summaryrefslogtreecommitdiff
path: root/drivers/gpu/nvgpu
diff options
context:
space:
mode:
authorArto Merilainen <amerilainen@nvidia.com>2014-03-19 09:38:25 +0200
committerTerje Bergstrom <tbergstrom@nvidia.com>2014-03-28 04:21:39 -0700
commite51e1033bd22dc5ea6a86f6704142baf89a2f7cb (patch)
tree9b5f65258f5777273f3b62e4f59f8001ed7da543 /drivers/gpu/nvgpu
parent1428ed474d1acb22321e89301c06be1bb9e5fe17 (diff)
gpu: nvgpu: Add NVIDIA GPU Driver
This patch moves the NVIDIA GPU driver to a new location. Bug 1482562 Change-Id: I24293810b9d0f1504fd9be00135e21dad656ccb6 Signed-off-by: Arto Merilainen <amerilainen@nvidia.com> Reviewed-on: http://git-master/r/383722 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r--drivers/gpu/nvgpu/Kconfig60
-rw-r--r--drivers/gpu/nvgpu/gk20a/Makefile36
-rw-r--r--drivers/gpu/nvgpu/gk20a/as_gk20a.c293
-rw-r--r--drivers/gpu/nvgpu/gk20a/as_gk20a.h50
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c2111
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h172
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c356
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h102
-rw-r--r--drivers/gpu/nvgpu/gk20a/clk_gk20a.c865
-rw-r--r--drivers/gpu/nvgpu/gk20a/clk_gk20a.h94
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c240
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctrl_gk20a.h28
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c699
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h83
-rw-r--r--drivers/gpu/nvgpu/gk20a/debug_gk20a.c295
-rw-r--r--drivers/gpu/nvgpu/gk20a/debug_gk20a.h25
-rw-r--r--drivers/gpu/nvgpu/gk20a/fb_gk20a.c37
-rw-r--r--drivers/gpu/nvgpu/gk20a/fb_gk20a.h21
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c1836
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.h164
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c1681
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h559
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator.c1247
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator.h177
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.c374
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.h39
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_scale.c358
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_scale.h51
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c335
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c333
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h149
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a_sim.c256
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c6747
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h406
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h179
-rw-r--r--drivers/gpu/nvgpu/gk20a/hal.c33
-rw-r--r--drivers/gpu/nvgpu/gk20a/hal.h25
-rw-r--r--drivers/gpu/nvgpu/gk20a/hal_gk20a.c50
-rw-r--r--drivers/gpu/nvgpu/gk20a/hal_gk20a.h28
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h105
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_ccsr_gk20a.h113
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_chiplet_pwr_gk20a.h85
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h245
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h213
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h565
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_flush_gk20a.h141
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h1141
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h3173
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h221
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_mc_gk20a.h253
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_pbdma_gk20a.h469
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_pri_ringmaster_gk20a.h137
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_fbp_gk20a.h226
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_gpc_gk20a.h226
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_sys_gk20a.h69
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h141
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_pwr_gk20a.h737
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_ram_gk20a.h389
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_sim_gk20a.h2150
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_therm_gk20a.h225
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h101
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h137
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_trim_gk20a.h301
-rw-r--r--drivers/gpu/nvgpu/gk20a/kind_gk20a.c424
-rw-r--r--drivers/gpu/nvgpu/gk20a/kind_gk20a.h67
-rw-r--r--drivers/gpu/nvgpu/gk20a/ltc_common.c243
-rw-r--r--drivers/gpu/nvgpu/gk20a/ltc_gk20a.c203
-rw-r--r--drivers/gpu/nvgpu/gk20a/ltc_gk20a.h21
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c2984
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h464
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_gk20a.h160
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_gk20a_generic.c35
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c561
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.c3796
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.h1097
-rw-r--r--drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.c91
-rw-r--r--drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.h27
-rw-r--r--drivers/gpu/nvgpu/gk20a/regops_gk20a.c704
-rw-r--r--drivers/gpu/nvgpu/gk20a/regops_gk20a.h47
-rw-r--r--drivers/gpu/nvgpu/gk20a/sim_gk20a.h62
-rw-r--r--drivers/gpu/nvgpu/gk20a/therm_gk20a.c142
-rw-r--r--drivers/gpu/nvgpu/gk20a/therm_gk20a.h33
82 files changed, 43318 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/Kconfig b/drivers/gpu/nvgpu/Kconfig
new file mode 100644
index 000000000000..160ec8be94de
--- /dev/null
+++ b/drivers/gpu/nvgpu/Kconfig
@@ -0,0 +1,60 @@
+config GK20A
+ bool "Nvidia GK20A GPU support"
+ help
+ Choose this option if you have an SoC with integrated
+ Nvidia GPU IP.
+
+config GK20A_DEFAULT_TIMEOUT
+ depends on GK20A
+ int "Default timeout for submits"
+ default 10000
+ help
+ Default timeout for jobs in milliseconds. Set to zero for no timeout.
+
+config GK20A_PMU
+ bool "Support GK20A PMU"
+ depends on GK20A
+ default n
+ help
+ Say Y here to enable GK20A PMU features.
+
+choice
+ depends on GK20A
+ prompt "Enable GK20A frequency scaling"
+ default GK20A_PERFMON
+ optional
+ help
+ Select this entry to enable gk20a scaling
+
+config GK20A_PERFMON
+ bool "Use Perfmon"
+ help
+ Select this to enable built-in perfmon scaling.
+ The built-in scaling option uses simplistic
+ scaling mechanism (if busy, increase frequency and
+ decrease frequency if idle).
+
+config GK20A_DEVFREQ
+ bool "Use Devfreq"
+ help
+ Select this to use devfreq based scaling.
+ Devfreq is a common framework that allows using
+ variety of different governors and changing
+ between governors on the fly. By default, no
+ governor is selected.
+
+endchoice
+
+config GK20A_CYCLE_STATS
+ bool "Support GK20A GPU CYCLE STATS"
+ depends on GK20A
+ default y
+ help
+ Say Y here to enable the cycle stats debugging features.
+
+config GK20A_PHYS_PAGE_TABLES
+ bool "Use physical addressing for gk20a page tables"
+ default y if TEGRA_SIMULATION_PLATFORM
+ help
+ Use physical addressing for gk20a page tables. If this is off, we
+ use SMMU translation.
diff --git a/drivers/gpu/nvgpu/gk20a/Makefile b/drivers/gpu/nvgpu/gk20a/Makefile
new file mode 100644
index 000000000000..f9b06b72eead
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/Makefile
@@ -0,0 +1,36 @@
+
+GCOV_PROFILE := y
+ccflags-y += -Idrivers/devfreq
+ccflags-y += -Wno-multichar
+ccflags-y += -Werror
+
+obj-$(CONFIG_GK20A) += \
+ gk20a.o \
+ as_gk20a.o \
+ ctrl_gk20a.o \
+ fifo_gk20a.o \
+ channel_gk20a.o \
+ channel_sync_gk20a.o \
+ debug_gk20a.o \
+ dbg_gpu_gk20a.o \
+ regops_gk20a.o \
+ gr_gk20a.o \
+ kind_gk20a.o \
+ mm_gk20a.o \
+ pmu_gk20a.o \
+ priv_ring_gk20a.o \
+ clk_gk20a.o \
+ therm_gk20a.o \
+ gr_ctx_gk20a_sim.o \
+ gr_ctx_gk20a.o \
+ gk20a_gating_reglist.o \
+ gk20a_scale.o \
+ gk20a_sysfs.o \
+ ltc_gk20a.o \
+ fb_gk20a.o \
+ hal.o \
+ hal_gk20a.o \
+ gk20a_allocator.o
+
+obj-$(CONFIG_GK20A) += platform_gk20a_generic.o
+obj-$(CONFIG_TEGRA_GK20A) += platform_gk20a_tegra.o
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
new file mode 100644
index 000000000000..65c26938ea80
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -0,0 +1,293 @@
+/*
+ * drivers/video/tegra/host/gk20a/as_gk20a.c
+ *
+ * GK20A Address Spaces
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/uaccess.h>
+
+#include <trace/events/gk20a.h>
+
+#include "gk20a.h"
+
+/* dumb allocator... */
+static int generate_as_share_id(struct gk20a_as *as)
+{
+ gk20a_dbg_fn("");
+ return ++as->last_share_id;
+}
+/* still dumb */
+static void release_as_share_id(struct gk20a_as *as, int id)
+{
+ gk20a_dbg_fn("");
+ return;
+}
+
+static int gk20a_as_alloc_share(struct gk20a_as *as,
+ struct gk20a_as_share **out)
+{
+ struct gk20a_as_share *as_share;
+ int err = 0;
+
+ gk20a_dbg_fn("");
+
+ *out = 0;
+ as_share = kzalloc(sizeof(*as_share), GFP_KERNEL);
+ if (!as_share)
+ return -ENOMEM;
+
+ as_share->as = as;
+ as_share->id = generate_as_share_id(as_share->as);
+ as_share->ref_cnt.counter = 1;
+
+ /* this will set as_share->vm. */
+ err = gk20a_vm_alloc_share(as_share);
+ if (err)
+ goto failed;
+
+ *out = as_share;
+ return 0;
+
+ failed:
+ kfree(as_share);
+ return err;
+}
+
+/*
+ * channels and the device nodes call this to release.
+ * once the ref_cnt hits zero the share is deleted.
+ */
+int gk20a_as_release_share(struct gk20a_as_share *as_share)
+{
+ int err;
+
+ gk20a_dbg_fn("");
+
+ if (atomic_dec_return(&as_share->ref_cnt) > 0)
+ return 0;
+
+ err = gk20a_vm_release_share(as_share);
+ release_as_share_id(as_share->as, as_share->id);
+ kfree(as_share);
+ return err;
+}
+
+static int gk20a_as_ioctl_bind_channel(
+ struct gk20a_as_share *as_share,
+ struct nvhost_as_bind_channel_args *args)
+{
+ int err = 0;
+ struct channel_gk20a *ch;
+
+ gk20a_dbg_fn("");
+
+ ch = gk20a_get_channel_from_file(args->channel_fd);
+ if (!ch || gk20a_channel_as_bound(ch))
+ return -EINVAL;
+
+ atomic_inc(&as_share->ref_cnt);
+
+ /* this will set channel_gk20a->vm */
+ err = gk20a_vm_bind_channel(as_share, ch);
+ if (err) {
+ atomic_dec(&as_share->ref_cnt);
+ return err;
+ }
+
+ return err;
+}
+
+static int gk20a_as_ioctl_alloc_space(
+ struct gk20a_as_share *as_share,
+ struct nvhost_as_alloc_space_args *args)
+{
+ gk20a_dbg_fn("");
+ return gk20a_vm_alloc_space(as_share, args);
+}
+
+static int gk20a_as_ioctl_free_space(
+ struct gk20a_as_share *as_share,
+ struct nvhost_as_free_space_args *args)
+{
+ gk20a_dbg_fn("");
+ return gk20a_vm_free_space(as_share, args);
+}
+
+static int gk20a_as_ioctl_map_buffer_ex(
+ struct gk20a_as_share *as_share,
+ struct nvhost_as_map_buffer_ex_args *args)
+{
+ int i;
+
+ gk20a_dbg_fn("");
+
+ /* ensure that padding is not set. this is required for ensuring that
+ * we can safely use these fields later */
+ for (i = 0; i < ARRAY_SIZE(args->padding); i++)
+ if (args->padding[i])
+ return -EINVAL;
+
+ return gk20a_vm_map_buffer(as_share, args->dmabuf_fd,
+ &args->offset, args->flags,
+ args->kind);
+}
+
+static int gk20a_as_ioctl_map_buffer(
+ struct gk20a_as_share *as_share,
+ struct nvhost_as_map_buffer_args *args)
+{
+ gk20a_dbg_fn("");
+ return gk20a_vm_map_buffer(as_share, args->nvmap_handle,
+ &args->o_a.align,
+ args->flags, NV_KIND_DEFAULT);
+ /* args->o_a.offset will be set if !err */
+}
+
+static int gk20a_as_ioctl_unmap_buffer(
+ struct gk20a_as_share *as_share,
+ struct nvhost_as_unmap_buffer_args *args)
+{
+ gk20a_dbg_fn("");
+ return gk20a_vm_unmap_buffer(as_share, args->offset);
+}
+
+int gk20a_as_dev_open(struct inode *inode, struct file *filp)
+{
+ struct gk20a_as_share *as_share;
+ struct gk20a *g;
+ int err;
+
+ gk20a_dbg_fn("");
+
+ g = container_of(inode->i_cdev, struct gk20a, as.cdev);
+
+ err = gk20a_get_client(g);
+ if (err) {
+ gk20a_dbg_fn("fail to get channel!");
+ return err;
+ }
+
+ err = gk20a_as_alloc_share(&g->as, &as_share);
+ if (err) {
+ gk20a_dbg_fn("failed to alloc share");
+ gk20a_put_client(g);
+ return err;
+ }
+
+ filp->private_data = as_share;
+ return 0;
+}
+
+int gk20a_as_dev_release(struct inode *inode, struct file *filp)
+{
+ struct gk20a_as_share *as_share = filp->private_data;
+ int ret;
+ struct gk20a *g = gk20a_from_as(as_share->as);
+
+ gk20a_dbg_fn("");
+
+ ret = gk20a_as_release_share(as_share);
+
+ gk20a_put_client(g);
+
+ return ret;
+}
+
+long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ int err = 0;
+ struct gk20a_as_share *as_share = filp->private_data;
+ struct gk20a *g = gk20a_from_as(as_share->as);
+
+ u8 buf[NVHOST_AS_IOCTL_MAX_ARG_SIZE];
+
+ if ((_IOC_TYPE(cmd) != NVHOST_AS_IOCTL_MAGIC) ||
+ (_IOC_NR(cmd) == 0) ||
+ (_IOC_NR(cmd) > NVHOST_AS_IOCTL_LAST))
+ return -EFAULT;
+
+ BUG_ON(_IOC_SIZE(cmd) > NVHOST_AS_IOCTL_MAX_ARG_SIZE);
+
+ if (_IOC_DIR(cmd) & _IOC_WRITE) {
+ if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
+ return -EFAULT;
+ }
+
+ err = gk20a_channel_busy(g->dev);
+ if (err)
+ return err;
+
+ switch (cmd) {
+ case NVHOST_AS_IOCTL_BIND_CHANNEL:
+ trace_gk20a_as_ioctl_bind_channel(dev_name(dev_from_gk20a(g)));
+ err = gk20a_as_ioctl_bind_channel(as_share,
+ (struct nvhost_as_bind_channel_args *)buf);
+
+ break;
+ case NVHOST32_AS_IOCTL_ALLOC_SPACE:
+ {
+ struct nvhost32_as_alloc_space_args *args32 =
+ (struct nvhost32_as_alloc_space_args *)buf;
+ struct nvhost_as_alloc_space_args args;
+
+ args.pages = args32->pages;
+ args.page_size = args32->page_size;
+ args.flags = args32->flags;
+ args.o_a.offset = args32->o_a.offset;
+ trace_gk20a_as_ioctl_alloc_space(dev_name(dev_from_gk20a(g)));
+ err = gk20a_as_ioctl_alloc_space(as_share, &args);
+ args32->o_a.offset = args.o_a.offset;
+ break;
+ }
+ case NVHOST_AS_IOCTL_ALLOC_SPACE:
+ trace_gk20a_as_ioctl_alloc_space(dev_name(dev_from_gk20a(g)));
+ err = gk20a_as_ioctl_alloc_space(as_share,
+ (struct nvhost_as_alloc_space_args *)buf);
+ break;
+ case NVHOST_AS_IOCTL_FREE_SPACE:
+ trace_gk20a_as_ioctl_free_space(dev_name(dev_from_gk20a(g)));
+ err = gk20a_as_ioctl_free_space(as_share,
+ (struct nvhost_as_free_space_args *)buf);
+ break;
+ case NVHOST_AS_IOCTL_MAP_BUFFER:
+ trace_gk20a_as_ioctl_map_buffer(dev_name(dev_from_gk20a(g)));
+ err = gk20a_as_ioctl_map_buffer(as_share,
+ (struct nvhost_as_map_buffer_args *)buf);
+ break;
+ case NVHOST_AS_IOCTL_MAP_BUFFER_EX:
+ trace_gk20a_as_ioctl_map_buffer(dev_name(dev_from_gk20a(g)));
+ err = gk20a_as_ioctl_map_buffer_ex(as_share,
+ (struct nvhost_as_map_buffer_ex_args *)buf);
+ break;
+ case NVHOST_AS_IOCTL_UNMAP_BUFFER:
+ trace_gk20a_as_ioctl_unmap_buffer(dev_name(dev_from_gk20a(g)));
+ err = gk20a_as_ioctl_unmap_buffer(as_share,
+ (struct nvhost_as_unmap_buffer_args *)buf);
+ break;
+ default:
+ dev_err(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd);
+ err = -ENOTTY;
+ break;
+ }
+
+ gk20a_channel_idle(g->dev);
+
+ if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
+ err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
+
+ return err;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.h b/drivers/gpu/nvgpu/gk20a/as_gk20a.h
new file mode 100644
index 000000000000..be0e97075f5a
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.h
@@ -0,0 +1,50 @@
+/*
+ * drivers/video/tegra/host/gk20a/as_gk20a.h
+ *
+ * GK20A Address Space
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#ifndef __GK20A_AS_H
+#define __GK20A_AS_H
+
+#include <linux/atomic.h>
+#include <linux/cdev.h>
+#include <linux/fs.h>
+
+#include <linux/nvhost_as_ioctl.h>
+
+struct gk20a_as;
+struct gk20a_as_share;
+struct vm_gk20a;
+
+struct gk20a_as_share {
+ struct gk20a_as *as;
+ atomic_t ref_cnt;
+ int id;
+ struct vm_gk20a *vm;
+};
+
+struct gk20a_as {
+ int last_share_id; /* dummy allocator for now */
+ struct cdev cdev;
+ struct device *node;
+};
+
+int gk20a_as_release_share(struct gk20a_as_share *as_share);
+
+/* struct file_operations driver interface */
+int gk20a_as_dev_open(struct inode *inode, struct file *filp);
+int gk20a_as_dev_release(struct inode *inode, struct file *filp);
+long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
new file mode 100644
index 000000000000..6056f558359f
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -0,0 +1,2111 @@
+/*
+ * drivers/video/tegra/host/gk20a/channel_gk20a.c
+ *
+ * GK20A Graphics channel
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/nvhost.h>
+#include <linux/list.h>
+#include <linux/delay.h>
+#include <linux/highmem.h> /* need for nvmap.h*/
+#include <trace/events/gk20a.h>
+#include <linux/scatterlist.h>
+#include <linux/file.h>
+#include <linux/anon_inodes.h>
+#include <linux/dma-buf.h>
+
+#include "debug_gk20a.h"
+
+#include "gk20a.h"
+#include "dbg_gpu_gk20a.h"
+
+#include "hw_ram_gk20a.h"
+#include "hw_fifo_gk20a.h"
+#include "hw_pbdma_gk20a.h"
+#include "hw_ccsr_gk20a.h"
+#include "hw_ltc_gk20a.h"
+
+#define NVMAP_HANDLE_PARAM_SIZE 1
+
+static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f);
+static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
+
+static void free_priv_cmdbuf(struct channel_gk20a *c,
+ struct priv_cmd_entry *e);
+static void recycle_priv_cmdbuf(struct channel_gk20a *c);
+
+static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
+static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
+
+static int channel_gk20a_commit_userd(struct channel_gk20a *c);
+static int channel_gk20a_setup_userd(struct channel_gk20a *c);
+static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
+ u64 gpfifo_base, u32 gpfifo_entries);
+
+static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
+static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
+
+static int channel_gk20a_alloc_inst(struct gk20a *g,
+ struct channel_gk20a *ch);
+static void channel_gk20a_free_inst(struct gk20a *g,
+ struct channel_gk20a *ch);
+
+static int channel_gk20a_update_runlist(struct channel_gk20a *c,
+ bool add);
+static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
+
+static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f)
+{
+ struct channel_gk20a *ch = NULL;
+ int chid;
+
+ mutex_lock(&f->ch_inuse_mutex);
+ for (chid = 0; chid < f->num_channels; chid++) {
+ if (!f->channel[chid].in_use) {
+ f->channel[chid].in_use = true;
+ ch = &f->channel[chid];
+ break;
+ }
+ }
+ mutex_unlock(&f->ch_inuse_mutex);
+
+ return ch;
+}
+
+static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c)
+{
+ mutex_lock(&f->ch_inuse_mutex);
+ f->channel[c->hw_chid].in_use = false;
+ mutex_unlock(&f->ch_inuse_mutex);
+}
+
+int channel_gk20a_commit_va(struct channel_gk20a *c)
+{
+ u64 addr;
+ u32 addr_lo;
+ u32 addr_hi;
+ void *inst_ptr;
+
+ gk20a_dbg_fn("");
+
+ inst_ptr = c->inst_block.cpuva;
+ if (!inst_ptr)
+ return -ENOMEM;
+
+ addr = gk20a_mm_iova_addr(c->vm->pdes.sgt->sgl);
+ addr_lo = u64_lo32(addr >> 12);
+ addr_hi = u64_hi32(addr);
+
+ gk20a_dbg_info("pde pa=0x%llx addr_lo=0x%x addr_hi=0x%x",
+ (u64)addr, addr_lo, addr_hi);
+
+ gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
+ ram_in_page_dir_base_target_vid_mem_f() |
+ ram_in_page_dir_base_vol_true_f() |
+ ram_in_page_dir_base_lo_f(addr_lo));
+
+ gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
+ ram_in_page_dir_base_hi_f(addr_hi));
+
+ gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
+ u64_lo32(c->vm->va_limit) | 0xFFF);
+
+ gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
+ ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit)));
+
+ gk20a_mm_l2_invalidate(c->g);
+
+ return 0;
+}
+
+static int channel_gk20a_commit_userd(struct channel_gk20a *c)
+{
+ u32 addr_lo;
+ u32 addr_hi;
+ void *inst_ptr;
+
+ gk20a_dbg_fn("");
+
+ inst_ptr = c->inst_block.cpuva;
+ if (!inst_ptr)
+ return -ENOMEM;
+
+ addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
+ addr_hi = u64_hi32(c->userd_iova);
+
+ gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
+ c->hw_chid, (u64)c->userd_iova);
+
+ gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
+ pbdma_userd_target_vid_mem_f() |
+ pbdma_userd_addr_f(addr_lo));
+
+ gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
+ pbdma_userd_target_vid_mem_f() |
+ pbdma_userd_hi_addr_f(addr_hi));
+
+ gk20a_mm_l2_invalidate(c->g);
+
+ return 0;
+}
+
+static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
+ u32 timeslice_timeout)
+{
+ void *inst_ptr;
+ int shift = 3;
+ int value = timeslice_timeout;
+
+ inst_ptr = c->inst_block.cpuva;
+ if (!inst_ptr)
+ return -ENOMEM;
+
+ /* disable channel */
+ gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
+ gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
+ ccsr_channel_enable_clr_true_f());
+
+ /* preempt the channel */
+ WARN_ON(gk20a_fifo_preempt_channel(c->g, c->hw_chid));
+
+ /* flush GPU cache */
+ gk20a_mm_l2_flush(c->g, true);
+
+ /* value field is 8 bits long */
+ while (value >= 1 << 8) {
+ value >>= 1;
+ shift++;
+ }
+
+ /* time slice register is only 18bits long */
+ if ((value << shift) >= 1<<19) {
+ pr_err("Requested timeslice value is clamped to 18 bits\n");
+ value = 255;
+ shift = 10;
+ }
+
+ /* set new timeslice */
+ gk20a_mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
+ value | (shift << 12) |
+ fifo_eng_timeslice_enable_true_f());
+
+ /* enable channel */
+ gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
+ gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
+ ccsr_channel_enable_set_true_f());
+
+ gk20a_mm_l2_invalidate(c->g);
+
+ return 0;
+}
+
+static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
+ u64 gpfifo_base, u32 gpfifo_entries)
+{
+ void *inst_ptr;
+
+ gk20a_dbg_fn("");
+
+ inst_ptr = c->inst_block.cpuva;
+ if (!inst_ptr)
+ return -ENOMEM;
+
+ memset(inst_ptr, 0, ram_fc_size_val_v());
+
+ gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
+ pbdma_gp_base_offset_f(
+ u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
+
+ gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
+ pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
+ pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
+
+ gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
+ pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f());
+
+ gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
+ pbdma_formats_gp_fermi0_f() |
+ pbdma_formats_pb_fermi1_f() |
+ pbdma_formats_mp_fermi0_f());
+
+ gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(),
+ pbdma_pb_header_priv_user_f() |
+ pbdma_pb_header_method_zero_f() |
+ pbdma_pb_header_subchannel_zero_f() |
+ pbdma_pb_header_level_main_f() |
+ pbdma_pb_header_first_true_f() |
+ pbdma_pb_header_type_inc_f());
+
+ gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(),
+ pbdma_subdevice_id_f(1) |
+ pbdma_subdevice_status_active_f() |
+ pbdma_subdevice_channel_dma_enable_f());
+
+ gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
+
+ gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
+ pbdma_acquire_retry_man_2_f() |
+ pbdma_acquire_retry_exp_2_f() |
+ pbdma_acquire_timeout_exp_max_f() |
+ pbdma_acquire_timeout_man_max_f() |
+ pbdma_acquire_timeout_en_disable_f());
+
+ gk20a_mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
+ fifo_eng_timeslice_timeout_128_f() |
+ fifo_eng_timeslice_timescale_3_f() |
+ fifo_eng_timeslice_enable_true_f());
+
+ gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
+ fifo_pb_timeslice_timeout_16_f() |
+ fifo_pb_timeslice_timescale_0_f() |
+ fifo_pb_timeslice_enable_true_f());
+
+ gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
+
+ /* TBD: alwasy priv mode? */
+ gk20a_mem_wr32(inst_ptr, ram_fc_hce_ctrl_w(),
+ pbdma_hce_ctrl_hce_priv_mode_yes_f());
+
+ gk20a_mm_l2_invalidate(c->g);
+
+ return 0;
+}
+
+static int channel_gk20a_setup_userd(struct channel_gk20a *c)
+{
+ BUG_ON(!c->userd_cpu_va);
+
+ gk20a_dbg_fn("");
+
+ gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
+ gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
+ gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
+ gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
+ gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
+ gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
+ gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
+ gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
+ gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
+ gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
+
+ gk20a_mm_l2_invalidate(c->g);
+
+ return 0;
+}
+
+static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
+{
+ struct gk20a *g = ch_gk20a->g;
+ struct fifo_gk20a *f = &g->fifo;
+ struct fifo_engine_info_gk20a *engine_info =
+ f->engine_info + ENGINE_GR_GK20A;
+
+ u32 inst_ptr = ch_gk20a->inst_block.cpu_pa
+ >> ram_in_base_shift_v();
+
+ gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
+ ch_gk20a->hw_chid, inst_ptr);
+
+ ch_gk20a->bound = true;
+
+ gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
+ (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
+ ~ccsr_channel_runlist_f(~0)) |
+ ccsr_channel_runlist_f(engine_info->runlist_id));
+
+ gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
+ ccsr_channel_inst_ptr_f(inst_ptr) |
+ ccsr_channel_inst_target_vid_mem_f() |
+ ccsr_channel_inst_bind_true_f());
+
+ gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
+ (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
+ ~ccsr_channel_enable_set_f(~0)) |
+ ccsr_channel_enable_set_true_f());
+}
+
+static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
+{
+ struct gk20a *g = ch_gk20a->g;
+
+ gk20a_dbg_fn("");
+
+ if (ch_gk20a->bound)
+ gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
+ ccsr_channel_inst_ptr_f(0) |
+ ccsr_channel_inst_bind_false_f());
+
+ ch_gk20a->bound = false;
+}
+
+static int channel_gk20a_alloc_inst(struct gk20a *g,
+ struct channel_gk20a *ch)
+{
+ struct device *d = dev_from_gk20a(g);
+ int err = 0;
+ dma_addr_t iova;
+
+ gk20a_dbg_fn("");
+
+ ch->inst_block.size = ram_in_alloc_size_v();
+ ch->inst_block.cpuva = dma_alloc_coherent(d,
+ ch->inst_block.size,
+ &iova,
+ GFP_KERNEL);
+ if (!ch->inst_block.cpuva) {
+ gk20a_err(d, "%s: memory allocation failed\n", __func__);
+ err = -ENOMEM;
+ goto clean_up;
+ }
+
+ ch->inst_block.iova = iova;
+ ch->inst_block.cpu_pa = gk20a_get_phys_from_iova(d,
+ ch->inst_block.iova);
+ if (!ch->inst_block.cpu_pa) {
+ gk20a_err(d, "%s: failed to get physical address\n", __func__);
+ err = -ENOMEM;
+ goto clean_up;
+ }
+
+ gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
+ ch->hw_chid, (u64)ch->inst_block.cpu_pa);
+
+ gk20a_dbg_fn("done");
+ return 0;
+
+clean_up:
+ gk20a_err(d, "fail");
+ channel_gk20a_free_inst(g, ch);
+ return err;
+}
+
+static void channel_gk20a_free_inst(struct gk20a *g,
+ struct channel_gk20a *ch)
+{
+ struct device *d = dev_from_gk20a(g);
+
+ if (ch->inst_block.cpuva)
+ dma_free_coherent(d, ch->inst_block.size,
+ ch->inst_block.cpuva, ch->inst_block.iova);
+ ch->inst_block.cpuva = NULL;
+ ch->inst_block.iova = 0;
+ memset(&ch->inst_block, 0, sizeof(struct inst_desc));
+}
+
+static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
+{
+ return gk20a_fifo_update_runlist(c->g, 0, c->hw_chid, add, true);
+}
+
+void gk20a_disable_channel_no_update(struct channel_gk20a *ch)
+{
+ /* ensure no fences are pending */
+ if (ch->sync)
+ ch->sync->set_min_eq_max(ch->sync);
+
+ /* disable channel */
+ gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
+ gk20a_readl(ch->g,
+ ccsr_channel_r(ch->hw_chid)) |
+ ccsr_channel_enable_clr_true_f());
+}
+
+static int gk20a_wait_channel_idle(struct channel_gk20a *ch)
+{
+ bool channel_idle = false;
+ unsigned long end_jiffies = jiffies +
+ msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
+
+ do {
+ mutex_lock(&ch->jobs_lock);
+ channel_idle = list_empty(&ch->jobs);
+ mutex_unlock(&ch->jobs_lock);
+ if (channel_idle)
+ break;
+
+ usleep_range(1000, 3000);
+ } while (time_before(jiffies, end_jiffies)
+ || !tegra_platform_is_silicon());
+
+ if (!channel_idle)
+ gk20a_err(dev_from_gk20a(ch->g), "channel jobs not freed");
+
+ return 0;
+}
+
+void gk20a_disable_channel(struct channel_gk20a *ch,
+ bool finish,
+ unsigned long finish_timeout)
+{
+ if (finish) {
+ int err = gk20a_channel_finish(ch, finish_timeout);
+ WARN_ON(err);
+ }
+
+ /* disable the channel from hw and increment syncpoints */
+ gk20a_disable_channel_no_update(ch);
+
+ gk20a_wait_channel_idle(ch);
+
+ /* preempt the channel */
+ gk20a_fifo_preempt_channel(ch->g, ch->hw_chid);
+
+ /* remove channel from runlist */
+ channel_gk20a_update_runlist(ch, false);
+}
+
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+
+static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
+{
+ /* disable existing cyclestats buffer */
+ mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
+ if (ch->cyclestate.cyclestate_buffer_handler) {
+ dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
+ ch->cyclestate.cyclestate_buffer);
+ dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
+ ch->cyclestate.cyclestate_buffer_handler = NULL;
+ ch->cyclestate.cyclestate_buffer = NULL;
+ ch->cyclestate.cyclestate_buffer_size = 0;
+ }
+ mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
+}
+
+static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
+ struct nvhost_cycle_stats_args *args)
+{
+ struct dma_buf *dmabuf;
+ void *virtual_address;
+
+ if (args->nvmap_handle && !ch->cyclestate.cyclestate_buffer_handler) {
+
+ /* set up new cyclestats buffer */
+ dmabuf = dma_buf_get(args->nvmap_handle);
+ if (IS_ERR(dmabuf))
+ return PTR_ERR(dmabuf);
+ virtual_address = dma_buf_vmap(dmabuf);
+ if (!virtual_address)
+ return -ENOMEM;
+
+ ch->cyclestate.cyclestate_buffer_handler = dmabuf;
+ ch->cyclestate.cyclestate_buffer = virtual_address;
+ ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
+ return 0;
+
+ } else if (!args->nvmap_handle &&
+ ch->cyclestate.cyclestate_buffer_handler) {
+ gk20a_free_cycle_stats_buffer(ch);
+ return 0;
+
+ } else if (!args->nvmap_handle &&
+ !ch->cyclestate.cyclestate_buffer_handler) {
+ /* no requst from GL */
+ return 0;
+
+ } else {
+ pr_err("channel already has cyclestats buffer\n");
+ return -EINVAL;
+ }
+}
+#endif
+
+static int gk20a_init_error_notifier(struct channel_gk20a *ch,
+ struct nvhost_set_error_notifier *args) {
+ void *va;
+
+ struct dma_buf *dmabuf;
+
+ if (!args->mem) {
+ pr_err("gk20a_init_error_notifier: invalid memory handle\n");
+ return -EINVAL;
+ }
+
+ dmabuf = dma_buf_get(args->mem);
+
+ if (ch->error_notifier_ref)
+ gk20a_free_error_notifiers(ch);
+
+ if (IS_ERR(dmabuf)) {
+ pr_err("Invalid handle: %d\n", args->mem);
+ return -EINVAL;
+ }
+ /* map handle */
+ va = dma_buf_vmap(dmabuf);
+ if (!va) {
+ dma_buf_put(dmabuf);
+ pr_err("Cannot map notifier handle\n");
+ return -ENOMEM;
+ }
+
+ /* set channel notifiers pointer */
+ ch->error_notifier_ref = dmabuf;
+ ch->error_notifier = va + args->offset;
+ ch->error_notifier_va = va;
+ memset(ch->error_notifier, 0, sizeof(struct nvhost_notification));
+ return 0;
+}
+
+void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
+{
+ if (ch->error_notifier_ref) {
+ struct timespec time_data;
+ u64 nsec;
+ getnstimeofday(&time_data);
+ nsec = ((u64)time_data.tv_sec) * 1000000000u +
+ (u64)time_data.tv_nsec;
+ ch->error_notifier->time_stamp.nanoseconds[0] =
+ (u32)nsec;
+ ch->error_notifier->time_stamp.nanoseconds[1] =
+ (u32)(nsec >> 32);
+ ch->error_notifier->info32 = error;
+ ch->error_notifier->status = 0xffff;
+ gk20a_err(dev_from_gk20a(ch->g),
+ "error notifier set to %d\n", error);
+ }
+}
+
+static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
+{
+ if (ch->error_notifier_ref) {
+ dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
+ dma_buf_put(ch->error_notifier_ref);
+ ch->error_notifier_ref = 0;
+ ch->error_notifier = 0;
+ ch->error_notifier_va = 0;
+ }
+}
+
+void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
+{
+ struct gk20a *g = ch->g;
+ struct device *d = dev_from_gk20a(g);
+ struct fifo_gk20a *f = &g->fifo;
+ struct gr_gk20a *gr = &g->gr;
+ struct vm_gk20a *ch_vm = ch->vm;
+ unsigned long timeout = gk20a_get_gr_idle_timeout(g);
+ struct dbg_session_gk20a *dbg_s;
+
+ gk20a_dbg_fn("");
+
+ /* if engine reset was deferred, perform it now */
+ mutex_lock(&f->deferred_reset_mutex);
+ if (g->fifo.deferred_reset_pending) {
+ gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
+ " deferred, running now");
+ fifo_gk20a_finish_mmu_fault_handling(g, g->fifo.mmu_fault_engines);
+ g->fifo.mmu_fault_engines = 0;
+ g->fifo.deferred_reset_pending = false;
+ }
+ mutex_unlock(&f->deferred_reset_mutex);
+
+ if (!ch->bound)
+ return;
+
+ if (!gk20a_channel_as_bound(ch))
+ goto unbind;
+
+ gk20a_dbg_info("freeing bound channel context, timeout=%ld",
+ timeout);
+
+ gk20a_disable_channel(ch, finish && !ch->has_timedout, timeout);
+
+ gk20a_free_error_notifiers(ch);
+
+ /* release channel ctx */
+ gk20a_free_channel_ctx(ch);
+
+ gk20a_gr_flush_channel_tlb(gr);
+
+ memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
+
+ /* free gpfifo */
+ if (ch->gpfifo.gpu_va)
+ gk20a_gmmu_unmap(ch_vm, ch->gpfifo.gpu_va,
+ ch->gpfifo.size, gk20a_mem_flag_none);
+ if (ch->gpfifo.cpu_va)
+ dma_free_coherent(d, ch->gpfifo.size,
+ ch->gpfifo.cpu_va, ch->gpfifo.iova);
+ ch->gpfifo.cpu_va = NULL;
+ ch->gpfifo.iova = 0;
+
+ gk20a_mm_l2_invalidate(ch->g);
+
+ memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
+
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+ gk20a_free_cycle_stats_buffer(ch);
+#endif
+
+ channel_gk20a_free_priv_cmdbuf(ch);
+
+ if (ch->sync) {
+ ch->sync->destroy(ch->sync);
+ ch->sync = NULL;
+ }
+
+ /* release channel binding to the as_share */
+ gk20a_as_release_share(ch_vm->as_share);
+
+unbind:
+ channel_gk20a_unbind(ch);
+ channel_gk20a_free_inst(g, ch);
+
+ ch->vpr = false;
+ ch->vm = NULL;
+ WARN_ON(ch->sync);
+
+ /* unlink all debug sessions */
+ mutex_lock(&ch->dbg_s_lock);
+
+ list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
+ dbg_s->ch = NULL;
+ list_del_init(&dbg_s->dbg_s_list_node);
+ }
+
+ mutex_unlock(&ch->dbg_s_lock);
+
+ /* ALWAYS last */
+ release_used_channel(f, ch);
+}
+
+int gk20a_channel_release(struct inode *inode, struct file *filp)
+{
+ struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data;
+ struct gk20a *g = ch->g;
+
+ trace_gk20a_channel_release(dev_name(&g->dev->dev));
+
+ gk20a_channel_busy(ch->g->dev);
+ gk20a_free_channel(ch, true);
+ gk20a_channel_idle(ch->g->dev);
+
+ gk20a_put_client(g);
+ filp->private_data = NULL;
+ return 0;
+}
+
+static struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
+{
+ struct fifo_gk20a *f = &g->fifo;
+ struct channel_gk20a *ch;
+
+ ch = acquire_unused_channel(f);
+ if (ch == NULL) {
+ /* TBD: we want to make this virtualizable */
+ gk20a_err(dev_from_gk20a(g), "out of hw chids");
+ return 0;
+ }
+
+ ch->g = g;
+
+ if (channel_gk20a_alloc_inst(g, ch)) {
+ ch->in_use = false;
+ gk20a_err(dev_from_gk20a(g),
+ "failed to open gk20a channel, out of inst mem");
+
+ return 0;
+ }
+ g->ops.fifo.bind_channel(ch);
+ ch->pid = current->pid;
+
+ /* reset timeout counter and update timestamp */
+ ch->timeout_accumulated_ms = 0;
+ ch->timeout_gpfifo_get = 0;
+ /* set gr host default timeout */
+ ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
+ ch->timeout_debug_dump = true;
+ ch->has_timedout = false;
+
+ /* The channel is *not* runnable at this point. It still needs to have
+ * an address space bound and allocate a gpfifo and grctx. */
+
+ init_waitqueue_head(&ch->notifier_wq);
+ init_waitqueue_head(&ch->semaphore_wq);
+ init_waitqueue_head(&ch->submit_wq);
+
+ return ch;
+}
+
+static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
+{
+ int err;
+ struct channel_gk20a *ch;
+
+ trace_gk20a_channel_open(dev_name(&g->dev->dev));
+
+ err = gk20a_get_client(g);
+ if (err) {
+ gk20a_err(dev_from_gk20a(g),
+ "failed to get client ref");
+ return err;
+ }
+
+ err = gk20a_channel_busy(g->dev);
+ if (err) {
+ gk20a_put_client(g);
+ gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
+ return err;
+ }
+ ch = gk20a_open_new_channel(g);
+ gk20a_channel_idle(g->dev);
+ if (!ch) {
+ gk20a_put_client(g);
+ gk20a_err(dev_from_gk20a(g),
+ "failed to get f");
+ return -ENOMEM;
+ }
+
+ filp->private_data = ch;
+ return 0;
+}
+
+int gk20a_channel_open(struct inode *inode, struct file *filp)
+{
+ struct gk20a *g = container_of(inode->i_cdev,
+ struct gk20a, channel.cdev);
+ return __gk20a_channel_open(g, filp);
+}
+
+/* allocate private cmd buffer.
+ used for inserting commands before/after user submitted buffers. */
+static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
+{
+ struct device *d = dev_from_gk20a(c->g);
+ struct vm_gk20a *ch_vm = c->vm;
+ struct priv_cmd_queue *q = &c->priv_cmd_q;
+ struct priv_cmd_entry *e;
+ u32 i = 0, size;
+ int err = 0;
+ struct sg_table *sgt;
+ dma_addr_t iova;
+
+ /* Kernel can insert gpfifos before and after user gpfifos.
+ Before user gpfifos, kernel inserts fence_wait, which takes
+ syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
+ After user gpfifos, kernel inserts fence_get, which takes
+ wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
+ = 6 dwords.
+ Worse case if kernel adds both of them for every user gpfifo,
+ max size of priv_cmdbuf is :
+ (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
+ size = roundup_pow_of_two(
+ c->gpfifo.entry_num * 2 * 10 * sizeof(u32) / 3);
+
+ q->mem.base_cpuva = dma_alloc_coherent(d, size,
+ &iova,
+ GFP_KERNEL);
+ if (!q->mem.base_cpuva) {
+ gk20a_err(d, "%s: memory allocation failed\n", __func__);
+ err = -ENOMEM;
+ goto clean_up;
+ }
+
+ q->mem.base_iova = iova;
+ q->mem.size = size;
+
+ err = gk20a_get_sgtable(d, &sgt,
+ q->mem.base_cpuva, q->mem.base_iova, size);
+ if (err) {
+ gk20a_err(d, "%s: failed to create sg table\n", __func__);
+ goto clean_up;
+ }
+
+ memset(q->mem.base_cpuva, 0, size);
+
+ q->base_gpuva = gk20a_gmmu_map(ch_vm, &sgt,
+ size,
+ 0, /* flags */
+ gk20a_mem_flag_none);
+ if (!q->base_gpuva) {
+ gk20a_err(d, "ch %d : failed to map gpu va"
+ "for priv cmd buffer", c->hw_chid);
+ err = -ENOMEM;
+ goto clean_up_sgt;
+ }
+
+ q->size = q->mem.size / sizeof (u32);
+
+ INIT_LIST_HEAD(&q->head);
+ INIT_LIST_HEAD(&q->free);
+
+ /* pre-alloc 25% of priv cmdbuf entries and put them on free list */
+ for (i = 0; i < q->size / 4; i++) {
+ e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
+ if (!e) {
+ gk20a_err(d, "ch %d: fail to pre-alloc cmd entry",
+ c->hw_chid);
+ err = -ENOMEM;
+ goto clean_up_sgt;
+ }
+ e->pre_alloc = true;
+ list_add(&e->list, &q->free);
+ }
+
+ gk20a_free_sgtable(&sgt);
+
+ return 0;
+
+clean_up_sgt:
+ gk20a_free_sgtable(&sgt);
+clean_up:
+ channel_gk20a_free_priv_cmdbuf(c);
+ return err;
+}
+
+static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
+{
+ struct device *d = dev_from_gk20a(c->g);
+ struct vm_gk20a *ch_vm = c->vm;
+ struct priv_cmd_queue *q = &c->priv_cmd_q;
+ struct priv_cmd_entry *e;
+ struct list_head *pos, *tmp, *head;
+
+ if (q->size == 0)
+ return;
+
+ if (q->base_gpuva)
+ gk20a_gmmu_unmap(ch_vm, q->base_gpuva,
+ q->mem.size, gk20a_mem_flag_none);
+ if (q->mem.base_cpuva)
+ dma_free_coherent(d, q->mem.size,
+ q->mem.base_cpuva, q->mem.base_iova);
+ q->mem.base_cpuva = NULL;
+ q->mem.base_iova = 0;
+
+ /* free used list */
+ head = &q->head;
+ list_for_each_safe(pos, tmp, head) {
+ e = container_of(pos, struct priv_cmd_entry, list);
+ free_priv_cmdbuf(c, e);
+ }
+
+ /* free free list */
+ head = &q->free;
+ list_for_each_safe(pos, tmp, head) {
+ e = container_of(pos, struct priv_cmd_entry, list);
+ e->pre_alloc = false;
+ free_priv_cmdbuf(c, e);
+ }
+
+ memset(q, 0, sizeof(struct priv_cmd_queue));
+}
+
+/* allocate a cmd buffer with given size. size is number of u32 entries */
+int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
+ struct priv_cmd_entry **entry)
+{
+ struct priv_cmd_queue *q = &c->priv_cmd_q;
+ struct priv_cmd_entry *e;
+ struct list_head *node;
+ u32 free_count;
+ u32 size = orig_size;
+ bool no_retry = false;
+
+ gk20a_dbg_fn("size %d", orig_size);
+
+ *entry = NULL;
+
+ /* if free space in the end is less than requested, increase the size
+ * to make the real allocated space start from beginning. */
+ if (q->put + size > q->size)
+ size = orig_size + (q->size - q->put);
+
+ gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
+ c->hw_chid, q->get, q->put);
+
+TRY_AGAIN:
+ free_count = (q->size - (q->put - q->get) - 1) % q->size;
+
+ if (size > free_count) {
+ if (!no_retry) {
+ recycle_priv_cmdbuf(c);
+ no_retry = true;
+ goto TRY_AGAIN;
+ } else
+ return -EAGAIN;
+ }
+
+ if (unlikely(list_empty(&q->free))) {
+
+ gk20a_dbg_info("ch %d: run out of pre-alloc entries",
+ c->hw_chid);
+
+ e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
+ if (!e) {
+ gk20a_err(dev_from_gk20a(c->g),
+ "ch %d: fail to allocate priv cmd entry",
+ c->hw_chid);
+ return -ENOMEM;
+ }
+ } else {
+ node = q->free.next;
+ list_del(node);
+ e = container_of(node, struct priv_cmd_entry, list);
+ }
+
+ e->size = orig_size;
+ e->gp_get = c->gpfifo.get;
+ e->gp_put = c->gpfifo.put;
+ e->gp_wrap = c->gpfifo.wrap;
+
+ /* if we have increased size to skip free space in the end, set put
+ to beginning of cmd buffer (0) + size */
+ if (size != orig_size) {
+ e->ptr = q->mem.base_cpuva;
+ e->gva = q->base_gpuva;
+ q->put = orig_size;
+ } else {
+ e->ptr = q->mem.base_cpuva + q->put;
+ e->gva = q->base_gpuva + q->put * sizeof(u32);
+ q->put = (q->put + orig_size) & (q->size - 1);
+ }
+
+ /* we already handled q->put + size > q->size so BUG_ON this */
+ BUG_ON(q->put > q->size);
+
+ /* add new entry to head since we free from head */
+ list_add(&e->list, &q->head);
+
+ *entry = e;
+
+ gk20a_dbg_fn("done");
+
+ return 0;
+}
+
+/* Don't call this to free an explict cmd entry.
+ * It doesn't update priv_cmd_queue get/put */
+static void free_priv_cmdbuf(struct channel_gk20a *c,
+ struct priv_cmd_entry *e)
+{
+ struct priv_cmd_queue *q = &c->priv_cmd_q;
+
+ if (!e)
+ return;
+
+ list_del(&e->list);
+
+ if (unlikely(!e->pre_alloc))
+ kfree(e);
+ else {
+ memset(e, 0, sizeof(struct priv_cmd_entry));
+ e->pre_alloc = true;
+ list_add(&e->list, &q->free);
+ }
+}
+
+/* free entries if they're no longer being used */
+static void recycle_priv_cmdbuf(struct channel_gk20a *c)
+{
+ struct priv_cmd_queue *q = &c->priv_cmd_q;
+ struct priv_cmd_entry *e, *tmp;
+ struct list_head *head = &q->head;
+ bool wrap_around, found = false;
+
+ gk20a_dbg_fn("");
+
+ /* Find the most recent free entry. Free it and everything before it */
+ list_for_each_entry(e, head, list) {
+
+ gk20a_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d "
+ "curr get:put:wrap %d:%d:%d",
+ c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap,
+ c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap);
+
+ wrap_around = (c->gpfifo.wrap != e->gp_wrap);
+ if (e->gp_get < e->gp_put) {
+ if (c->gpfifo.get >= e->gp_put ||
+ wrap_around) {
+ found = true;
+ break;
+ } else
+ e->gp_get = c->gpfifo.get;
+ } else if (e->gp_get > e->gp_put) {
+ if (wrap_around &&
+ c->gpfifo.get >= e->gp_put) {
+ found = true;
+ break;
+ } else
+ e->gp_get = c->gpfifo.get;
+ }
+ }
+
+ if (found)
+ q->get = (e->ptr - q->mem.base_cpuva) + e->size;
+ else {
+ gk20a_dbg_info("no free entry recycled");
+ return;
+ }
+
+ list_for_each_entry_safe_continue(e, tmp, head, list) {
+ free_priv_cmdbuf(c, e);
+ }
+
+ gk20a_dbg_fn("done");
+}
+
+
+static int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
+ struct nvhost_alloc_gpfifo_args *args)
+{
+ struct gk20a *g = c->g;
+ struct device *d = dev_from_gk20a(g);
+ struct vm_gk20a *ch_vm;
+ u32 gpfifo_size;
+ int err = 0;
+ struct sg_table *sgt;
+ dma_addr_t iova;
+
+ /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
+ and another one after, for internal usage. Triple the requested size. */
+ gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
+
+ if (args->flags & NVHOST_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
+ c->vpr = true;
+
+ /* an address space needs to have been bound at this point. */
+ if (!gk20a_channel_as_bound(c)) {
+ gk20a_err(d,
+ "not bound to an address space at time of gpfifo"
+ " allocation. Attempting to create and bind to"
+ " one...");
+ return -EINVAL;
+ }
+ ch_vm = c->vm;
+
+ c->cmds_pending = false;
+ c->last_submit_fence.valid = false;
+
+ c->ramfc.offset = 0;
+ c->ramfc.size = ram_in_ramfc_s() / 8;
+
+ if (c->gpfifo.cpu_va) {
+ gk20a_err(d, "channel %d :"
+ "gpfifo already allocated", c->hw_chid);
+ return -EEXIST;
+ }
+
+ c->gpfifo.size = gpfifo_size * sizeof(struct gpfifo);
+ c->gpfifo.cpu_va = (struct gpfifo *)dma_alloc_coherent(d,
+ c->gpfifo.size,
+ &iova,
+ GFP_KERNEL);
+ if (!c->gpfifo.cpu_va) {
+ gk20a_err(d, "%s: memory allocation failed\n", __func__);
+ err = -ENOMEM;
+ goto clean_up;
+ }
+
+ c->gpfifo.iova = iova;
+ c->gpfifo.entry_num = gpfifo_size;
+
+ c->gpfifo.get = c->gpfifo.put = 0;
+
+ err = gk20a_get_sgtable(d, &sgt,
+ c->gpfifo.cpu_va, c->gpfifo.iova, c->gpfifo.size);
+ if (err) {
+ gk20a_err(d, "%s: failed to allocate sg table\n", __func__);
+ goto clean_up;
+ }
+
+ c->gpfifo.gpu_va = gk20a_gmmu_map(ch_vm,
+ &sgt,
+ c->gpfifo.size,
+ 0, /* flags */
+ gk20a_mem_flag_none);
+ if (!c->gpfifo.gpu_va) {
+ gk20a_err(d, "channel %d : failed to map"
+ " gpu_va for gpfifo", c->hw_chid);
+ err = -ENOMEM;
+ goto clean_up_sgt;
+ }
+
+ gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
+ c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num);
+
+ channel_gk20a_setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
+
+ channel_gk20a_setup_userd(c);
+ channel_gk20a_commit_userd(c);
+
+ gk20a_mm_l2_invalidate(c->g);
+
+ /* TBD: setup engine contexts */
+
+ err = channel_gk20a_alloc_priv_cmdbuf(c);
+ if (err)
+ goto clean_up_unmap;
+
+ err = channel_gk20a_update_runlist(c, true);
+ if (err)
+ goto clean_up_unmap;
+
+ gk20a_free_sgtable(&sgt);
+
+ gk20a_dbg_fn("done");
+ return 0;
+
+clean_up_unmap:
+ gk20a_gmmu_unmap(ch_vm, c->gpfifo.gpu_va,
+ c->gpfifo.size, gk20a_mem_flag_none);
+clean_up_sgt:
+ gk20a_free_sgtable(&sgt);
+clean_up:
+ dma_free_coherent(d, c->gpfifo.size,
+ c->gpfifo.cpu_va, c->gpfifo.iova);
+ c->gpfifo.cpu_va = NULL;
+ c->gpfifo.iova = 0;
+ memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
+ gk20a_err(d, "fail");
+ return err;
+}
+
+static inline int wfi_cmd_size(void)
+{
+ return 2;
+}
+void add_wfi_cmd(struct priv_cmd_entry *cmd, int *i)
+{
+ /* wfi */
+ cmd->ptr[(*i)++] = 0x2001001E;
+ /* handle, ignored */
+ cmd->ptr[(*i)++] = 0x00000000;
+}
+
+static inline bool check_gp_put(struct gk20a *g,
+ struct channel_gk20a *c)
+{
+ u32 put;
+ /* gp_put changed unexpectedly since last update? */
+ put = gk20a_bar1_readl(g,
+ c->userd_gpu_va + 4 * ram_userd_gp_put_w());
+ if (c->gpfifo.put != put) {
+ /*TBD: BUG_ON/teardown on this*/
+ gk20a_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
+ "since last update");
+ c->gpfifo.put = put;
+ return false; /* surprise! */
+ }
+ return true; /* checked out ok */
+}
+
+/* Update with this periodically to determine how the gpfifo is draining. */
+static inline u32 update_gp_get(struct gk20a *g,
+ struct channel_gk20a *c)
+{
+ u32 new_get = gk20a_bar1_readl(g,
+ c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
+ if (new_get < c->gpfifo.get)
+ c->gpfifo.wrap = !c->gpfifo.wrap;
+ c->gpfifo.get = new_get;
+ return new_get;
+}
+
+static inline u32 gp_free_count(struct channel_gk20a *c)
+{
+ return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
+ c->gpfifo.entry_num;
+}
+
+bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
+ u32 timeout_delta_ms)
+{
+ u32 gpfifo_get = update_gp_get(ch->g, ch);
+ /* Count consequent timeout isr */
+ if (gpfifo_get == ch->timeout_gpfifo_get) {
+ /* we didn't advance since previous channel timeout check */
+ ch->timeout_accumulated_ms += timeout_delta_ms;
+ } else {
+ /* first timeout isr encountered */
+ ch->timeout_accumulated_ms = timeout_delta_ms;
+ }
+
+ ch->timeout_gpfifo_get = gpfifo_get;
+
+ return ch->g->timeouts_enabled &&
+ ch->timeout_accumulated_ms > ch->timeout_ms_max;
+}
+
+
+/* Issue a syncpoint increment *preceded* by a wait-for-idle
+ * command. All commands on the channel will have been
+ * consumed at the time the fence syncpoint increment occurs.
+ */
+static int gk20a_channel_submit_wfi(struct channel_gk20a *c)
+{
+ struct priv_cmd_entry *cmd = NULL;
+ struct gk20a *g = c->g;
+ u32 free_count;
+ int err;
+
+ if (c->has_timedout)
+ return -ETIMEDOUT;
+
+ if (!c->sync) {
+ c->sync = gk20a_channel_sync_create(c);
+ if (!c->sync)
+ return -ENOMEM;
+ }
+
+ update_gp_get(g, c);
+ free_count = gp_free_count(c);
+ if (unlikely(!free_count)) {
+ gk20a_err(dev_from_gk20a(g),
+ "not enough gpfifo space");
+ return -EAGAIN;
+ }
+
+ err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit_fence);
+ if (unlikely(err))
+ return err;
+
+ WARN_ON(!c->last_submit_fence.wfi);
+
+ c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva);
+ c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) |
+ pbdma_gp_entry1_length_f(cmd->size);
+
+ c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
+
+ /* save gp_put */
+ cmd->gp_put = c->gpfifo.put;
+
+ gk20a_bar1_writel(g,
+ c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
+ c->gpfifo.put);
+
+ gk20a_dbg_info("post-submit put %d, get %d, size %d",
+ c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
+
+ return 0;
+}
+
+static u32 get_gp_free_count(struct channel_gk20a *c)
+{
+ update_gp_get(c->g, c);
+ return gp_free_count(c);
+}
+
+static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
+{
+ void *mem = NULL;
+ unsigned int words;
+ u64 offset;
+ struct dma_buf *dmabuf = NULL;
+
+ if (gk20a_debug_trace_cmdbuf) {
+ u64 gpu_va = (u64)g->entry0 |
+ (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
+ int err;
+
+ words = pbdma_gp_entry1_length_v(g->entry1);
+ err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
+ if (!err)
+ mem = dma_buf_vmap(dmabuf);
+ }
+
+ if (mem) {
+ u32 i;
+ /*
+ * Write in batches of 128 as there seems to be a limit
+ * of how much you can output to ftrace at once.
+ */
+ for (i = 0; i < words; i += 128U) {
+ trace_gk20a_push_cmdbuf(
+ c->g->dev->name,
+ 0,
+ min(words - i, 128U),
+ offset + i * sizeof(u32),
+ mem);
+ }
+ dma_buf_vunmap(dmabuf, mem);
+ }
+}
+
+static int gk20a_channel_add_job(struct channel_gk20a *c,
+ struct gk20a_channel_fence *fence)
+{
+ struct vm_gk20a *vm = c->vm;
+ struct channel_gk20a_job *job = NULL;
+ struct mapped_buffer_node **mapped_buffers = NULL;
+ int err = 0, num_mapped_buffers;
+
+ /* job needs reference to this vm */
+ gk20a_vm_get(vm);
+
+ err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers);
+ if (err) {
+ gk20a_vm_put(vm);
+ return err;
+ }
+
+ job = kzalloc(sizeof(*job), GFP_KERNEL);
+ if (!job) {
+ gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
+ gk20a_vm_put(vm);
+ return -ENOMEM;
+ }
+
+ job->num_mapped_buffers = num_mapped_buffers;
+ job->mapped_buffers = mapped_buffers;
+ job->fence = *fence;
+
+ mutex_lock(&c->jobs_lock);
+ list_add_tail(&job->list, &c->jobs);
+ mutex_unlock(&c->jobs_lock);
+
+ return 0;
+}
+
+void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
+{
+ struct gk20a *g = c->g;
+ struct vm_gk20a *vm = c->vm;
+ struct channel_gk20a_job *job, *n;
+ int i;
+
+ wake_up(&c->submit_wq);
+
+ mutex_lock(&c->jobs_lock);
+ list_for_each_entry_safe(job, n, &c->jobs, list) {
+ bool completed = WARN_ON(!c->sync) ||
+ c->sync->is_expired(c->sync, &job->fence);
+ if (!completed)
+ break;
+
+ gk20a_vm_put_buffers(vm, job->mapped_buffers,
+ job->num_mapped_buffers);
+
+ /* job is done. release its reference to vm */
+ gk20a_vm_put(vm);
+
+ list_del_init(&job->list);
+ kfree(job);
+ gk20a_channel_idle(g->dev);
+ }
+ mutex_unlock(&c->jobs_lock);
+
+ for (i = 0; i < nr_completed; i++)
+ gk20a_channel_idle(c->g->dev);
+}
+
+static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
+ struct nvhost_gpfifo *gpfifo,
+ u32 num_entries,
+ struct nvhost_fence *fence,
+ u32 flags)
+{
+ struct gk20a *g = c->g;
+ struct device *d = dev_from_gk20a(g);
+ u32 err = 0;
+ int i;
+ struct priv_cmd_entry *wait_cmd = NULL;
+ struct priv_cmd_entry *incr_cmd = NULL;
+ /* we might need two extra gpfifo entries - one for pre fence
+ * and one for post fence. */
+ const int extra_entries = 2;
+
+ if (c->has_timedout)
+ return -ETIMEDOUT;
+
+ if ((flags & (NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
+ NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
+ !fence)
+ return -EINVAL;
+
+ if (!c->sync) {
+ c->sync = gk20a_channel_sync_create(c);
+ if (!c->sync)
+ return -ENOMEM;
+ }
+
+#ifdef CONFIG_DEBUG_FS
+ /* update debug settings */
+ if (g->ops.ltc.sync_debugfs)
+ g->ops.ltc.sync_debugfs(g);
+#endif
+
+ gk20a_dbg_info("channel %d", c->hw_chid);
+
+ /* gk20a_channel_update releases this ref. */
+ gk20a_channel_busy(g->dev);
+
+ trace_gk20a_channel_submit_gpfifo(c->g->dev->name,
+ c->hw_chid,
+ num_entries,
+ flags,
+ fence->syncpt_id, fence->value);
+ check_gp_put(g, c);
+ update_gp_get(g, c);
+
+ gk20a_dbg_info("pre-submit put %d, get %d, size %d",
+ c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
+
+ /* Invalidate tlb if it's dirty... */
+ /* TBD: this should be done in the cmd stream, not with PRIs. */
+ /* We don't know what context is currently running... */
+ /* Note also: there can be more than one context associated with the */
+ /* address space (vm). */
+ gk20a_mm_tlb_invalidate(c->vm);
+
+ /* Make sure we have enough space for gpfifo entries. If not,
+ * wait for signals from completed submits */
+ if (gp_free_count(c) < num_entries + extra_entries) {
+ err = wait_event_interruptible(c->submit_wq,
+ get_gp_free_count(c) >= num_entries + extra_entries ||
+ c->has_timedout);
+ }
+
+ if (c->has_timedout) {
+ err = -ETIMEDOUT;
+ goto clean_up;
+ }
+
+ if (err) {
+ gk20a_err(d, "not enough gpfifo space");
+ err = -EAGAIN;
+ goto clean_up;
+ }
+
+ /*
+ * optionally insert syncpt wait in the beginning of gpfifo submission
+ * when user requested and the wait hasn't expired.
+ * validate that the id makes sense, elide if not
+ * the only reason this isn't being unceremoniously killed is to
+ * keep running some tests which trigger this condition
+ */
+ if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
+ if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
+ err = c->sync->wait_fd(c->sync, fence->syncpt_id,
+ &wait_cmd);
+ else
+ err = c->sync->wait_syncpt(c->sync, fence->syncpt_id,
+ fence->value, &wait_cmd);
+ }
+ if (err)
+ goto clean_up;
+
+
+ /* always insert syncpt increment at end of gpfifo submission
+ to keep track of method completion for idle railgating */
+ if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET &&
+ flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
+ err = c->sync->incr_user_fd(c->sync, &incr_cmd,
+ &c->last_submit_fence,
+ &fence->syncpt_id);
+ else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
+ err = c->sync->incr_user_syncpt(c->sync, &incr_cmd,
+ &c->last_submit_fence,
+ &fence->syncpt_id,
+ &fence->value);
+ else
+ err = c->sync->incr(c->sync, &incr_cmd,
+ &c->last_submit_fence);
+ if (err)
+ goto clean_up;
+
+ if (wait_cmd) {
+ c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
+ u64_lo32(wait_cmd->gva);
+ c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
+ u64_hi32(wait_cmd->gva) |
+ pbdma_gp_entry1_length_f(wait_cmd->size);
+ trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
+
+ c->gpfifo.put = (c->gpfifo.put + 1) &
+ (c->gpfifo.entry_num - 1);
+
+ /* save gp_put */
+ wait_cmd->gp_put = c->gpfifo.put;
+ }
+
+ for (i = 0; i < num_entries; i++) {
+ c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
+ gpfifo[i].entry0; /* cmd buf va low 32 */
+ c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
+ gpfifo[i].entry1; /* cmd buf va high 32 | words << 10 */
+ trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
+ c->gpfifo.put = (c->gpfifo.put + 1) &
+ (c->gpfifo.entry_num - 1);
+ }
+
+ if (incr_cmd) {
+ c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
+ u64_lo32(incr_cmd->gva);
+ c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
+ u64_hi32(incr_cmd->gva) |
+ pbdma_gp_entry1_length_f(incr_cmd->size);
+ trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
+
+ c->gpfifo.put = (c->gpfifo.put + 1) &
+ (c->gpfifo.entry_num - 1);
+
+ /* save gp_put */
+ incr_cmd->gp_put = c->gpfifo.put;
+ }
+
+ /* Invalidate tlb if it's dirty... */
+ /* TBD: this should be done in the cmd stream, not with PRIs. */
+ /* We don't know what context is currently running... */
+ /* Note also: there can be more than one context associated with the */
+ /* address space (vm). */
+ gk20a_mm_tlb_invalidate(c->vm);
+
+ trace_gk20a_channel_submitted_gpfifo(c->g->dev->name,
+ c->hw_chid,
+ num_entries,
+ flags,
+ fence->syncpt_id, fence->value);
+
+ /* TODO! Check for errors... */
+ gk20a_channel_add_job(c, &c->last_submit_fence);
+
+ c->cmds_pending = true;
+ gk20a_bar1_writel(g,
+ c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
+ c->gpfifo.put);
+
+ gk20a_dbg_info("post-submit put %d, get %d, size %d",
+ c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
+
+ gk20a_dbg_fn("done");
+ return err;
+
+clean_up:
+ gk20a_err(d, "fail");
+ free_priv_cmdbuf(c, wait_cmd);
+ free_priv_cmdbuf(c, incr_cmd);
+ gk20a_channel_idle(g->dev);
+ return err;
+}
+
+void gk20a_remove_channel_support(struct channel_gk20a *c)
+{
+
+}
+
+int gk20a_init_channel_support(struct gk20a *g, u32 chid)
+{
+ struct channel_gk20a *c = g->fifo.channel+chid;
+ c->g = g;
+ c->in_use = false;
+ c->hw_chid = chid;
+ c->bound = false;
+ c->remove_support = gk20a_remove_channel_support;
+ mutex_init(&c->jobs_lock);
+ INIT_LIST_HEAD(&c->jobs);
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+ mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
+#endif
+ INIT_LIST_HEAD(&c->dbg_s_list);
+ mutex_init(&c->dbg_s_lock);
+
+ return 0;
+}
+
+int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
+{
+ int err = 0;
+
+ if (!ch->cmds_pending)
+ return 0;
+
+ /* Do not wait for a timedout channel */
+ if (ch->has_timedout)
+ return -ETIMEDOUT;
+
+ if (!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)) {
+ gk20a_dbg_fn("issuing wfi, incr to finish the channel");
+ err = gk20a_channel_submit_wfi(ch);
+ }
+ if (err)
+ return err;
+
+ BUG_ON(!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi));
+
+ gk20a_dbg_fn("waiting for channel to finish thresh:%d",
+ ch->last_submit_fence.thresh);
+
+ err = ch->sync->wait_cpu(ch->sync, &ch->last_submit_fence, timeout);
+ if (WARN_ON(err))
+ dev_warn(dev_from_gk20a(ch->g),
+ "timed out waiting for gk20a channel to finish");
+ else
+ ch->cmds_pending = false;
+
+ return err;
+}
+
+static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
+ ulong id, u32 offset,
+ u32 payload, long timeout)
+{
+ struct platform_device *pdev = ch->g->dev;
+ struct dma_buf *dmabuf;
+ void *data;
+ u32 *semaphore;
+ int ret = 0;
+ long remain;
+
+ /* do not wait if channel has timed out */
+ if (ch->has_timedout)
+ return -ETIMEDOUT;
+
+ dmabuf = dma_buf_get(id);
+ if (IS_ERR(dmabuf)) {
+ gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
+ id);
+ return -EINVAL;
+ }
+
+ data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
+ if (!data) {
+ gk20a_err(&pdev->dev, "failed to map notifier memory");
+ ret = -EINVAL;
+ goto cleanup_put;
+ }
+
+ semaphore = data + (offset & ~PAGE_MASK);
+
+ remain = wait_event_interruptible_timeout(
+ ch->semaphore_wq,
+ *semaphore == payload || ch->has_timedout,
+ timeout);
+
+ if (remain == 0 && *semaphore != payload)
+ ret = -ETIMEDOUT;
+ else if (remain < 0)
+ ret = remain;
+
+ dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
+cleanup_put:
+ dma_buf_put(dmabuf);
+ return ret;
+}
+
+static int gk20a_channel_wait(struct channel_gk20a *ch,
+ struct nvhost_wait_args *args)
+{
+ struct device *d = dev_from_gk20a(ch->g);
+ struct dma_buf *dmabuf;
+ struct notification *notif;
+ struct timespec tv;
+ u64 jiffies;
+ ulong id;
+ u32 offset;
+ unsigned long timeout;
+ int remain, ret = 0;
+
+ gk20a_dbg_fn("");
+
+ if (ch->has_timedout)
+ return -ETIMEDOUT;
+
+ if (args->timeout == NVHOST_NO_TIMEOUT)
+ timeout = MAX_SCHEDULE_TIMEOUT;
+ else
+ timeout = (u32)msecs_to_jiffies(args->timeout);
+
+ switch (args->type) {
+ case NVHOST_WAIT_TYPE_NOTIFIER:
+ id = args->condition.notifier.nvmap_handle;
+ offset = args->condition.notifier.offset;
+
+ dmabuf = dma_buf_get(id);
+ if (IS_ERR(dmabuf)) {
+ gk20a_err(d, "invalid notifier nvmap handle 0x%lx",
+ id);
+ return -EINVAL;
+ }
+
+ notif = dma_buf_vmap(dmabuf);
+ if (!notif) {
+ gk20a_err(d, "failed to map notifier memory");
+ return -ENOMEM;
+ }
+
+ notif = (struct notification *)((uintptr_t)notif + offset);
+
+ /* user should set status pending before
+ * calling this ioctl */
+ remain = wait_event_interruptible_timeout(
+ ch->notifier_wq,
+ notif->status == 0 || ch->has_timedout,
+ timeout);
+
+ if (remain == 0 && notif->status != 0) {
+ ret = -ETIMEDOUT;
+ goto notif_clean_up;
+ } else if (remain < 0) {
+ ret = -EINTR;
+ goto notif_clean_up;
+ }
+
+ /* TBD: fill in correct information */
+ jiffies = get_jiffies_64();
+ jiffies_to_timespec(jiffies, &tv);
+ notif->timestamp.nanoseconds[0] = tv.tv_nsec;
+ notif->timestamp.nanoseconds[1] = tv.tv_sec;
+ notif->info32 = 0xDEADBEEF; /* should be object name */
+ notif->info16 = ch->hw_chid; /* should be method offset */
+
+notif_clean_up:
+ dma_buf_vunmap(dmabuf, notif);
+ return ret;
+
+ case NVHOST_WAIT_TYPE_SEMAPHORE:
+ ret = gk20a_channel_wait_semaphore(ch,
+ args->condition.semaphore.nvmap_handle,
+ args->condition.semaphore.offset,
+ args->condition.semaphore.payload,
+ timeout);
+
+ break;
+
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int gk20a_channel_set_priority(struct channel_gk20a *ch,
+ u32 priority)
+{
+ u32 timeslice_timeout;
+ /* set priority of graphics channel */
+ switch (priority) {
+ case NVHOST_PRIORITY_LOW:
+ /* 64 << 3 = 512us */
+ timeslice_timeout = 64;
+ break;
+ case NVHOST_PRIORITY_MEDIUM:
+ /* 128 << 3 = 1024us */
+ timeslice_timeout = 128;
+ break;
+ case NVHOST_PRIORITY_HIGH:
+ /* 255 << 3 = 2048us */
+ timeslice_timeout = 255;
+ break;
+ default:
+ pr_err("Unsupported priority");
+ return -EINVAL;
+ }
+ channel_gk20a_set_schedule_params(ch,
+ timeslice_timeout);
+ return 0;
+}
+
+static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
+ struct nvhost_zcull_bind_args *args)
+{
+ struct gk20a *g = ch->g;
+ struct gr_gk20a *gr = &g->gr;
+
+ gk20a_dbg_fn("");
+
+ return gr_gk20a_bind_ctxsw_zcull(g, gr, ch,
+ args->gpu_va, args->mode);
+}
+
+/* in this context the "channel" is the host1x channel which
+ * maps to *all* gk20a channels */
+int gk20a_channel_suspend(struct gk20a *g)
+{
+ struct fifo_gk20a *f = &g->fifo;
+ u32 chid;
+ bool channels_in_use = false;
+ struct device *d = dev_from_gk20a(g);
+ int err;
+
+ gk20a_dbg_fn("");
+
+ /* idle the engine by submitting WFI on non-KEPLER_C channel */
+ for (chid = 0; chid < f->num_channels; chid++) {
+ struct channel_gk20a *c = &f->channel[chid];
+ if (c->in_use && c->obj_class != KEPLER_C) {
+ err = gk20a_channel_submit_wfi(c);
+ if (err) {
+ gk20a_err(d, "cannot idle channel %d\n",
+ chid);
+ return err;
+ }
+
+ c->sync->wait_cpu(c->sync, &c->last_submit_fence,
+ 500000);
+ break;
+ }
+ }
+
+ for (chid = 0; chid < f->num_channels; chid++) {
+ if (f->channel[chid].in_use) {
+
+ gk20a_dbg_info("suspend channel %d", chid);
+ /* disable channel */
+ gk20a_writel(g, ccsr_channel_r(chid),
+ gk20a_readl(g, ccsr_channel_r(chid)) |
+ ccsr_channel_enable_clr_true_f());
+ /* preempt the channel */
+ gk20a_fifo_preempt_channel(g, chid);
+
+ channels_in_use = true;
+ }
+ }
+
+ if (channels_in_use) {
+ gk20a_fifo_update_runlist(g, 0, ~0, false, true);
+
+ for (chid = 0; chid < f->num_channels; chid++) {
+ if (f->channel[chid].in_use)
+ channel_gk20a_unbind(&f->channel[chid]);
+ }
+ }
+
+ gk20a_dbg_fn("done");
+ return 0;
+}
+
+/* in this context the "channel" is the host1x channel which
+ * maps to *all* gk20a channels */
+int gk20a_channel_resume(struct gk20a *g)
+{
+ struct fifo_gk20a *f = &g->fifo;
+ u32 chid;
+ bool channels_in_use = false;
+
+ gk20a_dbg_fn("");
+
+ for (chid = 0; chid < f->num_channels; chid++) {
+ if (f->channel[chid].in_use) {
+ gk20a_dbg_info("resume channel %d", chid);
+ g->ops.fifo.bind_channel(&f->channel[chid]);
+ channels_in_use = true;
+ }
+ }
+
+ if (channels_in_use)
+ gk20a_fifo_update_runlist(g, 0, ~0, true, true);
+
+ gk20a_dbg_fn("done");
+ return 0;
+}
+
+void gk20a_channel_semaphore_wakeup(struct gk20a *g)
+{
+ struct fifo_gk20a *f = &g->fifo;
+ u32 chid;
+
+ gk20a_dbg_fn("");
+
+ for (chid = 0; chid < f->num_channels; chid++) {
+ struct channel_gk20a *c = g->fifo.channel+chid;
+ if (c->in_use)
+ wake_up_interruptible_all(&c->semaphore_wq);
+ }
+}
+
+static int gk20a_ioctl_channel_submit_gpfifo(
+ struct channel_gk20a *ch,
+ struct nvhost_submit_gpfifo_args *args)
+{
+ void *gpfifo;
+ u32 size;
+ int ret = 0;
+
+ gk20a_dbg_fn("");
+
+ if (ch->has_timedout)
+ return -ETIMEDOUT;
+
+ size = args->num_entries * sizeof(struct nvhost_gpfifo);
+
+ gpfifo = kzalloc(size, GFP_KERNEL);
+ if (!gpfifo)
+ return -ENOMEM;
+
+ if (copy_from_user(gpfifo,
+ (void __user *)(uintptr_t)args->gpfifo, size)) {
+ ret = -EINVAL;
+ goto clean_up;
+ }
+
+ ret = gk20a_submit_channel_gpfifo(ch, gpfifo, args->num_entries,
+ &args->fence, args->flags);
+
+clean_up:
+ kfree(gpfifo);
+ return ret;
+}
+
+void gk20a_init_fifo(struct gpu_ops *gops)
+{
+ gops->fifo.bind_channel = channel_gk20a_bind;
+}
+
+long gk20a_channel_ioctl(struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+ struct channel_gk20a *ch = filp->private_data;
+ struct platform_device *dev = ch->g->dev;
+ u8 buf[NVHOST_IOCTL_CHANNEL_MAX_ARG_SIZE];
+ int err = 0;
+
+ if ((_IOC_TYPE(cmd) != NVHOST_IOCTL_MAGIC) ||
+ (_IOC_NR(cmd) == 0) ||
+ (_IOC_NR(cmd) > NVHOST_IOCTL_CHANNEL_LAST) ||
+ (_IOC_SIZE(cmd) > NVHOST_IOCTL_CHANNEL_MAX_ARG_SIZE))
+ return -EFAULT;
+
+ if (_IOC_DIR(cmd) & _IOC_WRITE) {
+ if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
+ return -EFAULT;
+ }
+
+ switch (cmd) {
+ case NVHOST_IOCTL_CHANNEL_OPEN:
+ {
+ int fd;
+ struct file *file;
+ char *name;
+
+ err = get_unused_fd_flags(O_RDWR);
+ if (err < 0)
+ break;
+ fd = err;
+
+ name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
+ dev_name(&dev->dev), fd);
+ if (!name) {
+ err = -ENOMEM;
+ put_unused_fd(fd);
+ break;
+ }
+
+ file = anon_inode_getfile(name, filp->f_op, NULL, O_RDWR);
+ kfree(name);
+ if (IS_ERR(file)) {
+ err = PTR_ERR(file);
+ put_unused_fd(fd);
+ break;
+ }
+ fd_install(fd, file);
+
+ err = __gk20a_channel_open(ch->g, file);
+ if (err) {
+ put_unused_fd(fd);
+ fput(file);
+ break;
+ }
+
+ ((struct nvhost_channel_open_args *)buf)->channel_fd = fd;
+ break;
+ }
+ case NVHOST_IOCTL_CHANNEL_SET_NVMAP_FD:
+ break;
+ case NVHOST_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
+ gk20a_channel_busy(dev);
+ err = gk20a_alloc_obj_ctx(ch,
+ (struct nvhost_alloc_obj_ctx_args *)buf);
+ gk20a_channel_idle(dev);
+ break;
+ case NVHOST_IOCTL_CHANNEL_FREE_OBJ_CTX:
+ gk20a_channel_busy(dev);
+ err = gk20a_free_obj_ctx(ch,
+ (struct nvhost_free_obj_ctx_args *)buf);
+ gk20a_channel_idle(dev);
+ break;
+ case NVHOST_IOCTL_CHANNEL_ALLOC_GPFIFO:
+ gk20a_channel_busy(dev);
+ err = gk20a_alloc_channel_gpfifo(ch,
+ (struct nvhost_alloc_gpfifo_args *)buf);
+ gk20a_channel_idle(dev);
+ break;
+ case NVHOST_IOCTL_CHANNEL_SUBMIT_GPFIFO:
+ err = gk20a_ioctl_channel_submit_gpfifo(ch,
+ (struct nvhost_submit_gpfifo_args *)buf);
+ break;
+ case NVHOST_IOCTL_CHANNEL_WAIT:
+ gk20a_channel_busy(dev);
+ err = gk20a_channel_wait(ch,
+ (struct nvhost_wait_args *)buf);
+ gk20a_channel_idle(dev);
+ break;
+ case NVHOST_IOCTL_CHANNEL_ZCULL_BIND:
+ gk20a_channel_busy(dev);
+ err = gk20a_channel_zcull_bind(ch,
+ (struct nvhost_zcull_bind_args *)buf);
+ gk20a_channel_idle(dev);
+ break;
+ case NVHOST_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
+ gk20a_channel_busy(dev);
+ err = gk20a_init_error_notifier(ch,
+ (struct nvhost_set_error_notifier *)buf);
+ gk20a_channel_idle(dev);
+ break;
+#ifdef CONFIG_GK20A_CYCLE_STATS
+ case NVHOST_IOCTL_CHANNEL_CYCLE_STATS:
+ gk20a_channel_busy(dev);
+ err = gk20a_channel_cycle_stats(ch,
+ (struct nvhost_cycle_stats_args *)buf);
+ gk20a_channel_idle(dev);
+ break;
+#endif
+ case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT:
+ {
+ u32 timeout =
+ (u32)((struct nvhost_set_timeout_args *)buf)->timeout;
+ gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
+ timeout, ch->hw_chid);
+ ch->timeout_ms_max = timeout;
+ break;
+ }
+ case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT_EX:
+ {
+ u32 timeout =
+ (u32)((struct nvhost_set_timeout_args *)buf)->timeout;
+ bool timeout_debug_dump = !((u32)
+ ((struct nvhost_set_timeout_ex_args *)buf)->flags &
+ (1 << NVHOST_TIMEOUT_FLAG_DISABLE_DUMP));
+ gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
+ timeout, ch->hw_chid);
+ ch->timeout_ms_max = timeout;
+ ch->timeout_debug_dump = timeout_debug_dump;
+ break;
+ }
+ case NVHOST_IOCTL_CHANNEL_GET_TIMEDOUT:
+ ((struct nvhost_get_param_args *)buf)->value =
+ ch->has_timedout;
+ break;
+ case NVHOST_IOCTL_CHANNEL_SET_PRIORITY:
+ gk20a_channel_busy(dev);
+ gk20a_channel_set_priority(ch,
+ ((struct nvhost_set_priority_args *)buf)->priority);
+ gk20a_channel_idle(dev);
+ break;
+ default:
+ dev_err(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
+ err = -ENOTTY;
+ break;
+ }
+
+ if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
+ err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
+
+ return err;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
new file mode 100644
index 000000000000..429db85d4177
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -0,0 +1,172 @@
+/*
+ * drivers/video/tegra/host/gk20a/channel_gk20a.h
+ *
+ * GK20A graphics channel
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef __CHANNEL_GK20A_H__
+#define __CHANNEL_GK20A_H__
+
+#include <linux/log2.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/mutex.h>
+#include <linux/nvhost_ioctl.h>
+struct gk20a;
+struct gr_gk20a;
+struct dbg_session_gk20a;
+
+#include "channel_sync_gk20a.h"
+
+#include "mm_gk20a.h"
+#include "gr_gk20a.h"
+
+struct gpfifo {
+ u32 entry0;
+ u32 entry1;
+};
+
+struct notification {
+ struct {
+ u32 nanoseconds[2];
+ } timestamp;
+ u32 info32;
+ u16 info16;
+ u16 status;
+};
+
+struct fence {
+ u32 hw_chid;
+ u32 syncpt_val;
+};
+
+/* contexts associated with a channel */
+struct channel_ctx_gk20a {
+ struct gr_ctx_desc gr_ctx;
+ struct pm_ctx_desc pm_ctx;
+ struct patch_desc patch_ctx;
+ struct zcull_ctx_desc zcull_ctx;
+ u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA];
+ u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA];
+ bool global_ctx_buffer_mapped;
+};
+
+struct channel_gk20a_job {
+ struct mapped_buffer_node **mapped_buffers;
+ int num_mapped_buffers;
+ struct gk20a_channel_fence fence;
+ struct list_head list;
+};
+
+/* this is the priv element of struct nvhost_channel */
+struct channel_gk20a {
+ struct gk20a *g;
+ bool in_use;
+ int hw_chid;
+ bool bound;
+ bool first_init;
+ bool vpr;
+ pid_t pid;
+
+ struct list_head jobs;
+ struct mutex jobs_lock;
+
+ struct vm_gk20a *vm;
+
+ struct gpfifo_desc gpfifo;
+
+ struct channel_ctx_gk20a ch_ctx;
+
+ struct inst_desc inst_block;
+ struct mem_desc_sub ramfc;
+
+ void *userd_cpu_va;
+ u64 userd_iova;
+ u64 userd_gpu_va;
+
+ s32 num_objects;
+ u32 obj_class; /* we support only one obj per channel */
+
+ struct priv_cmd_queue priv_cmd_q;
+
+ wait_queue_head_t notifier_wq;
+ wait_queue_head_t semaphore_wq;
+ wait_queue_head_t submit_wq;
+
+ u32 timeout_accumulated_ms;
+ u32 timeout_gpfifo_get;
+
+ bool cmds_pending;
+ struct gk20a_channel_fence last_submit_fence;
+
+ void (*remove_support)(struct channel_gk20a *);
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+ struct {
+ void *cyclestate_buffer;
+ u32 cyclestate_buffer_size;
+ struct dma_buf *cyclestate_buffer_handler;
+ struct mutex cyclestate_buffer_mutex;
+ } cyclestate;
+#endif
+ struct mutex dbg_s_lock;
+ struct list_head dbg_s_list;
+
+ bool has_timedout;
+ u32 timeout_ms_max;
+ bool timeout_debug_dump;
+
+ struct dma_buf *error_notifier_ref;
+ struct nvhost_notification *error_notifier;
+ void *error_notifier_va;
+
+ struct gk20a_channel_sync *sync;
+};
+
+static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch)
+{
+ return !!ch->vm;
+}
+int channel_gk20a_commit_va(struct channel_gk20a *c);
+int gk20a_init_channel_support(struct gk20a *, u32 chid);
+void gk20a_free_channel(struct channel_gk20a *ch, bool finish);
+bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
+ u32 timeout_delta_ms);
+void gk20a_disable_channel(struct channel_gk20a *ch,
+ bool wait_for_finish,
+ unsigned long finish_timeout);
+void gk20a_disable_channel_no_update(struct channel_gk20a *ch);
+int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout);
+void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error);
+void gk20a_channel_semaphore_wakeup(struct gk20a *g);
+int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size,
+ struct priv_cmd_entry **entry);
+
+int gk20a_channel_suspend(struct gk20a *g);
+int gk20a_channel_resume(struct gk20a *g);
+
+/* Channel file operations */
+int gk20a_channel_open(struct inode *inode, struct file *filp);
+long gk20a_channel_ioctl(struct file *filp,
+ unsigned int cmd,
+ unsigned long arg);
+int gk20a_channel_release(struct inode *inode, struct file *filp);
+struct channel_gk20a *gk20a_get_channel_from_file(int fd);
+void gk20a_channel_update(struct channel_gk20a *c, int nr_completed);
+
+void gk20a_init_fifo(struct gpu_ops *gops);
+
+#endif /*__CHANNEL_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
new file mode 100644
index 000000000000..9f9c3ba7ac71
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -0,0 +1,356 @@
+/*
+ * drivers/video/tegra/host/gk20a/channel_sync_gk20a.c
+ *
+ * GK20A Channel Synchronization Abstraction
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/gk20a.h>
+
+#include "channel_sync_gk20a.h"
+#include "gk20a.h"
+
+#ifdef CONFIG_SYNC
+#include "../../../staging/android/sync.h"
+#endif
+
+#ifdef CONFIG_TEGRA_GK20A
+#include <linux/nvhost.h>
+#endif
+
+#ifdef CONFIG_TEGRA_GK20A
+
+struct gk20a_channel_syncpt {
+ struct gk20a_channel_sync ops;
+ struct channel_gk20a *c;
+ struct platform_device *host1x_pdev;
+ u32 id;
+};
+
+static void add_wait_cmd(u32 *ptr, u32 id, u32 thresh)
+{
+ /* syncpoint_a */
+ ptr[0] = 0x2001001C;
+ /* payload */
+ ptr[1] = thresh;
+ /* syncpoint_b */
+ ptr[2] = 0x2001001D;
+ /* syncpt_id, switch_en, wait */
+ ptr[3] = (id << 8) | 0x10;
+}
+
+int gk20a_channel_syncpt_wait_cpu(struct gk20a_channel_sync *s,
+ struct gk20a_channel_fence *fence,
+ int timeout)
+{
+ struct gk20a_channel_syncpt *sp =
+ container_of(s, struct gk20a_channel_syncpt, ops);
+ if (!fence->valid)
+ return 0;
+ return nvhost_syncpt_wait_timeout_ext(
+ sp->host1x_pdev, sp->id, fence->thresh,
+ timeout, NULL, NULL);
+}
+
+bool gk20a_channel_syncpt_is_expired(struct gk20a_channel_sync *s,
+ struct gk20a_channel_fence *fence)
+{
+ struct gk20a_channel_syncpt *sp =
+ container_of(s, struct gk20a_channel_syncpt, ops);
+ if (!fence->valid)
+ return true;
+ return nvhost_syncpt_is_expired_ext(sp->host1x_pdev, sp->id,
+ fence->thresh);
+}
+
+int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id,
+ u32 thresh, struct priv_cmd_entry **entry)
+{
+ struct gk20a_channel_syncpt *sp =
+ container_of(s, struct gk20a_channel_syncpt, ops);
+ struct priv_cmd_entry *wait_cmd = NULL;
+
+ if (id >= nvhost_syncpt_nb_pts_ext(sp->host1x_pdev)) {
+ dev_warn(dev_from_gk20a(sp->c->g),
+ "invalid wait id in gpfifo submit, elided");
+ return 0;
+ }
+
+ if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh))
+ return 0;
+
+ gk20a_channel_alloc_priv_cmdbuf(sp->c, 4, &wait_cmd);
+ if (wait_cmd == NULL) {
+ gk20a_err(dev_from_gk20a(sp->c->g),
+ "not enough priv cmd buffer space");
+ return -EAGAIN;
+ }
+
+ add_wait_cmd(&wait_cmd->ptr[0], id, thresh);
+
+ *entry = wait_cmd;
+ return 0;
+}
+
+int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
+ struct priv_cmd_entry **entry)
+{
+#ifdef CONFIG_SYNC
+ int i;
+ int num_wait_cmds;
+ struct sync_pt *pt;
+ struct sync_fence *sync_fence;
+ struct priv_cmd_entry *wait_cmd = NULL;
+ struct gk20a_channel_syncpt *sp =
+ container_of(s, struct gk20a_channel_syncpt, ops);
+ struct channel_gk20a *c = sp->c;
+
+ sync_fence = nvhost_sync_fdget(fd);
+ if (!sync_fence)
+ return -EINVAL;
+
+ num_wait_cmds = nvhost_sync_num_pts(sync_fence);
+ gk20a_channel_alloc_priv_cmdbuf(c, 4 * num_wait_cmds, &wait_cmd);
+ if (wait_cmd == NULL) {
+ gk20a_err(dev_from_gk20a(c->g),
+ "not enough priv cmd buffer space");
+ sync_fence_put(sync_fence);
+ return -EAGAIN;
+ }
+
+ i = 0;
+ list_for_each_entry(pt, &sync_fence->pt_list_head, pt_list) {
+ u32 wait_id = nvhost_sync_pt_id(pt);
+ u32 wait_value = nvhost_sync_pt_thresh(pt);
+
+ if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev,
+ wait_id, wait_value)) {
+ wait_cmd->ptr[i * 4 + 0] = 0;
+ wait_cmd->ptr[i * 4 + 1] = 0;
+ wait_cmd->ptr[i * 4 + 2] = 0;
+ wait_cmd->ptr[i * 4 + 3] = 0;
+ } else
+ add_wait_cmd(&wait_cmd->ptr[i * 4], wait_id,
+ wait_value);
+ i++;
+ }
+ WARN_ON(i != num_wait_cmds);
+ sync_fence_put(sync_fence);
+
+ *entry = wait_cmd;
+ return 0;
+#else
+ return -ENODEV;
+#endif
+}
+
+static void gk20a_channel_syncpt_update(void *priv, int nr_completed)
+{
+ struct channel_gk20a *ch20a = priv;
+ gk20a_channel_update(ch20a, nr_completed);
+}
+
+static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
+ bool gfx_class, bool wfi_cmd,
+ struct priv_cmd_entry **entry,
+ struct gk20a_channel_fence *fence)
+{
+ u32 thresh;
+ int incr_cmd_size;
+ int j = 0;
+ int err;
+ struct priv_cmd_entry *incr_cmd = NULL;
+ struct gk20a_channel_syncpt *sp =
+ container_of(s, struct gk20a_channel_syncpt, ops);
+ struct channel_gk20a *c = sp->c;
+
+ /* nvhost action_gpfifo_submit_complete releases this ref. */
+ err = gk20a_channel_busy(c->g->dev);
+ if (err)
+ return err;
+
+ incr_cmd_size = 4;
+ if (wfi_cmd)
+ incr_cmd_size += 2;
+
+ gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd);
+ if (incr_cmd == NULL) {
+ gk20a_channel_idle(c->g->dev);
+ gk20a_err(dev_from_gk20a(c->g),
+ "not enough priv cmd buffer space");
+ return -EAGAIN;
+ }
+
+ if (gfx_class) {
+ WARN_ON(wfi_cmd); /* No sense to use gfx class + wfi. */
+ /* setobject KEPLER_C */
+ incr_cmd->ptr[j++] = 0x20010000;
+ incr_cmd->ptr[j++] = KEPLER_C;
+ /* syncpt incr */
+ incr_cmd->ptr[j++] = 0x200100B2;
+ incr_cmd->ptr[j++] = sp->id |
+ (0x1 << 20) | (0x1 << 16);
+ } else {
+ if (wfi_cmd) {
+ /* wfi */
+ incr_cmd->ptr[j++] = 0x2001001E;
+ /* handle, ignored */
+ incr_cmd->ptr[j++] = 0x00000000;
+ }
+ /* syncpoint_a */
+ incr_cmd->ptr[j++] = 0x2001001C;
+ /* payload, ignored */
+ incr_cmd->ptr[j++] = 0;
+ /* syncpoint_b */
+ incr_cmd->ptr[j++] = 0x2001001D;
+ /* syncpt_id, incr */
+ incr_cmd->ptr[j++] = (sp->id << 8) | 0x1;
+ }
+ WARN_ON(j != incr_cmd_size);
+
+ thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 1);
+
+ err = nvhost_intr_register_notifier(sp->host1x_pdev, sp->id, thresh,
+ gk20a_channel_syncpt_update, c);
+
+ /* Adding interrupt action should never fail. A proper error handling
+ * here would require us to decrement the syncpt max back to its
+ * original value. */
+ if (WARN(err, "failed to set submit complete interrupt")) {
+ gk20a_channel_idle(c->g->dev);
+ err = 0; /* Ignore this error. */
+ }
+
+ fence->thresh = thresh;
+ fence->valid = true;
+ fence->wfi = wfi_cmd;
+ *entry = incr_cmd;
+ return 0;
+}
+
+int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
+ struct priv_cmd_entry **entry,
+ struct gk20a_channel_fence *fence)
+{
+ return __gk20a_channel_syncpt_incr(s,
+ false /* use host class */,
+ true /* wfi */,
+ entry, fence);
+}
+
+int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
+ struct priv_cmd_entry **entry,
+ struct gk20a_channel_fence *fence)
+{
+ struct gk20a_channel_syncpt *sp =
+ container_of(s, struct gk20a_channel_syncpt, ops);
+ /* Don't put wfi cmd to this one since we're not returning
+ * a fence to user space. */
+ return __gk20a_channel_syncpt_incr(s,
+ sp->c->obj_class == KEPLER_C /* may use gfx class */,
+ false /* no wfi */,
+ entry, fence);
+}
+
+int gk20a_channel_syncpt_incr_user_syncpt(struct gk20a_channel_sync *s,
+ struct priv_cmd_entry **entry,
+ struct gk20a_channel_fence *fence,
+ u32 *id, u32 *thresh)
+{
+ struct gk20a_channel_syncpt *sp =
+ container_of(s, struct gk20a_channel_syncpt, ops);
+ /* Need to do 'host incr + wfi' or 'gfx incr' since we return the fence
+ * to user space. */
+ int err = __gk20a_channel_syncpt_incr(s,
+ sp->c->obj_class == KEPLER_C /* use gfx class? */,
+ sp->c->obj_class != KEPLER_C /* wfi if host class */,
+ entry, fence);
+ if (err)
+ return err;
+ *id = sp->id;
+ *thresh = fence->thresh;
+ return 0;
+}
+
+int gk20a_channel_syncpt_incr_user_fd(struct gk20a_channel_sync *s,
+ struct priv_cmd_entry **entry,
+ struct gk20a_channel_fence *fence,
+ int *fd)
+{
+#ifdef CONFIG_SYNC
+ int err;
+ struct nvhost_ctrl_sync_fence_info pt;
+ struct gk20a_channel_syncpt *sp =
+ container_of(s, struct gk20a_channel_syncpt, ops);
+ err = gk20a_channel_syncpt_incr_user_syncpt(s, entry, fence,
+ &pt.id, &pt.thresh);
+ if (err)
+ return err;
+ return nvhost_sync_create_fence_fd(sp->host1x_pdev, &pt, 1,
+ "fence", fd);
+#else
+ return -ENODEV;
+#endif
+}
+
+void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s)
+{
+ struct gk20a_channel_syncpt *sp =
+ container_of(s, struct gk20a_channel_syncpt, ops);
+ nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id);
+}
+
+static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s)
+{
+ struct gk20a_channel_syncpt *sp =
+ container_of(s, struct gk20a_channel_syncpt, ops);
+ nvhost_free_syncpt(sp->id);
+ kfree(sp);
+}
+
+static struct gk20a_channel_sync *
+gk20a_channel_syncpt_create(struct channel_gk20a *c)
+{
+ struct gk20a_channel_syncpt *sp;
+
+ sp = kzalloc(sizeof(*sp), GFP_KERNEL);
+ if (!sp)
+ return NULL;
+
+ sp->c = c;
+ sp->host1x_pdev = to_platform_device(c->g->dev->dev.parent);
+ sp->id = nvhost_get_syncpt_host_managed(sp->host1x_pdev, c->hw_chid);
+
+ sp->ops.wait_cpu = gk20a_channel_syncpt_wait_cpu;
+ sp->ops.is_expired = gk20a_channel_syncpt_is_expired;
+ sp->ops.wait_syncpt = gk20a_channel_syncpt_wait_syncpt;
+ sp->ops.wait_fd = gk20a_channel_syncpt_wait_fd;
+ sp->ops.incr = gk20a_channel_syncpt_incr;
+ sp->ops.incr_wfi = gk20a_channel_syncpt_incr_wfi;
+ sp->ops.incr_user_syncpt = gk20a_channel_syncpt_incr_user_syncpt;
+ sp->ops.incr_user_fd = gk20a_channel_syncpt_incr_user_fd;
+ sp->ops.set_min_eq_max = gk20a_channel_syncpt_set_min_eq_max;
+ sp->ops.destroy = gk20a_channel_syncpt_destroy;
+ return &sp->ops;
+}
+#endif /* CONFIG_TEGRA_GK20A */
+
+struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
+{
+#ifdef CONFIG_TEGRA_GK20A
+ if (gk20a_platform_has_syncpoints(c->g->dev))
+ return gk20a_channel_syncpt_create(c);
+#endif
+ WARN_ON(1);
+ return NULL;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
new file mode 100644
index 000000000000..69feb89f0c3e
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -0,0 +1,102 @@
+/*
+ * drivers/video/tegra/host/gk20a/channel_sync_gk20a.h
+ *
+ * GK20A Channel Synchronization Abstraction
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _GK20A_CHANNEL_SYNC_H_
+#define _GK20A_CHANNEL_SYNC_H_
+
+#include <linux/types.h>
+
+struct gk20a_channel_sync;
+struct priv_cmd_entry;
+struct channel_gk20a;
+
+struct gk20a_channel_fence {
+ bool valid;
+ bool wfi; /* was issued with preceding wfi */
+ u32 thresh; /* either semaphore or syncpoint value */
+};
+
+struct gk20a_channel_sync {
+ /* CPU wait for a fence returned by incr_syncpt() or incr_fd(). */
+ int (*wait_cpu)(struct gk20a_channel_sync *s,
+ struct gk20a_channel_fence *fence,
+ int timeout);
+
+ /* Test whether a fence returned by incr_syncpt() or incr_fd() is
+ * expired. */
+ bool (*is_expired)(struct gk20a_channel_sync *s,
+ struct gk20a_channel_fence *fence);
+
+ /* Generate a gpu wait cmdbuf from syncpoint. */
+ int (*wait_syncpt)(struct gk20a_channel_sync *s, u32 id, u32 thresh,
+ struct priv_cmd_entry **entry);
+
+ /* Generate a gpu wait cmdbuf from sync fd. */
+ int (*wait_fd)(struct gk20a_channel_sync *s, int fd,
+ struct priv_cmd_entry **entry);
+
+ /* Increment syncpoint/semaphore.
+ * Returns
+ * - a gpu cmdbuf that performs the increment when executed,
+ * - a fence that can be passed to wait_cpu() and is_expired().
+ */
+ int (*incr)(struct gk20a_channel_sync *s,
+ struct priv_cmd_entry **entry,
+ struct gk20a_channel_fence *fence);
+
+ /* Increment syncpoint/semaphore, preceded by a wfi.
+ * Returns
+ * - a gpu cmdbuf that performs the increment when executed,
+ * - a fence that can be passed to wait_cpu() and is_expired().
+ */
+ int (*incr_wfi)(struct gk20a_channel_sync *s,
+ struct priv_cmd_entry **entry,
+ struct gk20a_channel_fence *fence);
+
+ /* Increment syncpoint, so that the returned fence represents
+ * work completion (may need wfi) and can be returned to user space.
+ * Returns
+ * - a gpu cmdbuf that performs the increment when executed,
+ * - a fence that can be passed to wait_cpu() and is_expired(),
+ * - a syncpoint id/value pair that can be returned to user space.
+ */
+ int (*incr_user_syncpt)(struct gk20a_channel_sync *s,
+ struct priv_cmd_entry **entry,
+ struct gk20a_channel_fence *fence,
+ u32 *id, u32 *thresh);
+
+ /* Increment syncpoint/semaphore, so that the returned fence represents
+ * work completion (may need wfi) and can be returned to user space.
+ * Returns
+ * - a gpu cmdbuf that performs the increment when executed,
+ * - a fence that can be passed to wait_cpu() and is_expired(),
+ * - a sync fd that can be returned to user space.
+ */
+ int (*incr_user_fd)(struct gk20a_channel_sync *s,
+ struct priv_cmd_entry **entry,
+ struct gk20a_channel_fence *fence,
+ int *fd);
+
+ /* Reset the channel syncpoint/semaphore. */
+ void (*set_min_eq_max)(struct gk20a_channel_sync *s);
+
+ /* Free the resources allocated by gk20a_channel_sync_create. */
+ void (*destroy)(struct gk20a_channel_sync *s);
+};
+
+struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/clk_gk20a.c b/drivers/gpu/nvgpu/gk20a/clk_gk20a.c
new file mode 100644
index 000000000000..151a332b8cbd
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/clk_gk20a.c
@@ -0,0 +1,865 @@
+/*
+ * drivers/video/tegra/host/gk20a/clk_gk20a.c
+ *
+ * GK20A Clocks
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h> /* for mdelay */
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/clk/tegra.h>
+#include <mach/thermal.h>
+
+#include "gk20a.h"
+#include "hw_trim_gk20a.h"
+#include "hw_timer_gk20a.h"
+
+#define gk20a_dbg_clk(fmt, arg...) \
+ gk20a_dbg(gpu_dbg_clk, fmt, ##arg)
+
+/* from vbios PLL info table */
+struct pll_parms gpc_pll_params = {
+ 144, 2064, /* freq */
+ 1000, 2064, /* vco */
+ 12, 38, /* u */
+ 1, 255, /* M */
+ 8, 255, /* N */
+ 1, 32, /* PL */
+};
+
+static int num_gpu_cooling_freq;
+static struct gpufreq_table_data *gpu_cooling_freq;
+
+struct gpufreq_table_data *tegra_gpufreq_table_get(void)
+{
+ return gpu_cooling_freq;
+}
+
+unsigned int tegra_gpufreq_table_size_get(void)
+{
+ return num_gpu_cooling_freq;
+}
+
+static u8 pl_to_div[] = {
+/* PL: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 */
+/* p: */ 1, 2, 3, 4, 5, 6, 8, 10, 12, 16, 12, 16, 20, 24, 32 };
+
+/* Calculate and update M/N/PL as well as pll->freq
+ ref_clk_f = clk_in_f / src_div = clk_in_f; (src_div = 1 on gk20a)
+ u_f = ref_clk_f / M;
+ PLL output = vco_f = u_f * N = ref_clk_f * N / M;
+ gpc2clk = target clock frequency = vco_f / PL;
+ gpcclk = gpc2clk / 2; */
+static int clk_config_pll(struct clk_gk20a *clk, struct pll *pll,
+ struct pll_parms *pll_params, u32 *target_freq, bool best_fit)
+{
+ u32 min_vco_f, max_vco_f;
+ u32 best_M, best_N;
+ u32 low_PL, high_PL, best_PL;
+ u32 m, n, n2;
+ u32 target_vco_f, vco_f;
+ u32 ref_clk_f, target_clk_f, u_f;
+ u32 delta, lwv, best_delta = ~0;
+ int pl;
+
+ BUG_ON(target_freq == NULL);
+
+ gk20a_dbg_fn("request target freq %d MHz", *target_freq);
+
+ ref_clk_f = pll->clk_in;
+ target_clk_f = *target_freq;
+ max_vco_f = pll_params->max_vco;
+ min_vco_f = pll_params->min_vco;
+ best_M = pll_params->max_M;
+ best_N = pll_params->min_N;
+ best_PL = pll_params->min_PL;
+
+ target_vco_f = target_clk_f + target_clk_f / 50;
+ if (max_vco_f < target_vco_f)
+ max_vco_f = target_vco_f;
+
+ high_PL = (max_vco_f + target_vco_f - 1) / target_vco_f;
+ high_PL = min(high_PL, pll_params->max_PL);
+ high_PL = max(high_PL, pll_params->min_PL);
+
+ low_PL = min_vco_f / target_vco_f;
+ low_PL = min(low_PL, pll_params->max_PL);
+ low_PL = max(low_PL, pll_params->min_PL);
+
+ /* Find Indices of high_PL and low_PL */
+ for (pl = 0; pl < 14; pl++) {
+ if (pl_to_div[pl] >= low_PL) {
+ low_PL = pl;
+ break;
+ }
+ }
+ for (pl = 0; pl < 14; pl++) {
+ if (pl_to_div[pl] >= high_PL) {
+ high_PL = pl;
+ break;
+ }
+ }
+ gk20a_dbg_info("low_PL %d(div%d), high_PL %d(div%d)",
+ low_PL, pl_to_div[low_PL], high_PL, pl_to_div[high_PL]);
+
+ for (pl = low_PL; pl <= high_PL; pl++) {
+ target_vco_f = target_clk_f * pl_to_div[pl];
+
+ for (m = pll_params->min_M; m <= pll_params->max_M; m++) {
+ u_f = ref_clk_f / m;
+
+ if (u_f < pll_params->min_u)
+ break;
+ if (u_f > pll_params->max_u)
+ continue;
+
+ n = (target_vco_f * m) / ref_clk_f;
+ n2 = ((target_vco_f * m) + (ref_clk_f - 1)) / ref_clk_f;
+
+ if (n > pll_params->max_N)
+ break;
+
+ for (; n <= n2; n++) {
+ if (n < pll_params->min_N)
+ continue;
+ if (n > pll_params->max_N)
+ break;
+
+ vco_f = ref_clk_f * n / m;
+
+ if (vco_f >= min_vco_f && vco_f <= max_vco_f) {
+ lwv = (vco_f + (pl_to_div[pl] / 2))
+ / pl_to_div[pl];
+ delta = abs(lwv - target_clk_f);
+
+ if (delta < best_delta) {
+ best_delta = delta;
+ best_M = m;
+ best_N = n;
+ best_PL = pl;
+
+ if (best_delta == 0 ||
+ /* 0.45% for non best fit */
+ (!best_fit && (vco_f / best_delta > 218))) {
+ goto found_match;
+ }
+
+ gk20a_dbg_info("delta %d @ M %d, N %d, PL %d",
+ delta, m, n, pl);
+ }
+ }
+ }
+ }
+ }
+
+found_match:
+ BUG_ON(best_delta == ~0);
+
+ if (best_fit && best_delta != 0)
+ gk20a_dbg_clk("no best match for target @ %dMHz on gpc_pll",
+ target_clk_f);
+
+ pll->M = best_M;
+ pll->N = best_N;
+ pll->PL = best_PL;
+
+ /* save current frequency */
+ pll->freq = ref_clk_f * pll->N / (pll->M * pl_to_div[pll->PL]);
+
+ *target_freq = pll->freq;
+
+ gk20a_dbg_clk("actual target freq %d MHz, M %d, N %d, PL %d(div%d)",
+ *target_freq, pll->M, pll->N, pll->PL, pl_to_div[pll->PL]);
+
+ gk20a_dbg_fn("done");
+
+ return 0;
+}
+
+static int clk_slide_gpc_pll(struct gk20a *g, u32 n)
+{
+ u32 data, coeff;
+ u32 nold;
+ int ramp_timeout = 500;
+
+ /* get old coefficients */
+ coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
+ nold = trim_sys_gpcpll_coeff_ndiv_v(coeff);
+
+ /* do nothing if NDIV is same */
+ if (n == nold)
+ return 0;
+
+ /* setup */
+ data = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
+ data = set_field(data, trim_sys_gpcpll_cfg2_pll_stepa_m(),
+ trim_sys_gpcpll_cfg2_pll_stepa_f(0x2b));
+ gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), data);
+ data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r());
+ data = set_field(data, trim_sys_gpcpll_cfg3_pll_stepb_m(),
+ trim_sys_gpcpll_cfg3_pll_stepb_f(0xb));
+ gk20a_writel(g, trim_sys_gpcpll_cfg3_r(), data);
+
+ /* pll slowdown mode */
+ data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
+ data = set_field(data,
+ trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(),
+ trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f());
+ gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
+
+ /* new ndiv ready for ramp */
+ coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
+ coeff = set_field(coeff, trim_sys_gpcpll_coeff_ndiv_m(),
+ trim_sys_gpcpll_coeff_ndiv_f(n));
+ udelay(1);
+ gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
+
+ /* dynamic ramp to new ndiv */
+ data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
+ data = set_field(data,
+ trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(),
+ trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f());
+ udelay(1);
+ gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
+
+ do {
+ udelay(1);
+ ramp_timeout--;
+ data = gk20a_readl(
+ g, trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r());
+ if (trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(data))
+ break;
+ } while (ramp_timeout > 0);
+
+ /* exit slowdown mode */
+ data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
+ data = set_field(data,
+ trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(),
+ trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f());
+ data = set_field(data,
+ trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(),
+ trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f());
+ gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
+ gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
+
+ if (ramp_timeout <= 0) {
+ gk20a_err(dev_from_gk20a(g), "gpcpll dynamic ramp timeout");
+ return -ETIMEDOUT;
+ }
+ return 0;
+}
+
+static int clk_program_gpc_pll(struct gk20a *g, struct clk_gk20a *clk,
+ int allow_slide)
+{
+ u32 data, cfg, coeff, timeout;
+ u32 m, n, pl;
+ u32 nlo;
+
+ gk20a_dbg_fn("");
+
+ if (!tegra_platform_is_silicon())
+ return 0;
+
+ /* get old coefficients */
+ coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
+ m = trim_sys_gpcpll_coeff_mdiv_v(coeff);
+ n = trim_sys_gpcpll_coeff_ndiv_v(coeff);
+ pl = trim_sys_gpcpll_coeff_pldiv_v(coeff);
+
+ /* do NDIV slide if there is no change in M and PL */
+ cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+ if (allow_slide && clk->gpc_pll.M == m && clk->gpc_pll.PL == pl
+ && trim_sys_gpcpll_cfg_enable_v(cfg)) {
+ return clk_slide_gpc_pll(g, clk->gpc_pll.N);
+ }
+
+ /* slide down to NDIV_LO */
+ nlo = DIV_ROUND_UP(m * gpc_pll_params.min_vco, clk->gpc_pll.clk_in);
+ if (allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg)) {
+ int ret = clk_slide_gpc_pll(g, nlo);
+ if (ret)
+ return ret;
+ }
+
+ /* split FO-to-bypass jump in halfs by setting out divider 1:2 */
+ data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
+ data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(),
+ trim_sys_gpc2clk_out_vcodiv_f(2));
+ gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
+
+ /* put PLL in bypass before programming it */
+ data = gk20a_readl(g, trim_sys_sel_vco_r());
+ data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
+ trim_sys_sel_vco_gpc2clk_out_bypass_f());
+ udelay(2);
+ gk20a_writel(g, trim_sys_sel_vco_r(), data);
+
+ /* get out from IDDQ */
+ cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+ if (trim_sys_gpcpll_cfg_iddq_v(cfg)) {
+ cfg = set_field(cfg, trim_sys_gpcpll_cfg_iddq_m(),
+ trim_sys_gpcpll_cfg_iddq_power_on_v());
+ gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
+ gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+ udelay(2);
+ }
+
+ /* disable PLL before changing coefficients */
+ cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+ cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
+ trim_sys_gpcpll_cfg_enable_no_f());
+ gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
+ gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+
+ /* change coefficients */
+ nlo = DIV_ROUND_UP(clk->gpc_pll.M * gpc_pll_params.min_vco,
+ clk->gpc_pll.clk_in);
+ coeff = trim_sys_gpcpll_coeff_mdiv_f(clk->gpc_pll.M) |
+ trim_sys_gpcpll_coeff_ndiv_f(allow_slide ?
+ nlo : clk->gpc_pll.N) |
+ trim_sys_gpcpll_coeff_pldiv_f(clk->gpc_pll.PL);
+ gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
+
+ /* enable PLL after changing coefficients */
+ cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+ cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
+ trim_sys_gpcpll_cfg_enable_yes_f());
+ gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
+
+ /* lock pll */
+ cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+ if (cfg & trim_sys_gpcpll_cfg_enb_lckdet_power_off_f()){
+ cfg = set_field(cfg, trim_sys_gpcpll_cfg_enb_lckdet_m(),
+ trim_sys_gpcpll_cfg_enb_lckdet_power_on_f());
+ gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
+ }
+
+ /* wait pll lock */
+ timeout = clk->pll_delay / 2 + 1;
+ do {
+ cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+ if (cfg & trim_sys_gpcpll_cfg_pll_lock_true_f())
+ goto pll_locked;
+ udelay(2);
+ } while (--timeout > 0);
+
+ /* PLL is messed up. What can we do here? */
+ BUG();
+ return -EBUSY;
+
+pll_locked:
+ /* put PLL back on vco */
+ data = gk20a_readl(g, trim_sys_sel_vco_r());
+ data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
+ trim_sys_sel_vco_gpc2clk_out_vco_f());
+ gk20a_writel(g, trim_sys_sel_vco_r(), data);
+ clk->gpc_pll.enabled = true;
+
+ /* restore out divider 1:1 */
+ data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
+ data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(),
+ trim_sys_gpc2clk_out_vcodiv_by1_f());
+ udelay(2);
+ gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
+
+ /* slide up to target NDIV */
+ return clk_slide_gpc_pll(g, clk->gpc_pll.N);
+}
+
+static int clk_disable_gpcpll(struct gk20a *g, int allow_slide)
+{
+ u32 cfg, coeff, m, nlo;
+ struct clk_gk20a *clk = &g->clk;
+
+ /* slide to VCO min */
+ cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+ if (allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg)) {
+ coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
+ m = trim_sys_gpcpll_coeff_mdiv_v(coeff);
+ nlo = DIV_ROUND_UP(m * gpc_pll_params.min_vco,
+ clk->gpc_pll.clk_in);
+ clk_slide_gpc_pll(g, nlo);
+ }
+
+ /* put PLL in bypass before disabling it */
+ cfg = gk20a_readl(g, trim_sys_sel_vco_r());
+ cfg = set_field(cfg, trim_sys_sel_vco_gpc2clk_out_m(),
+ trim_sys_sel_vco_gpc2clk_out_bypass_f());
+ gk20a_writel(g, trim_sys_sel_vco_r(), cfg);
+
+ /* disable PLL */
+ cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+ cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
+ trim_sys_gpcpll_cfg_enable_no_f());
+ gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
+ gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+
+ clk->gpc_pll.enabled = false;
+ return 0;
+}
+
+static int gk20a_init_clk_reset_enable_hw(struct gk20a *g)
+{
+ gk20a_dbg_fn("");
+ return 0;
+}
+
+struct clk *gk20a_clk_get(struct gk20a *g)
+{
+ if (!g->clk.tegra_clk) {
+ struct clk *clk;
+
+ clk = clk_get_sys("tegra_gk20a", "gpu");
+ if (IS_ERR(clk)) {
+ gk20a_err(dev_from_gk20a(g),
+ "fail to get tegra gpu clk tegra_gk20a/gpu");
+ return NULL;
+ }
+ g->clk.tegra_clk = clk;
+ }
+
+ return g->clk.tegra_clk;
+}
+
+static int gk20a_init_clk_setup_sw(struct gk20a *g)
+{
+ struct clk_gk20a *clk = &g->clk;
+ static int initialized;
+ unsigned long *freqs;
+ int err, num_freqs;
+ struct clk *ref;
+ unsigned long ref_rate;
+
+ gk20a_dbg_fn("");
+
+ if (clk->sw_ready) {
+ gk20a_dbg_fn("skip init");
+ return 0;
+ }
+
+ if (!gk20a_clk_get(g))
+ return -EINVAL;
+
+ ref = clk_get_parent(clk_get_parent(clk->tegra_clk));
+ if (IS_ERR(ref)) {
+ gk20a_err(dev_from_gk20a(g),
+ "failed to get GPCPLL reference clock");
+ return -EINVAL;
+ }
+ ref_rate = clk_get_rate(ref);
+
+ clk->pll_delay = 300; /* usec */
+
+ clk->gpc_pll.id = GK20A_GPC_PLL;
+ clk->gpc_pll.clk_in = ref_rate / 1000000; /* MHz */
+
+ /* Decide initial frequency */
+ if (!initialized) {
+ initialized = 1;
+ clk->gpc_pll.M = 1;
+ clk->gpc_pll.N = DIV_ROUND_UP(gpc_pll_params.min_vco,
+ clk->gpc_pll.clk_in);
+ clk->gpc_pll.PL = 1;
+ clk->gpc_pll.freq = clk->gpc_pll.clk_in * clk->gpc_pll.N;
+ clk->gpc_pll.freq /= pl_to_div[clk->gpc_pll.PL];
+ }
+
+ err = tegra_dvfs_get_freqs(clk_get_parent(clk->tegra_clk),
+ &freqs, &num_freqs);
+ if (!err) {
+ int i, j;
+
+ /* init j for inverse traversal of frequencies */
+ j = num_freqs - 1;
+
+ gpu_cooling_freq = kzalloc(
+ (1 + num_freqs) * sizeof(*gpu_cooling_freq),
+ GFP_KERNEL);
+
+ /* store frequencies in inverse order */
+ for (i = 0; i < num_freqs; ++i, --j) {
+ gpu_cooling_freq[i].index = i;
+ gpu_cooling_freq[i].frequency = freqs[j];
+ }
+
+ /* add 'end of table' marker */
+ gpu_cooling_freq[i].index = i;
+ gpu_cooling_freq[i].frequency = GPUFREQ_TABLE_END;
+
+ /* store number of frequencies */
+ num_gpu_cooling_freq = num_freqs + 1;
+ }
+
+ mutex_init(&clk->clk_mutex);
+
+ clk->sw_ready = true;
+
+ gk20a_dbg_fn("done");
+ return 0;
+}
+
+static int gk20a_init_clk_setup_hw(struct gk20a *g)
+{
+ u32 data;
+
+ gk20a_dbg_fn("");
+
+ data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
+ data = set_field(data,
+ trim_sys_gpc2clk_out_sdiv14_m() |
+ trim_sys_gpc2clk_out_vcodiv_m() |
+ trim_sys_gpc2clk_out_bypdiv_m(),
+ trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f() |
+ trim_sys_gpc2clk_out_vcodiv_by1_f() |
+ trim_sys_gpc2clk_out_bypdiv_f(0));
+ gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
+
+ return 0;
+}
+
+static int set_pll_target(struct gk20a *g, u32 freq, u32 old_freq)
+{
+ struct clk_gk20a *clk = &g->clk;
+
+ if (freq > gpc_pll_params.max_freq)
+ freq = gpc_pll_params.max_freq;
+ else if (freq < gpc_pll_params.min_freq)
+ freq = gpc_pll_params.min_freq;
+
+ if (freq != old_freq) {
+ /* gpc_pll.freq is changed to new value here */
+ if (clk_config_pll(clk, &clk->gpc_pll, &gpc_pll_params,
+ &freq, true)) {
+ gk20a_err(dev_from_gk20a(g),
+ "failed to set pll target for %d", freq);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static int set_pll_freq(struct gk20a *g, u32 freq, u32 old_freq)
+{
+ struct clk_gk20a *clk = &g->clk;
+ int err = 0;
+
+ gk20a_dbg_fn("curr freq: %dMHz, target freq %dMHz", old_freq, freq);
+
+ if ((freq == old_freq) && clk->gpc_pll.enabled)
+ return 0;
+
+ /* change frequency only if power is on */
+ if (g->clk.clk_hw_on) {
+ err = clk_program_gpc_pll(g, clk, 1);
+ if (err)
+ err = clk_program_gpc_pll(g, clk, 0);
+ }
+
+ /* Just report error but not restore PLL since dvfs could already change
+ voltage even when it returns error. */
+ if (err)
+ gk20a_err(dev_from_gk20a(g),
+ "failed to set pll to %d", freq);
+ return err;
+}
+
+static int gk20a_clk_export_set_rate(void *data, unsigned long *rate)
+{
+ u32 old_freq;
+ int ret = -ENODATA;
+ struct gk20a *g = data;
+ struct clk_gk20a *clk = &g->clk;
+
+ if (rate) {
+ mutex_lock(&clk->clk_mutex);
+ old_freq = clk->gpc_pll.freq;
+ ret = set_pll_target(g, rate_gpu_to_gpc2clk(*rate), old_freq);
+ if (!ret && clk->gpc_pll.enabled)
+ ret = set_pll_freq(g, clk->gpc_pll.freq, old_freq);
+ if (!ret)
+ *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
+ mutex_unlock(&clk->clk_mutex);
+ }
+ return ret;
+}
+
+static int gk20a_clk_export_enable(void *data)
+{
+ int ret;
+ struct gk20a *g = data;
+ struct clk_gk20a *clk = &g->clk;
+
+ mutex_lock(&clk->clk_mutex);
+ ret = set_pll_freq(g, clk->gpc_pll.freq, clk->gpc_pll.freq);
+ mutex_unlock(&clk->clk_mutex);
+ return ret;
+}
+
+static void gk20a_clk_export_disable(void *data)
+{
+ struct gk20a *g = data;
+ struct clk_gk20a *clk = &g->clk;
+
+ mutex_lock(&clk->clk_mutex);
+ if (g->clk.clk_hw_on)
+ clk_disable_gpcpll(g, 1);
+ mutex_unlock(&clk->clk_mutex);
+}
+
+static void gk20a_clk_export_init(void *data, unsigned long *rate, bool *state)
+{
+ struct gk20a *g = data;
+ struct clk_gk20a *clk = &g->clk;
+
+ mutex_lock(&clk->clk_mutex);
+ if (state)
+ *state = clk->gpc_pll.enabled;
+ if (rate)
+ *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
+ mutex_unlock(&clk->clk_mutex);
+}
+
+static struct tegra_clk_export_ops gk20a_clk_export_ops = {
+ .init = gk20a_clk_export_init,
+ .enable = gk20a_clk_export_enable,
+ .disable = gk20a_clk_export_disable,
+ .set_rate = gk20a_clk_export_set_rate,
+};
+
+static int gk20a_clk_register_export_ops(struct gk20a *g)
+{
+ int ret;
+ struct clk *c;
+
+ if (gk20a_clk_export_ops.data)
+ return 0;
+
+ gk20a_clk_export_ops.data = (void *)g;
+ c = g->clk.tegra_clk;
+ if (!c || !clk_get_parent(c))
+ return -ENOSYS;
+
+ ret = tegra_clk_register_export_ops(clk_get_parent(c),
+ &gk20a_clk_export_ops);
+
+ return ret;
+}
+
+int gk20a_init_clk_support(struct gk20a *g)
+{
+ struct clk_gk20a *clk = &g->clk;
+ u32 err;
+
+ gk20a_dbg_fn("");
+
+ clk->g = g;
+
+ err = gk20a_init_clk_reset_enable_hw(g);
+ if (err)
+ return err;
+
+ err = gk20a_init_clk_setup_sw(g);
+ if (err)
+ return err;
+
+ mutex_lock(&clk->clk_mutex);
+ clk->clk_hw_on = true;
+
+ err = gk20a_init_clk_setup_hw(g);
+ mutex_unlock(&clk->clk_mutex);
+ if (err)
+ return err;
+
+ err = gk20a_clk_register_export_ops(g);
+ if (err)
+ return err;
+
+ /* FIXME: this effectively prevents host level clock gating */
+ err = clk_enable(g->clk.tegra_clk);
+ if (err)
+ return err;
+
+ /* The prev call may not enable PLL if gbus is unbalanced - force it */
+ mutex_lock(&clk->clk_mutex);
+ err = set_pll_freq(g, clk->gpc_pll.freq, clk->gpc_pll.freq);
+ mutex_unlock(&clk->clk_mutex);
+ if (err)
+ return err;
+
+ return err;
+}
+
+unsigned long gk20a_clk_get_rate(struct gk20a *g)
+{
+ struct clk_gk20a *clk = &g->clk;
+ return rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
+}
+
+long gk20a_clk_round_rate(struct gk20a *g, unsigned long rate)
+{
+ /* make sure the clock is available */
+ if (!gk20a_clk_get(g))
+ return rate;
+
+ return clk_round_rate(clk_get_parent(g->clk.tegra_clk), rate);
+}
+
+int gk20a_clk_set_rate(struct gk20a *g, unsigned long rate)
+{
+ return clk_set_rate(g->clk.tegra_clk, rate);
+}
+
+int gk20a_suspend_clk_support(struct gk20a *g)
+{
+ int ret;
+
+ clk_disable(g->clk.tegra_clk);
+
+ /* The prev call may not disable PLL if gbus is unbalanced - force it */
+ mutex_lock(&g->clk.clk_mutex);
+ ret = clk_disable_gpcpll(g, 1);
+ g->clk.clk_hw_on = false;
+ mutex_unlock(&g->clk.clk_mutex);
+ return ret;
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+static int rate_get(void *data, u64 *val)
+{
+ struct gk20a *g = (struct gk20a *)data;
+ *val = (u64)gk20a_clk_get_rate(g);
+ return 0;
+}
+static int rate_set(void *data, u64 val)
+{
+ struct gk20a *g = (struct gk20a *)data;
+ return gk20a_clk_set_rate(g, (u32)val);
+}
+DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n");
+
+static int pll_reg_show(struct seq_file *s, void *data)
+{
+ struct gk20a *g = s->private;
+ u32 reg, m, n, pl, f;
+
+ mutex_lock(&g->clk.clk_mutex);
+ if (!g->clk.clk_hw_on) {
+ seq_printf(s, "gk20a powered down - no access to registers\n");
+ mutex_unlock(&g->clk.clk_mutex);
+ return 0;
+ }
+
+ reg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+ seq_printf(s, "cfg = 0x%x : %s : %s\n", reg,
+ trim_sys_gpcpll_cfg_enable_v(reg) ? "enabled" : "disabled",
+ trim_sys_gpcpll_cfg_pll_lock_v(reg) ? "locked" : "unlocked");
+
+ reg = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
+ m = trim_sys_gpcpll_coeff_mdiv_v(reg);
+ n = trim_sys_gpcpll_coeff_ndiv_v(reg);
+ pl = trim_sys_gpcpll_coeff_pldiv_v(reg);
+ f = g->clk.gpc_pll.clk_in * n / (m * pl_to_div[pl]);
+ seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl);
+ seq_printf(s, " : pll_f(gpu_f) = %u(%u) MHz\n", f, f/2);
+ mutex_unlock(&g->clk.clk_mutex);
+ return 0;
+}
+
+static int pll_reg_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, pll_reg_show, inode->i_private);
+}
+
+static const struct file_operations pll_reg_fops = {
+ .open = pll_reg_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int monitor_get(void *data, u64 *val)
+{
+ struct gk20a *g = (struct gk20a *)data;
+ struct clk_gk20a *clk = &g->clk;
+ int err;
+
+ u32 ncycle = 100; /* count GPCCLK for ncycle of clkin */
+ u32 clkin = clk->gpc_pll.clk_in;
+ u32 count1, count2;
+
+ err = gk20a_busy(g->dev);
+ if (err)
+ return err;
+
+ gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0),
+ trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f());
+ gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0),
+ trim_gpc_clk_cntr_ncgpcclk_cfg_enable_asserted_f() |
+ trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f() |
+ trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(ncycle));
+ /* start */
+
+ /* It should take about 8us to finish 100 cycle of 12MHz.
+ But longer than 100us delay is required here. */
+ gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0));
+ udelay(2000);
+
+ count1 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0));
+ udelay(100);
+ count2 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0));
+ *val = (u64)(trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(count2) * clkin / ncycle);
+ gk20a_idle(g->dev);
+
+ if (count1 != count2)
+ return -EBUSY;
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n");
+
+int clk_gk20a_debugfs_init(struct platform_device *dev)
+{
+ struct dentry *d;
+ struct gk20a_platform *platform = platform_get_drvdata(dev);
+ struct gk20a *g = get_gk20a(dev);
+
+ d = debugfs_create_file(
+ "rate", S_IRUGO|S_IWUSR, platform->debugfs, g, &rate_fops);
+ if (!d)
+ goto err_out;
+
+ d = debugfs_create_file(
+ "pll_reg", S_IRUGO, platform->debugfs, g, &pll_reg_fops);
+ if (!d)
+ goto err_out;
+
+ d = debugfs_create_file(
+ "monitor", S_IRUGO, platform->debugfs, g, &monitor_fops);
+ if (!d)
+ goto err_out;
+
+ return 0;
+
+err_out:
+ pr_err("%s: Failed to make debugfs node\n", __func__);
+ debugfs_remove_recursive(platform->debugfs);
+ return -ENOMEM;
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/nvgpu/gk20a/clk_gk20a.h b/drivers/gpu/nvgpu/gk20a/clk_gk20a.h
new file mode 100644
index 000000000000..d2665259b0fe
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/clk_gk20a.h
@@ -0,0 +1,94 @@
+/*
+ * drivers/video/tegra/host/gk20a/clk_gk20a.h
+ *
+ * GK20A Graphics
+ *
+ * Copyright (c) 2011 - 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef _NVHOST_CLK_GK20A_H_
+#define _NVHOST_CLK_GK20A_H_
+
+#include <linux/mutex.h>
+
+#define GPUFREQ_TABLE_END ~(u32)1
+enum {
+ /* only one PLL for gk20a */
+ GK20A_GPC_PLL = 0,
+};
+
+struct pll {
+ u32 id;
+ u32 clk_in; /* MHz */
+ u32 M;
+ u32 N;
+ u32 PL;
+ u32 freq; /* MHz */
+ bool enabled;
+};
+
+struct pll_parms {
+ u32 min_freq, max_freq; /* MHz */
+ u32 min_vco, max_vco; /* MHz */
+ u32 min_u, max_u; /* MHz */
+ u32 min_M, max_M;
+ u32 min_N, max_N;
+ u32 min_PL, max_PL;
+};
+
+struct clk_gk20a {
+ struct gk20a *g;
+ struct clk *tegra_clk;
+ struct pll gpc_pll;
+ u32 pll_delay; /* default PLL settle time */
+ struct mutex clk_mutex;
+ bool sw_ready;
+ bool clk_hw_on;
+};
+
+struct gpufreq_table_data {
+ unsigned int index;
+ unsigned int frequency; /* MHz */
+};
+
+struct gpufreq_table_data *tegra_gpufreq_table_get(void);
+
+unsigned int tegra_gpufreq_table_size_get(void);
+
+int gk20a_init_clk_support(struct gk20a *g);
+
+unsigned long gk20a_clk_get_rate(struct gk20a *g);
+int gk20a_clk_set_rate(struct gk20a *g, unsigned long rate);
+int gk20a_suspend_clk_support(struct gk20a *g);
+struct clk *gk20a_clk_get(struct gk20a *g);
+long gk20a_clk_round_rate(struct gk20a *g, unsigned long rate);
+
+extern struct pll_parms gpc_pll_params;
+
+#define KHZ 1000
+#define MHZ 1000000
+
+static inline unsigned long rate_gpc2clk_to_gpu(unsigned long rate)
+{
+ /* convert the MHz gpc2clk frequency to Hz gpcpll frequency */
+ return (rate * MHZ) / 2;
+}
+static inline unsigned long rate_gpu_to_gpc2clk(unsigned long rate)
+{
+ /* convert the Hz gpcpll frequency to MHz gpc2clk frequency */
+ return (rate * 2) / MHZ;
+}
+
+#endif /* _NVHOST_CLK_GK20A_H_ */
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
new file mode 100644
index 000000000000..9128959f60a7
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -0,0 +1,240 @@
+/*
+ * GK20A Ctrl
+ *
+ * Copyright (c) 2011-2014, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/highmem.h>
+#include <linux/cdev.h>
+#include <linux/nvhost_gpu_ioctl.h>
+
+#include "gk20a.h"
+
+int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp)
+{
+ int err;
+ struct gk20a *g;
+
+ gk20a_dbg_fn("");
+
+ g = container_of(inode->i_cdev,
+ struct gk20a, ctrl.cdev);
+
+ filp->private_data = g->dev;
+
+ err = gk20a_get_client(g);
+ if (err) {
+ gk20a_dbg_fn("fail to get channel!");
+ return err;
+ }
+
+ return 0;
+}
+
+int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp)
+{
+ struct platform_device *dev = filp->private_data;
+
+ gk20a_dbg_fn("");
+
+ gk20a_put_client(get_gk20a(dev));
+ return 0;
+}
+
+static long
+gk20a_ctrl_ioctl_gpu_characteristics(
+ struct gk20a *g,
+ struct nvhost_gpu_get_characteristics *request)
+{
+ struct nvhost_gpu_characteristics *pgpu = &g->gpu_characteristics;
+ long err = 0;
+
+ if (request->gpu_characteristics_buf_size > 0) {
+ size_t write_size = sizeof(*pgpu);
+
+ if (write_size > request->gpu_characteristics_buf_size)
+ write_size = request->gpu_characteristics_buf_size;
+
+ err = copy_to_user((void __user *)(uintptr_t)
+ request->gpu_characteristics_buf_addr,
+ pgpu, write_size);
+ }
+
+ if (err == 0)
+ request->gpu_characteristics_buf_size = sizeof(*pgpu);
+
+ return err;
+}
+
+long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ struct platform_device *dev = filp->private_data;
+ struct gk20a *g = get_gk20a(dev);
+ struct nvhost_gpu_zcull_get_ctx_size_args *get_ctx_size_args;
+ struct nvhost_gpu_zcull_get_info_args *get_info_args;
+ struct nvhost_gpu_zbc_set_table_args *set_table_args;
+ struct nvhost_gpu_zbc_query_table_args *query_table_args;
+ u8 buf[NVHOST_GPU_IOCTL_MAX_ARG_SIZE];
+ struct gr_zcull_info *zcull_info;
+ struct zbc_entry *zbc_val;
+ struct zbc_query_params *zbc_tbl;
+ int i, err = 0;
+
+ gk20a_dbg_fn("");
+
+ if ((_IOC_TYPE(cmd) != NVHOST_GPU_IOCTL_MAGIC) ||
+ (_IOC_NR(cmd) == 0) ||
+ (_IOC_NR(cmd) > NVHOST_GPU_IOCTL_LAST))
+ return -EFAULT;
+
+ BUG_ON(_IOC_SIZE(cmd) > NVHOST_GPU_IOCTL_MAX_ARG_SIZE);
+
+ if (_IOC_DIR(cmd) & _IOC_WRITE) {
+ if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
+ return -EFAULT;
+ }
+
+ if (!g->gr.sw_ready) {
+ err = gk20a_busy(g->dev);
+ if (err)
+ return err;
+
+ gk20a_idle(g->dev);
+ }
+
+ switch (cmd) {
+ case NVHOST_GPU_IOCTL_ZCULL_GET_CTX_SIZE:
+ get_ctx_size_args = (struct nvhost_gpu_zcull_get_ctx_size_args *)buf;
+
+ get_ctx_size_args->size = gr_gk20a_get_ctxsw_zcull_size(g, &g->gr);
+
+ break;
+ case NVHOST_GPU_IOCTL_ZCULL_GET_INFO:
+ get_info_args = (struct nvhost_gpu_zcull_get_info_args *)buf;
+
+ memset(get_info_args, 0, sizeof(struct nvhost_gpu_zcull_get_info_args));
+
+ zcull_info = kzalloc(sizeof(struct gr_zcull_info), GFP_KERNEL);
+ if (zcull_info == NULL)
+ return -ENOMEM;
+
+ err = gr_gk20a_get_zcull_info(g, &g->gr, zcull_info);
+ if (err) {
+ kfree(zcull_info);
+ break;
+ }
+
+ get_info_args->width_align_pixels = zcull_info->width_align_pixels;
+ get_info_args->height_align_pixels = zcull_info->height_align_pixels;
+ get_info_args->pixel_squares_by_aliquots = zcull_info->pixel_squares_by_aliquots;
+ get_info_args->aliquot_total = zcull_info->aliquot_total;
+ get_info_args->region_byte_multiplier = zcull_info->region_byte_multiplier;
+ get_info_args->region_header_size = zcull_info->region_header_size;
+ get_info_args->subregion_header_size = zcull_info->subregion_header_size;
+ get_info_args->subregion_width_align_pixels = zcull_info->subregion_width_align_pixels;
+ get_info_args->subregion_height_align_pixels = zcull_info->subregion_height_align_pixels;
+ get_info_args->subregion_count = zcull_info->subregion_count;
+
+ kfree(zcull_info);
+ break;
+ case NVHOST_GPU_IOCTL_ZBC_SET_TABLE:
+ set_table_args = (struct nvhost_gpu_zbc_set_table_args *)buf;
+
+ zbc_val = kzalloc(sizeof(struct zbc_entry), GFP_KERNEL);
+ if (zbc_val == NULL)
+ return -ENOMEM;
+
+ zbc_val->format = set_table_args->format;
+ zbc_val->type = set_table_args->type;
+
+ switch (zbc_val->type) {
+ case GK20A_ZBC_TYPE_COLOR:
+ for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
+ zbc_val->color_ds[i] = set_table_args->color_ds[i];
+ zbc_val->color_l2[i] = set_table_args->color_l2[i];
+ }
+ break;
+ case GK20A_ZBC_TYPE_DEPTH:
+ zbc_val->depth = set_table_args->depth;
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+ if (!err) {
+ gk20a_busy(dev);
+ err = gk20a_gr_zbc_set_table(g, &g->gr, zbc_val);
+ gk20a_idle(dev);
+ }
+
+ if (zbc_val)
+ kfree(zbc_val);
+ break;
+ case NVHOST_GPU_IOCTL_ZBC_QUERY_TABLE:
+ query_table_args = (struct nvhost_gpu_zbc_query_table_args *)buf;
+
+ zbc_tbl = kzalloc(sizeof(struct zbc_query_params), GFP_KERNEL);
+ if (zbc_tbl == NULL)
+ return -ENOMEM;
+
+ zbc_tbl->type = query_table_args->type;
+ zbc_tbl->index_size = query_table_args->index_size;
+
+ err = gr_gk20a_query_zbc(g, &g->gr, zbc_tbl);
+
+ if (!err) {
+ switch (zbc_tbl->type) {
+ case GK20A_ZBC_TYPE_COLOR:
+ for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
+ query_table_args->color_ds[i] = zbc_tbl->color_ds[i];
+ query_table_args->color_l2[i] = zbc_tbl->color_l2[i];
+ }
+ break;
+ case GK20A_ZBC_TYPE_DEPTH:
+ query_table_args->depth = zbc_tbl->depth;
+ break;
+ case GK20A_ZBC_TYPE_INVALID:
+ query_table_args->index_size = zbc_tbl->index_size;
+ break;
+ default:
+ err = -EINVAL;
+ }
+ if (!err) {
+ query_table_args->format = zbc_tbl->format;
+ query_table_args->ref_cnt = zbc_tbl->ref_cnt;
+ }
+ }
+
+ if (zbc_tbl)
+ kfree(zbc_tbl);
+ break;
+
+ case NVHOST_GPU_IOCTL_GET_CHARACTERISTICS:
+ err = gk20a_ctrl_ioctl_gpu_characteristics(
+ g, (struct nvhost_gpu_get_characteristics *)buf);
+ break;
+
+ default:
+ gk20a_err(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
+ err = -ENOTTY;
+ break;
+ }
+
+ if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
+ err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
+
+ return err;
+}
+
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.h b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.h
new file mode 100644
index 000000000000..ac9c253ec696
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.h
@@ -0,0 +1,28 @@
+/*
+ * drivers/video/tegra/host/gk20a/gk20a_ctrl.h
+ *
+ * GK20A Ctrl
+ *
+ * Copyright (c) 2011-2012, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef _NVHOST_GK20A_CTRL_H_
+#define _NVHOST_GK20A_CTRL_H_
+
+int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp);
+int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp);
+long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+
+#endif /* _NVHOST_GK20A_CTRL_H_ */
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
new file mode 100644
index 000000000000..da7d733e3fd0
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -0,0 +1,699 @@
+/*
+ * Tegra GK20A GPU Debugger/Profiler Driver
+ *
+ * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/cdev.h>
+#include <linux/uaccess.h>
+#include <linux/nvhost.h>
+#include <linux/nvhost_dbg_gpu_ioctl.h>
+
+#include "gk20a.h"
+#include "gr_gk20a.h"
+#include "dbg_gpu_gk20a.h"
+#include "regops_gk20a.h"
+#include "hw_therm_gk20a.h"
+
+struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a = {
+ .exec_reg_ops = exec_regops_gk20a,
+};
+
+/* silly allocator - just increment session id */
+static atomic_t session_id = ATOMIC_INIT(0);
+static int generate_session_id(void)
+{
+ return atomic_add_return(1, &session_id);
+}
+
+static int alloc_session(struct dbg_session_gk20a **_dbg_s)
+{
+ struct dbg_session_gk20a *dbg_s;
+ *_dbg_s = NULL;
+
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+
+ dbg_s = kzalloc(sizeof(*dbg_s), GFP_KERNEL);
+ if (!dbg_s)
+ return -ENOMEM;
+
+ dbg_s->id = generate_session_id();
+ dbg_s->ops = &dbg_gpu_session_ops_gk20a;
+ *_dbg_s = dbg_s;
+ return 0;
+}
+
+int gk20a_dbg_gpu_do_dev_open(struct inode *inode, struct file *filp, bool is_profiler)
+{
+ struct dbg_session_gk20a *dbg_session;
+ struct gk20a *g;
+
+ struct platform_device *pdev;
+ struct device *dev;
+
+ int err;
+
+ if (!is_profiler)
+ g = container_of(inode->i_cdev,
+ struct gk20a, dbg.cdev);
+ else
+ g = container_of(inode->i_cdev,
+ struct gk20a, prof.cdev);
+ pdev = g->dev;
+ dev = &pdev->dev;
+
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg session: %s", dev_name(dev));
+
+ err = alloc_session(&dbg_session);
+ if (err)
+ return err;
+
+ filp->private_data = dbg_session;
+ dbg_session->pdev = pdev;
+ dbg_session->dev = dev;
+ dbg_session->g = g;
+ dbg_session->is_profiler = is_profiler;
+ dbg_session->is_pg_disabled = false;
+
+ INIT_LIST_HEAD(&dbg_session->dbg_s_list_node);
+ init_waitqueue_head(&dbg_session->dbg_events.wait_queue);
+ dbg_session->dbg_events.events_enabled = false;
+ dbg_session->dbg_events.num_pending_events = 0;
+
+ return 0;
+}
+
+/* used in scenarios where the debugger session can take just the inter-session
+ * lock for performance, but the profiler session must take the per-gpu lock
+ * since it might not have an associated channel. */
+static void gk20a_dbg_session_mutex_lock(struct dbg_session_gk20a *dbg_s)
+{
+ if (dbg_s->is_profiler)
+ mutex_lock(&dbg_s->g->dbg_sessions_lock);
+ else
+ mutex_lock(&dbg_s->ch->dbg_s_lock);
+}
+
+static void gk20a_dbg_session_mutex_unlock(struct dbg_session_gk20a *dbg_s)
+{
+ if (dbg_s->is_profiler)
+ mutex_unlock(&dbg_s->g->dbg_sessions_lock);
+ else
+ mutex_unlock(&dbg_s->ch->dbg_s_lock);
+}
+
+static void gk20a_dbg_gpu_events_enable(struct dbg_session_gk20a *dbg_s)
+{
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+
+ gk20a_dbg_session_mutex_lock(dbg_s);
+
+ dbg_s->dbg_events.events_enabled = true;
+ dbg_s->dbg_events.num_pending_events = 0;
+
+ gk20a_dbg_session_mutex_unlock(dbg_s);
+}
+
+static void gk20a_dbg_gpu_events_disable(struct dbg_session_gk20a *dbg_s)
+{
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+
+ gk20a_dbg_session_mutex_lock(dbg_s);
+
+ dbg_s->dbg_events.events_enabled = false;
+ dbg_s->dbg_events.num_pending_events = 0;
+
+ gk20a_dbg_session_mutex_unlock(dbg_s);
+}
+
+static void gk20a_dbg_gpu_events_clear(struct dbg_session_gk20a *dbg_s)
+{
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+
+ gk20a_dbg_session_mutex_lock(dbg_s);
+
+ if (dbg_s->dbg_events.events_enabled &&
+ dbg_s->dbg_events.num_pending_events > 0)
+ dbg_s->dbg_events.num_pending_events--;
+
+ gk20a_dbg_session_mutex_unlock(dbg_s);
+}
+
+static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s,
+ struct nvhost_dbg_gpu_events_ctrl_args *args)
+{
+ int ret = 0;
+
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg events ctrl cmd %d", args->cmd);
+
+ if (!dbg_s->ch) {
+ gk20a_err(dev_from_gk20a(dbg_s->g),
+ "no channel bound to dbg session\n");
+ return -EINVAL;
+ }
+
+ switch (args->cmd) {
+ case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_ENABLE:
+ gk20a_dbg_gpu_events_enable(dbg_s);
+ break;
+
+ case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_DISABLE:
+ gk20a_dbg_gpu_events_disable(dbg_s);
+ break;
+
+ case NVHOST_DBG_GPU_EVENTS_CTRL_CMD_CLEAR:
+ gk20a_dbg_gpu_events_clear(dbg_s);
+ break;
+
+ default:
+ gk20a_err(dev_from_gk20a(dbg_s->g),
+ "unrecognized dbg gpu events ctrl cmd: 0x%x",
+ args->cmd);
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait)
+{
+ unsigned int mask = 0;
+ struct dbg_session_gk20a *dbg_s = filep->private_data;
+
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+
+ poll_wait(filep, &dbg_s->dbg_events.wait_queue, wait);
+
+ gk20a_dbg_session_mutex_lock(dbg_s);
+
+ if (dbg_s->dbg_events.events_enabled &&
+ dbg_s->dbg_events.num_pending_events > 0) {
+ gk20a_dbg(gpu_dbg_gpu_dbg, "found pending event on session id %d",
+ dbg_s->id);
+ gk20a_dbg(gpu_dbg_gpu_dbg, "%d events pending",
+ dbg_s->dbg_events.num_pending_events);
+ mask = (POLLPRI | POLLIN);
+ }
+
+ gk20a_dbg_session_mutex_unlock(dbg_s);
+
+ return mask;
+}
+
+int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp)
+{
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+ return gk20a_dbg_gpu_do_dev_open(inode, filp, false /* not profiler */);
+}
+
+int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp)
+{
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+ return gk20a_dbg_gpu_do_dev_open(inode, filp, true /* is profiler */);
+}
+
+void gk20a_dbg_gpu_post_events(struct channel_gk20a *ch)
+{
+ struct dbg_session_gk20a *dbg_s;
+
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+
+ /* guard against the session list being modified */
+ mutex_lock(&ch->dbg_s_lock);
+
+ list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
+ if (dbg_s->dbg_events.events_enabled) {
+ gk20a_dbg(gpu_dbg_gpu_dbg, "posting event on session id %d",
+ dbg_s->id);
+ gk20a_dbg(gpu_dbg_gpu_dbg, "%d events pending",
+ dbg_s->dbg_events.num_pending_events);
+
+ dbg_s->dbg_events.num_pending_events++;
+
+ wake_up_interruptible_all(&dbg_s->dbg_events.wait_queue);
+ }
+ }
+
+ mutex_unlock(&ch->dbg_s_lock);
+}
+
+
+static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s,
+ __u32 powermode);
+
+static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s)
+{
+ struct channel_gk20a *ch_gk20a = dbg_s->ch;
+ struct gk20a *g = dbg_s->g;
+
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+
+ /* wasn't bound to start with ? */
+ if (!ch_gk20a) {
+ gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "not bound already?");
+ return -ENODEV;
+ }
+
+ mutex_lock(&g->dbg_sessions_lock);
+ mutex_lock(&ch_gk20a->dbg_s_lock);
+
+ --g->dbg_sessions;
+
+ /* Powergate enable is called here as possibility of dbg_session
+ * which called powergate disable ioctl, to be killed without calling
+ * powergate enable ioctl
+ */
+ dbg_set_powergate(dbg_s, NVHOST_DBG_GPU_POWERGATE_MODE_ENABLE);
+
+ dbg_s->ch = NULL;
+ fput(dbg_s->ch_f);
+ dbg_s->ch_f = NULL;
+
+ list_del_init(&dbg_s->dbg_s_list_node);
+
+ mutex_unlock(&ch_gk20a->dbg_s_lock);
+ mutex_unlock(&g->dbg_sessions_lock);
+
+ return 0;
+}
+
+int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
+{
+ struct dbg_session_gk20a *dbg_s = filp->private_data;
+
+ gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", dev_name(dbg_s->dev));
+
+ /* unbind if it was bound */
+ if (!dbg_s->ch)
+ return 0;
+ dbg_unbind_channel_gk20a(dbg_s);
+
+ kfree(dbg_s);
+ return 0;
+}
+
+static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
+ struct nvhost_dbg_gpu_bind_channel_args *args)
+{
+ struct file *f;
+ struct gk20a *g;
+ struct channel_gk20a *ch;
+
+ gk20a_dbg(gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d",
+ dev_name(dbg_s->dev), args->channel_fd);
+
+ if (args->channel_fd == ~0)
+ return dbg_unbind_channel_gk20a(dbg_s);
+
+ /* even though get_file_channel is doing this it releases it as well */
+ /* by holding it here we'll keep it from disappearing while the
+ * debugger is in session */
+ f = fget(args->channel_fd);
+ if (!f)
+ return -ENODEV;
+
+ ch = gk20a_get_channel_from_file(args->channel_fd);
+ if (!ch) {
+ gk20a_dbg_fn("no channel found for fd");
+ fput(f);
+ return -EINVAL;
+ }
+
+ g = dbg_s->g;
+ gk20a_dbg_fn("%s hwchid=%d", dev_name(dbg_s->dev), ch->hw_chid);
+
+ mutex_lock(&g->dbg_sessions_lock);
+ mutex_lock(&ch->dbg_s_lock);
+
+ dbg_s->ch_f = f;
+ dbg_s->ch = ch;
+ list_add(&dbg_s->dbg_s_list_node, &dbg_s->ch->dbg_s_list);
+
+ g->dbg_sessions++;
+
+ mutex_unlock(&ch->dbg_s_lock);
+ mutex_unlock(&g->dbg_sessions_lock);
+ return 0;
+}
+
+static int nvhost_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
+ struct nvhost_dbg_gpu_exec_reg_ops_args *args);
+
+static int nvhost_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
+ struct nvhost_dbg_gpu_powergate_args *args);
+
+static int nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
+ struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *args);
+
+long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ struct dbg_session_gk20a *dbg_s = filp->private_data;
+ struct gk20a *g = get_gk20a(dbg_s->pdev);
+ u8 buf[NVHOST_DBG_GPU_IOCTL_MAX_ARG_SIZE];
+ int err = 0;
+
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+
+ if ((_IOC_TYPE(cmd) != NVHOST_DBG_GPU_IOCTL_MAGIC) ||
+ (_IOC_NR(cmd) == 0) ||
+ (_IOC_NR(cmd) > NVHOST_DBG_GPU_IOCTL_LAST))
+ return -EFAULT;
+
+ BUG_ON(_IOC_SIZE(cmd) > NVHOST_DBG_GPU_IOCTL_MAX_ARG_SIZE);
+
+ if (_IOC_DIR(cmd) & _IOC_WRITE) {
+ if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
+ return -EFAULT;
+ }
+
+ if (!g->gr.sw_ready) {
+ err = gk20a_busy(g->dev);
+ if (err)
+ return err;
+
+ gk20a_idle(g->dev);
+ }
+
+ switch (cmd) {
+ case NVHOST_DBG_GPU_IOCTL_BIND_CHANNEL:
+ err = dbg_bind_channel_gk20a(dbg_s,
+ (struct nvhost_dbg_gpu_bind_channel_args *)buf);
+ gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
+ break;
+
+ case NVHOST_DBG_GPU_IOCTL_REG_OPS:
+ err = nvhost_ioctl_channel_reg_ops(dbg_s,
+ (struct nvhost_dbg_gpu_exec_reg_ops_args *)buf);
+ gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
+ break;
+
+ case NVHOST_DBG_GPU_IOCTL_POWERGATE:
+ err = nvhost_ioctl_powergate_gk20a(dbg_s,
+ (struct nvhost_dbg_gpu_powergate_args *)buf);
+ gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
+ break;
+
+ case NVHOST_DBG_GPU_IOCTL_EVENTS_CTRL:
+ err = gk20a_dbg_gpu_events_ctrl(dbg_s,
+ (struct nvhost_dbg_gpu_events_ctrl_args *)buf);
+ break;
+
+ case NVHOST_DBG_GPU_IOCTL_SMPC_CTXSW_MODE:
+ err = nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(dbg_s,
+ (struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *)buf);
+ break;
+
+ default:
+ gk20a_err(dev_from_gk20a(g),
+ "unrecognized dbg gpu ioctl cmd: 0x%x",
+ cmd);
+ err = -ENOTTY;
+ break;
+ }
+
+ if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
+ err = copy_to_user((void __user *)arg,
+ buf, _IOC_SIZE(cmd));
+
+ return err;
+}
+
+/* In order to perform a context relative op the context has
+ * to be created already... which would imply that the
+ * context switch mechanism has already been put in place.
+ * So by the time we perform such an opertation it should always
+ * be possible to query for the appropriate context offsets, etc.
+ *
+ * But note: while the dbg_gpu bind requires the a channel fd,
+ * it doesn't require an allocated gr/compute obj at that point...
+ */
+static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s,
+ struct gr_gk20a *gr)
+{
+ int err;
+
+ mutex_lock(&gr->ctx_mutex);
+ err = !gr->ctx_vars.golden_image_initialized;
+ mutex_unlock(&gr->ctx_mutex);
+ if (err)
+ return false;
+ return true;
+
+}
+
+static int nvhost_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
+ struct nvhost_dbg_gpu_exec_reg_ops_args *args)
+{
+ int err;
+ struct device *dev = dbg_s->dev;
+ struct gk20a *g = get_gk20a(dbg_s->pdev);
+ struct nvhost_dbg_gpu_reg_op *ops;
+ u64 ops_size = sizeof(ops[0]) * args->num_ops;
+
+ gk20a_dbg_fn("%d ops, total size %llu", args->num_ops, ops_size);
+
+ if (!dbg_s->ops) {
+ gk20a_err(dev, "can't call reg_ops on an unbound debugger session");
+ return -EINVAL;
+ }
+
+ if (!dbg_s->is_profiler && !dbg_s->ch) {
+ gk20a_err(dev, "bind a channel before regops for a debugging session");
+ return -EINVAL;
+ }
+
+ /* be sure that ctx info is in place */
+ if (!gr_context_info_available(dbg_s, &g->gr)) {
+ gk20a_err(dev, "gr context data not available\n");
+ return -ENODEV;
+ }
+
+ ops = kzalloc(ops_size, GFP_KERNEL);
+ if (!ops) {
+ gk20a_err(dev, "Allocating memory failed!");
+ return -ENOMEM;
+ }
+
+ gk20a_dbg_fn("Copying regops from userspace");
+
+ if (copy_from_user(ops, (void *)(uintptr_t)args->ops, ops_size)) {
+ dev_err(dev, "copy_from_user failed!");
+ err = -EFAULT;
+ goto clean_up;
+ }
+
+ /* since exec_reg_ops sends methods to the ucode, it must take the
+ * global gpu lock to protect against mixing methods from debug sessions
+ * on other channels */
+ mutex_lock(&g->dbg_sessions_lock);
+
+ err = dbg_s->ops->exec_reg_ops(dbg_s, ops, args->num_ops);
+
+ mutex_unlock(&g->dbg_sessions_lock);
+
+ if (err) {
+ gk20a_err(dev, "dbg regops failed");
+ goto clean_up;
+ }
+
+ gk20a_dbg_fn("Copying result to userspace");
+
+ if (copy_to_user((void *)(uintptr_t)args->ops, ops, ops_size)) {
+ dev_err(dev, "copy_to_user failed!");
+ err = -EFAULT;
+ goto clean_up;
+ }
+ return 0;
+ clean_up:
+ kfree(ops);
+ return err;
+}
+
+static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s,
+ __u32 powermode)
+{
+ int err = 0;
+ struct gk20a *g = get_gk20a(dbg_s->pdev);
+
+ /* This function must be called with g->dbg_sessions_lock held */
+
+ gk20a_dbg(gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %d",
+ dev_name(dbg_s->dev), powermode);
+
+ switch (powermode) {
+ case NVHOST_DBG_GPU_POWERGATE_MODE_DISABLE:
+ /* save off current powergate, clk state.
+ * set gpu module's can_powergate = 0.
+ * set gpu module's clk to max.
+ * while *a* debug session is active there will be no power or
+ * clocking state changes allowed from mainline code (but they
+ * should be saved).
+ */
+ /* Allow powergate disable if the current dbg_session doesn't
+ * call a powergate disable ioctl and the global
+ * powergating_disabled_refcount is zero
+ */
+
+ if ((dbg_s->is_pg_disabled == false) &&
+ (g->dbg_powergating_disabled_refcount++ == 0)) {
+
+ gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "module busy");
+ gk20a_busy(g->dev);
+ gk20a_channel_busy(dbg_s->pdev);
+
+ g->ops.clock_gating.slcg_gr_load_gating_prod(g,
+ false);
+ g->ops.clock_gating.slcg_perf_load_gating_prod(g,
+ false);
+ gr_gk20a_init_blcg_mode(g, BLCG_RUN, ENGINE_GR_GK20A);
+
+ g->elcg_enabled = false;
+ gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A);
+ gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A);
+
+ gk20a_pmu_disable_elpg(g);
+ }
+
+ dbg_s->is_pg_disabled = true;
+ break;
+
+ case NVHOST_DBG_GPU_POWERGATE_MODE_ENABLE:
+ /* restore (can) powergate, clk state */
+ /* release pending exceptions to fault/be handled as usual */
+ /*TBD: ordering of these? */
+
+ /* Re-enabling powergate as no other sessions want
+ * powergate disabled and the current dbg-sessions had
+ * requested the powergate disable through ioctl
+ */
+ if (dbg_s->is_pg_disabled &&
+ --g->dbg_powergating_disabled_refcount == 0) {
+
+ g->elcg_enabled = true;
+ gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A);
+ gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A);
+ gr_gk20a_init_blcg_mode(g, BLCG_AUTO, ENGINE_GR_GK20A);
+
+ g->ops.clock_gating.slcg_gr_load_gating_prod(g,
+ g->slcg_enabled);
+ g->ops.clock_gating.slcg_perf_load_gating_prod(g,
+ g->slcg_enabled);
+
+ gk20a_pmu_enable_elpg(g);
+
+ gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "module idle");
+ gk20a_channel_idle(dbg_s->pdev);
+ gk20a_idle(g->dev);
+ }
+
+ dbg_s->is_pg_disabled = false;
+ break;
+
+ default:
+ gk20a_err(dev_from_gk20a(g),
+ "unrecognized dbg gpu powergate mode: 0x%x",
+ powermode);
+ err = -ENOTTY;
+ break;
+ }
+
+ return err;
+}
+
+static int nvhost_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
+ struct nvhost_dbg_gpu_powergate_args *args)
+{
+ int err;
+ struct gk20a *g = get_gk20a(dbg_s->pdev);
+ gk20a_dbg_fn("%s powergate mode = %d",
+ dev_name(dbg_s->dev), args->mode);
+
+ mutex_lock(&g->dbg_sessions_lock);
+ err = dbg_set_powergate(dbg_s, args->mode);
+ mutex_unlock(&g->dbg_sessions_lock);
+ return err;
+}
+
+static int nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
+ struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *args)
+{
+ int err;
+ struct gk20a *g = get_gk20a(dbg_s->pdev);
+ struct channel_gk20a *ch_gk20a;
+
+ gk20a_dbg_fn("%s smpc ctxsw mode = %d",
+ dev_name(dbg_s->dev), args->mode);
+
+ /* Take the global lock, since we'll be doing global regops */
+ mutex_lock(&g->dbg_sessions_lock);
+
+ ch_gk20a = dbg_s->ch;
+
+ if (!ch_gk20a) {
+ gk20a_err(dev_from_gk20a(dbg_s->g),
+ "no bound channel for smpc ctxsw mode update\n");
+ err = -EINVAL;
+ goto clean_up;
+ }
+
+ err = gr_gk20a_update_smpc_ctxsw_mode(g, ch_gk20a,
+ args->mode == NVHOST_DBG_GPU_SMPC_CTXSW_MODE_CTXSW);
+ if (err) {
+ gk20a_err(dev_from_gk20a(dbg_s->g),
+ "error (%d) during smpc ctxsw mode update\n", err);
+ goto clean_up;
+ }
+ /* The following regops are a hack/war to make up for the fact that we
+ * just scribbled into the ctxsw image w/o really knowing whether
+ * it was already swapped out in/out once or not, etc.
+ */
+ {
+ struct nvhost_dbg_gpu_reg_op ops[4];
+ int i;
+ for (i = 0; i < ARRAY_SIZE(ops); i++) {
+ ops[i].op = NVHOST_DBG_GPU_REG_OP_WRITE_32;
+ ops[i].type = NVHOST_DBG_GPU_REG_OP_TYPE_GR_CTX;
+ ops[i].status = NVHOST_DBG_GPU_REG_OP_STATUS_SUCCESS;
+ ops[i].value_hi = 0;
+ ops[i].and_n_mask_lo = 0;
+ ops[i].and_n_mask_hi = 0;
+ }
+ /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control_sel1_r();*/
+ ops[0].offset = 0x00419e08;
+ ops[0].value_lo = 0x1d;
+
+ /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control5_r(); */
+ ops[1].offset = 0x00419e58;
+ ops[1].value_lo = 0x1;
+
+ /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control3_r(); */
+ ops[2].offset = 0x00419e68;
+ ops[2].value_lo = 0xaaaa;
+
+ /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter4_control_r(); */
+ ops[3].offset = 0x00419f40;
+ ops[3].value_lo = 0x18;
+
+ err = dbg_s->ops->exec_reg_ops(dbg_s, ops, ARRAY_SIZE(ops));
+ }
+
+ clean_up:
+ mutex_unlock(&g->dbg_sessions_lock);
+ return err;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h
new file mode 100644
index 000000000000..49827608436c
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h
@@ -0,0 +1,83 @@
+/*
+ * Tegra GK20A GPU Debugger Driver
+ *
+ * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __DBG_GPU_GK20A_H_
+#define __DBG_GPU_GK20A_H_
+#include <linux/poll.h>
+
+/* module debug driver interface */
+int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp);
+int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp);
+long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait);
+
+/* used by profiler driver interface */
+int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp);
+
+/* used by the interrupt handler to post events */
+void gk20a_dbg_gpu_post_events(struct channel_gk20a *fault_ch);
+
+struct dbg_gpu_session_ops {
+ int (*exec_reg_ops)(struct dbg_session_gk20a *dbg_s,
+ struct nvhost_dbg_gpu_reg_op *ops,
+ u64 num_ops);
+};
+
+struct dbg_gpu_session_events {
+ wait_queue_head_t wait_queue;
+ bool events_enabled;
+ int num_pending_events;
+};
+
+struct dbg_session_gk20a {
+ /* dbg session id used for trace/prints */
+ int id;
+
+ /* profiler session, if any */
+ bool is_profiler;
+
+ /* power enabled or disabled */
+ bool is_pg_disabled;
+
+ /*
+ * There can be different versions of the whitelists
+ * between both global and per-context sets; as well
+ * as between debugger and profiler interfaces.
+ */
+ struct regops_whitelist *global;
+ struct regops_whitelist *per_context;
+
+ /* gpu module vagaries */
+ struct device *dev;
+ struct platform_device *pdev;
+ struct gk20a *g;
+
+ /* bound channel, if any */
+ struct file *ch_f;
+ struct channel_gk20a *ch;
+
+ /* session operations */
+ struct dbg_gpu_session_ops *ops;
+
+ /* event support */
+ struct dbg_gpu_session_events dbg_events;
+ struct list_head dbg_s_list_node;
+};
+
+extern struct dbg_gpu_session_ops dbg_gpu_session_ops_gk20a;
+
+#endif /* __DBG_GPU_GK20A_H_ */
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
new file mode 100644
index 000000000000..c5b6953cfd02
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
@@ -0,0 +1,295 @@
+/*
+ * drivers/video/tegra/host/t20/debug_gk20a.c
+ *
+ * Copyright (C) 2011-2014 NVIDIA Corporation. All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/nvhost.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <linux/io.h>
+
+#include "gk20a.h"
+#include "debug_gk20a.h"
+
+#include "hw_ram_gk20a.h"
+#include "hw_fifo_gk20a.h"
+#include "hw_ccsr_gk20a.h"
+#include "hw_pbdma_gk20a.h"
+
+unsigned int gk20a_debug_trace_cmdbuf;
+struct platform_device *gk20a_device;
+
+struct gk20a_debug_output {
+ void (*fn)(void *ctx, const char *str, size_t len);
+ void *ctx;
+ char buf[256];
+};
+
+static const char * const ccsr_chan_status_str[] = {
+ "idle",
+ "pending",
+ "pending_ctx_reload",
+ "pending_acquire",
+ "pending_acq_ctx_reload",
+ "on_pbdma",
+ "on_pbdma_and_eng",
+ "on_eng",
+ "on_eng_pending_acquire",
+ "on_eng_pending",
+ "on_pbdma_ctx_reload",
+ "on_pbdma_and_eng_ctx_reload",
+ "on_eng_ctx_reload",
+ "on_eng_pending_ctx_reload",
+ "on_eng_pending_acq_ctx_reload",
+};
+
+static const char * const chan_status_str[] = {
+ "invalid",
+ "valid",
+ "chsw_load",
+ "chsw_save",
+ "chsw_switch",
+};
+
+static const char * const ctx_status_str[] = {
+ "invalid",
+ "valid",
+ NULL,
+ NULL,
+ NULL,
+ "ctxsw_load",
+ "ctxsw_save",
+ "ctxsw_switch",
+};
+
+static inline void gk20a_debug_write_printk(void *ctx, const char *str,
+ size_t len)
+{
+ pr_info("%s", str);
+}
+
+static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str,
+ size_t len)
+{
+ seq_write((struct seq_file *)ctx, str, len);
+}
+
+void gk20a_debug_output(struct gk20a_debug_output *o, const char *fmt, ...)
+{
+ va_list args;
+ int len;
+
+ va_start(args, fmt);
+ len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
+ va_end(args);
+ o->fn(o->ctx, o->buf, len);
+}
+
+static void gk20a_debug_show_channel(struct gk20a *g,
+ struct gk20a_debug_output *o,
+ struct channel_gk20a *ch)
+{
+ u32 channel = gk20a_readl(g, ccsr_channel_r(ch->hw_chid));
+ u32 status = ccsr_channel_status_v(channel);
+ u32 syncpointa, syncpointb;
+ void *inst_ptr;
+
+ inst_ptr = ch->inst_block.cpuva;
+ if (!inst_ptr)
+ return;
+
+ syncpointa = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointa_w());
+ syncpointb = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointb_w());
+
+ gk20a_debug_output(o, "%d-%s, pid %d: ", ch->hw_chid,
+ ch->g->dev->name,
+ ch->pid);
+ gk20a_debug_output(o, "%s in use %s %s\n",
+ ccsr_channel_enable_v(channel) ? "" : "not",
+ ccsr_chan_status_str[status],
+ ccsr_channel_busy_v(channel) ? "busy" : "not busy");
+ gk20a_debug_output(o, "TOP: %016llx PUT: %016llx GET: %016llx "
+ "FETCH: %016llx\nHEADER: %08x COUNT: %08x\n"
+ "SYNCPOINT %08x %08x SEMAPHORE %08x %08x %08x %08x\n",
+ (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_top_level_get_w()) +
+ ((u64)gk20a_mem_rd32(inst_ptr,
+ ram_fc_pb_top_level_get_hi_w()) << 32ULL),
+ (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_put_w()) +
+ ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_put_hi_w()) << 32ULL),
+ (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_get_w()) +
+ ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_get_hi_w()) << 32ULL),
+ (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_fetch_w()) +
+ ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_fetch_hi_w()) << 32ULL),
+ gk20a_mem_rd32(inst_ptr, ram_fc_pb_header_w()),
+ gk20a_mem_rd32(inst_ptr, ram_fc_pb_count_w()),
+ syncpointa,
+ syncpointb,
+ gk20a_mem_rd32(inst_ptr, ram_fc_semaphorea_w()),
+ gk20a_mem_rd32(inst_ptr, ram_fc_semaphoreb_w()),
+ gk20a_mem_rd32(inst_ptr, ram_fc_semaphorec_w()),
+ gk20a_mem_rd32(inst_ptr, ram_fc_semaphored_w()));
+
+ if ((pbdma_syncpointb_op_v(syncpointb) == pbdma_syncpointb_op_wait_v())
+ && (pbdma_syncpointb_wait_switch_v(syncpointb) ==
+ pbdma_syncpointb_wait_switch_en_v()))
+ gk20a_debug_output(o, "Waiting on syncpt %u (%s) val %u\n",
+ pbdma_syncpointb_syncpt_index_v(syncpointb),
+ nvhost_syncpt_get_name(
+ to_platform_device(g->dev->dev.parent),
+ pbdma_syncpointb_syncpt_index_v(syncpointb)),
+ pbdma_syncpointa_payload_v(syncpointa));
+
+ gk20a_debug_output(o, "\n");
+}
+
+void gk20a_debug_show_dump(struct platform_device *pdev,
+ struct gk20a_debug_output *o)
+{
+ struct gk20a_platform *platform = gk20a_get_platform(pdev);
+ struct gk20a *g = platform->g;
+ struct fifo_gk20a *f = &g->fifo;
+ u32 chid;
+ int i;
+
+ gk20a_busy(g->dev);
+ for (i = 0; i < fifo_pbdma_status__size_1_v(); i++) {
+ u32 status = gk20a_readl(g, fifo_pbdma_status_r(i));
+ u32 chan_status = fifo_pbdma_status_chan_status_v(status);
+
+ gk20a_debug_output(o, "%s pbdma %d: ", g->dev->name, i);
+ gk20a_debug_output(o,
+ "id: %d (%s), next_id: %d (%s) status: %s\n",
+ fifo_pbdma_status_id_v(status),
+ fifo_pbdma_status_id_type_v(status) ?
+ "tsg" : "channel",
+ fifo_pbdma_status_next_id_v(status),
+ fifo_pbdma_status_next_id_type_v(status) ?
+ "tsg" : "channel",
+ chan_status_str[chan_status]);
+ gk20a_debug_output(o, "PUT: %016llx GET: %016llx "
+ "FETCH: %08x HEADER: %08x\n",
+ (u64)gk20a_readl(g, pbdma_put_r(i)) +
+ ((u64)gk20a_readl(g, pbdma_put_hi_r(i)) << 32ULL),
+ (u64)gk20a_readl(g, pbdma_get_r(i)) +
+ ((u64)gk20a_readl(g, pbdma_get_hi_r(i)) << 32ULL),
+ gk20a_readl(g, pbdma_gp_fetch_r(i)),
+ gk20a_readl(g, pbdma_pb_header_r(i)));
+ }
+ gk20a_debug_output(o, "\n");
+
+ for (i = 0; i < fifo_engine_status__size_1_v(); i++) {
+ u32 status = gk20a_readl(g, fifo_engine_status_r(i));
+ u32 ctx_status = fifo_engine_status_ctx_status_v(status);
+
+ gk20a_debug_output(o, "%s eng %d: ", g->dev->name, i);
+ gk20a_debug_output(o,
+ "id: %d (%s), next_id: %d (%s), ctx: %s ",
+ fifo_engine_status_id_v(status),
+ fifo_engine_status_id_type_v(status) ?
+ "tsg" : "channel",
+ fifo_engine_status_next_id_v(status),
+ fifo_engine_status_next_id_type_v(status) ?
+ "tsg" : "channel",
+ ctx_status_str[ctx_status]);
+
+ if (fifo_engine_status_faulted_v(status))
+ gk20a_debug_output(o, "faulted ");
+ if (fifo_engine_status_engine_v(status))
+ gk20a_debug_output(o, "busy ");
+ gk20a_debug_output(o, "\n");
+ }
+ gk20a_debug_output(o, "\n");
+
+ for (chid = 0; chid < f->num_channels; chid++) {
+ if (f->channel[chid].in_use) {
+ struct channel_gk20a *gpu_ch = &f->channel[chid];
+ gk20a_debug_show_channel(g, o, gpu_ch);
+ }
+ }
+ gk20a_idle(g->dev);
+}
+
+void gk20a_debug_dump(struct platform_device *pdev)
+{
+ struct gk20a_platform *platform = gk20a_get_platform(pdev);
+ struct gk20a_debug_output o = {
+ .fn = gk20a_debug_write_printk
+ };
+
+ if (platform->dump_platform_dependencies)
+ platform->dump_platform_dependencies(pdev);
+
+ gk20a_debug_show_dump(pdev, &o);
+}
+
+void gk20a_debug_dump_device(struct platform_device *pdev)
+{
+ struct gk20a_debug_output o = {
+ .fn = gk20a_debug_write_printk
+ };
+
+ /* Dump the first device if no info is provided */
+ if (!pdev && gk20a_device)
+ pdev = gk20a_device;
+
+ gk20a_debug_show_dump(pdev, &o);
+}
+EXPORT_SYMBOL(gk20a_debug_dump_device);
+
+static int gk20a_debug_show(struct seq_file *s, void *unused)
+{
+ struct platform_device *pdev = s->private;
+ struct gk20a_debug_output o = {
+ .fn = gk20a_debug_write_to_seqfile,
+ .ctx = s,
+ };
+ gk20a_debug_show_dump(pdev, &o);
+ return 0;
+}
+
+static int gk20a_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, gk20a_debug_show, inode->i_private);
+}
+
+static const struct file_operations gk20a_debug_fops = {
+ .open = gk20a_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+void gk20a_debug_init(struct platform_device *pdev)
+{
+ struct gk20a_platform *platform = platform_get_drvdata(pdev);
+
+ /* Store the first device */
+ if (!gk20a_device)
+ gk20a_device = pdev;
+
+ platform->debugfs = debugfs_create_dir(pdev->name, NULL);
+
+ debugfs_create_file("status", S_IRUGO, platform->debugfs,
+ pdev, &gk20a_debug_fops);
+ debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR, platform->debugfs,
+ &gk20a_debug_trace_cmdbuf);
+
+#if defined(GK20A_DEBUG)
+ debugfs_create_u32("dbg_mask", S_IRUGO|S_IWUSR, platform->debugfs,
+ &gk20a_dbg_mask);
+ debugfs_create_u32("dbg_ftrace", S_IRUGO|S_IWUSR, platform->debugfs,
+ &gk20a_dbg_ftrace);
+#endif
+}
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.h b/drivers/gpu/nvgpu/gk20a/debug_gk20a.h
new file mode 100644
index 000000000000..cd2e09c31f91
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.h
@@ -0,0 +1,25 @@
+/*
+ * GK20A Debug functionality
+ *
+ * Copyright (C) 2011-2014 NVIDIA CORPORATION. All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _DEBUG_GK20A_H_
+#define _DEBUG_GK20A_H_
+
+extern unsigned int gk20a_debug_trace_cmdbuf;
+
+void gk20a_debug_dump(struct platform_device *pdev);
+void gk20a_debug_init(struct platform_device *pdev);
+
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
new file mode 100644
index 000000000000..52f2db4d9e28
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
@@ -0,0 +1,37 @@
+/*
+ * GK20A memory interface
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+
+#include "gk20a.h"
+#include "kind_gk20a.h"
+#include "hw_mc_gk20a.h"
+
+static void fb_gk20a_reset(struct gk20a *g)
+{
+ gk20a_dbg_info("reset gk20a fb");
+
+ gk20a_reset(g, mc_enable_pfb_enabled_f()
+ | mc_enable_l2_enabled_f()
+ | mc_enable_xbar_enabled_f()
+ | mc_enable_hub_enabled_f());
+}
+
+void gk20a_init_fb(struct gpu_ops *gops)
+{
+ gops->fb.reset = fb_gk20a_reset;
+ gk20a_init_uncompressed_kind_map();
+ gk20a_init_kind_attr();
+}
diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.h b/drivers/gpu/nvgpu/gk20a/fb_gk20a.h
new file mode 100644
index 000000000000..34c21c9b2e13
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.h
@@ -0,0 +1,21 @@
+/*
+ * GK20A FB
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVHOST_GK20A_FB
+#define _NVHOST_GK20A_FB
+struct gk20a;
+
+void gk20a_init_fb(struct gpu_ops *gops);
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
new file mode 100644
index 000000000000..5575b995a100
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -0,0 +1,1836 @@
+/*
+ * drivers/video/tegra/host/gk20a/fifo_gk20a.c
+ *
+ * GK20A Graphics FIFO (gr host)
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/scatterlist.h>
+#include <trace/events/gk20a.h>
+#include <linux/dma-mapping.h>
+#include <linux/nvhost.h>
+
+#include "gk20a.h"
+#include "debug_gk20a.h"
+#include "hw_fifo_gk20a.h"
+#include "hw_pbdma_gk20a.h"
+#include "hw_ccsr_gk20a.h"
+#include "hw_ram_gk20a.h"
+#include "hw_proj_gk20a.h"
+#include "hw_top_gk20a.h"
+#include "hw_mc_gk20a.h"
+#include "hw_gr_gk20a.h"
+
+static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
+ u32 hw_chid, bool add,
+ bool wait_for_finish);
+static void gk20a_fifo_handle_mmu_fault_thread(struct work_struct *work);
+
+/*
+ * Link engine IDs to MMU IDs and vice versa.
+ */
+
+static inline u32 gk20a_engine_id_to_mmu_id(u32 engine_id)
+{
+ switch (engine_id) {
+ case ENGINE_GR_GK20A:
+ return 0x00;
+ case ENGINE_CE2_GK20A:
+ return 0x1b;
+ default:
+ return ~0;
+ }
+}
+
+static inline u32 gk20a_mmu_id_to_engine_id(u32 engine_id)
+{
+ switch (engine_id) {
+ case 0x00:
+ return ENGINE_GR_GK20A;
+ case 0x1b:
+ return ENGINE_CE2_GK20A;
+ default:
+ return ~0;
+ }
+}
+
+
+static int init_engine_info(struct fifo_gk20a *f)
+{
+ struct gk20a *g = f->g;
+ struct device *d = dev_from_gk20a(g);
+ struct fifo_engine_info_gk20a *gr_info;
+ const u32 gr_sw_id = ENGINE_GR_GK20A;
+ u32 i;
+ u32 max_info_entries = top_device_info__size_1_v();
+
+ gk20a_dbg_fn("");
+
+ /* all we really care about finding is the graphics entry */
+ /* especially early on in sim it probably thinks it has more */
+ f->num_engines = 1;
+
+ gr_info = f->engine_info + gr_sw_id;
+
+ gr_info->sw_id = gr_sw_id;
+ gr_info->name = "gr";
+ gr_info->dev_info_id = top_device_info_type_enum_graphics_v();
+ gr_info->mmu_fault_id = fifo_intr_mmu_fault_eng_id_graphics_v();
+ gr_info->runlist_id = ~0;
+ gr_info->pbdma_id = ~0;
+ gr_info->engine_id = ~0;
+
+ for (i = 0; i < max_info_entries; i++) {
+ u32 table_entry = gk20a_readl(f->g, top_device_info_r(i));
+ u32 entry = top_device_info_entry_v(table_entry);
+ u32 engine_enum = top_device_info_type_enum_v(table_entry);
+ u32 table_entry2 = 0;
+
+ if (entry == top_device_info_entry_not_valid_v())
+ continue;
+
+ if (top_device_info_chain_v(table_entry) ==
+ top_device_info_chain_enable_v()) {
+
+ table_entry2 = gk20a_readl(f->g,
+ top_device_info_r(++i));
+
+ engine_enum = top_device_info_type_enum_v(table_entry2);
+ }
+
+ /* we only care about GR engine here */
+ if (entry == top_device_info_entry_enum_v() &&
+ engine_enum == gr_info->dev_info_id) {
+ int pbdma_id;
+ u32 runlist_bit;
+
+ gr_info->runlist_id =
+ top_device_info_runlist_enum_v(table_entry);
+ gk20a_dbg_info("gr info: runlist_id %d", gr_info->runlist_id);
+
+ gr_info->engine_id =
+ top_device_info_engine_enum_v(table_entry);
+ gk20a_dbg_info("gr info: engine_id %d", gr_info->engine_id);
+
+ runlist_bit = 1 << gr_info->runlist_id;
+
+ for (pbdma_id = 0; pbdma_id < f->num_pbdma; pbdma_id++) {
+ gk20a_dbg_info("gr info: pbdma_map[%d]=%d",
+ pbdma_id, f->pbdma_map[pbdma_id]);
+ if (f->pbdma_map[pbdma_id] & runlist_bit)
+ break;
+ }
+
+ if (pbdma_id == f->num_pbdma) {
+ gk20a_err(d, "busted pbmda map");
+ return -EINVAL;
+ }
+ gr_info->pbdma_id = pbdma_id;
+
+ break;
+ }
+ }
+
+ if (gr_info->runlist_id == ~0) {
+ gk20a_err(d, "busted device info");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void gk20a_remove_fifo_support(struct fifo_gk20a *f)
+{
+ struct gk20a *g = f->g;
+ struct device *d = dev_from_gk20a(g);
+ struct fifo_engine_info_gk20a *engine_info;
+ struct fifo_runlist_info_gk20a *runlist;
+ u32 runlist_id;
+ u32 i;
+
+ gk20a_dbg_fn("");
+
+ if (f->channel) {
+ int c;
+ for (c = 0; c < f->num_channels; c++) {
+ if (f->channel[c].remove_support)
+ f->channel[c].remove_support(f->channel+c);
+ }
+ kfree(f->channel);
+ }
+ if (f->userd.gpu_va)
+ gk20a_gmmu_unmap(&g->mm.bar1.vm,
+ f->userd.gpu_va,
+ f->userd.size,
+ gk20a_mem_flag_none);
+
+ if (f->userd.sgt)
+ gk20a_free_sgtable(&f->userd.sgt);
+
+ if (f->userd.cpuva)
+ dma_free_coherent(d,
+ f->userd_total_size,
+ f->userd.cpuva,
+ f->userd.iova);
+ f->userd.cpuva = NULL;
+ f->userd.iova = 0;
+
+ engine_info = f->engine_info + ENGINE_GR_GK20A;
+ runlist_id = engine_info->runlist_id;
+ runlist = &f->runlist_info[runlist_id];
+
+ for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
+ if (runlist->mem[i].cpuva)
+ dma_free_coherent(d,
+ runlist->mem[i].size,
+ runlist->mem[i].cpuva,
+ runlist->mem[i].iova);
+ runlist->mem[i].cpuva = NULL;
+ runlist->mem[i].iova = 0;
+ }
+
+ kfree(runlist->active_channels);
+
+ kfree(f->runlist_info);
+ kfree(f->pbdma_map);
+ kfree(f->engine_info);
+}
+
+/* reads info from hardware and fills in pbmda exception info record */
+static inline void get_exception_pbdma_info(
+ struct gk20a *g,
+ struct fifo_engine_info_gk20a *eng_info)
+{
+ struct fifo_pbdma_exception_info_gk20a *e =
+ &eng_info->pbdma_exception_info;
+
+ u32 pbdma_status_r = e->status_r = gk20a_readl(g,
+ fifo_pbdma_status_r(eng_info->pbdma_id));
+ e->id = fifo_pbdma_status_id_v(pbdma_status_r); /* vs. id_hw_v()? */
+ e->id_is_chid = fifo_pbdma_status_id_type_v(pbdma_status_r) ==
+ fifo_pbdma_status_id_type_chid_v();
+ e->chan_status_v = fifo_pbdma_status_chan_status_v(pbdma_status_r);
+ e->next_id_is_chid =
+ fifo_pbdma_status_next_id_type_v(pbdma_status_r) ==
+ fifo_pbdma_status_next_id_type_chid_v();
+ e->next_id = fifo_pbdma_status_next_id_v(pbdma_status_r);
+ e->chsw_in_progress =
+ fifo_pbdma_status_chsw_v(pbdma_status_r) ==
+ fifo_pbdma_status_chsw_in_progress_v();
+}
+
+static void fifo_pbdma_exception_status(struct gk20a *g,
+ struct fifo_engine_info_gk20a *eng_info)
+{
+ struct fifo_pbdma_exception_info_gk20a *e;
+ get_exception_pbdma_info(g, eng_info);
+ e = &eng_info->pbdma_exception_info;
+
+ gk20a_dbg_fn("pbdma_id %d, "
+ "id_type %s, id %d, chan_status %d, "
+ "next_id_type %s, next_id %d, "
+ "chsw_in_progress %d",
+ eng_info->pbdma_id,
+ e->id_is_chid ? "chid" : "tsgid", e->id, e->chan_status_v,
+ e->next_id_is_chid ? "chid" : "tsgid", e->next_id,
+ e->chsw_in_progress);
+}
+
+/* reads info from hardware and fills in pbmda exception info record */
+static inline void get_exception_engine_info(
+ struct gk20a *g,
+ struct fifo_engine_info_gk20a *eng_info)
+{
+ struct fifo_engine_exception_info_gk20a *e =
+ &eng_info->engine_exception_info;
+ u32 engine_status_r = e->status_r =
+ gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id));
+ e->id = fifo_engine_status_id_v(engine_status_r); /* vs. id_hw_v()? */
+ e->id_is_chid = fifo_engine_status_id_type_v(engine_status_r) ==
+ fifo_engine_status_id_type_chid_v();
+ e->ctx_status_v = fifo_engine_status_ctx_status_v(engine_status_r);
+ e->faulted =
+ fifo_engine_status_faulted_v(engine_status_r) ==
+ fifo_engine_status_faulted_true_v();
+ e->idle =
+ fifo_engine_status_engine_v(engine_status_r) ==
+ fifo_engine_status_engine_idle_v();
+ e->ctxsw_in_progress =
+ fifo_engine_status_ctxsw_v(engine_status_r) ==
+ fifo_engine_status_ctxsw_in_progress_v();
+}
+
+static void fifo_engine_exception_status(struct gk20a *g,
+ struct fifo_engine_info_gk20a *eng_info)
+{
+ struct fifo_engine_exception_info_gk20a *e;
+ get_exception_engine_info(g, eng_info);
+ e = &eng_info->engine_exception_info;
+
+ gk20a_dbg_fn("engine_id %d, id_type %s, id %d, ctx_status %d, "
+ "faulted %d, idle %d, ctxsw_in_progress %d, ",
+ eng_info->engine_id, e->id_is_chid ? "chid" : "tsgid",
+ e->id, e->ctx_status_v,
+ e->faulted, e->idle, e->ctxsw_in_progress);
+}
+
+static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
+{
+ struct fifo_engine_info_gk20a *engine_info;
+ struct fifo_runlist_info_gk20a *runlist;
+ struct device *d = dev_from_gk20a(g);
+ u32 runlist_id;
+ u32 i;
+ u64 runlist_size;
+
+ gk20a_dbg_fn("");
+
+ f->max_runlists = fifo_eng_runlist_base__size_1_v();
+ f->runlist_info = kzalloc(sizeof(struct fifo_runlist_info_gk20a) *
+ f->max_runlists, GFP_KERNEL);
+ if (!f->runlist_info)
+ goto clean_up;
+
+ engine_info = f->engine_info + ENGINE_GR_GK20A;
+ runlist_id = engine_info->runlist_id;
+ runlist = &f->runlist_info[runlist_id];
+
+ runlist->active_channels =
+ kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
+ GFP_KERNEL);
+ if (!runlist->active_channels)
+ goto clean_up_runlist_info;
+
+ runlist_size = ram_rl_entry_size_v() * f->num_channels;
+ for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
+ dma_addr_t iova;
+
+ runlist->mem[i].cpuva =
+ dma_alloc_coherent(d,
+ runlist_size,
+ &iova,
+ GFP_KERNEL);
+ if (!runlist->mem[i].cpuva) {
+ dev_err(d, "memory allocation failed\n");
+ goto clean_up_runlist;
+ }
+ runlist->mem[i].iova = iova;
+ runlist->mem[i].size = runlist_size;
+ }
+ mutex_init(&runlist->mutex);
+ init_waitqueue_head(&runlist->runlist_wq);
+
+ /* None of buffers is pinned if this value doesn't change.
+ Otherwise, one of them (cur_buffer) must have been pinned. */
+ runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
+
+ gk20a_dbg_fn("done");
+ return 0;
+
+clean_up_runlist:
+ for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
+ if (runlist->mem[i].cpuva)
+ dma_free_coherent(d,
+ runlist->mem[i].size,
+ runlist->mem[i].cpuva,
+ runlist->mem[i].iova);
+ runlist->mem[i].cpuva = NULL;
+ runlist->mem[i].iova = 0;
+ }
+
+ kfree(runlist->active_channels);
+ runlist->active_channels = NULL;
+
+clean_up_runlist_info:
+ kfree(f->runlist_info);
+ f->runlist_info = NULL;
+
+clean_up:
+ gk20a_dbg_fn("fail");
+ return -ENOMEM;
+}
+
+#define GRFIFO_TIMEOUT_CHECK_PERIOD_US 100000
+
+int gk20a_init_fifo_reset_enable_hw(struct gk20a *g)
+{
+ u32 intr_stall;
+ u32 mask;
+ u32 timeout;
+ int i;
+
+ gk20a_dbg_fn("");
+ /* enable pmc pfifo */
+ gk20a_reset(g, mc_enable_pfifo_enabled_f()
+ | mc_enable_ce2_enabled_f());
+
+ /* enable pbdma */
+ mask = 0;
+ for (i = 0; i < proj_host_num_pbdma_v(); ++i)
+ mask |= mc_enable_pb_sel_f(mc_enable_pb_0_enabled_v(), i);
+ gk20a_writel(g, mc_enable_pb_r(), mask);
+
+ /* enable pfifo interrupt */
+ gk20a_writel(g, fifo_intr_0_r(), 0xFFFFFFFF);
+ gk20a_writel(g, fifo_intr_en_0_r(), 0x7FFFFFFF);
+ gk20a_writel(g, fifo_intr_en_1_r(), 0x80000000);
+
+ /* enable pbdma interrupt */
+ mask = 0;
+ for (i = 0; i < proj_host_num_pbdma_v(); i++) {
+ intr_stall = gk20a_readl(g, pbdma_intr_stall_r(i));
+ intr_stall &= ~pbdma_intr_stall_lbreq_enabled_f();
+ gk20a_writel(g, pbdma_intr_stall_r(i), intr_stall);
+ gk20a_writel(g, pbdma_intr_0_r(i), 0xFFFFFFFF);
+ gk20a_writel(g, pbdma_intr_en_0_r(i),
+ (~0) & ~pbdma_intr_en_0_lbreq_enabled_f());
+ gk20a_writel(g, pbdma_intr_1_r(i), 0xFFFFFFFF);
+ gk20a_writel(g, pbdma_intr_en_1_r(i), 0xFFFFFFFF);
+ }
+
+ /* TBD: apply overrides */
+
+ /* TBD: BLCG prod */
+
+ /* reset runlist interrupts */
+ gk20a_writel(g, fifo_intr_runlist_r(), ~0);
+
+ /* TBD: do we need those? */
+ timeout = gk20a_readl(g, fifo_fb_timeout_r());
+ timeout = set_field(timeout, fifo_fb_timeout_period_m(),
+ fifo_fb_timeout_period_max_f());
+ gk20a_writel(g, fifo_fb_timeout_r(), timeout);
+
+ if (tegra_platform_is_silicon()) {
+ timeout = gk20a_readl(g, fifo_pb_timeout_r());
+ timeout &= ~fifo_pb_timeout_detection_enabled_f();
+ gk20a_writel(g, fifo_pb_timeout_r(), timeout);
+ }
+
+ timeout = GRFIFO_TIMEOUT_CHECK_PERIOD_US |
+ fifo_eng_timeout_detection_enabled_f();
+ gk20a_writel(g, fifo_eng_timeout_r(), timeout);
+
+ gk20a_dbg_fn("done");
+
+ return 0;
+}
+
+static void gk20a_init_fifo_pbdma_intr_descs(struct fifo_gk20a *f)
+{
+ /* These are all errors which indicate something really wrong
+ * going on in the device. */
+ f->intr.pbdma.device_fatal_0 =
+ pbdma_intr_0_memreq_pending_f() |
+ pbdma_intr_0_memack_timeout_pending_f() |
+ pbdma_intr_0_memack_extra_pending_f() |
+ pbdma_intr_0_memdat_timeout_pending_f() |
+ pbdma_intr_0_memdat_extra_pending_f() |
+ pbdma_intr_0_memflush_pending_f() |
+ pbdma_intr_0_memop_pending_f() |
+ pbdma_intr_0_lbconnect_pending_f() |
+ pbdma_intr_0_lbreq_pending_f() |
+ pbdma_intr_0_lback_timeout_pending_f() |
+ pbdma_intr_0_lback_extra_pending_f() |
+ pbdma_intr_0_lbdat_timeout_pending_f() |
+ pbdma_intr_0_lbdat_extra_pending_f() |
+ pbdma_intr_0_xbarconnect_pending_f() |
+ pbdma_intr_0_pri_pending_f();
+
+ /* These are data parsing, framing errors or others which can be
+ * recovered from with intervention... or just resetting the
+ * channel. */
+ f->intr.pbdma.channel_fatal_0 =
+ pbdma_intr_0_gpfifo_pending_f() |
+ pbdma_intr_0_gpptr_pending_f() |
+ pbdma_intr_0_gpentry_pending_f() |
+ pbdma_intr_0_gpcrc_pending_f() |
+ pbdma_intr_0_pbptr_pending_f() |
+ pbdma_intr_0_pbentry_pending_f() |
+ pbdma_intr_0_pbcrc_pending_f() |
+ pbdma_intr_0_method_pending_f() |
+ pbdma_intr_0_methodcrc_pending_f() |
+ pbdma_intr_0_pbseg_pending_f() |
+ pbdma_intr_0_signature_pending_f();
+
+ /* Can be used for sw-methods, or represents
+ * a recoverable timeout. */
+ f->intr.pbdma.restartable_0 =
+ pbdma_intr_0_device_pending_f() |
+ pbdma_intr_0_acquire_pending_f();
+}
+
+static int gk20a_init_fifo_setup_sw(struct gk20a *g)
+{
+ struct fifo_gk20a *f = &g->fifo;
+ struct device *d = dev_from_gk20a(g);
+ int chid, i, err = 0;
+ dma_addr_t iova;
+
+ gk20a_dbg_fn("");
+
+ if (f->sw_ready) {
+ gk20a_dbg_fn("skip init");
+ return 0;
+ }
+
+ f->g = g;
+
+ INIT_WORK(&f->fault_restore_thread,
+ gk20a_fifo_handle_mmu_fault_thread);
+ mutex_init(&f->intr.isr.mutex);
+ gk20a_init_fifo_pbdma_intr_descs(f); /* just filling in data/tables */
+
+ f->num_channels = ccsr_channel__size_1_v();
+ f->num_pbdma = proj_host_num_pbdma_v();
+ f->max_engines = ENGINE_INVAL_GK20A;
+
+ f->userd_entry_size = 1 << ram_userd_base_shift_v();
+ f->userd_total_size = f->userd_entry_size * f->num_channels;
+
+ f->userd.cpuva = dma_alloc_coherent(d,
+ f->userd_total_size,
+ &iova,
+ GFP_KERNEL);
+ if (!f->userd.cpuva) {
+ dev_err(d, "memory allocation failed\n");
+ goto clean_up;
+ }
+
+ f->userd.iova = iova;
+ err = gk20a_get_sgtable(d, &f->userd.sgt,
+ f->userd.cpuva, f->userd.iova,
+ f->userd_total_size);
+ if (err) {
+ dev_err(d, "failed to create sg table\n");
+ goto clean_up;
+ }
+
+ /* bar1 va */
+ f->userd.gpu_va = gk20a_gmmu_map(&g->mm.bar1.vm,
+ &f->userd.sgt,
+ f->userd_total_size,
+ 0, /* flags */
+ gk20a_mem_flag_none);
+ if (!f->userd.gpu_va) {
+ dev_err(d, "gmmu mapping failed\n");
+ goto clean_up;
+ }
+
+ gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va);
+
+ f->userd.size = f->userd_total_size;
+
+ f->channel = kzalloc(f->num_channels * sizeof(*f->channel),
+ GFP_KERNEL);
+ f->pbdma_map = kzalloc(f->num_pbdma * sizeof(*f->pbdma_map),
+ GFP_KERNEL);
+ f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info),
+ GFP_KERNEL);
+
+ if (!(f->channel && f->pbdma_map && f->engine_info)) {
+ err = -ENOMEM;
+ goto clean_up;
+ }
+
+ /* pbdma map needs to be in place before calling engine info init */
+ for (i = 0; i < f->num_pbdma; ++i)
+ f->pbdma_map[i] = gk20a_readl(g, fifo_pbdma_map_r(i));
+
+ init_engine_info(f);
+
+ init_runlist(g, f);
+
+ for (chid = 0; chid < f->num_channels; chid++) {
+ f->channel[chid].userd_cpu_va =
+ f->userd.cpuva + chid * f->userd_entry_size;
+ f->channel[chid].userd_iova =
+ NV_MC_SMMU_VADDR_TRANSLATE(f->userd.iova)
+ + chid * f->userd_entry_size;
+ f->channel[chid].userd_gpu_va =
+ f->userd.gpu_va + chid * f->userd_entry_size;
+
+ gk20a_init_channel_support(g, chid);
+ }
+ mutex_init(&f->ch_inuse_mutex);
+
+ f->remove_support = gk20a_remove_fifo_support;
+
+ f->deferred_reset_pending = false;
+ mutex_init(&f->deferred_reset_mutex);
+
+ f->sw_ready = true;
+
+ gk20a_dbg_fn("done");
+ return 0;
+
+clean_up:
+ gk20a_dbg_fn("fail");
+ if (f->userd.gpu_va)
+ gk20a_gmmu_unmap(&g->mm.bar1.vm,
+ f->userd.gpu_va,
+ f->userd.size,
+ gk20a_mem_flag_none);
+ if (f->userd.sgt)
+ gk20a_free_sgtable(&f->userd.sgt);
+ if (f->userd.cpuva)
+ dma_free_coherent(d,
+ f->userd_total_size,
+ f->userd.cpuva,
+ f->userd.iova);
+ f->userd.cpuva = NULL;
+ f->userd.iova = 0;
+
+ memset(&f->userd, 0, sizeof(struct userd_desc));
+
+ kfree(f->channel);
+ f->channel = NULL;
+ kfree(f->pbdma_map);
+ f->pbdma_map = NULL;
+ kfree(f->engine_info);
+ f->engine_info = NULL;
+
+ return err;
+}
+
+static void gk20a_fifo_handle_runlist_event(struct gk20a *g)
+{
+ struct fifo_gk20a *f = &g->fifo;
+ struct fifo_runlist_info_gk20a *runlist;
+ unsigned long runlist_event;
+ u32 runlist_id;
+
+ runlist_event = gk20a_readl(g, fifo_intr_runlist_r());
+ gk20a_writel(g, fifo_intr_runlist_r(), runlist_event);
+
+ for_each_set_bit(runlist_id, &runlist_event, f->max_runlists) {
+ runlist = &f->runlist_info[runlist_id];
+ wake_up(&runlist->runlist_wq);
+ }
+
+}
+
+static int gk20a_init_fifo_setup_hw(struct gk20a *g)
+{
+ struct fifo_gk20a *f = &g->fifo;
+
+ gk20a_dbg_fn("");
+
+ /* test write, read through bar1 @ userd region before
+ * turning on the snooping */
+ {
+ struct fifo_gk20a *f = &g->fifo;
+ u32 v, v1 = 0x33, v2 = 0x55;
+
+ u32 bar1_vaddr = f->userd.gpu_va;
+ volatile u32 *cpu_vaddr = f->userd.cpuva;
+
+ gk20a_dbg_info("test bar1 @ vaddr 0x%x",
+ bar1_vaddr);
+
+ v = gk20a_bar1_readl(g, bar1_vaddr);
+
+ *cpu_vaddr = v1;
+ smp_mb();
+
+ if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) {
+ gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!");
+ return -EINVAL;
+ }
+
+ gk20a_bar1_writel(g, bar1_vaddr, v2);
+
+ if (v2 != gk20a_bar1_readl(g, bar1_vaddr)) {
+ gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!");
+ return -EINVAL;
+ }
+
+ /* is it visible to the cpu? */
+ if (*cpu_vaddr != v2) {
+ gk20a_err(dev_from_gk20a(g),
+ "cpu didn't see bar1 write @ %p!",
+ cpu_vaddr);
+ }
+
+ /* put it back */
+ gk20a_bar1_writel(g, bar1_vaddr, v);
+ }
+
+ /*XXX all manner of flushes and caching worries, etc */
+
+ /* set the base for the userd region now */
+ gk20a_writel(g, fifo_bar1_base_r(),
+ fifo_bar1_base_ptr_f(f->userd.gpu_va >> 12) |
+ fifo_bar1_base_valid_true_f());
+
+ gk20a_dbg_fn("done");
+
+ return 0;
+}
+
+int gk20a_init_fifo_support(struct gk20a *g)
+{
+ u32 err;
+
+ err = gk20a_init_fifo_setup_sw(g);
+ if (err)
+ return err;
+
+ err = gk20a_init_fifo_setup_hw(g);
+ if (err)
+ return err;
+
+ return err;
+}
+
+static struct channel_gk20a *
+channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr)
+{
+ int ci;
+ if (unlikely(!f->channel))
+ return NULL;
+ for (ci = 0; ci < f->num_channels; ci++) {
+ struct channel_gk20a *c = f->channel+ci;
+ if (c->inst_block.cpuva &&
+ (inst_ptr == c->inst_block.cpu_pa))
+ return f->channel+ci;
+ }
+ return NULL;
+}
+
+/* fault info/descriptions.
+ * tbd: move to setup
+ * */
+static const char * const fault_type_descs[] = {
+ "pde", /*fifo_intr_mmu_fault_info_type_pde_v() == 0 */
+ "pde size",
+ "pte",
+ "va limit viol",
+ "unbound inst",
+ "priv viol",
+ "ro viol",
+ "wo viol",
+ "pitch mask",
+ "work creation",
+ "bad aperture",
+ "compression failure",
+ "bad kind",
+ "region viol",
+ "dual ptes",
+ "poisoned",
+};
+/* engine descriptions */
+static const char * const engine_subid_descs[] = {
+ "gpc",
+ "hub",
+};
+
+static const char * const hub_client_descs[] = {
+ "vip", "ce0", "ce1", "dniso", "fe", "fecs", "host", "host cpu",
+ "host cpu nb", "iso", "mmu", "mspdec", "msppp", "msvld",
+ "niso", "p2p", "pd", "perf", "pmu", "raster twod", "scc",
+ "scc nb", "sec", "ssync", "gr copy", "ce2", "xv", "mmu nb",
+ "msenc", "d falcon", "sked", "a falcon", "n/a",
+};
+
+static const char * const gpc_client_descs[] = {
+ "l1 0", "t1 0", "pe 0",
+ "l1 1", "t1 1", "pe 1",
+ "l1 2", "t1 2", "pe 2",
+ "l1 3", "t1 3", "pe 3",
+ "rast", "gcc", "gpccs",
+ "prop 0", "prop 1", "prop 2", "prop 3",
+ "l1 4", "t1 4", "pe 4",
+ "l1 5", "t1 5", "pe 5",
+ "l1 6", "t1 6", "pe 6",
+ "l1 7", "t1 7", "pe 7",
+ "gpm",
+ "ltp utlb 0", "ltp utlb 1", "ltp utlb 2", "ltp utlb 3",
+ "rgg utlb",
+};
+
+/* reads info from hardware and fills in mmu fault info record */
+static inline void get_exception_mmu_fault_info(
+ struct gk20a *g, u32 engine_id,
+ struct fifo_mmu_fault_info_gk20a *f)
+{
+ u32 fault_info_v;
+
+ gk20a_dbg_fn("engine_id %d", engine_id);
+
+ memset(f, 0, sizeof(*f));
+
+ f->fault_info_v = fault_info_v = gk20a_readl(g,
+ fifo_intr_mmu_fault_info_r(engine_id));
+ f->fault_type_v =
+ fifo_intr_mmu_fault_info_type_v(fault_info_v);
+ f->engine_subid_v =
+ fifo_intr_mmu_fault_info_engine_subid_v(fault_info_v);
+ f->client_v = fifo_intr_mmu_fault_info_client_v(fault_info_v);
+
+ BUG_ON(f->fault_type_v >= ARRAY_SIZE(fault_type_descs));
+ f->fault_type_desc = fault_type_descs[f->fault_type_v];
+
+ BUG_ON(f->engine_subid_v >= ARRAY_SIZE(engine_subid_descs));
+ f->engine_subid_desc = engine_subid_descs[f->engine_subid_v];
+
+ if (f->engine_subid_v ==
+ fifo_intr_mmu_fault_info_engine_subid_hub_v()) {
+
+ BUG_ON(f->client_v >= ARRAY_SIZE(hub_client_descs));
+ f->client_desc = hub_client_descs[f->client_v];
+ } else if (f->engine_subid_v ==
+ fifo_intr_mmu_fault_info_engine_subid_gpc_v()) {
+ BUG_ON(f->client_v >= ARRAY_SIZE(gpc_client_descs));
+ f->client_desc = gpc_client_descs[f->client_v];
+ } else {
+ BUG_ON(1);
+ }
+
+ f->fault_hi_v = gk20a_readl(g, fifo_intr_mmu_fault_hi_r(engine_id));
+ f->fault_lo_v = gk20a_readl(g, fifo_intr_mmu_fault_lo_r(engine_id));
+ /* note:ignoring aperture on gk20a... */
+ f->inst_ptr = fifo_intr_mmu_fault_inst_ptr_v(
+ gk20a_readl(g, fifo_intr_mmu_fault_inst_r(engine_id)));
+ /* note: inst_ptr is a 40b phys addr. */
+ f->inst_ptr <<= fifo_intr_mmu_fault_inst_ptr_align_shift_v();
+}
+
+static void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id)
+{
+ gk20a_dbg_fn("");
+
+ if (engine_id == top_device_info_type_enum_graphics_v()) {
+ /* resetting engine using mc_enable_r() is not enough,
+ * we do full init sequence */
+ gk20a_gr_reset(g);
+ }
+ if (engine_id == top_device_info_type_enum_copy0_v())
+ gk20a_reset(g, mc_enable_ce2_m());
+}
+
+static void gk20a_fifo_handle_mmu_fault_thread(struct work_struct *work)
+{
+ struct fifo_gk20a *f = container_of(work, struct fifo_gk20a,
+ fault_restore_thread);
+ struct gk20a *g = f->g;
+ int i;
+
+ /* Reinitialise FECS and GR */
+ gk20a_init_pmu_setup_hw2(g);
+
+ /* It is safe to enable ELPG again. */
+ gk20a_pmu_enable_elpg(g);
+
+ /* Restore the runlist */
+ for (i = 0; i < g->fifo.max_runlists; i++)
+ gk20a_fifo_update_runlist_locked(g, i, ~0, true, true);
+
+ /* unlock all runlists */
+ for (i = 0; i < g->fifo.max_runlists; i++)
+ mutex_unlock(&g->fifo.runlist_info[i].mutex);
+
+}
+
+static void gk20a_fifo_handle_chsw_fault(struct gk20a *g)
+{
+ u32 intr;
+
+ intr = gk20a_readl(g, fifo_intr_chsw_error_r());
+ gk20a_err(dev_from_gk20a(g), "chsw: %08x\n", intr);
+ gk20a_fecs_dump_falcon_stats(g);
+ gk20a_writel(g, fifo_intr_chsw_error_r(), intr);
+}
+
+static void gk20a_fifo_handle_dropped_mmu_fault(struct gk20a *g)
+{
+ struct device *dev = dev_from_gk20a(g);
+ u32 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
+ gk20a_err(dev, "dropped mmu fault (0x%08x)", fault_id);
+}
+
+static bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,
+ struct fifo_mmu_fault_info_gk20a *f, bool fake_fault)
+{
+ /* channel recovery is only deferred if an sm debugger
+ is attached and has MMU debug mode is enabled */
+ if (!gk20a_gr_sm_debugger_attached(g) ||
+ !gk20a_mm_mmu_debug_mode_enabled(g))
+ return false;
+
+ /* if this fault is fake (due to RC recovery), don't defer recovery */
+ if (fake_fault)
+ return false;
+
+ if (engine_id != ENGINE_GR_GK20A ||
+ f->engine_subid_v != fifo_intr_mmu_fault_info_engine_subid_gpc_v())
+ return false;
+
+ return true;
+}
+
+void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g,
+ unsigned long fault_id) {
+ u32 engine_mmu_id;
+ int i;
+
+ /* reset engines */
+ for_each_set_bit(engine_mmu_id, &fault_id, 32) {
+ u32 engine_id = gk20a_mmu_id_to_engine_id(engine_mmu_id);
+ if (engine_id != ~0)
+ gk20a_fifo_reset_engine(g, engine_id);
+ }
+
+ /* CLEAR the runlists. Do not wait for runlist to start as
+ * some engines may not be available right now */
+ for (i = 0; i < g->fifo.max_runlists; i++)
+ gk20a_fifo_update_runlist_locked(g, i, ~0, false, false);
+
+ /* clear interrupt */
+ gk20a_writel(g, fifo_intr_mmu_fault_id_r(), fault_id);
+
+ /* resume scheduler */
+ gk20a_writel(g, fifo_error_sched_disable_r(),
+ gk20a_readl(g, fifo_error_sched_disable_r()));
+
+ /* Spawn a work to enable PMU and restore runlists */
+ schedule_work(&g->fifo.fault_restore_thread);
+}
+
+static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g,
+ struct channel_gk20a *ch) {
+ bool verbose = true;
+ if (!ch)
+ return verbose;
+
+ gk20a_err(dev_from_gk20a(g),
+ "channel %d generated a mmu fault",
+ ch->hw_chid);
+ if (ch->error_notifier) {
+ u32 err = ch->error_notifier->info32;
+ if (ch->error_notifier->status == 0xffff) {
+ /* If error code is already set, this mmu fault
+ * was triggered as part of recovery from other
+ * error condition.
+ * Don't overwrite error flag. */
+ /* Fifo timeout debug spew is controlled by user */
+ if (err == NVHOST_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT)
+ verbose = ch->timeout_debug_dump;
+ } else {
+ gk20a_set_error_notifier(ch,
+ NVHOST_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
+ }
+ }
+ /* mark channel as faulted */
+ ch->has_timedout = true;
+ wmb();
+ /* unblock pending waits */
+ wake_up(&ch->semaphore_wq);
+ wake_up(&ch->notifier_wq);
+ wake_up(&ch->submit_wq);
+ return verbose;
+}
+
+
+static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g)
+{
+ bool fake_fault;
+ unsigned long fault_id;
+ unsigned long engine_mmu_id;
+ int i;
+ bool verbose = true;
+ gk20a_dbg_fn("");
+
+ g->fifo.deferred_reset_pending = false;
+
+ /* Disable ELPG */
+ gk20a_pmu_disable_elpg(g);
+
+ /* If we have recovery in progress, MMU fault id is invalid */
+ if (g->fifo.mmu_fault_engines) {
+ fault_id = g->fifo.mmu_fault_engines;
+ g->fifo.mmu_fault_engines = 0;
+ fake_fault = true;
+ } else {
+ fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
+ fake_fault = false;
+ gk20a_debug_dump(g->dev);
+ }
+
+ /* lock all runlists. Note that locks are are released in
+ * gk20a_fifo_handle_mmu_fault_thread() */
+ for (i = 0; i < g->fifo.max_runlists; i++)
+ mutex_lock(&g->fifo.runlist_info[i].mutex);
+
+ /* go through all faulted engines */
+ for_each_set_bit(engine_mmu_id, &fault_id, 32) {
+ /* bits in fifo_intr_mmu_fault_id_r do not correspond 1:1 to
+ * engines. Convert engine_mmu_id to engine_id */
+ u32 engine_id = gk20a_mmu_id_to_engine_id(engine_mmu_id);
+ struct fifo_runlist_info_gk20a *runlist = g->fifo.runlist_info;
+ struct fifo_mmu_fault_info_gk20a f;
+ struct channel_gk20a *ch = NULL;
+
+ get_exception_mmu_fault_info(g, engine_mmu_id, &f);
+ trace_gk20a_mmu_fault(f.fault_hi_v,
+ f.fault_lo_v,
+ f.fault_info_v,
+ f.inst_ptr,
+ engine_id,
+ f.engine_subid_desc,
+ f.client_desc,
+ f.fault_type_desc);
+ gk20a_err(dev_from_gk20a(g), "mmu fault on engine %d, "
+ "engine subid %d (%s), client %d (%s), "
+ "addr 0x%08x:0x%08x, type %d (%s), info 0x%08x,"
+ "inst_ptr 0x%llx\n",
+ engine_id,
+ f.engine_subid_v, f.engine_subid_desc,
+ f.client_v, f.client_desc,
+ f.fault_hi_v, f.fault_lo_v,
+ f.fault_type_v, f.fault_type_desc,
+ f.fault_info_v, f.inst_ptr);
+
+ /* get the channel */
+ if (fake_fault) {
+ /* read and parse engine status */
+ u32 status = gk20a_readl(g,
+ fifo_engine_status_r(engine_id));
+ u32 ctx_status =
+ fifo_engine_status_ctx_status_v(status);
+ bool type_ch = fifo_pbdma_status_id_type_v(status) ==
+ fifo_pbdma_status_id_type_chid_v();
+
+ /* use next_id if context load is failing */
+ u32 id = (ctx_status ==
+ fifo_engine_status_ctx_status_ctxsw_load_v()) ?
+ fifo_engine_status_next_id_v(status) :
+ fifo_engine_status_id_v(status);
+
+ if (type_ch) {
+ ch = g->fifo.channel + id;
+ } else {
+ gk20a_err(dev_from_gk20a(g), "non-chid type not supported");
+ WARN_ON(1);
+ }
+ } else {
+ /* read channel based on instruction pointer */
+ ch = channel_from_inst_ptr(&g->fifo, f.inst_ptr);
+ }
+
+ if (ch) {
+ if (ch->in_use) {
+ /* disable the channel from hw and increment
+ * syncpoints */
+ gk20a_disable_channel_no_update(ch);
+
+ /* remove the channel from runlist */
+ clear_bit(ch->hw_chid,
+ runlist->active_channels);
+ }
+
+ /* check if engine reset should be deferred */
+ if (gk20a_fifo_should_defer_engine_reset(g, engine_id, &f, fake_fault)) {
+ g->fifo.mmu_fault_engines = fault_id;
+
+ /* handled during channel free */
+ g->fifo.deferred_reset_pending = true;
+ } else
+ verbose = gk20a_fifo_set_ctx_mmu_error(g, ch);
+
+ } else if (f.inst_ptr ==
+ g->mm.bar1.inst_block.cpu_pa) {
+ gk20a_err(dev_from_gk20a(g), "mmu fault from bar1");
+ } else if (f.inst_ptr ==
+ g->mm.pmu.inst_block.cpu_pa) {
+ gk20a_err(dev_from_gk20a(g), "mmu fault from pmu");
+ } else
+ gk20a_err(dev_from_gk20a(g), "couldn't locate channel for mmu fault");
+ }
+
+ if (g->fifo.deferred_reset_pending) {
+ gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "sm debugger attached,"
+ " deferring channel recovery to channel free");
+ /* clear interrupt */
+ gk20a_writel(g, fifo_intr_mmu_fault_id_r(), fault_id);
+ return verbose;
+ }
+
+ /* resetting the engines and clearing the runlists is done in
+ a separate function to allow deferred reset. */
+ fifo_gk20a_finish_mmu_fault_handling(g, fault_id);
+ return verbose;
+}
+
+static void gk20a_fifo_get_faulty_channel(struct gk20a *g, int engine_id,
+ u32 *chid, bool *type_ch)
+{
+ u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id));
+ u32 ctx_status = fifo_engine_status_ctx_status_v(status);
+
+ *type_ch = fifo_pbdma_status_id_type_v(status) ==
+ fifo_pbdma_status_id_type_chid_v();
+ /* use next_id if context load is failing */
+ *chid = (ctx_status ==
+ fifo_engine_status_ctx_status_ctxsw_load_v()) ?
+ fifo_engine_status_next_id_v(status) :
+ fifo_engine_status_id_v(status);
+}
+
+void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
+ bool verbose)
+{
+ unsigned long end_jiffies = jiffies +
+ msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
+ unsigned long delay = GR_IDLE_CHECK_DEFAULT;
+ unsigned long engine_id, i;
+ unsigned long _engine_ids = __engine_ids;
+ unsigned long engine_ids = 0;
+ int ret;
+
+ if (verbose)
+ gk20a_debug_dump(g->dev);
+
+ /* store faulted engines in advance */
+ g->fifo.mmu_fault_engines = 0;
+ for_each_set_bit(engine_id, &_engine_ids, 32) {
+ bool ref_type_ch;
+ int ref_chid;
+ gk20a_fifo_get_faulty_channel(g, engine_id, &ref_chid,
+ &ref_type_ch);
+
+ /* Reset *all* engines that use the
+ * same channel as faulty engine */
+ for (i = 0; i < g->fifo.max_engines; i++) {
+ bool type_ch;
+ u32 chid;
+ gk20a_fifo_get_faulty_channel(g, i, &chid, &type_ch);
+ if (ref_type_ch == type_ch && ref_chid == chid) {
+ engine_ids |= BIT(i);
+ g->fifo.mmu_fault_engines |=
+ BIT(gk20a_engine_id_to_mmu_id(i));
+ }
+ }
+
+ }
+
+ /* trigger faults for all bad engines */
+ for_each_set_bit(engine_id, &engine_ids, 32) {
+ if (engine_id > g->fifo.max_engines) {
+ WARN_ON(true);
+ break;
+ }
+
+ gk20a_writel(g, fifo_trigger_mmu_fault_r(engine_id),
+ fifo_trigger_mmu_fault_id_f(
+ gk20a_engine_id_to_mmu_id(engine_id)) |
+ fifo_trigger_mmu_fault_enable_f(1));
+ }
+
+ /* Wait for MMU fault to trigger */
+ ret = -EBUSY;
+ do {
+ if (gk20a_readl(g, fifo_intr_0_r()) &
+ fifo_intr_0_mmu_fault_pending_f()) {
+ ret = 0;
+ break;
+ }
+
+ usleep_range(delay, delay * 2);
+ delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
+ } while (time_before(jiffies, end_jiffies) ||
+ !tegra_platform_is_silicon());
+
+ if (ret)
+ gk20a_err(dev_from_gk20a(g), "mmu fault timeout");
+
+ /* release mmu fault trigger */
+ for_each_set_bit(engine_id, &engine_ids, 32)
+ gk20a_writel(g, fifo_trigger_mmu_fault_r(engine_id), 0);
+}
+
+
+static bool gk20a_fifo_handle_sched_error(struct gk20a *g)
+{
+ u32 sched_error;
+ u32 engine_id;
+ int id = -1;
+ bool non_chid = false;
+
+ /* read and reset the scheduler error register */
+ sched_error = gk20a_readl(g, fifo_intr_sched_error_r());
+ gk20a_writel(g, fifo_intr_0_r(), fifo_intr_0_sched_error_reset_f());
+
+ for (engine_id = 0; engine_id < g->fifo.max_engines; engine_id++) {
+ u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id));
+ u32 ctx_status = fifo_engine_status_ctx_status_v(status);
+ bool failing_engine;
+
+ /* we are interested in busy engines */
+ failing_engine = fifo_engine_status_engine_v(status) ==
+ fifo_engine_status_engine_busy_v();
+
+ /* ..that are doing context switch */
+ failing_engine = failing_engine &&
+ (ctx_status ==
+ fifo_engine_status_ctx_status_ctxsw_switch_v()
+ || ctx_status ==
+ fifo_engine_status_ctx_status_ctxsw_save_v()
+ || ctx_status ==
+ fifo_engine_status_ctx_status_ctxsw_load_v());
+
+ if (failing_engine) {
+ id = (ctx_status ==
+ fifo_engine_status_ctx_status_ctxsw_load_v()) ?
+ fifo_engine_status_next_id_v(status) :
+ fifo_engine_status_id_v(status);
+ non_chid = fifo_pbdma_status_id_type_v(status) !=
+ fifo_pbdma_status_id_type_chid_v();
+ break;
+ }
+ }
+
+ /* could not find the engine - should never happen */
+ if (unlikely(engine_id >= g->fifo.max_engines))
+ goto err;
+
+ if (fifo_intr_sched_error_code_f(sched_error) ==
+ fifo_intr_sched_error_code_ctxsw_timeout_v()) {
+ struct fifo_gk20a *f = &g->fifo;
+ struct channel_gk20a *ch = &f->channel[id];
+
+ if (non_chid) {
+ gk20a_fifo_recover(g, BIT(engine_id), true);
+ goto err;
+ }
+
+ if (gk20a_channel_update_and_check_timeout(ch,
+ GRFIFO_TIMEOUT_CHECK_PERIOD_US / 1000)) {
+ gk20a_set_error_notifier(ch,
+ NVHOST_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
+ gk20a_err(dev_from_gk20a(g),
+ "fifo sched ctxsw timeout error:"
+ "engine = %u, ch = %d", engine_id, id);
+ gk20a_fifo_recover(g, BIT(engine_id),
+ ch->timeout_debug_dump);
+ } else {
+ gk20a_warn(dev_from_gk20a(g),
+ "fifo is waiting for ctx switch for %d ms,"
+ "ch = %d\n",
+ ch->timeout_accumulated_ms,
+ id);
+ }
+ return ch->timeout_debug_dump;
+ }
+err:
+ gk20a_err(dev_from_gk20a(g), "fifo sched error : 0x%08x, engine=%u, %s=%d",
+ sched_error, engine_id, non_chid ? "non-ch" : "ch", id);
+
+ return true;
+}
+
+static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr)
+{
+ bool print_channel_reset_log = false, reset_engine = false;
+ struct device *dev = dev_from_gk20a(g);
+ u32 handled = 0;
+
+ gk20a_dbg_fn("");
+
+ if (fifo_intr & fifo_intr_0_pio_error_pending_f()) {
+ /* pio mode is unused. this shouldn't happen, ever. */
+ /* should we clear it or just leave it pending? */
+ gk20a_err(dev, "fifo pio error!\n");
+ BUG_ON(1);
+ }
+
+ if (fifo_intr & fifo_intr_0_bind_error_pending_f()) {
+ u32 bind_error = gk20a_readl(g, fifo_intr_bind_error_r());
+ gk20a_err(dev, "fifo bind error: 0x%08x", bind_error);
+ print_channel_reset_log = true;
+ handled |= fifo_intr_0_bind_error_pending_f();
+ }
+
+ if (fifo_intr & fifo_intr_0_sched_error_pending_f()) {
+ print_channel_reset_log = gk20a_fifo_handle_sched_error(g);
+ handled |= fifo_intr_0_sched_error_pending_f();
+ }
+
+ if (fifo_intr & fifo_intr_0_chsw_error_pending_f()) {
+ gk20a_fifo_handle_chsw_fault(g);
+ handled |= fifo_intr_0_chsw_error_pending_f();
+ }
+
+ if (fifo_intr & fifo_intr_0_mmu_fault_pending_f()) {
+ print_channel_reset_log = gk20a_fifo_handle_mmu_fault(g);
+ reset_engine = true;
+ handled |= fifo_intr_0_mmu_fault_pending_f();
+ }
+
+ if (fifo_intr & fifo_intr_0_dropped_mmu_fault_pending_f()) {
+ gk20a_fifo_handle_dropped_mmu_fault(g);
+ handled |= fifo_intr_0_dropped_mmu_fault_pending_f();
+ }
+
+ print_channel_reset_log = !g->fifo.deferred_reset_pending
+ && print_channel_reset_log;
+
+ if (print_channel_reset_log) {
+ int engine_id;
+ gk20a_err(dev_from_gk20a(g),
+ "channel reset initated from %s", __func__);
+ for (engine_id = 0;
+ engine_id < g->fifo.max_engines;
+ engine_id++) {
+ gk20a_dbg_fn("enum:%d -> engine_id:%d", engine_id,
+ g->fifo.engine_info[engine_id].engine_id);
+ fifo_pbdma_exception_status(g,
+ &g->fifo.engine_info[engine_id]);
+ fifo_engine_exception_status(g,
+ &g->fifo.engine_info[engine_id]);
+ }
+ }
+
+ return handled;
+}
+
+
+static u32 gk20a_fifo_handle_pbdma_intr(struct device *dev,
+ struct gk20a *g,
+ struct fifo_gk20a *f,
+ u32 pbdma_id)
+{
+ u32 pbdma_intr_0 = gk20a_readl(g, pbdma_intr_0_r(pbdma_id));
+ u32 pbdma_intr_1 = gk20a_readl(g, pbdma_intr_1_r(pbdma_id));
+ u32 handled = 0;
+ bool reset_device = false;
+ bool reset_channel = false;
+
+ gk20a_dbg_fn("");
+
+ gk20a_dbg(gpu_dbg_intr, "pbdma id intr pending %d %08x %08x", pbdma_id,
+ pbdma_intr_0, pbdma_intr_1);
+ if (pbdma_intr_0) {
+ if (f->intr.pbdma.device_fatal_0 & pbdma_intr_0) {
+ dev_err(dev, "unrecoverable device error: "
+ "pbdma_intr_0(%d):0x%08x", pbdma_id, pbdma_intr_0);
+ reset_device = true;
+ /* TODO: disable pbdma intrs */
+ handled |= f->intr.pbdma.device_fatal_0 & pbdma_intr_0;
+ }
+ if (f->intr.pbdma.channel_fatal_0 & pbdma_intr_0) {
+ dev_warn(dev, "channel error: "
+ "pbdma_intr_0(%d):0x%08x", pbdma_id, pbdma_intr_0);
+ reset_channel = true;
+ /* TODO: clear pbdma channel errors */
+ handled |= f->intr.pbdma.channel_fatal_0 & pbdma_intr_0;
+ }
+ if (f->intr.pbdma.restartable_0 & pbdma_intr_0) {
+ dev_warn(dev, "sw method: %08x %08x",
+ gk20a_readl(g, pbdma_method0_r(0)),
+ gk20a_readl(g, pbdma_method0_r(0)+4));
+ gk20a_writel(g, pbdma_method0_r(0), 0);
+ gk20a_writel(g, pbdma_method0_r(0)+4, 0);
+ handled |= f->intr.pbdma.restartable_0 & pbdma_intr_0;
+ }
+
+ gk20a_writel(g, pbdma_intr_0_r(pbdma_id), pbdma_intr_0);
+ }
+
+ /* all intrs in _intr_1 are "host copy engine" related,
+ * which gk20a doesn't have. for now just make them channel fatal. */
+ if (pbdma_intr_1) {
+ dev_err(dev, "channel hce error: pbdma_intr_1(%d): 0x%08x",
+ pbdma_id, pbdma_intr_1);
+ reset_channel = true;
+ gk20a_writel(g, pbdma_intr_1_r(pbdma_id), pbdma_intr_1);
+ }
+
+
+
+ return handled;
+}
+
+static u32 fifo_channel_isr(struct gk20a *g, u32 fifo_intr)
+{
+ gk20a_channel_semaphore_wakeup(g);
+ return fifo_intr_0_channel_intr_pending_f();
+}
+
+
+static u32 fifo_pbdma_isr(struct gk20a *g, u32 fifo_intr)
+{
+ struct device *dev = dev_from_gk20a(g);
+ struct fifo_gk20a *f = &g->fifo;
+ u32 clear_intr = 0, i;
+ u32 pbdma_pending = gk20a_readl(g, fifo_intr_pbdma_id_r());
+
+ for (i = 0; i < fifo_intr_pbdma_id_status__size_1_v(); i++) {
+ if (fifo_intr_pbdma_id_status_f(pbdma_pending, i)) {
+ gk20a_dbg(gpu_dbg_intr, "pbdma id %d intr pending", i);
+ clear_intr |=
+ gk20a_fifo_handle_pbdma_intr(dev, g, f, i);
+ }
+ }
+ return fifo_intr_0_pbdma_intr_pending_f();
+}
+
+void gk20a_fifo_isr(struct gk20a *g)
+{
+ u32 error_intr_mask =
+ fifo_intr_0_bind_error_pending_f() |
+ fifo_intr_0_sched_error_pending_f() |
+ fifo_intr_0_chsw_error_pending_f() |
+ fifo_intr_0_fb_flush_timeout_pending_f() |
+ fifo_intr_0_dropped_mmu_fault_pending_f() |
+ fifo_intr_0_mmu_fault_pending_f() |
+ fifo_intr_0_lb_error_pending_f() |
+ fifo_intr_0_pio_error_pending_f();
+
+ u32 fifo_intr = gk20a_readl(g, fifo_intr_0_r());
+ u32 clear_intr = 0;
+
+ /* note we're not actually in an "isr", but rather
+ * in a threaded interrupt context... */
+ mutex_lock(&g->fifo.intr.isr.mutex);
+
+ gk20a_dbg(gpu_dbg_intr, "fifo isr %08x\n", fifo_intr);
+
+ /* handle runlist update */
+ if (fifo_intr & fifo_intr_0_runlist_event_pending_f()) {
+ gk20a_fifo_handle_runlist_event(g);
+ clear_intr |= fifo_intr_0_runlist_event_pending_f();
+ }
+ if (fifo_intr & fifo_intr_0_pbdma_intr_pending_f())
+ clear_intr |= fifo_pbdma_isr(g, fifo_intr);
+
+ if (unlikely(fifo_intr & error_intr_mask))
+ clear_intr = fifo_error_isr(g, fifo_intr);
+
+ gk20a_writel(g, fifo_intr_0_r(), clear_intr);
+
+ mutex_unlock(&g->fifo.intr.isr.mutex);
+
+ return;
+}
+
+void gk20a_fifo_nonstall_isr(struct gk20a *g)
+{
+ u32 fifo_intr = gk20a_readl(g, fifo_intr_0_r());
+ u32 clear_intr = 0;
+
+ gk20a_dbg(gpu_dbg_intr, "fifo nonstall isr %08x\n", fifo_intr);
+
+ if (fifo_intr & fifo_intr_0_channel_intr_pending_f())
+ clear_intr |= fifo_channel_isr(g, fifo_intr);
+
+ gk20a_writel(g, fifo_intr_0_r(), clear_intr);
+
+ return;
+}
+
+int gk20a_fifo_preempt_channel(struct gk20a *g, u32 hw_chid)
+{
+ struct fifo_gk20a *f = &g->fifo;
+ unsigned long end_jiffies = jiffies
+ + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
+ u32 delay = GR_IDLE_CHECK_DEFAULT;
+ u32 ret = 0;
+ u32 token = PMU_INVALID_MUTEX_OWNER_ID;
+ u32 elpg_off = 0;
+ u32 i;
+
+ gk20a_dbg_fn("%d", hw_chid);
+
+ /* we have no idea which runlist we are using. lock all */
+ for (i = 0; i < g->fifo.max_runlists; i++)
+ mutex_lock(&f->runlist_info[i].mutex);
+
+ /* disable elpg if failed to acquire pmu mutex */
+ elpg_off = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
+ if (elpg_off)
+ gk20a_pmu_disable_elpg(g);
+
+ /* issue preempt */
+ gk20a_writel(g, fifo_preempt_r(),
+ fifo_preempt_chid_f(hw_chid) |
+ fifo_preempt_type_channel_f());
+
+ /* wait for preempt */
+ ret = -EBUSY;
+ do {
+ if (!(gk20a_readl(g, fifo_preempt_r()) &
+ fifo_preempt_pending_true_f())) {
+ ret = 0;
+ break;
+ }
+
+ usleep_range(delay, delay * 2);
+ delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
+ } while (time_before(jiffies, end_jiffies) ||
+ !tegra_platform_is_silicon());
+
+ if (ret) {
+ int i;
+ u32 engines = 0;
+ struct fifo_gk20a *f = &g->fifo;
+ struct channel_gk20a *ch = &f->channel[hw_chid];
+
+ gk20a_err(dev_from_gk20a(g), "preempt channel %d timeout\n",
+ hw_chid);
+
+ /* forcefully reset all busy engines using this channel */
+ for (i = 0; i < g->fifo.max_engines; i++) {
+ u32 status = gk20a_readl(g, fifo_engine_status_r(i));
+ u32 ctx_status =
+ fifo_engine_status_ctx_status_v(status);
+ bool type_ch = fifo_pbdma_status_id_type_v(status) ==
+ fifo_pbdma_status_id_type_chid_v();
+ bool busy = fifo_engine_status_engine_v(status) ==
+ fifo_engine_status_engine_busy_v();
+ u32 id = (ctx_status ==
+ fifo_engine_status_ctx_status_ctxsw_load_v()) ?
+ fifo_engine_status_next_id_v(status) :
+ fifo_engine_status_id_v(status);
+
+ if (type_ch && busy && id == hw_chid)
+ engines |= BIT(i);
+ }
+ gk20a_set_error_notifier(ch,
+ NVHOST_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
+ gk20a_fifo_recover(g, engines, true);
+ }
+
+ /* re-enable elpg or release pmu mutex */
+ if (elpg_off)
+ gk20a_pmu_enable_elpg(g);
+ else
+ pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
+
+ for (i = 0; i < g->fifo.max_runlists; i++)
+ mutex_unlock(&f->runlist_info[i].mutex);
+
+ return ret;
+}
+
+int gk20a_fifo_enable_engine_activity(struct gk20a *g,
+ struct fifo_engine_info_gk20a *eng_info)
+{
+ u32 token = PMU_INVALID_MUTEX_OWNER_ID;
+ u32 elpg_off;
+ u32 enable;
+
+ gk20a_dbg_fn("");
+
+ /* disable elpg if failed to acquire pmu mutex */
+ elpg_off = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
+ if (elpg_off)
+ gk20a_pmu_disable_elpg(g);
+
+ enable = gk20a_readl(g, fifo_sched_disable_r());
+ enable &= ~(fifo_sched_disable_true_v() >> eng_info->runlist_id);
+ gk20a_writel(g, fifo_sched_disable_r(), enable);
+
+ /* re-enable elpg or release pmu mutex */
+ if (elpg_off)
+ gk20a_pmu_enable_elpg(g);
+ else
+ pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
+
+ gk20a_dbg_fn("done");
+ return 0;
+}
+
+int gk20a_fifo_disable_engine_activity(struct gk20a *g,
+ struct fifo_engine_info_gk20a *eng_info,
+ bool wait_for_idle)
+{
+ u32 gr_stat, pbdma_stat, chan_stat, eng_stat, ctx_stat;
+ u32 pbdma_chid = ~0, engine_chid = ~0, disable;
+ u32 token = PMU_INVALID_MUTEX_OWNER_ID;
+ u32 elpg_off;
+ u32 err = 0;
+
+ gk20a_dbg_fn("");
+
+ gr_stat =
+ gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id));
+ if (fifo_engine_status_engine_v(gr_stat) ==
+ fifo_engine_status_engine_busy_v() && !wait_for_idle)
+ return -EBUSY;
+
+ /* disable elpg if failed to acquire pmu mutex */
+ elpg_off = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
+ if (elpg_off)
+ gk20a_pmu_disable_elpg(g);
+
+ disable = gk20a_readl(g, fifo_sched_disable_r());
+ disable = set_field(disable,
+ fifo_sched_disable_runlist_m(eng_info->runlist_id),
+ fifo_sched_disable_runlist_f(fifo_sched_disable_true_v(),
+ eng_info->runlist_id));
+ gk20a_writel(g, fifo_sched_disable_r(), disable);
+
+ /* chid from pbdma status */
+ pbdma_stat = gk20a_readl(g, fifo_pbdma_status_r(eng_info->pbdma_id));
+ chan_stat = fifo_pbdma_status_chan_status_v(pbdma_stat);
+ if (chan_stat == fifo_pbdma_status_chan_status_valid_v() ||
+ chan_stat == fifo_pbdma_status_chan_status_chsw_save_v())
+ pbdma_chid = fifo_pbdma_status_id_v(pbdma_stat);
+ else if (chan_stat == fifo_pbdma_status_chan_status_chsw_load_v() ||
+ chan_stat == fifo_pbdma_status_chan_status_chsw_switch_v())
+ pbdma_chid = fifo_pbdma_status_next_id_v(pbdma_stat);
+
+ if (pbdma_chid != ~0) {
+ err = gk20a_fifo_preempt_channel(g, pbdma_chid);
+ if (err)
+ goto clean_up;
+ }
+
+ /* chid from engine status */
+ eng_stat = gk20a_readl(g, fifo_engine_status_r(eng_info->engine_id));
+ ctx_stat = fifo_engine_status_ctx_status_v(eng_stat);
+ if (ctx_stat == fifo_engine_status_ctx_status_valid_v() ||
+ ctx_stat == fifo_engine_status_ctx_status_ctxsw_save_v())
+ engine_chid = fifo_engine_status_id_v(eng_stat);
+ else if (ctx_stat == fifo_engine_status_ctx_status_ctxsw_load_v() ||
+ ctx_stat == fifo_engine_status_ctx_status_ctxsw_switch_v())
+ engine_chid = fifo_engine_status_next_id_v(eng_stat);
+
+ if (engine_chid != ~0 && engine_chid != pbdma_chid) {
+ err = gk20a_fifo_preempt_channel(g, engine_chid);
+ if (err)
+ goto clean_up;
+ }
+
+clean_up:
+ /* re-enable elpg or release pmu mutex */
+ if (elpg_off)
+ gk20a_pmu_enable_elpg(g);
+ else
+ pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
+
+ if (err) {
+ gk20a_dbg_fn("failed");
+ if (gk20a_fifo_enable_engine_activity(g, eng_info))
+ gk20a_err(dev_from_gk20a(g),
+ "failed to enable gr engine activity\n");
+ } else {
+ gk20a_dbg_fn("done");
+ }
+ return err;
+}
+
+static void gk20a_fifo_runlist_reset_engines(struct gk20a *g, u32 runlist_id)
+{
+ struct fifo_gk20a *f = &g->fifo;
+ u32 engines = 0;
+ int i;
+
+ for (i = 0; i < f->max_engines; i++) {
+ u32 status = gk20a_readl(g, fifo_engine_status_r(i));
+ bool engine_busy = fifo_engine_status_engine_v(status) ==
+ fifo_engine_status_engine_busy_v();
+
+ if (engine_busy &&
+ (f->engine_info[i].runlist_id == runlist_id))
+ engines |= BIT(i);
+ }
+ gk20a_fifo_recover(g, engines, true);
+}
+
+static int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id)
+{
+ struct fifo_runlist_info_gk20a *runlist;
+ u32 remain;
+ bool pending;
+
+ runlist = &g->fifo.runlist_info[runlist_id];
+ remain = wait_event_timeout(runlist->runlist_wq,
+ ((pending = gk20a_readl(g, fifo_eng_runlist_r(runlist_id)) &
+ fifo_eng_runlist_pending_true_f()) == 0),
+ msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)));
+
+ if (remain == 0 && pending != 0)
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
+ u32 hw_chid, bool add,
+ bool wait_for_finish)
+{
+ u32 ret = 0;
+ struct device *d = dev_from_gk20a(g);
+ struct fifo_gk20a *f = &g->fifo;
+ struct fifo_runlist_info_gk20a *runlist = NULL;
+ u32 *runlist_entry_base = NULL;
+ u32 *runlist_entry = NULL;
+ phys_addr_t runlist_pa;
+ u32 old_buf, new_buf;
+ u32 chid;
+ u32 count = 0;
+ runlist = &f->runlist_info[runlist_id];
+
+ /* valid channel, add/remove it from active list.
+ Otherwise, keep active list untouched for suspend/resume. */
+ if (hw_chid != ~0) {
+ if (add) {
+ if (test_and_set_bit(hw_chid,
+ runlist->active_channels) == 1)
+ return 0;
+ } else {
+ if (test_and_clear_bit(hw_chid,
+ runlist->active_channels) == 0)
+ return 0;
+ }
+ }
+
+ old_buf = runlist->cur_buffer;
+ new_buf = !runlist->cur_buffer;
+
+ gk20a_dbg_info("runlist_id : %d, switch to new buffer 0x%16llx",
+ runlist_id, runlist->mem[new_buf].iova);
+
+ runlist_pa = gk20a_get_phys_from_iova(d, runlist->mem[new_buf].iova);
+ if (!runlist_pa) {
+ ret = -EINVAL;
+ goto clean_up;
+ }
+
+ runlist_entry_base = runlist->mem[new_buf].cpuva;
+ if (!runlist_entry_base) {
+ ret = -ENOMEM;
+ goto clean_up;
+ }
+
+ if (hw_chid != ~0 || /* add/remove a valid channel */
+ add /* resume to add all channels back */) {
+ runlist_entry = runlist_entry_base;
+ for_each_set_bit(chid,
+ runlist->active_channels, f->num_channels) {
+ gk20a_dbg_info("add channel %d to runlist", chid);
+ runlist_entry[0] = chid;
+ runlist_entry[1] = 0;
+ runlist_entry += 2;
+ count++;
+ }
+ } else /* suspend to remove all channels */
+ count = 0;
+
+ if (count != 0) {
+ gk20a_writel(g, fifo_runlist_base_r(),
+ fifo_runlist_base_ptr_f(u64_lo32(runlist_pa >> 12)) |
+ fifo_runlist_base_target_vid_mem_f());
+ }
+
+ gk20a_writel(g, fifo_runlist_r(),
+ fifo_runlist_engine_f(runlist_id) |
+ fifo_eng_runlist_length_f(count));
+
+ if (wait_for_finish) {
+ ret = gk20a_fifo_runlist_wait_pending(g, runlist_id);
+
+ if (ret == -ETIMEDOUT) {
+ gk20a_err(dev_from_gk20a(g),
+ "runlist update timeout");
+
+ gk20a_fifo_runlist_reset_engines(g, runlist_id);
+
+ /* engine reset needs the lock. drop it */
+ mutex_unlock(&runlist->mutex);
+ /* wait until the runlist is active again */
+ ret = gk20a_fifo_runlist_wait_pending(g, runlist_id);
+ /* get the lock back. at this point everything should
+ * should be fine */
+ mutex_lock(&runlist->mutex);
+
+ if (ret)
+ gk20a_err(dev_from_gk20a(g),
+ "runlist update failed: %d", ret);
+ } else if (ret == -EINTR)
+ gk20a_err(dev_from_gk20a(g),
+ "runlist update interrupted");
+ }
+
+ runlist->cur_buffer = new_buf;
+
+clean_up:
+ return ret;
+}
+
+/* add/remove a channel from runlist
+ special cases below: runlist->active_channels will NOT be changed.
+ (hw_chid == ~0 && !add) means remove all active channels from runlist.
+ (hw_chid == ~0 && add) means restore all active channels on runlist. */
+int gk20a_fifo_update_runlist(struct gk20a *g, u32 runlist_id, u32 hw_chid,
+ bool add, bool wait_for_finish)
+{
+ struct fifo_runlist_info_gk20a *runlist = NULL;
+ struct fifo_gk20a *f = &g->fifo;
+ u32 token = PMU_INVALID_MUTEX_OWNER_ID;
+ u32 elpg_off;
+ u32 ret = 0;
+
+ runlist = &f->runlist_info[runlist_id];
+
+ mutex_lock(&runlist->mutex);
+
+ /* disable elpg if failed to acquire pmu mutex */
+ elpg_off = pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
+ if (elpg_off)
+ gk20a_pmu_disable_elpg(g);
+
+ ret = gk20a_fifo_update_runlist_locked(g, runlist_id, hw_chid, add,
+ wait_for_finish);
+
+ /* re-enable elpg or release pmu mutex */
+ if (elpg_off)
+ gk20a_pmu_enable_elpg(g);
+ else
+ pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
+
+ mutex_unlock(&runlist->mutex);
+ return ret;
+}
+
+int gk20a_fifo_suspend(struct gk20a *g)
+{
+ gk20a_dbg_fn("");
+
+ /* stop bar1 snooping */
+ gk20a_writel(g, fifo_bar1_base_r(),
+ fifo_bar1_base_valid_false_f());
+
+ /* disable fifo intr */
+ gk20a_writel(g, fifo_intr_en_0_r(), 0);
+ gk20a_writel(g, fifo_intr_en_1_r(), 0);
+
+ gk20a_dbg_fn("done");
+ return 0;
+}
+
+bool gk20a_fifo_mmu_fault_pending(struct gk20a *g)
+{
+ if (gk20a_readl(g, fifo_intr_0_r()) &
+ fifo_intr_0_mmu_fault_pending_f())
+ return true;
+ else
+ return false;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
new file mode 100644
index 000000000000..051acda23bcb
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -0,0 +1,164 @@
+/*
+ * drivers/video/tegra/host/gk20a/fifo_gk20a.h
+ *
+ * GK20A graphics fifo (gr host)
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef __FIFO_GK20A_H__
+#define __FIFO_GK20A_H__
+
+#include "channel_gk20a.h"
+
+#define MAX_RUNLIST_BUFFERS 2
+
+/* generally corresponds to the "pbdma" engine */
+
+struct fifo_runlist_info_gk20a {
+ unsigned long *active_channels;
+ /* Each engine has its own SW and HW runlist buffer.*/
+ struct runlist_mem_desc mem[MAX_RUNLIST_BUFFERS];
+ u32 cur_buffer;
+ u32 total_entries;
+ bool stopped;
+ bool support_tsg;
+ struct mutex mutex; /* protect channel preempt and runlist upate */
+ wait_queue_head_t runlist_wq;
+};
+
+/* so far gk20a has two engines: gr and ce2(gr_copy) */
+enum {
+ ENGINE_GR_GK20A = 0,
+ ENGINE_CE2_GK20A = 1,
+ ENGINE_INVAL_GK20A
+};
+
+struct fifo_pbdma_exception_info_gk20a {
+ u32 status_r; /* raw register value from hardware */
+ u32 id, next_id;
+ u32 chan_status_v; /* raw value from hardware */
+ bool id_is_chid, next_id_is_chid;
+ bool chsw_in_progress;
+};
+
+struct fifo_engine_exception_info_gk20a {
+ u32 status_r; /* raw register value from hardware */
+ u32 id, next_id;
+ u32 ctx_status_v; /* raw value from hardware */
+ bool id_is_chid, next_id_is_chid;
+ bool faulted, idle, ctxsw_in_progress;
+};
+
+struct fifo_mmu_fault_info_gk20a {
+ u32 fault_info_v;
+ u32 fault_type_v;
+ u32 engine_subid_v;
+ u32 client_v;
+ u32 fault_hi_v;
+ u32 fault_lo_v;
+ u64 inst_ptr;
+ const char *fault_type_desc;
+ const char *engine_subid_desc;
+ const char *client_desc;
+};
+
+struct fifo_engine_info_gk20a {
+ u32 sw_id;
+ const char *name;
+ u32 dev_info_id;
+ u32 engine_id;
+ u32 runlist_id;
+ u32 pbdma_id;
+ u32 mmu_fault_id;
+ u32 rc_mask;
+ struct fifo_pbdma_exception_info_gk20a pbdma_exception_info;
+ struct fifo_engine_exception_info_gk20a engine_exception_info;
+ struct fifo_mmu_fault_info_gk20a mmu_fault_info;
+
+};
+
+struct fifo_gk20a {
+ struct gk20a *g;
+ int num_channels;
+
+ int num_pbdma;
+ u32 *pbdma_map;
+
+ struct fifo_engine_info_gk20a *engine_info;
+ u32 max_engines;
+ u32 num_engines;
+
+ struct fifo_runlist_info_gk20a *runlist_info;
+ u32 max_runlists;
+
+ struct userd_desc userd;
+ u32 userd_entry_size;
+ u32 userd_total_size;
+
+ struct channel_gk20a *channel;
+ struct mutex ch_inuse_mutex; /* protect unused chid look up */
+
+ void (*remove_support)(struct fifo_gk20a *);
+ bool sw_ready;
+ struct {
+ /* share info between isrs and non-isr code */
+ struct {
+ struct mutex mutex;
+ } isr;
+ struct {
+ u32 device_fatal_0;
+ u32 channel_fatal_0;
+ u32 restartable_0;
+ } pbdma;
+ struct {
+
+ } engine;
+
+
+ } intr;
+
+ u32 mmu_fault_engines;
+ bool deferred_reset_pending;
+ struct mutex deferred_reset_mutex;
+
+ struct work_struct fault_restore_thread;
+};
+
+int gk20a_init_fifo_support(struct gk20a *g);
+
+void gk20a_fifo_isr(struct gk20a *g);
+void gk20a_fifo_nonstall_isr(struct gk20a *g);
+
+int gk20a_fifo_preempt_channel(struct gk20a *g, u32 hw_chid);
+
+int gk20a_fifo_enable_engine_activity(struct gk20a *g,
+ struct fifo_engine_info_gk20a *eng_info);
+int gk20a_fifo_disable_engine_activity(struct gk20a *g,
+ struct fifo_engine_info_gk20a *eng_info,
+ bool wait_for_idle);
+
+int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 hw_chid,
+ bool add, bool wait_for_finish);
+
+int gk20a_fifo_suspend(struct gk20a *g);
+
+bool gk20a_fifo_mmu_fault_pending(struct gk20a *g);
+void gk20a_fifo_recover(struct gk20a *g, u32 engine_ids, bool verbose);
+int gk20a_init_fifo_reset_enable_hw(struct gk20a *g);
+
+void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g,
+ unsigned long fault_id);
+#endif /*__GR_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
new file mode 100644
index 000000000000..4cc500dee6f2
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -0,0 +1,1681 @@
+/*
+ * drivers/video/tegra/host/gk20a/gk20a.c
+ *
+ * GK20A Graphics
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/gk20a.h>
+
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/string.h>
+#include <linux/cdev.h>
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/export.h>
+#include <linux/file.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/pm_runtime.h>
+#include <linux/thermal.h>
+#include <asm/cacheflush.h>
+#include <linux/debugfs.h>
+#include <linux/spinlock.h>
+#include <linux/tegra-powergate.h>
+
+#include <linux/sched.h>
+#include <linux/input-cfboost.h>
+
+#include <mach/pm_domains.h>
+
+#include "gk20a.h"
+#include "debug_gk20a.h"
+#include "ctrl_gk20a.h"
+#include "hw_mc_gk20a.h"
+#include "hw_timer_gk20a.h"
+#include "hw_bus_gk20a.h"
+#include "hw_sim_gk20a.h"
+#include "hw_top_gk20a.h"
+#include "hw_ltc_gk20a.h"
+#include "gk20a_scale.h"
+#include "dbg_gpu_gk20a.h"
+#include "hal.h"
+
+#ifdef CONFIG_ARM64
+#define __cpuc_flush_dcache_area __flush_dcache_area
+#endif
+
+#define CLASS_NAME "nvidia-gpu"
+/* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */
+#define INTERFACE_NAME "nvhost%s-gpu"
+
+#define GK20A_NUM_CDEVS 5
+
+#if defined(GK20A_DEBUG)
+u32 gk20a_dbg_mask = GK20A_DEFAULT_DBG_MASK;
+u32 gk20a_dbg_ftrace;
+#endif
+
+static int gk20a_pm_finalize_poweron(struct device *dev);
+static int gk20a_pm_prepare_poweroff(struct device *dev);
+
+static inline void set_gk20a(struct platform_device *dev, struct gk20a *gk20a)
+{
+ gk20a_get_platform(dev)->g = gk20a;
+}
+
+static const struct file_operations gk20a_channel_ops = {
+ .owner = THIS_MODULE,
+ .release = gk20a_channel_release,
+ .open = gk20a_channel_open,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = gk20a_channel_ioctl,
+#endif
+ .unlocked_ioctl = gk20a_channel_ioctl,
+};
+
+static const struct file_operations gk20a_ctrl_ops = {
+ .owner = THIS_MODULE,
+ .release = gk20a_ctrl_dev_release,
+ .open = gk20a_ctrl_dev_open,
+ .unlocked_ioctl = gk20a_ctrl_dev_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = gk20a_ctrl_dev_ioctl,
+#endif
+};
+
+static const struct file_operations gk20a_dbg_ops = {
+ .owner = THIS_MODULE,
+ .release = gk20a_dbg_gpu_dev_release,
+ .open = gk20a_dbg_gpu_dev_open,
+ .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
+ .poll = gk20a_dbg_gpu_dev_poll,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
+#endif
+};
+
+static const struct file_operations gk20a_as_ops = {
+ .owner = THIS_MODULE,
+ .release = gk20a_as_dev_release,
+ .open = gk20a_as_dev_open,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = gk20a_as_dev_ioctl,
+#endif
+ .unlocked_ioctl = gk20a_as_dev_ioctl,
+};
+
+/*
+ * Note: We use a different 'open' to trigger handling of the profiler session.
+ * Most of the code is shared between them... Though, at some point if the
+ * code does get too tangled trying to handle each in the same path we can
+ * separate them cleanly.
+ */
+static const struct file_operations gk20a_prof_ops = {
+ .owner = THIS_MODULE,
+ .release = gk20a_dbg_gpu_dev_release,
+ .open = gk20a_prof_gpu_dev_open,
+ .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
+ /* .mmap = gk20a_prof_gpu_dev_mmap,*/
+ /*int (*mmap) (struct file *, struct vm_area_struct *);*/
+ .compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
+#endif
+};
+
+static inline void sim_writel(struct gk20a *g, u32 r, u32 v)
+{
+ writel(v, g->sim.regs+r);
+}
+
+static inline u32 sim_readl(struct gk20a *g, u32 r)
+{
+ return readl(g->sim.regs+r);
+}
+
+static void kunmap_and_free_iopage(void **kvaddr, struct page **page)
+{
+ if (*kvaddr) {
+ kunmap(*kvaddr);
+ *kvaddr = 0;
+ }
+ if (*page) {
+ __free_page(*page);
+ *page = 0;
+ }
+}
+
+static void gk20a_free_sim_support(struct gk20a *g)
+{
+ /* free sim mappings, bfrs */
+ kunmap_and_free_iopage(&g->sim.send_bfr.kvaddr,
+ &g->sim.send_bfr.page);
+
+ kunmap_and_free_iopage(&g->sim.recv_bfr.kvaddr,
+ &g->sim.recv_bfr.page);
+
+ kunmap_and_free_iopage(&g->sim.msg_bfr.kvaddr,
+ &g->sim.msg_bfr.page);
+}
+
+static void gk20a_remove_sim_support(struct sim_gk20a *s)
+{
+ struct gk20a *g = s->g;
+ if (g->sim.regs)
+ sim_writel(g, sim_config_r(), sim_config_mode_disabled_v());
+ gk20a_free_sim_support(g);
+}
+
+static int alloc_and_kmap_iopage(struct device *d,
+ void **kvaddr,
+ phys_addr_t *phys,
+ struct page **page)
+{
+ int err = 0;
+ *page = alloc_page(GFP_KERNEL);
+
+ if (!*page) {
+ err = -ENOMEM;
+ dev_err(d, "couldn't allocate io page\n");
+ goto fail;
+ }
+
+ *kvaddr = kmap(*page);
+ if (!*kvaddr) {
+ err = -ENOMEM;
+ dev_err(d, "couldn't kmap io page\n");
+ goto fail;
+ }
+ *phys = page_to_phys(*page);
+ return 0;
+
+ fail:
+ kunmap_and_free_iopage(kvaddr, page);
+ return err;
+
+}
+
+static void __iomem *gk20a_ioremap_resource(struct platform_device *dev, int i,
+ struct resource **out)
+{
+ struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i);
+ if (!r)
+ return NULL;
+ if (out)
+ *out = r;
+ return devm_request_and_ioremap(&dev->dev, r);
+}
+
+/* TBD: strip from released */
+static int gk20a_init_sim_support(struct platform_device *dev)
+{
+ int err = 0;
+ struct gk20a *g = get_gk20a(dev);
+ struct device *d = &dev->dev;
+ phys_addr_t phys;
+
+ g->sim.g = g;
+ g->sim.regs = gk20a_ioremap_resource(dev, GK20A_SIM_IORESOURCE_MEM,
+ &g->sim.reg_mem);
+ if (!g->sim.regs) {
+ dev_err(d, "failed to remap gk20a sim regs\n");
+ err = -ENXIO;
+ goto fail;
+ }
+
+ /* allocate sim event/msg buffers */
+ err = alloc_and_kmap_iopage(d, &g->sim.send_bfr.kvaddr,
+ &g->sim.send_bfr.phys,
+ &g->sim.send_bfr.page);
+
+ err = err || alloc_and_kmap_iopage(d, &g->sim.recv_bfr.kvaddr,
+ &g->sim.recv_bfr.phys,
+ &g->sim.recv_bfr.page);
+
+ err = err || alloc_and_kmap_iopage(d, &g->sim.msg_bfr.kvaddr,
+ &g->sim.msg_bfr.phys,
+ &g->sim.msg_bfr.page);
+
+ if (!(g->sim.send_bfr.kvaddr && g->sim.recv_bfr.kvaddr &&
+ g->sim.msg_bfr.kvaddr)) {
+ dev_err(d, "couldn't allocate all sim buffers\n");
+ goto fail;
+ }
+
+ /*mark send ring invalid*/
+ sim_writel(g, sim_send_ring_r(), sim_send_ring_status_invalid_f());
+
+ /*read get pointer and make equal to put*/
+ g->sim.send_ring_put = sim_readl(g, sim_send_get_r());
+ sim_writel(g, sim_send_put_r(), g->sim.send_ring_put);
+
+ /*write send ring address and make it valid*/
+ /*TBD: work for >32b physmem*/
+ phys = g->sim.send_bfr.phys;
+ sim_writel(g, sim_send_ring_hi_r(), 0);
+ sim_writel(g, sim_send_ring_r(),
+ sim_send_ring_status_valid_f() |
+ sim_send_ring_target_phys_pci_coherent_f() |
+ sim_send_ring_size_4kb_f() |
+ sim_send_ring_addr_lo_f(phys >> PAGE_SHIFT));
+
+ /*repeat for recv ring (but swap put,get as roles are opposite) */
+ sim_writel(g, sim_recv_ring_r(), sim_recv_ring_status_invalid_f());
+
+ /*read put pointer and make equal to get*/
+ g->sim.recv_ring_get = sim_readl(g, sim_recv_put_r());
+ sim_writel(g, sim_recv_get_r(), g->sim.recv_ring_get);
+
+ /*write send ring address and make it valid*/
+ /*TBD: work for >32b physmem*/
+ phys = g->sim.recv_bfr.phys;
+ sim_writel(g, sim_recv_ring_hi_r(), 0);
+ sim_writel(g, sim_recv_ring_r(),
+ sim_recv_ring_status_valid_f() |
+ sim_recv_ring_target_phys_pci_coherent_f() |
+ sim_recv_ring_size_4kb_f() |
+ sim_recv_ring_addr_lo_f(phys >> PAGE_SHIFT));
+
+ g->sim.remove_support = gk20a_remove_sim_support;
+ return 0;
+
+ fail:
+ gk20a_free_sim_support(g);
+ return err;
+}
+
+static inline u32 sim_msg_header_size(void)
+{
+ return 24;/*TBD: fix the header to gt this from NV_VGPU_MSG_HEADER*/
+}
+
+static inline u32 *sim_msg_bfr(struct gk20a *g, u32 byte_offset)
+{
+ return (u32 *)(g->sim.msg_bfr.kvaddr + byte_offset);
+}
+
+static inline u32 *sim_msg_hdr(struct gk20a *g, u32 byte_offset)
+{
+ return sim_msg_bfr(g, byte_offset); /*starts at 0*/
+}
+
+static inline u32 *sim_msg_param(struct gk20a *g, u32 byte_offset)
+{
+ /*starts after msg header/cmn*/
+ return sim_msg_bfr(g, byte_offset + sim_msg_header_size());
+}
+
+static inline void sim_write_hdr(struct gk20a *g, u32 func, u32 size)
+{
+ /*memset(g->sim.msg_bfr.kvaddr,0,min(PAGE_SIZE,size));*/
+ *sim_msg_hdr(g, sim_msg_signature_r()) = sim_msg_signature_valid_v();
+ *sim_msg_hdr(g, sim_msg_result_r()) = sim_msg_result_rpc_pending_v();
+ *sim_msg_hdr(g, sim_msg_spare_r()) = sim_msg_spare__init_v();
+ *sim_msg_hdr(g, sim_msg_function_r()) = func;
+ *sim_msg_hdr(g, sim_msg_length_r()) = size + sim_msg_header_size();
+}
+
+static inline u32 sim_escape_read_hdr_size(void)
+{
+ return 12; /*TBD: fix NV_VGPU_SIM_ESCAPE_READ_HEADER*/
+}
+
+static u32 *sim_send_ring_bfr(struct gk20a *g, u32 byte_offset)
+{
+ return (u32 *)(g->sim.send_bfr.kvaddr + byte_offset);
+}
+
+static int rpc_send_message(struct gk20a *g)
+{
+ /* calculations done in units of u32s */
+ u32 send_base = sim_send_put_pointer_v(g->sim.send_ring_put) * 2;
+ u32 dma_offset = send_base + sim_dma_r()/sizeof(u32);
+ u32 dma_hi_offset = send_base + sim_dma_hi_r()/sizeof(u32);
+
+ *sim_send_ring_bfr(g, dma_offset*sizeof(u32)) =
+ sim_dma_target_phys_pci_coherent_f() |
+ sim_dma_status_valid_f() |
+ sim_dma_size_4kb_f() |
+ sim_dma_addr_lo_f(g->sim.msg_bfr.phys >> PAGE_SHIFT);
+
+ *sim_send_ring_bfr(g, dma_hi_offset*sizeof(u32)) = 0; /*TBD >32b phys*/
+
+ *sim_msg_hdr(g, sim_msg_sequence_r()) = g->sim.sequence_base++;
+
+ g->sim.send_ring_put = (g->sim.send_ring_put + 2 * sizeof(u32)) %
+ PAGE_SIZE;
+
+ __cpuc_flush_dcache_area(g->sim.msg_bfr.kvaddr, PAGE_SIZE);
+ __cpuc_flush_dcache_area(g->sim.send_bfr.kvaddr, PAGE_SIZE);
+ __cpuc_flush_dcache_area(g->sim.recv_bfr.kvaddr, PAGE_SIZE);
+
+ /* Update the put pointer. This will trap into the host. */
+ sim_writel(g, sim_send_put_r(), g->sim.send_ring_put);
+
+ return 0;
+}
+
+static inline u32 *sim_recv_ring_bfr(struct gk20a *g, u32 byte_offset)
+{
+ return (u32 *)(g->sim.recv_bfr.kvaddr + byte_offset);
+}
+
+static int rpc_recv_poll(struct gk20a *g)
+{
+ phys_addr_t recv_phys_addr;
+
+ /* XXX This read is not required (?) */
+ /*pVGpu->recv_ring_get = VGPU_REG_RD32(pGpu, NV_VGPU_RECV_GET);*/
+
+ /* Poll the recv ring get pointer in an infinite loop*/
+ do {
+ g->sim.recv_ring_put = sim_readl(g, sim_recv_put_r());
+ } while (g->sim.recv_ring_put == g->sim.recv_ring_get);
+
+ /* process all replies */
+ while (g->sim.recv_ring_put != g->sim.recv_ring_get) {
+ /* these are in u32 offsets*/
+ u32 dma_lo_offset =
+ sim_recv_put_pointer_v(g->sim.recv_ring_get)*2 + 0;
+ /*u32 dma_hi_offset = dma_lo_offset + 1;*/
+ u32 recv_phys_addr_lo = sim_dma_addr_lo_v(*sim_recv_ring_bfr(g, dma_lo_offset*4));
+
+ /*u32 recv_phys_addr_hi = sim_dma_hi_addr_v(
+ (phys_addr_t)sim_recv_ring_bfr(g,dma_hi_offset*4));*/
+
+ /*TBD >32b phys addr */
+ recv_phys_addr = recv_phys_addr_lo << PAGE_SHIFT;
+
+ if (recv_phys_addr != g->sim.msg_bfr.phys) {
+ dev_err(dev_from_gk20a(g), "%s Error in RPC reply\n",
+ __func__);
+ return -1;
+ }
+
+ /* Update GET pointer */
+ g->sim.recv_ring_get = (g->sim.recv_ring_get + 2*sizeof(u32)) %
+ PAGE_SIZE;
+
+ __cpuc_flush_dcache_area(g->sim.msg_bfr.kvaddr, PAGE_SIZE);
+ __cpuc_flush_dcache_area(g->sim.send_bfr.kvaddr, PAGE_SIZE);
+ __cpuc_flush_dcache_area(g->sim.recv_bfr.kvaddr, PAGE_SIZE);
+
+ sim_writel(g, sim_recv_get_r(), g->sim.recv_ring_get);
+
+ g->sim.recv_ring_put = sim_readl(g, sim_recv_put_r());
+ }
+
+ return 0;
+}
+
+static int issue_rpc_and_wait(struct gk20a *g)
+{
+ int err;
+
+ err = rpc_send_message(g);
+ if (err) {
+ dev_err(dev_from_gk20a(g), "%s failed rpc_send_message\n",
+ __func__);
+ return err;
+ }
+
+ err = rpc_recv_poll(g);
+ if (err) {
+ dev_err(dev_from_gk20a(g), "%s failed rpc_recv_poll\n",
+ __func__);
+ return err;
+ }
+
+ /* Now check if RPC really succeeded */
+ if (*sim_msg_hdr(g, sim_msg_result_r()) != sim_msg_result_success_v()) {
+ dev_err(dev_from_gk20a(g), "%s received failed status!\n",
+ __func__);
+ return -(*sim_msg_hdr(g, sim_msg_result_r()));
+ }
+ return 0;
+}
+
+int gk20a_sim_esc_read(struct gk20a *g, char *path, u32 index, u32 count, u32 *data)
+{
+ int err;
+ size_t pathlen = strlen(path);
+ u32 data_offset;
+
+ sim_write_hdr(g, sim_msg_function_sim_escape_read_v(),
+ sim_escape_read_hdr_size());
+ *sim_msg_param(g, 0) = index;
+ *sim_msg_param(g, 4) = count;
+ data_offset = roundup(0xc + pathlen + 1, sizeof(u32));
+ *sim_msg_param(g, 8) = data_offset;
+ strcpy((char *)sim_msg_param(g, 0xc), path);
+
+ err = issue_rpc_and_wait(g);
+
+ if (!err)
+ memcpy(data, sim_msg_param(g, data_offset), count);
+ return err;
+}
+
+static irqreturn_t gk20a_intr_isr_stall(int irq, void *dev_id)
+{
+ struct gk20a *g = dev_id;
+ u32 mc_intr_0;
+
+ if (!g->power_on)
+ return IRQ_NONE;
+
+ /* not from gpu when sharing irq with others */
+ mc_intr_0 = gk20a_readl(g, mc_intr_0_r());
+ if (unlikely(!mc_intr_0))
+ return IRQ_NONE;
+
+ gk20a_writel(g, mc_intr_en_0_r(),
+ mc_intr_en_0_inta_disabled_f());
+
+ /* flush previous write */
+ gk20a_readl(g, mc_intr_en_0_r());
+
+ return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t gk20a_intr_isr_nonstall(int irq, void *dev_id)
+{
+ struct gk20a *g = dev_id;
+ u32 mc_intr_1;
+
+ if (!g->power_on)
+ return IRQ_NONE;
+
+ /* not from gpu when sharing irq with others */
+ mc_intr_1 = gk20a_readl(g, mc_intr_1_r());
+ if (unlikely(!mc_intr_1))
+ return IRQ_NONE;
+
+ gk20a_writel(g, mc_intr_en_1_r(),
+ mc_intr_en_1_inta_disabled_f());
+
+ /* flush previous write */
+ gk20a_readl(g, mc_intr_en_1_r());
+
+ return IRQ_WAKE_THREAD;
+}
+
+static void gk20a_pbus_isr(struct gk20a *g)
+{
+ u32 val;
+ val = gk20a_readl(g, bus_intr_0_r());
+ if (val & (bus_intr_0_pri_squash_m() |
+ bus_intr_0_pri_fecserr_m() |
+ bus_intr_0_pri_timeout_m())) {
+ gk20a_err(dev_from_gk20a(g), "top_fs_status_r : 0x%x",
+ gk20a_readl(g, top_fs_status_r()));
+ gk20a_err(dev_from_gk20a(g), "pmc_enable : 0x%x",
+ gk20a_readl(g, mc_enable_r()));
+ gk20a_err(&g->dev->dev,
+ "NV_PTIMER_PRI_TIMEOUT_SAVE_0: 0x%x\n",
+ gk20a_readl(g, timer_pri_timeout_save_0_r()));
+ gk20a_err(&g->dev->dev,
+ "NV_PTIMER_PRI_TIMEOUT_SAVE_1: 0x%x\n",
+ gk20a_readl(g, timer_pri_timeout_save_1_r()));
+ gk20a_err(&g->dev->dev,
+ "NV_PTIMER_PRI_TIMEOUT_FECS_ERRCODE: 0x%x\n",
+ gk20a_readl(g, timer_pri_timeout_fecs_errcode_r()));
+ }
+
+ if (val)
+ gk20a_err(&g->dev->dev,
+ "Unhandled pending pbus interrupt\n");
+
+ gk20a_writel(g, bus_intr_0_r(), val);
+}
+
+static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id)
+{
+ struct gk20a *g = dev_id;
+ u32 mc_intr_0;
+
+ gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
+
+ mc_intr_0 = gk20a_readl(g, mc_intr_0_r());
+
+ gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0);
+
+ if (mc_intr_0 & mc_intr_0_pgraph_pending_f())
+ gr_gk20a_elpg_protected_call(g, gk20a_gr_isr(g));
+ if (mc_intr_0 & mc_intr_0_pfifo_pending_f())
+ gk20a_fifo_isr(g);
+ if (mc_intr_0 & mc_intr_0_pmu_pending_f())
+ gk20a_pmu_isr(g);
+ if (mc_intr_0 & mc_intr_0_priv_ring_pending_f())
+ gk20a_priv_ring_isr(g);
+ if (mc_intr_0 & mc_intr_0_ltc_pending_f())
+ gk20a_mm_ltc_isr(g);
+ if (mc_intr_0 & mc_intr_0_pbus_pending_f())
+ gk20a_pbus_isr(g);
+
+ gk20a_writel(g, mc_intr_en_0_r(),
+ mc_intr_en_0_inta_hardware_f());
+
+ /* flush previous write */
+ gk20a_readl(g, mc_intr_en_0_r());
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t gk20a_intr_thread_nonstall(int irq, void *dev_id)
+{
+ struct gk20a *g = dev_id;
+ u32 mc_intr_1;
+
+ gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
+
+ mc_intr_1 = gk20a_readl(g, mc_intr_1_r());
+
+ gk20a_dbg(gpu_dbg_intr, "non-stall intr %08x\n", mc_intr_1);
+
+ if (mc_intr_1 & mc_intr_0_pfifo_pending_f())
+ gk20a_fifo_nonstall_isr(g);
+ if (mc_intr_1 & mc_intr_0_pgraph_pending_f())
+ gk20a_gr_nonstall_isr(g);
+
+ gk20a_writel(g, mc_intr_en_1_r(),
+ mc_intr_en_1_inta_hardware_f());
+
+ /* flush previous write */
+ gk20a_readl(g, mc_intr_en_1_r());
+
+ return IRQ_HANDLED;
+}
+
+static void gk20a_remove_support(struct platform_device *dev)
+{
+ struct gk20a *g = get_gk20a(dev);
+
+ /* pmu support should already be removed when driver turns off
+ gpu power rail in prepapre_poweroff */
+ if (g->gk20a_cdev.gk20a_cooling_dev)
+ thermal_cooling_device_unregister(g->gk20a_cdev.gk20a_cooling_dev);
+
+ if (g->gr.remove_support)
+ g->gr.remove_support(&g->gr);
+
+ if (g->fifo.remove_support)
+ g->fifo.remove_support(&g->fifo);
+
+ if (g->mm.remove_support)
+ g->mm.remove_support(&g->mm);
+
+ if (g->sim.remove_support)
+ g->sim.remove_support(&g->sim);
+
+ release_firmware(g->pmu_fw);
+
+ if (g->irq_requested) {
+ free_irq(g->irq_stall, g);
+ free_irq(g->irq_nonstall, g);
+ g->irq_requested = false;
+ }
+
+ /* free mappings to registers, etc*/
+
+ if (g->regs) {
+ iounmap(g->regs);
+ g->regs = 0;
+ }
+ if (g->bar1) {
+ iounmap(g->bar1);
+ g->bar1 = 0;
+ }
+}
+
+static int gk20a_init_support(struct platform_device *dev)
+{
+ int err = 0;
+ struct gk20a *g = get_gk20a(dev);
+
+ g->regs = gk20a_ioremap_resource(dev, GK20A_BAR0_IORESOURCE_MEM,
+ &g->reg_mem);
+ if (!g->regs) {
+ dev_err(dev_from_gk20a(g), "failed to remap gk20a registers\n");
+ err = -ENXIO;
+ goto fail;
+ }
+
+ g->bar1 = gk20a_ioremap_resource(dev, GK20A_BAR1_IORESOURCE_MEM,
+ &g->bar1_mem);
+ if (!g->bar1) {
+ dev_err(dev_from_gk20a(g), "failed to remap gk20a bar1\n");
+ err = -ENXIO;
+ goto fail;
+ }
+
+ /* Get interrupt numbers */
+ g->irq_stall = platform_get_irq(dev, 0);
+ g->irq_nonstall = platform_get_irq(dev, 1);
+ if (g->irq_stall < 0 || g->irq_nonstall < 0) {
+ err = -ENXIO;
+ goto fail;
+ }
+
+ if (tegra_cpu_is_asim()) {
+ err = gk20a_init_sim_support(dev);
+ if (err)
+ goto fail;
+ }
+
+ mutex_init(&g->dbg_sessions_lock);
+ mutex_init(&g->client_lock);
+
+ g->remove_support = gk20a_remove_support;
+ return 0;
+
+ fail:
+ gk20a_remove_support(dev);
+ return err;
+}
+
+static int gk20a_init_client(struct platform_device *dev)
+{
+ struct gk20a *g = get_gk20a(dev);
+ int err;
+
+ gk20a_dbg_fn("");
+
+#ifndef CONFIG_PM_RUNTIME
+ gk20a_pm_finalize_poweron(&dev->dev);
+#endif
+
+ err = gk20a_init_mm_setup_sw(g);
+ if (err)
+ return err;
+
+ if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
+ gk20a_scale_hw_init(dev);
+ return 0;
+}
+
+static void gk20a_deinit_client(struct platform_device *dev)
+{
+ gk20a_dbg_fn("");
+#ifndef CONFIG_PM_RUNTIME
+ gk20a_pm_prepare_poweroff(&dev->dev);
+#endif
+}
+
+int gk20a_get_client(struct gk20a *g)
+{
+ int err = 0;
+
+ mutex_lock(&g->client_lock);
+ if (g->client_refcount == 0)
+ err = gk20a_init_client(g->dev);
+ if (!err)
+ g->client_refcount++;
+ mutex_unlock(&g->client_lock);
+ return err;
+}
+
+void gk20a_put_client(struct gk20a *g)
+{
+ mutex_lock(&g->client_lock);
+ if (g->client_refcount == 1)
+ gk20a_deinit_client(g->dev);
+ g->client_refcount--;
+ mutex_unlock(&g->client_lock);
+ WARN_ON(g->client_refcount < 0);
+}
+
+static int gk20a_pm_prepare_poweroff(struct device *_dev)
+{
+ struct platform_device *dev = to_platform_device(_dev);
+ struct gk20a *g = get_gk20a(dev);
+ int ret = 0;
+
+ gk20a_dbg_fn("");
+
+ if (!g->power_on)
+ return 0;
+
+ ret |= gk20a_channel_suspend(g);
+
+ /* disable elpg before gr or fifo suspend */
+ ret |= gk20a_pmu_destroy(g);
+ ret |= gk20a_gr_suspend(g);
+ ret |= gk20a_mm_suspend(g);
+ ret |= gk20a_fifo_suspend(g);
+
+ /*
+ * After this point, gk20a interrupts should not get
+ * serviced.
+ */
+ if (g->irq_requested) {
+ free_irq(g->irq_stall, g);
+ free_irq(g->irq_nonstall, g);
+ g->irq_requested = false;
+ }
+
+ /* Disable GPCPLL */
+ ret |= gk20a_suspend_clk_support(g);
+ g->power_on = false;
+
+ return ret;
+}
+
+static void gk20a_detect_chip(struct gk20a *g)
+{
+ struct nvhost_gpu_characteristics *gpu = &g->gpu_characteristics;
+
+ u32 mc_boot_0_value = gk20a_readl(g, mc_boot_0_r());
+ gpu->arch = mc_boot_0_architecture_v(mc_boot_0_value) <<
+ NVHOST_GPU_ARCHITECTURE_SHIFT;
+ gpu->impl = mc_boot_0_implementation_v(mc_boot_0_value);
+ gpu->rev =
+ (mc_boot_0_major_revision_v(mc_boot_0_value) << 4) |
+ mc_boot_0_minor_revision_v(mc_boot_0_value);
+
+ gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n",
+ g->gpu_characteristics.arch,
+ g->gpu_characteristics.impl,
+ g->gpu_characteristics.rev);
+}
+
+static int gk20a_pm_finalize_poweron(struct device *_dev)
+{
+ struct platform_device *dev = to_platform_device(_dev);
+ struct gk20a *g = get_gk20a(dev);
+ int err, nice_value;
+
+ gk20a_dbg_fn("");
+
+ if (g->power_on)
+ return 0;
+
+ nice_value = task_nice(current);
+ set_user_nice(current, -20);
+
+ if (!g->irq_requested) {
+ err = request_threaded_irq(g->irq_stall,
+ gk20a_intr_isr_stall,
+ gk20a_intr_thread_stall,
+ 0, "gk20a_stall", g);
+ if (err) {
+ dev_err(dev_from_gk20a(g),
+ "failed to request stall intr irq @ %lld\n",
+ (u64)g->irq_stall);
+ goto done;
+ }
+ err = request_threaded_irq(g->irq_nonstall,
+ gk20a_intr_isr_nonstall,
+ gk20a_intr_thread_nonstall,
+ 0, "gk20a_nonstall", g);
+ if (err) {
+ dev_err(dev_from_gk20a(g),
+ "failed to request non-stall intr irq @ %lld\n",
+ (u64)g->irq_nonstall);
+ goto done;
+ }
+ g->irq_requested = true;
+ }
+
+ g->power_on = true;
+
+ gk20a_writel(g, mc_intr_mask_1_r(),
+ mc_intr_0_pfifo_pending_f()
+ | mc_intr_0_pgraph_pending_f());
+ gk20a_writel(g, mc_intr_en_1_r(),
+ mc_intr_en_1_inta_hardware_f());
+
+ gk20a_writel(g, mc_intr_mask_0_r(),
+ mc_intr_0_pgraph_pending_f()
+ | mc_intr_0_pfifo_pending_f()
+ | mc_intr_0_priv_ring_pending_f()
+ | mc_intr_0_ltc_pending_f()
+ | mc_intr_0_pbus_pending_f());
+ gk20a_writel(g, mc_intr_en_0_r(),
+ mc_intr_en_0_inta_hardware_f());
+
+ if (!tegra_platform_is_silicon())
+ gk20a_writel(g, bus_intr_en_0_r(), 0x0);
+ else
+ gk20a_writel(g, bus_intr_en_0_r(),
+ bus_intr_en_0_pri_squash_m() |
+ bus_intr_en_0_pri_fecserr_m() |
+ bus_intr_en_0_pri_timeout_m());
+ gk20a_reset_priv_ring(g);
+
+ gk20a_detect_chip(g);
+ err = gpu_init_hal(g);
+ if (err)
+ goto done;
+
+ /* TBD: move this after graphics init in which blcg/slcg is enabled.
+ This function removes SlowdownOnBoot which applies 32x divider
+ on gpcpll bypass path. The purpose of slowdown is to save power
+ during boot but it also significantly slows down gk20a init on
+ simulation and emulation. We should remove SOB after graphics power
+ saving features (blcg/slcg) are enabled. For now, do it here. */
+ err = gk20a_init_clk_support(g);
+ if (err) {
+ gk20a_err(&dev->dev, "failed to init gk20a clk");
+ goto done;
+ }
+
+ /* enable pri timeout only on silicon */
+ if (tegra_platform_is_silicon()) {
+ gk20a_writel(g,
+ timer_pri_timeout_r(),
+ timer_pri_timeout_period_f(0x186A0) |
+ timer_pri_timeout_en_en_enabled_f());
+ } else {
+ gk20a_writel(g,
+ timer_pri_timeout_r(),
+ timer_pri_timeout_period_f(0x186A0) |
+ timer_pri_timeout_en_en_disabled_f());
+ }
+
+ err = gk20a_init_fifo_reset_enable_hw(g);
+ if (err) {
+ gk20a_err(&dev->dev, "failed to reset gk20a fifo");
+ goto done;
+ }
+
+ err = gk20a_init_mm_support(g);
+ if (err) {
+ gk20a_err(&dev->dev, "failed to init gk20a mm");
+ goto done;
+ }
+
+ err = gk20a_init_pmu_support(g);
+ if (err) {
+ gk20a_err(&dev->dev, "failed to init gk20a pmu");
+ goto done;
+ }
+
+ err = gk20a_init_fifo_support(g);
+ if (err) {
+ gk20a_err(&dev->dev, "failed to init gk20a fifo");
+ goto done;
+ }
+
+ err = gk20a_init_gr_support(g);
+ if (err) {
+ gk20a_err(&dev->dev, "failed to init gk20a gr");
+ goto done;
+ }
+
+ err = gk20a_init_pmu_setup_hw2(g);
+ if (err) {
+ gk20a_err(&dev->dev, "failed to init gk20a pmu_hw2");
+ goto done;
+ }
+
+ err = gk20a_init_therm_support(g);
+ if (err) {
+ gk20a_err(&dev->dev, "failed to init gk20a therm");
+ goto done;
+ }
+
+ err = gk20a_init_gpu_characteristics(g);
+ if (err) {
+ gk20a_err(&dev->dev, "failed to init gk20a gpu characteristics");
+ goto done;
+ }
+
+ gk20a_channel_resume(g);
+ set_user_nice(current, nice_value);
+
+done:
+ return err;
+}
+
+static struct of_device_id tegra_gk20a_of_match[] = {
+#ifdef CONFIG_TEGRA_GK20A
+ { .compatible = "nvidia,tegra124-gk20a",
+ .data = &gk20a_tegra_platform },
+#endif
+ { .compatible = "nvidia,generic-gk20a",
+ .data = &gk20a_generic_platform },
+ { },
+};
+
+int tegra_gpu_get_max_state(struct thermal_cooling_device *cdev,
+ unsigned long *max_state)
+{
+ struct cooling_device_gk20a *gk20a_gpufreq_device = cdev->devdata;
+
+ *max_state = gk20a_gpufreq_device->gk20a_freq_table_size - 1;
+ return 0;
+}
+
+int tegra_gpu_get_cur_state(struct thermal_cooling_device *cdev,
+ unsigned long *cur_state)
+{
+ struct cooling_device_gk20a *gk20a_gpufreq_device = cdev->devdata;
+
+ *cur_state = gk20a_gpufreq_device->gk20a_freq_state;
+ return 0;
+}
+
+int tegra_gpu_set_cur_state(struct thermal_cooling_device *c_dev,
+ unsigned long cur_state)
+{
+ u32 target_freq;
+ struct gk20a *g;
+ struct gpufreq_table_data *gpu_cooling_table;
+ struct cooling_device_gk20a *gk20a_gpufreq_device = c_dev->devdata;
+
+ BUG_ON(cur_state >= gk20a_gpufreq_device->gk20a_freq_table_size);
+
+ g = container_of(gk20a_gpufreq_device, struct gk20a, gk20a_cdev);
+
+ gpu_cooling_table = tegra_gpufreq_table_get();
+ target_freq = gpu_cooling_table[cur_state].frequency;
+
+ /* ensure a query for state will get the proper value */
+ gk20a_gpufreq_device->gk20a_freq_state = cur_state;
+
+ gk20a_clk_set_rate(g, target_freq);
+
+ return 0;
+}
+
+static struct thermal_cooling_device_ops tegra_gpu_cooling_ops = {
+ .get_max_state = tegra_gpu_get_max_state,
+ .get_cur_state = tegra_gpu_get_cur_state,
+ .set_cur_state = tegra_gpu_set_cur_state,
+};
+
+static int gk20a_create_device(
+ struct platform_device *pdev, int devno, const char *cdev_name,
+ struct cdev *cdev, struct device **out,
+ const struct file_operations *ops)
+{
+ struct device *dev;
+ int err;
+ struct gk20a *g = get_gk20a(pdev);
+
+ gk20a_dbg_fn("");
+
+ cdev_init(cdev, ops);
+ cdev->owner = THIS_MODULE;
+
+ err = cdev_add(cdev, devno, 1);
+ if (err) {
+ dev_err(&pdev->dev,
+ "failed to add %s cdev\n", cdev_name);
+ return err;
+ }
+
+ dev = device_create(g->class, NULL, devno, NULL,
+ (pdev->id <= 0) ? INTERFACE_NAME : INTERFACE_NAME ".%d",
+ cdev_name, pdev->id);
+
+ if (IS_ERR(dev)) {
+ err = PTR_ERR(dev);
+ cdev_del(cdev);
+ dev_err(&pdev->dev,
+ "failed to create %s device for %s\n",
+ cdev_name, pdev->name);
+ return err;
+ }
+
+ *out = dev;
+ return 0;
+}
+
+static void gk20a_user_deinit(struct platform_device *dev)
+{
+ struct gk20a *g = get_gk20a(dev);
+
+ if (g->channel.node) {
+ device_destroy(g->class, g->channel.cdev.dev);
+ cdev_del(&g->channel.cdev);
+ }
+
+ if (g->as.node) {
+ device_destroy(g->class, g->as.cdev.dev);
+ cdev_del(&g->as.cdev);
+ }
+
+ if (g->ctrl.node) {
+ device_destroy(g->class, g->ctrl.cdev.dev);
+ cdev_del(&g->ctrl.cdev);
+ }
+
+ if (g->dbg.node) {
+ device_destroy(g->class, g->dbg.cdev.dev);
+ cdev_del(&g->dbg.cdev);
+ }
+
+ if (g->prof.node) {
+ device_destroy(g->class, g->prof.cdev.dev);
+ cdev_del(&g->prof.cdev);
+ }
+
+ if (g->cdev_region)
+ unregister_chrdev_region(g->cdev_region, GK20A_NUM_CDEVS);
+
+ if (g->class)
+ class_destroy(g->class);
+}
+
+static int gk20a_user_init(struct platform_device *dev)
+{
+ int err;
+ dev_t devno;
+ struct gk20a *g = get_gk20a(dev);
+
+ g->class = class_create(THIS_MODULE, CLASS_NAME);
+ if (IS_ERR(g->class)) {
+ err = PTR_ERR(g->class);
+ g->class = NULL;
+ dev_err(&dev->dev,
+ "failed to create " CLASS_NAME " class\n");
+ goto fail;
+ }
+
+ err = alloc_chrdev_region(&devno, 0, GK20A_NUM_CDEVS, CLASS_NAME);
+ if (err) {
+ dev_err(&dev->dev, "failed to allocate devno\n");
+ goto fail;
+ }
+ g->cdev_region = devno;
+
+ err = gk20a_create_device(dev, devno++, "",
+ &g->channel.cdev, &g->channel.node,
+ &gk20a_channel_ops);
+ if (err)
+ goto fail;
+
+ err = gk20a_create_device(dev, devno++, "-as",
+ &g->as.cdev, &g->as.node,
+ &gk20a_as_ops);
+ if (err)
+ goto fail;
+
+ err = gk20a_create_device(dev, devno++, "-ctrl",
+ &g->ctrl.cdev, &g->ctrl.node,
+ &gk20a_ctrl_ops);
+ if (err)
+ goto fail;
+
+ err = gk20a_create_device(dev, devno++, "-dbg",
+ &g->dbg.cdev, &g->dbg.node,
+ &gk20a_dbg_ops);
+ if (err)
+ goto fail;
+
+ err = gk20a_create_device(dev, devno++, "-prof",
+ &g->prof.cdev, &g->prof.node,
+ &gk20a_prof_ops);
+ if (err)
+ goto fail;
+
+ return 0;
+fail:
+ gk20a_user_deinit(dev);
+ return err;
+}
+
+struct channel_gk20a *gk20a_get_channel_from_file(int fd)
+{
+ struct channel_gk20a *ch;
+ struct file *f = fget(fd);
+ if (!f)
+ return 0;
+
+ if (f->f_op != &gk20a_channel_ops) {
+ fput(f);
+ return 0;
+ }
+
+ ch = (struct channel_gk20a *)f->private_data;
+ fput(f);
+ return ch;
+}
+
+static int gk20a_pm_enable_clk(struct device *dev)
+{
+ int index = 0;
+ struct gk20a_platform *platform;
+
+ platform = dev_get_drvdata(dev);
+ if (!platform)
+ return -EINVAL;
+
+ for (index = 0; index < platform->num_clks; index++) {
+ int err = clk_prepare_enable(platform->clk[index]);
+ if (err)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gk20a_pm_disable_clk(struct device *dev)
+{
+ int index = 0;
+ struct gk20a_platform *platform;
+
+ platform = dev_get_drvdata(dev);
+ if (!platform)
+ return -EINVAL;
+
+ for (index = 0; index < platform->num_clks; index++)
+ clk_disable_unprepare(platform->clk[index]);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM
+const struct dev_pm_ops gk20a_pm_ops = {
+#if defined(CONFIG_PM_RUNTIME) && !defined(CONFIG_PM_GENERIC_DOMAINS)
+ .runtime_resume = gk20a_pm_enable_clk,
+ .runtime_suspend = gk20a_pm_disable_clk,
+#endif
+};
+#endif
+
+static int gk20a_pm_railgate(struct generic_pm_domain *domain)
+{
+ struct gk20a *g = container_of(domain, struct gk20a, pd);
+ struct gk20a_platform *platform = platform_get_drvdata(g->dev);
+ int ret = 0;
+
+ if (platform->railgate)
+ ret = platform->railgate(platform->g->dev);
+
+ return ret;
+}
+
+static int gk20a_pm_unrailgate(struct generic_pm_domain *domain)
+{
+ struct gk20a *g = container_of(domain, struct gk20a, pd);
+ struct gk20a_platform *platform = platform_get_drvdata(g->dev);
+ int ret = 0;
+
+ if (platform->unrailgate)
+ ret = platform->unrailgate(platform->g->dev);
+
+ return ret;
+}
+
+static int gk20a_pm_suspend(struct device *dev)
+{
+ struct gk20a_platform *platform = dev_get_drvdata(dev);
+ int ret = 0;
+
+ if (atomic_read(&dev->power.usage_count) > 1)
+ return -EBUSY;
+
+ ret = gk20a_pm_prepare_poweroff(dev);
+ if (ret)
+ return ret;
+
+ gk20a_scale_suspend(to_platform_device(dev));
+
+ if (platform->suspend)
+ platform->suspend(dev);
+
+ return 0;
+}
+
+static int gk20a_pm_resume(struct device *dev)
+{
+ int ret = 0;
+
+ ret = gk20a_pm_finalize_poweron(dev);
+ if (ret)
+ return ret;
+
+ gk20a_scale_resume(to_platform_device(dev));
+
+ return 0;
+}
+
+static int gk20a_pm_initialise_domain(struct platform_device *pdev)
+{
+ struct gk20a_platform *platform = platform_get_drvdata(pdev);
+ struct dev_power_governor *pm_domain_gov = NULL;
+ struct generic_pm_domain *domain = &platform->g->pd;
+ int ret = 0;
+
+ domain->name = kstrdup(pdev->name, GFP_KERNEL);
+
+ if (!platform->can_railgate)
+ pm_domain_gov = &pm_domain_always_on_gov;
+
+ pm_genpd_init(domain, pm_domain_gov, true);
+
+ domain->power_off = gk20a_pm_railgate;
+ domain->power_on = gk20a_pm_unrailgate;
+ domain->dev_ops.start = gk20a_pm_enable_clk;
+ domain->dev_ops.stop = gk20a_pm_disable_clk;
+ domain->dev_ops.save_state = gk20a_pm_prepare_poweroff;
+ domain->dev_ops.restore_state = gk20a_pm_finalize_poweron;
+ domain->dev_ops.suspend = gk20a_pm_suspend;
+ domain->dev_ops.resume = gk20a_pm_resume;
+
+ device_set_wakeup_capable(&pdev->dev, 0);
+ ret = pm_genpd_add_device(domain, &pdev->dev);
+
+ if (platform->railgate_delay)
+ pm_genpd_set_poweroff_delay(domain, platform->railgate_delay);
+
+ return ret;
+}
+
+static int gk20a_pm_init(struct platform_device *dev)
+{
+ struct gk20a_platform *platform = platform_get_drvdata(dev);
+ int err = 0;
+
+ /* Initialise pm runtime */
+ if (platform->clockgate_delay) {
+ pm_runtime_set_autosuspend_delay(&dev->dev,
+ platform->clockgate_delay);
+ pm_runtime_use_autosuspend(&dev->dev);
+ }
+
+ pm_runtime_enable(&dev->dev);
+ if (!pm_runtime_enabled(&dev->dev))
+ gk20a_pm_enable_clk(&dev->dev);
+
+ /* Enable runtime railgating if possible. If not,
+ * turn on the rail now. */
+ if (platform->can_railgate && IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS))
+ platform->railgate(dev);
+ else
+ platform->unrailgate(dev);
+
+ /* genpd will take care of runtime power management if it is enabled */
+ if (IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS))
+ err = gk20a_pm_initialise_domain(dev);
+
+ return err;
+}
+
+static int gk20a_probe(struct platform_device *dev)
+{
+ struct gk20a *gk20a;
+ int err;
+ struct gk20a_platform *platform = NULL;
+ struct cooling_device_gk20a *gpu_cdev = NULL;
+
+ if (dev->dev.of_node) {
+ const struct of_device_id *match;
+
+ match = of_match_device(tegra_gk20a_of_match, &dev->dev);
+ if (match)
+ platform = (struct gk20a_platform *)match->data;
+ } else
+ platform = (struct gk20a_platform *)dev->dev.platform_data;
+
+ if (!platform) {
+ dev_err(&dev->dev, "no platform data\n");
+ return -ENODATA;
+ }
+
+ gk20a_dbg_fn("");
+
+ platform_set_drvdata(dev, platform);
+
+ gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL);
+ if (!gk20a) {
+ dev_err(&dev->dev, "couldn't allocate gk20a support");
+ return -ENOMEM;
+ }
+
+ set_gk20a(dev, gk20a);
+ gk20a->dev = dev;
+
+ err = gk20a_user_init(dev);
+ if (err)
+ return err;
+
+ gk20a_init_support(dev);
+
+ spin_lock_init(&gk20a->mc_enable_lock);
+
+ /* Initialize the platform interface. */
+ err = platform->probe(dev);
+ if (err) {
+ dev_err(&dev->dev, "platform probe failed");
+ return err;
+ }
+
+ err = gk20a_pm_init(dev);
+ if (err) {
+ dev_err(&dev->dev, "pm init failed");
+ return err;
+ }
+
+ /* Initialise scaling */
+ if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
+ gk20a_scale_init(dev);
+
+ if (platform->late_probe) {
+ err = platform->late_probe(dev);
+ if (err) {
+ dev_err(&dev->dev, "late probe failed");
+ return err;
+ }
+ }
+
+ gk20a_debug_init(dev);
+
+ /* Set DMA parameters to allow larger sgt lists */
+ dev->dev.dma_parms = &gk20a->dma_parms;
+ dma_set_max_seg_size(&dev->dev, UINT_MAX);
+
+ gpu_cdev = &gk20a->gk20a_cdev;
+ gpu_cdev->gk20a_freq_table_size = tegra_gpufreq_table_size_get();
+ gpu_cdev->gk20a_freq_state = 0;
+ gpu_cdev->g = gk20a;
+ gpu_cdev->gk20a_cooling_dev = thermal_cooling_device_register("gk20a_cdev", gpu_cdev,
+ &tegra_gpu_cooling_ops);
+
+ gk20a->gr_idle_timeout_default =
+ CONFIG_GK20A_DEFAULT_TIMEOUT;
+ gk20a->timeouts_enabled = true;
+
+ /* Set up initial clock gating settings */
+ if (tegra_platform_is_silicon()) {
+ gk20a->slcg_enabled = true;
+ gk20a->blcg_enabled = true;
+ gk20a->elcg_enabled = true;
+ gk20a->elpg_enabled = true;
+ gk20a->aelpg_enabled = true;
+ }
+
+ gk20a_create_sysfs(dev);
+
+#ifdef CONFIG_DEBUG_FS
+ clk_gk20a_debugfs_init(dev);
+
+ spin_lock_init(&gk20a->debugfs_lock);
+ gk20a->mm.ltc_enabled = true;
+ gk20a->mm.ltc_enabled_debug = true;
+ gk20a->debugfs_ltc_enabled =
+ debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR,
+ platform->debugfs,
+ &gk20a->mm.ltc_enabled_debug);
+ gk20a->mm.ltc_enabled_debug = true;
+ gk20a->debugfs_gr_idle_timeout_default =
+ debugfs_create_u32("gr_idle_timeout_default_us",
+ S_IRUGO|S_IWUSR, platform->debugfs,
+ &gk20a->gr_idle_timeout_default);
+ gk20a->debugfs_timeouts_enabled =
+ debugfs_create_bool("timeouts_enabled",
+ S_IRUGO|S_IWUSR,
+ platform->debugfs,
+ &gk20a->timeouts_enabled);
+ gk20a_pmu_debugfs_init(dev);
+#endif
+
+#ifdef CONFIG_INPUT_CFBOOST
+ cfb_add_device(&dev->dev);
+#endif
+
+ return 0;
+}
+
+static int __exit gk20a_remove(struct platform_device *dev)
+{
+ struct gk20a *g = get_gk20a(dev);
+ gk20a_dbg_fn("");
+
+#ifdef CONFIG_INPUT_CFBOOST
+ cfb_remove_device(&dev->dev);
+#endif
+
+ if (g->remove_support)
+ g->remove_support(dev);
+
+ gk20a_user_deinit(dev);
+
+ set_gk20a(dev, 0);
+#ifdef CONFIG_DEBUG_FS
+ debugfs_remove(g->debugfs_ltc_enabled);
+ debugfs_remove(g->debugfs_gr_idle_timeout_default);
+ debugfs_remove(g->debugfs_timeouts_enabled);
+#endif
+
+ kfree(g);
+
+#ifdef CONFIG_PM_RUNTIME
+ pm_runtime_put(&dev->dev);
+ pm_runtime_disable(&dev->dev);
+#else
+ nvhost_module_disable_clk(&dev->dev);
+#endif
+
+ return 0;
+}
+
+static struct platform_driver gk20a_driver = {
+ .probe = gk20a_probe,
+ .remove = __exit_p(gk20a_remove),
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = "gk20a",
+#ifdef CONFIG_OF
+ .of_match_table = tegra_gk20a_of_match,
+#endif
+#ifdef CONFIG_PM
+ .pm = &gk20a_pm_ops,
+#endif
+ }
+};
+
+static int __init gk20a_init(void)
+{
+ return platform_driver_register(&gk20a_driver);
+}
+
+static void __exit gk20a_exit(void)
+{
+ platform_driver_unregister(&gk20a_driver);
+}
+
+bool is_gk20a_module(struct platform_device *dev)
+{
+ return &gk20a_driver.driver == dev->dev.driver;
+}
+
+void gk20a_busy_noresume(struct platform_device *pdev)
+{
+ pm_runtime_get_noresume(&pdev->dev);
+}
+
+int gk20a_channel_busy(struct platform_device *pdev)
+{
+ int ret = 0;
+
+ ret = gk20a_platform_channel_busy(pdev);
+ if (ret)
+ return ret;
+
+ ret = gk20a_busy(pdev);
+ if (ret)
+ gk20a_platform_channel_idle(pdev);
+
+ return ret;
+}
+
+void gk20a_channel_idle(struct platform_device *pdev)
+{
+ gk20a_idle(pdev);
+ gk20a_platform_channel_idle(pdev);
+}
+
+int gk20a_busy(struct platform_device *pdev)
+{
+ int ret = 0;
+
+#ifdef CONFIG_PM_RUNTIME
+ ret = pm_runtime_get_sync(&pdev->dev);
+#endif
+ gk20a_scale_notify_busy(pdev);
+
+ return ret < 0 ? ret : 0;
+}
+
+void gk20a_idle(struct platform_device *pdev)
+{
+#ifdef CONFIG_PM_RUNTIME
+ if (atomic_read(&pdev->dev.power.usage_count) == 1)
+ gk20a_scale_notify_idle(pdev);
+ pm_runtime_mark_last_busy(&pdev->dev);
+ pm_runtime_put_sync_autosuspend(&pdev->dev);
+#else
+ gk20a_scale_notify_idle(pdev);
+#endif
+}
+
+void gk20a_disable(struct gk20a *g, u32 units)
+{
+ u32 pmc;
+
+ gk20a_dbg(gpu_dbg_info, "pmc disable: %08x\n", units);
+
+ spin_lock(&g->mc_enable_lock);
+ pmc = gk20a_readl(g, mc_enable_r());
+ pmc &= ~units;
+ gk20a_writel(g, mc_enable_r(), pmc);
+ spin_unlock(&g->mc_enable_lock);
+}
+
+void gk20a_enable(struct gk20a *g, u32 units)
+{
+ u32 pmc;
+
+ gk20a_dbg(gpu_dbg_info, "pmc enable: %08x\n", units);
+
+ spin_lock(&g->mc_enable_lock);
+ pmc = gk20a_readl(g, mc_enable_r());
+ pmc |= units;
+ gk20a_writel(g, mc_enable_r(), pmc);
+ spin_unlock(&g->mc_enable_lock);
+ gk20a_readl(g, mc_enable_r());
+
+ udelay(20);
+}
+
+void gk20a_reset(struct gk20a *g, u32 units)
+{
+ gk20a_disable(g, units);
+ udelay(20);
+ gk20a_enable(g, units);
+}
+
+int gk20a_init_gpu_characteristics(struct gk20a *g)
+{
+ struct nvhost_gpu_characteristics *gpu = &g->gpu_characteristics;
+
+ gpu->L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g);
+ gpu->on_board_video_memory_size = 0; /* integrated GPU */
+
+ gpu->num_gpc = g->gr.gpc_count;
+ gpu->num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count;
+
+ gpu->bus_type = NVHOST_GPU_BUS_TYPE_AXI; /* always AXI for now */
+
+ gpu->big_page_size = g->mm.big_page_size;
+ gpu->compression_page_size = g->mm.compression_page_size;
+
+ return 0;
+}
+
+int nvhost_vpr_info_fetch(void)
+{
+ struct gk20a *g = get_gk20a(to_platform_device(
+ bus_find_device_by_name(&platform_bus_type,
+ NULL, "gk20a.0")));
+
+ if (!g) {
+ pr_info("gk20a ins't ready yet\n");
+ return 0;
+ }
+
+ return gk20a_mm_mmu_vpr_info_fetch(g);
+}
+
+static const struct firmware *
+do_request_firmware(struct device *dev, const char *prefix, const char *fw_name)
+{
+ const struct firmware *fw;
+ char *fw_path = NULL;
+ int path_len, err;
+
+ if (prefix) {
+ path_len = strlen(prefix) + strlen(fw_name);
+ path_len += 2; /* for the path separator and zero terminator*/
+
+ fw_path = kzalloc(sizeof(*fw_path) * path_len, GFP_KERNEL);
+ if (!fw_path)
+ return NULL;
+
+ sprintf(fw_path, "%s/%s", prefix, fw_name);
+ fw_name = fw_path;
+ }
+
+ err = request_firmware(&fw, fw_name, dev);
+ kfree(fw_path);
+ if (err)
+ return NULL;
+ return fw;
+}
+
+/* This is a simple wrapper around request_firmware that takes 'fw_name' and
+ * applies an IP specific relative path prefix to it. The caller is
+ * responsible for calling release_firmware later. */
+const struct firmware *
+gk20a_request_firmware(struct gk20a *g, const char *fw_name)
+{
+ struct device *dev = &g->dev->dev;
+ const struct firmware *fw;
+
+ /* current->fs is NULL when calling from SYS_EXIT.
+ Add a check here to prevent crash in request_firmware */
+ if (!current->fs || !fw_name)
+ return NULL;
+
+ BUG_ON(!g->ops.name);
+ fw = do_request_firmware(dev, g->ops.name, fw_name);
+
+#ifdef CONFIG_TEGRA_GK20A
+ /* TO BE REMOVED - Support loading from legacy SOC specific path. */
+ if (!fw)
+ fw = nvhost_client_request_firmware(g->dev, fw_name);
+#endif
+
+ if (!fw) {
+ dev_err(dev, "failed to get firmware\n");
+ return NULL;
+ }
+
+ return fw;
+}
+
+module_init(gk20a_init);
+module_exit(gk20a_exit);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
new file mode 100644
index 000000000000..a9081a9dfb1c
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -0,0 +1,559 @@
+/*
+ * drivers/video/tegra/host/gk20a/gk20a.h
+ *
+ * GK20A Graphics
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef _NVHOST_GK20A_H_
+#define _NVHOST_GK20A_H_
+
+
+struct gk20a;
+struct fifo_gk20a;
+struct channel_gk20a;
+struct gr_gk20a;
+struct sim_gk20a;
+
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/nvhost_gpu_ioctl.h>
+#include <linux/tegra-soc.h>
+
+#include "../../../arch/arm/mach-tegra/iomap.h"
+
+#include "as_gk20a.h"
+#include "clk_gk20a.h"
+#include "fifo_gk20a.h"
+#include "gr_gk20a.h"
+#include "sim_gk20a.h"
+#include "pmu_gk20a.h"
+#include "priv_ring_gk20a.h"
+#include "therm_gk20a.h"
+#include "platform_gk20a.h"
+
+extern struct platform_device tegra_gk20a_device;
+
+bool is_gk20a_module(struct platform_device *dev);
+
+struct cooling_device_gk20a {
+ struct thermal_cooling_device *gk20a_cooling_dev;
+ unsigned int gk20a_freq_state;
+ unsigned int gk20a_freq_table_size;
+ struct gk20a *g;
+};
+
+struct gpu_ops {
+ struct {
+ int (*determine_L2_size_bytes)(struct gk20a *gk20a);
+ void (*set_max_ways_evict_last)(struct gk20a *g, u32 max_ways);
+ int (*init_comptags)(struct gk20a *g, struct gr_gk20a *gr);
+ int (*clear_comptags)(struct gk20a *g, u32 min, u32 max);
+ void (*set_zbc_color_entry)(struct gk20a *g,
+ struct zbc_entry *color_val,
+ u32 index);
+ void (*set_zbc_depth_entry)(struct gk20a *g,
+ struct zbc_entry *depth_val,
+ u32 index);
+ void (*clear_zbc_color_entry)(struct gk20a *g, u32 index);
+ void (*clear_zbc_depth_entry)(struct gk20a *g, u32 index);
+ int (*init_zbc)(struct gk20a *g, struct gr_gk20a *gr);
+ void (*init_cbc)(struct gk20a *g, struct gr_gk20a *gr);
+ void (*sync_debugfs)(struct gk20a *g);
+ void (*elpg_flush)(struct gk20a *g);
+ } ltc;
+ struct {
+ int (*init_fs_state)(struct gk20a *g);
+ void (*access_smpc_reg)(struct gk20a *g, u32 quad, u32 offset);
+ void (*bundle_cb_defaults)(struct gk20a *g);
+ void (*cb_size_default)(struct gk20a *g);
+ int (*calc_global_ctx_buffer_size)(struct gk20a *g);
+ void (*commit_global_attrib_cb)(struct gk20a *g,
+ struct channel_ctx_gk20a *ch_ctx,
+ u64 addr, bool patch);
+ void (*commit_global_bundle_cb)(struct gk20a *g,
+ struct channel_ctx_gk20a *ch_ctx,
+ u64 addr, u64 size, bool patch);
+ int (*commit_global_cb_manager)(struct gk20a *g,
+ struct channel_gk20a *ch,
+ bool patch);
+ void (*commit_global_pagepool)(struct gk20a *g,
+ struct channel_ctx_gk20a *ch_ctx,
+ u64 addr, u32 size, bool patch);
+ void (*init_gpc_mmu)(struct gk20a *g);
+ int (*handle_sw_method)(struct gk20a *g, u32 addr,
+ u32 class_num, u32 offset, u32 data);
+ void (*set_alpha_circular_buffer_size)(struct gk20a *g,
+ u32 data);
+ void (*set_circular_buffer_size)(struct gk20a *g, u32 data);
+ void (*enable_hww_exceptions)(struct gk20a *g);
+ bool (*is_valid_class)(struct gk20a *g, u32 class_num);
+ void (*get_sm_dsm_perf_regs)(struct gk20a *g,
+ u32 *num_sm_dsm_perf_regs,
+ u32 **sm_dsm_perf_regs,
+ u32 *perf_register_stride);
+ void (*get_sm_dsm_perf_ctrl_regs)(struct gk20a *g,
+ u32 *num_sm_dsm_perf_regs,
+ u32 **sm_dsm_perf_regs,
+ u32 *perf_register_stride);
+ void (*set_hww_esr_report_mask)(struct gk20a *g);
+ int (*setup_alpha_beta_tables)(struct gk20a *g,
+ struct gr_gk20a *gr);
+ } gr;
+ const char *name;
+ struct {
+ void (*init_fs_state)(struct gk20a *g);
+ void (*reset)(struct gk20a *g);
+ void (*init_uncompressed_kind_map)(struct gk20a *g);
+ void (*init_kind_attr)(struct gk20a *g);
+ } fb;
+ struct {
+ void (*slcg_gr_load_gating_prod)(struct gk20a *g, bool prod);
+ void (*slcg_perf_load_gating_prod)(struct gk20a *g, bool prod);
+ void (*blcg_gr_load_gating_prod)(struct gk20a *g, bool prod);
+ void (*pg_gr_load_gating_prod)(struct gk20a *g, bool prod);
+ void (*slcg_therm_load_gating_prod)(struct gk20a *g, bool prod);
+ } clock_gating;
+ struct {
+ void (*bind_channel)(struct channel_gk20a *ch_gk20a);
+ } fifo;
+ struct pmu_v {
+ /*used for change of enum zbc update cmd id from ver 0 to ver1*/
+ u32 cmd_id_zbc_table_update;
+ u32 (*get_pmu_cmdline_args_size)(struct pmu_gk20a *pmu);
+ void (*set_pmu_cmdline_args_cpu_freq)(struct pmu_gk20a *pmu,
+ u32 freq);
+ void * (*get_pmu_cmdline_args_ptr)(struct pmu_gk20a *pmu);
+ u32 (*get_pmu_allocation_struct_size)(struct pmu_gk20a *pmu);
+ void (*set_pmu_allocation_ptr)(struct pmu_gk20a *pmu,
+ void **pmu_alloc_ptr, void *assign_ptr);
+ void (*pmu_allocation_set_dmem_size)(struct pmu_gk20a *pmu,
+ void *pmu_alloc_ptr, u16 size);
+ u16 (*pmu_allocation_get_dmem_size)(struct pmu_gk20a *pmu,
+ void *pmu_alloc_ptr);
+ u32 (*pmu_allocation_get_dmem_offset)(struct pmu_gk20a *pmu,
+ void *pmu_alloc_ptr);
+ u32 * (*pmu_allocation_get_dmem_offset_addr)(
+ struct pmu_gk20a *pmu, void *pmu_alloc_ptr);
+ void (*pmu_allocation_set_dmem_offset)(struct pmu_gk20a *pmu,
+ void *pmu_alloc_ptr, u32 offset);
+ void (*get_pmu_init_msg_pmu_queue_params)(
+ struct pmu_queue *queue, u32 id,
+ void *pmu_init_msg);
+ void *(*get_pmu_msg_pmu_init_msg_ptr)(
+ struct pmu_init_msg *init);
+ u16 (*get_pmu_init_msg_pmu_sw_mg_off)(
+ union pmu_init_msg_pmu *init_msg);
+ u16 (*get_pmu_init_msg_pmu_sw_mg_size)(
+ union pmu_init_msg_pmu *init_msg);
+ u32 (*get_pmu_perfmon_cmd_start_size)(void);
+ int (*get_perfmon_cmd_start_offsetofvar)(
+ enum pmu_perfmon_cmd_start_fields field);
+ void (*perfmon_start_set_cmd_type)(struct pmu_perfmon_cmd *pc,
+ u8 value);
+ void (*perfmon_start_set_group_id)(struct pmu_perfmon_cmd *pc,
+ u8 value);
+ void (*perfmon_start_set_state_id)(struct pmu_perfmon_cmd *pc,
+ u8 value);
+ void (*perfmon_start_set_flags)(struct pmu_perfmon_cmd *pc,
+ u8 value);
+ u8 (*perfmon_start_get_flags)(struct pmu_perfmon_cmd *pc);
+ u32 (*get_pmu_perfmon_cmd_init_size)(void);
+ int (*get_perfmon_cmd_init_offsetofvar)(
+ enum pmu_perfmon_cmd_start_fields field);
+ void (*perfmon_cmd_init_set_sample_buffer)(
+ struct pmu_perfmon_cmd *pc, u16 value);
+ void (*perfmon_cmd_init_set_dec_cnt)(
+ struct pmu_perfmon_cmd *pc, u8 value);
+ void (*perfmon_cmd_init_set_base_cnt_id)(
+ struct pmu_perfmon_cmd *pc, u8 value);
+ void (*perfmon_cmd_init_set_samp_period_us)(
+ struct pmu_perfmon_cmd *pc, u32 value);
+ void (*perfmon_cmd_init_set_num_cnt)(struct pmu_perfmon_cmd *pc,
+ u8 value);
+ void (*perfmon_cmd_init_set_mov_avg)(struct pmu_perfmon_cmd *pc,
+ u8 value);
+ void *(*get_pmu_seq_in_a_ptr)(
+ struct pmu_sequence *seq);
+ void *(*get_pmu_seq_out_a_ptr)(
+ struct pmu_sequence *seq);
+ } pmu_ver;
+};
+
+struct gk20a {
+ struct platform_device *dev;
+
+ struct resource *reg_mem;
+ void __iomem *regs;
+
+ struct resource *bar1_mem;
+ void __iomem *bar1;
+
+ bool power_on;
+ bool irq_requested;
+
+ struct clk_gk20a clk;
+ struct fifo_gk20a fifo;
+ struct gr_gk20a gr;
+ struct sim_gk20a sim;
+ struct mm_gk20a mm;
+ struct pmu_gk20a pmu;
+ struct cooling_device_gk20a gk20a_cdev;
+
+ /* Save pmu fw here so that it lives cross suspend/resume.
+ pmu suspend destroys all pmu sw/hw states. Loading pmu
+ fw in resume crashes when the resume is from sys_exit. */
+ const struct firmware *pmu_fw;
+
+ u32 gr_idle_timeout_default;
+ u32 timeouts_enabled;
+
+ bool slcg_enabled;
+ bool blcg_enabled;
+ bool elcg_enabled;
+ bool elpg_enabled;
+ bool aelpg_enabled;
+
+#ifdef CONFIG_DEBUG_FS
+ spinlock_t debugfs_lock;
+ struct dentry *debugfs_ltc_enabled;
+ struct dentry *debugfs_timeouts_enabled;
+ struct dentry *debugfs_gr_idle_timeout_default;
+#endif
+ struct gk20a_ctxsw_ucode_info ctxsw_ucode_info;
+
+ /* held while manipulating # of debug/profiler sessions present */
+ /* also prevents debug sessions from attaching until released */
+ struct mutex dbg_sessions_lock;
+ int dbg_sessions; /* number attached */
+ int dbg_powergating_disabled_refcount; /*refcount for pg disable */
+
+ void (*remove_support)(struct platform_device *);
+
+ u64 pg_ingating_time_us;
+ u64 pg_ungating_time_us;
+ u32 pg_gating_cnt;
+
+ spinlock_t mc_enable_lock;
+
+ struct nvhost_gpu_characteristics gpu_characteristics;
+
+ struct {
+ struct cdev cdev;
+ struct device *node;
+ } channel;
+
+ struct gk20a_as as;
+
+ struct {
+ struct cdev cdev;
+ struct device *node;
+ } ctrl;
+
+ struct {
+ struct cdev cdev;
+ struct device *node;
+ } dbg;
+
+ struct {
+ struct cdev cdev;
+ struct device *node;
+ } prof;
+
+ struct mutex client_lock;
+ int client_refcount; /* open channels and ctrl nodes */
+
+ dev_t cdev_region;
+ struct class *class;
+
+ struct gpu_ops ops;
+
+ int irq_stall;
+ int irq_nonstall;
+
+ struct generic_pm_domain pd;
+
+ struct devfreq *devfreq;
+
+ struct gk20a_scale_profile *scale_profile;
+
+ struct device_dma_parameters dma_parms;
+};
+
+static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g)
+{
+ return g->timeouts_enabled ?
+ g->gr_idle_timeout_default : MAX_SCHEDULE_TIMEOUT;
+}
+
+static inline struct gk20a *get_gk20a(struct platform_device *dev)
+{
+ return gk20a_get_platform(dev)->g;
+}
+
+enum BAR0_DEBUG_OPERATION {
+ BARO_ZERO_NOP = 0,
+ OP_END = 'DONE',
+ BAR0_READ32 = '0R32',
+ BAR0_WRITE32 = '0W32',
+};
+
+struct share_buffer_head {
+ enum BAR0_DEBUG_OPERATION operation;
+/* size of the operation item */
+ u32 size;
+ u32 completed;
+ u32 failed;
+ u64 context;
+ u64 completion_callback;
+};
+
+struct gk20a_cyclestate_buffer_elem {
+ struct share_buffer_head head;
+/* in */
+ u64 p_data;
+ u64 p_done;
+ u32 offset_bar0;
+ u16 first_bit;
+ u16 last_bit;
+/* out */
+/* keep 64 bits to be consistent */
+ u64 data;
+};
+
+/* debug accessories */
+
+#ifdef CONFIG_DEBUG_FS
+ /* debug info, default is compiled-in but effectively disabled (0 mask) */
+ #define GK20A_DEBUG
+ /*e.g: echo 1 > /d/tegra_host/dbg_mask */
+ #define GK20A_DEFAULT_DBG_MASK 0
+#else
+ /* manually enable and turn it on the mask */
+ /*#define NVHOST_DEBUG*/
+ #define GK20A_DEFAULT_DBG_MASK (dbg_info)
+#endif
+
+enum gk20a_dbg_categories {
+ gpu_dbg_info = BIT(0), /* lightly verbose info */
+ gpu_dbg_fn = BIT(2), /* fn name tracing */
+ gpu_dbg_reg = BIT(3), /* register accesses, very verbose */
+ gpu_dbg_pte = BIT(4), /* gmmu ptes */
+ gpu_dbg_intr = BIT(5), /* interrupts */
+ gpu_dbg_pmu = BIT(6), /* gk20a pmu */
+ gpu_dbg_clk = BIT(7), /* gk20a clk */
+ gpu_dbg_map = BIT(8), /* mem mappings */
+ gpu_dbg_gpu_dbg = BIT(9), /* gpu debugger/profiler */
+ gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */
+};
+
+#if defined(GK20A_DEBUG)
+extern u32 gk20a_dbg_mask;
+extern u32 gk20a_dbg_ftrace;
+#define gk20a_dbg(dbg_mask, format, arg...) \
+do { \
+ if (unlikely((dbg_mask) & gk20a_dbg_mask)) { \
+ if (gk20a_dbg_ftrace) \
+ trace_printk(format "\n", ##arg); \
+ else \
+ pr_info("gk20a %s: " format "\n", \
+ __func__, ##arg); \
+ } \
+} while (0)
+
+#else /* GK20A_DEBUG */
+#define gk20a_dbg(dbg_mask, format, arg...) \
+do { \
+ if (0) \
+ pr_info("gk20a %s: " format "\n", __func__, ##arg);\
+} while (0)
+
+#endif
+
+#define gk20a_err(d, fmt, arg...) \
+ dev_err(d, "%s: " fmt "\n", __func__, ##arg)
+
+#define gk20a_warn(d, fmt, arg...) \
+ dev_warn(d, "%s: " fmt "\n", __func__, ##arg)
+
+#define gk20a_dbg_fn(fmt, arg...) \
+ gk20a_dbg(gpu_dbg_fn, fmt, ##arg)
+
+#define gk20a_dbg_info(fmt, arg...) \
+ gk20a_dbg(gpu_dbg_info, fmt, ##arg)
+
+/* mem access with dbg_mem logging */
+static inline u8 gk20a_mem_rd08(void *ptr, int b)
+{
+ u8 _b = ((const u8 *)ptr)[b];
+#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
+ gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u8)*b, _b);
+#endif
+ return _b;
+}
+static inline u16 gk20a_mem_rd16(void *ptr, int s)
+{
+ u16 _s = ((const u16 *)ptr)[s];
+#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
+ gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u16)*s, _s);
+#endif
+ return _s;
+}
+static inline u32 gk20a_mem_rd32(void *ptr, int w)
+{
+ u32 _w = ((const u32 *)ptr)[w];
+#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
+ gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + sizeof(u32)*w, _w);
+#endif
+ return _w;
+}
+static inline void gk20a_mem_wr08(void *ptr, int b, u8 data)
+{
+#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
+ gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u8)*b, data);
+#endif
+ ((u8 *)ptr)[b] = data;
+}
+static inline void gk20a_mem_wr16(void *ptr, int s, u16 data)
+{
+#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
+ gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u16)*s, data);
+#endif
+ ((u16 *)ptr)[s] = data;
+}
+static inline void gk20a_mem_wr32(void *ptr, int w, u32 data)
+{
+#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
+ gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u32)*w, data);
+#endif
+ ((u32 *)ptr)[w] = data;
+}
+
+/* register accessors */
+static inline void gk20a_writel(struct gk20a *g, u32 r, u32 v)
+{
+ gk20a_dbg(gpu_dbg_reg, " r=0x%x v=0x%x", r, v);
+ writel(v, g->regs + r);
+}
+static inline u32 gk20a_readl(struct gk20a *g, u32 r)
+{
+ u32 v = readl(g->regs + r);
+ gk20a_dbg(gpu_dbg_reg, " r=0x%x v=0x%x", r, v);
+ return v;
+}
+
+static inline void gk20a_bar1_writel(struct gk20a *g, u32 b, u32 v)
+{
+ gk20a_dbg(gpu_dbg_reg, " b=0x%x v=0x%x", b, v);
+ writel(v, g->bar1 + b);
+}
+
+static inline u32 gk20a_bar1_readl(struct gk20a *g, u32 b)
+{
+ u32 v = readl(g->bar1 + b);
+ gk20a_dbg(gpu_dbg_reg, " b=0x%x v=0x%x", b, v);
+ return v;
+}
+
+/* convenience */
+static inline struct device *dev_from_gk20a(struct gk20a *g)
+{
+ return &g->dev->dev;
+}
+static inline struct gk20a *gk20a_from_as(struct gk20a_as *as)
+{
+ return container_of(as, struct gk20a, as);
+}
+static inline u32 u64_hi32(u64 n)
+{
+ return (u32)((n >> 32) & ~(u32)0);
+}
+
+static inline u32 u64_lo32(u64 n)
+{
+ return (u32)(n & ~(u32)0);
+}
+
+static inline u32 set_field(u32 val, u32 mask, u32 field)
+{
+ return ((val & ~mask) | field);
+}
+
+/* invalidate channel lookup tlb */
+static inline void gk20a_gr_flush_channel_tlb(struct gr_gk20a *gr)
+{
+ spin_lock(&gr->ch_tlb_lock);
+ memset(gr->chid_tlb, 0,
+ sizeof(struct gr_channel_map_tlb_entry) *
+ GR_CHANNEL_MAP_TLB_SIZE);
+ spin_unlock(&gr->ch_tlb_lock);
+}
+
+/* classes that the device supports */
+/* TBD: get these from an open-sourced SDK? */
+enum {
+ KEPLER_C = 0xA297,
+ FERMI_TWOD_A = 0x902D,
+ KEPLER_COMPUTE_A = 0xA0C0,
+ KEPLER_INLINE_TO_MEMORY_A = 0xA040,
+ KEPLER_DMA_COPY_A = 0xA0B5, /*not sure about this one*/
+};
+
+#if defined(CONFIG_GK20A_PMU)
+static inline int support_gk20a_pmu(void)
+{
+ return 1;
+}
+#else
+static inline int support_gk20a_pmu(void){return 0;}
+#endif
+
+void gk20a_create_sysfs(struct platform_device *dev);
+
+#ifdef CONFIG_DEBUG_FS
+int clk_gk20a_debugfs_init(struct platform_device *dev);
+#endif
+
+#define GK20A_BAR0_IORESOURCE_MEM 0
+#define GK20A_BAR1_IORESOURCE_MEM 1
+#define GK20A_SIM_IORESOURCE_MEM 2
+
+void gk20a_busy_noresume(struct platform_device *pdev);
+int gk20a_busy(struct platform_device *pdev);
+void gk20a_idle(struct platform_device *pdev);
+int gk20a_channel_busy(struct platform_device *pdev);
+void gk20a_channel_idle(struct platform_device *pdev);
+void gk20a_disable(struct gk20a *g, u32 units);
+void gk20a_enable(struct gk20a *g, u32 units);
+void gk20a_reset(struct gk20a *g, u32 units);
+int gk20a_get_client(struct gk20a *g);
+void gk20a_put_client(struct gk20a *g);
+
+const struct firmware *
+gk20a_request_firmware(struct gk20a *g, const char *fw_name);
+
+#define NVHOST_GPU_ARCHITECTURE_SHIFT 4
+
+/* constructs unique and compact GPUID from nvhost_gpu_characteristics
+ * arch/impl fields */
+#define GK20A_GPUID(arch, impl) ((u32) ((arch) | (impl)))
+
+#define GK20A_GPUID_GK20A \
+ GK20A_GPUID(NVHOST_GPU_ARCH_GK100, NVHOST_GPU_IMPL_GK20A)
+
+int gk20a_init_gpu_characteristics(struct gk20a *g);
+
+#endif /* _NVHOST_GK20A_H_ */
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
new file mode 100644
index 000000000000..32c003b655a6
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
@@ -0,0 +1,1247 @@
+/*
+ * gk20a allocator
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "gk20a_allocator.h"
+
+static inline void link_block_list(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *block,
+ struct gk20a_alloc_block *prev,
+ struct rb_node *rb_parent);
+static inline void link_block_rb(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *block,
+ struct rb_node **rb_link,
+ struct rb_node *rb_parent);
+static void link_block(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *block,
+ struct gk20a_alloc_block *prev, struct rb_node **rb_link,
+ struct rb_node *rb_parent);
+static void insert_block(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *block);
+
+static void unlink_block(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *block,
+ struct gk20a_alloc_block *prev);
+static struct gk20a_alloc_block *unlink_blocks(
+ struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *block,
+ struct gk20a_alloc_block *prev, u32 end);
+
+static struct gk20a_alloc_block *find_block(
+ struct gk20a_allocator *allocator, u32 addr);
+static struct gk20a_alloc_block *find_block_prev(
+ struct gk20a_allocator *allocator, u32 addr,
+ struct gk20a_alloc_block **pprev);
+static struct gk20a_alloc_block *find_block_prepare(
+ struct gk20a_allocator *allocator, u32 addr,
+ struct gk20a_alloc_block **pprev, struct rb_node ***rb_link,
+ struct rb_node **rb_parent);
+
+static u32 check_free_space(u32 addr, u32 limit, u32 len, u32 align);
+static void update_free_addr_cache(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *block,
+ u32 addr, u32 len, bool free);
+static int find_free_area(struct gk20a_allocator *allocator,
+ u32 *addr, u32 len);
+static int find_free_area_nc(struct gk20a_allocator *allocator,
+ u32 *addr, u32 *len);
+
+static void adjust_block(struct gk20a_alloc_block *block,
+ u32 start, u32 end,
+ struct gk20a_alloc_block *insert);
+static struct gk20a_alloc_block *merge_block(
+ struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *block, u32 addr, u32 end);
+static int split_block(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *block,
+ u32 addr, int new_below);
+
+static int block_alloc_single_locked(struct gk20a_allocator *allocator,
+ u32 *addr, u32 len);
+static int block_alloc_list_locked(struct gk20a_allocator *allocator,
+ u32 *addr, u32 len,
+ struct gk20a_alloc_block **pblock);
+static int block_free_locked(struct gk20a_allocator *allocator,
+ u32 addr, u32 len);
+static void block_free_list_locked(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *list);
+
+/* link a block into allocator block list */
+static inline void link_block_list(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *block,
+ struct gk20a_alloc_block *prev,
+ struct rb_node *rb_parent)
+{
+ struct gk20a_alloc_block *next;
+
+ block->prev = prev;
+ if (prev) {
+ next = prev->next;
+ prev->next = block;
+ } else {
+ allocator->block_first = block;
+ if (rb_parent)
+ next = rb_entry(rb_parent,
+ struct gk20a_alloc_block, rb);
+ else
+ next = NULL;
+ }
+ block->next = next;
+ if (next)
+ next->prev = block;
+}
+
+/* link a block into allocator rb tree */
+static inline void link_block_rb(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *block, struct rb_node **rb_link,
+ struct rb_node *rb_parent)
+{
+ rb_link_node(&block->rb, rb_parent, rb_link);
+ rb_insert_color(&block->rb, &allocator->rb_root);
+}
+
+/* add a block to allocator with known location */
+static void link_block(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *block,
+ struct gk20a_alloc_block *prev, struct rb_node **rb_link,
+ struct rb_node *rb_parent)
+{
+ struct gk20a_alloc_block *next;
+
+ link_block_list(allocator, block, prev, rb_parent);
+ link_block_rb(allocator, block, rb_link, rb_parent);
+ allocator->block_count++;
+
+ next = block->next;
+ allocator_dbg(allocator, "link new block %d:%d between block %d:%d and block %d:%d",
+ block->start, block->end,
+ prev ? prev->start : -1, prev ? prev->end : -1,
+ next ? next->start : -1, next ? next->end : -1);
+}
+
+/* add a block to allocator */
+static void insert_block(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *block)
+{
+ struct gk20a_alloc_block *prev;
+ struct rb_node **rb_link, *rb_parent;
+
+ find_block_prepare(allocator, block->start,
+ &prev, &rb_link, &rb_parent);
+ link_block(allocator, block, prev, rb_link, rb_parent);
+}
+
+/* remove a block from allocator */
+static void unlink_block(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *block,
+ struct gk20a_alloc_block *prev)
+{
+ struct gk20a_alloc_block *next = block->next;
+
+ allocator_dbg(allocator, "unlink block %d:%d between block %d:%d and block %d:%d",
+ block->start, block->end,
+ prev ? prev->start : -1, prev ? prev->end : -1,
+ next ? next->start : -1, next ? next->end : -1);
+
+ BUG_ON(block->start < allocator->base);
+ BUG_ON(block->end > allocator->limit);
+
+ if (prev)
+ prev->next = next;
+ else
+ allocator->block_first = next;
+
+ if (next)
+ next->prev = prev;
+ rb_erase(&block->rb, &allocator->rb_root);
+ if (allocator->block_recent == block)
+ allocator->block_recent = prev;
+
+ allocator->block_count--;
+}
+
+/* remove a list of blocks from allocator. the list can contain both
+ regular blocks and non-contiguous blocks. skip all non-contiguous
+ blocks, remove regular blocks into a separate list, return list head */
+static struct gk20a_alloc_block *
+unlink_blocks(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *block,
+ struct gk20a_alloc_block *prev,
+ u32 end)
+{
+ struct gk20a_alloc_block **insertion_point;
+ struct gk20a_alloc_block *last_unfreed_block = prev;
+ struct gk20a_alloc_block *last_freed_block = NULL;
+ struct gk20a_alloc_block *first_freed_block = NULL;
+
+ insertion_point = (prev ? &prev->next : &allocator->block_first);
+ *insertion_point = NULL;
+
+ do {
+ if (!block->nc_block) {
+ allocator_dbg(allocator, "unlink block %d:%d",
+ block->start, block->end);
+ if (last_freed_block)
+ last_freed_block->next = block;
+ block->prev = last_freed_block;
+ rb_erase(&block->rb, &allocator->rb_root);
+ last_freed_block = block;
+ allocator->block_count--;
+ if (!first_freed_block)
+ first_freed_block = block;
+ } else {
+ allocator_dbg(allocator, "skip nc block %d:%d",
+ block->start, block->end);
+ if (!*insertion_point)
+ *insertion_point = block;
+ if (last_unfreed_block)
+ last_unfreed_block->next = block;
+ block->prev = last_unfreed_block;
+ last_unfreed_block = block;
+ }
+ block = block->next;
+ } while (block && block->start < end);
+
+ if (!*insertion_point)
+ *insertion_point = block;
+
+ if (block)
+ block->prev = last_unfreed_block;
+ if (last_unfreed_block)
+ last_unfreed_block->next = block;
+ if (last_freed_block)
+ last_freed_block->next = NULL;
+
+ allocator->block_recent = NULL;
+
+ return first_freed_block;
+}
+
+/* Look up the first block which satisfies addr < block->end,
+ NULL if none */
+static struct gk20a_alloc_block *
+find_block(struct gk20a_allocator *allocator, u32 addr)
+{
+ struct gk20a_alloc_block *block = allocator->block_recent;
+
+ if (!(block && block->end > addr && block->start <= addr)) {
+ struct rb_node *rb_node;
+
+ rb_node = allocator->rb_root.rb_node;
+ block = NULL;
+
+ while (rb_node) {
+ struct gk20a_alloc_block *block_tmp;
+
+ block_tmp = rb_entry(rb_node,
+ struct gk20a_alloc_block, rb);
+
+ if (block_tmp->end > addr) {
+ block = block_tmp;
+ if (block_tmp->start <= addr)
+ break;
+ rb_node = rb_node->rb_left;
+ } else
+ rb_node = rb_node->rb_right;
+ if (block)
+ allocator->block_recent = block;
+ }
+ }
+ return block;
+}
+
+/* Same as find_block, but also return a pointer to the previous block */
+static struct gk20a_alloc_block *
+find_block_prev(struct gk20a_allocator *allocator, u32 addr,
+ struct gk20a_alloc_block **pprev)
+{
+ struct gk20a_alloc_block *block = NULL, *prev = NULL;
+ struct rb_node *rb_node;
+ if (!allocator)
+ goto out;
+
+ block = allocator->block_first;
+
+ rb_node = allocator->rb_root.rb_node;
+
+ while (rb_node) {
+ struct gk20a_alloc_block *block_tmp;
+ block_tmp = rb_entry(rb_node, struct gk20a_alloc_block, rb);
+
+ if (addr < block_tmp->end)
+ rb_node = rb_node->rb_left;
+ else {
+ prev = block_tmp;
+ if (!prev->next || addr < prev->next->end)
+ break;
+ rb_node = rb_node->rb_right;
+ }
+ }
+
+out:
+ *pprev = prev;
+ return prev ? prev->next : block;
+}
+
+/* Same as find_block, but also return a pointer to the previous block
+ and return rb_node to prepare for rbtree insertion */
+static struct gk20a_alloc_block *
+find_block_prepare(struct gk20a_allocator *allocator, u32 addr,
+ struct gk20a_alloc_block **pprev, struct rb_node ***rb_link,
+ struct rb_node **rb_parent)
+{
+ struct gk20a_alloc_block *block;
+ struct rb_node **__rb_link, *__rb_parent, *rb_prev;
+
+ __rb_link = &allocator->rb_root.rb_node;
+ rb_prev = __rb_parent = NULL;
+ block = NULL;
+
+ while (*__rb_link) {
+ struct gk20a_alloc_block *block_tmp;
+
+ __rb_parent = *__rb_link;
+ block_tmp = rb_entry(__rb_parent,
+ struct gk20a_alloc_block, rb);
+
+ if (block_tmp->end > addr) {
+ block = block_tmp;
+ if (block_tmp->start <= addr)
+ break;
+ __rb_link = &__rb_parent->rb_left;
+ } else {
+ rb_prev = __rb_parent;
+ __rb_link = &__rb_parent->rb_right;
+ }
+ }
+
+ *pprev = NULL;
+ if (rb_prev)
+ *pprev = rb_entry(rb_prev, struct gk20a_alloc_block, rb);
+ *rb_link = __rb_link;
+ *rb_parent = __rb_parent;
+ return block;
+}
+
+/* return available space */
+static u32 check_free_space(u32 addr, u32 limit, u32 len, u32 align)
+{
+ if (addr >= limit)
+ return 0;
+ if (addr + len <= limit)
+ return len;
+ return (limit - addr) & ~(align - 1);
+}
+
+/* update first_free_addr/last_free_addr based on new free addr
+ called when free block(s) and allocate block(s) */
+static void update_free_addr_cache(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *next,
+ u32 addr, u32 len, bool free)
+{
+ /* update from block free */
+ if (free) {
+ if (allocator->first_free_addr > addr)
+ allocator->first_free_addr = addr;
+ } else { /* update from block alloc */
+ if (allocator->last_free_addr < addr + len)
+ allocator->last_free_addr = addr + len;
+ if (allocator->first_free_addr == addr) {
+ if (!next || next->start > addr + len)
+ allocator->first_free_addr = addr + len;
+ else
+ allocator->first_free_addr = next->end;
+ }
+ }
+
+ if (allocator->first_free_addr > allocator->last_free_addr)
+ allocator->first_free_addr = allocator->last_free_addr;
+}
+
+/* find a free address range for a fixed len */
+static int find_free_area(struct gk20a_allocator *allocator,
+ u32 *addr, u32 len)
+{
+ struct gk20a_alloc_block *block;
+ u32 start_addr, search_base, search_limit;
+
+ /* fixed addr allocation */
+ /* note: constraints for fixed are handled by caller */
+ if (*addr) {
+ block = find_block(allocator, *addr);
+ if (allocator->limit - len >= *addr &&
+ (!block || *addr + len <= block->start)) {
+ update_free_addr_cache(allocator, block,
+ *addr, len, false);
+ return 0;
+ } else
+ return -ENOMEM;
+ }
+
+ if (!allocator->constraint.enable) {
+ search_base = allocator->base;
+ search_limit = allocator->limit;
+ } else {
+ start_addr = *addr = allocator->constraint.base;
+ search_base = allocator->constraint.base;
+ search_limit = allocator->constraint.limit;
+ }
+
+ /* cached_hole_size has max free space up to last_free_addr */
+ if (len > allocator->cached_hole_size)
+ start_addr = *addr = allocator->last_free_addr;
+ else {
+ start_addr = *addr = allocator->base;
+ allocator->cached_hole_size = 0;
+ }
+
+ allocator_dbg(allocator, "start search addr : %d", start_addr);
+
+full_search:
+ for (block = find_block(allocator, *addr);; block = block->next) {
+ if (search_limit - len < *addr) {
+ /* start a new search in case we missed any hole */
+ if (start_addr != search_base) {
+ start_addr = *addr = search_base;
+ allocator->cached_hole_size = 0;
+ allocator_dbg(allocator, "start a new search from base");
+ goto full_search;
+ }
+ return -ENOMEM;
+ }
+ if (!block || *addr + len <= block->start) {
+ update_free_addr_cache(allocator, block,
+ *addr, len, false);
+ allocator_dbg(allocator, "free space from %d, len %d",
+ *addr, len);
+ allocator_dbg(allocator, "next free addr: %d",
+ allocator->last_free_addr);
+ return 0;
+ }
+ if (*addr + allocator->cached_hole_size < block->start)
+ allocator->cached_hole_size = block->start - *addr;
+ *addr = block->end;
+ }
+}
+
+/* find a free address range for as long as it meets alignment or meet len */
+static int find_free_area_nc(struct gk20a_allocator *allocator,
+ u32 *addr, u32 *len)
+{
+ struct gk20a_alloc_block *block;
+ u32 start_addr;
+ u32 avail_len;
+
+ /* fixed addr allocation */
+ if (*addr) {
+ block = find_block(allocator, *addr);
+ if (allocator->limit - *len >= *addr) {
+ if (!block)
+ return 0;
+
+ avail_len = check_free_space(*addr, block->start,
+ *len, allocator->align);
+ if (avail_len != 0) {
+ update_free_addr_cache(allocator, block,
+ *addr, avail_len, false);
+ allocator_dbg(allocator,
+ "free space between %d, %d, len %d",
+ *addr, block->start, avail_len);
+ allocator_dbg(allocator, "next free addr: %d",
+ allocator->last_free_addr);
+ *len = avail_len;
+ return 0;
+ } else
+ return -ENOMEM;
+ } else
+ return -ENOMEM;
+ }
+
+ start_addr = *addr = allocator->first_free_addr;
+
+ allocator_dbg(allocator, "start search addr : %d", start_addr);
+
+ for (block = find_block(allocator, *addr);; block = block->next) {
+ if (allocator->limit - *len < *addr)
+ return -ENOMEM;
+ if (!block) {
+ update_free_addr_cache(allocator, block,
+ *addr, *len, false);
+ allocator_dbg(allocator, "free space from %d, len %d",
+ *addr, *len);
+ allocator_dbg(allocator, "next free addr: %d",
+ allocator->first_free_addr);
+ return 0;
+ }
+
+ avail_len = check_free_space(*addr, block->start,
+ *len, allocator->align);
+ if (avail_len != 0) {
+ update_free_addr_cache(allocator, block,
+ *addr, avail_len, false);
+ allocator_dbg(allocator, "free space between %d, %d, len %d",
+ *addr, block->start, avail_len);
+ allocator_dbg(allocator, "next free addr: %d",
+ allocator->first_free_addr);
+ *len = avail_len;
+ return 0;
+ }
+ if (*addr + allocator->cached_hole_size < block->start)
+ allocator->cached_hole_size = block->start - *addr;
+ *addr = block->end;
+ }
+}
+
+/* expand/shrink a block with new start and new end
+ split_block function provides insert block for shrink */
+static void adjust_block(struct gk20a_alloc_block *block,
+ u32 start, u32 end, struct gk20a_alloc_block *insert)
+{
+ struct gk20a_allocator *allocator = block->allocator;
+
+ allocator_dbg(allocator, "curr block %d:%d, new start %d, new end %d",
+ block->start, block->end, start, end);
+
+ /* expand */
+ if (!insert) {
+ if (start == block->end) {
+ struct gk20a_alloc_block *next = block->next;
+
+ if (next && end == next->start) {
+ /* ....AAAA.... */
+ /* PPPP....NNNN */
+ /* PPPPPPPPPPPP */
+ unlink_block(allocator, next, block);
+ block->end = next->end;
+ kmem_cache_free(allocator->block_cache, next);
+ } else {
+ /* ....AAAA.... */
+ /* PPPP........ */
+ /* PPPPPPPP.... */
+ block->end = end;
+ }
+ }
+
+ if (end == block->start) {
+ /* ....AAAA.... */
+ /* ........NNNN */
+ /* PP..NNNNNNNN ....NNNNNNNN */
+ block->start = start;
+ }
+ } else { /* shrink */
+ /* BBBBBBBB -> BBBBIIII OR BBBBBBBB -> IIIIBBBB */
+ block->start = start;
+ block->end = end;
+ insert_block(allocator, insert);
+ }
+}
+
+/* given a range [addr, end], merge it with blocks before or after or both
+ if they can be combined into a contiguous block */
+static struct gk20a_alloc_block *
+merge_block(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *prev, u32 addr, u32 end)
+{
+ struct gk20a_alloc_block *next;
+
+ if (prev)
+ next = prev->next;
+ else
+ next = allocator->block_first;
+
+ allocator_dbg(allocator, "curr block %d:%d", addr, end);
+ if (prev)
+ allocator_dbg(allocator, "prev block %d:%d",
+ prev->start, prev->end);
+ if (next)
+ allocator_dbg(allocator, "next block %d:%d",
+ next->start, next->end);
+
+ /* don't merge with non-contiguous allocation block */
+ if (prev && prev->end == addr && !prev->nc_block) {
+ adjust_block(prev, addr, end, NULL);
+ return prev;
+ }
+
+ /* don't merge with non-contiguous allocation block */
+ if (next && end == next->start && !next->nc_block) {
+ adjust_block(next, addr, end, NULL);
+ return next;
+ }
+
+ return NULL;
+}
+
+/* split a block based on addr. addr must be within (start, end).
+ if new_below == 1, link new block before adjusted current block */
+static int split_block(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *block, u32 addr, int new_below)
+{
+ struct gk20a_alloc_block *new_block;
+
+ allocator_dbg(allocator, "start %d, split %d, end %d, new_below %d",
+ block->start, addr, block->end, new_below);
+
+ BUG_ON(!(addr > block->start && addr < block->end));
+
+ new_block = kmem_cache_alloc(allocator->block_cache, GFP_KERNEL);
+ if (!new_block)
+ return -ENOMEM;
+
+ *new_block = *block;
+
+ if (new_below)
+ new_block->end = addr;
+ else
+ new_block->start = addr;
+
+ if (new_below)
+ adjust_block(block, addr, block->end, new_block);
+ else
+ adjust_block(block, block->start, addr, new_block);
+
+ return 0;
+}
+
+/* free a list of blocks */
+static void free_blocks(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *block)
+{
+ struct gk20a_alloc_block *curr_block;
+ while (block) {
+ curr_block = block;
+ block = block->next;
+ kmem_cache_free(allocator->block_cache, curr_block);
+ }
+}
+
+/* called with rw_sema acquired */
+static int block_alloc_single_locked(struct gk20a_allocator *allocator,
+ u32 *addr_req, u32 len)
+{
+ struct gk20a_alloc_block *block, *prev;
+ struct rb_node **rb_link, *rb_parent;
+ u32 addr = *addr_req;
+ int err;
+
+ *addr_req = ~0;
+
+ err = find_free_area(allocator, &addr, len);
+ if (err)
+ return err;
+
+ find_block_prepare(allocator, addr, &prev, &rb_link, &rb_parent);
+
+ /* merge requested free space with existing block(s)
+ if they can be combined into one contiguous block */
+ block = merge_block(allocator, prev, addr, addr + len);
+ if (block) {
+ *addr_req = addr;
+ return 0;
+ }
+
+ /* create a new block if cannot merge */
+ block = kmem_cache_zalloc(allocator->block_cache, GFP_KERNEL);
+ if (!block)
+ return -ENOMEM;
+
+ block->allocator = allocator;
+ block->start = addr;
+ block->end = addr + len;
+
+ link_block(allocator, block, prev, rb_link, rb_parent);
+
+ *addr_req = addr;
+
+ return 0;
+}
+
+static int block_alloc_list_locked(struct gk20a_allocator *allocator,
+ u32 *addr_req, u32 nc_len, struct gk20a_alloc_block **pblock)
+{
+ struct gk20a_alloc_block *block;
+ struct gk20a_alloc_block *nc_head = NULL, *nc_prev = NULL;
+ u32 addr = *addr_req, len = nc_len;
+ int err = 0;
+
+ *addr_req = ~0;
+
+ while (nc_len > 0) {
+ err = find_free_area_nc(allocator, &addr, &len);
+ if (err) {
+ allocator_dbg(allocator, "not enough free space");
+ goto clean_up;
+ }
+
+ /* never merge non-contiguous allocation block,
+ just create a new block */
+ block = kmem_cache_zalloc(allocator->block_cache,
+ GFP_KERNEL);
+ if (!block) {
+ err = -ENOMEM;
+ goto clean_up;
+ }
+
+ block->allocator = allocator;
+ block->start = addr;
+ block->end = addr + len;
+
+ insert_block(allocator, block);
+
+ block->nc_prev = nc_prev;
+ if (nc_prev)
+ nc_prev->nc_next = block;
+ nc_prev = block;
+ block->nc_block = true;
+
+ if (!nc_head)
+ nc_head = block;
+
+ if (*addr_req == ~0)
+ *addr_req = addr;
+
+ addr = 0;
+ nc_len -= len;
+ len = nc_len;
+ allocator_dbg(allocator, "remaining length %d", nc_len);
+ }
+
+clean_up:
+ if (err) {
+ while (nc_head) {
+ unlink_block(allocator, nc_head, nc_head->prev);
+ nc_prev = nc_head;
+ nc_head = nc_head->nc_next;
+ kmem_cache_free(allocator->block_cache, nc_prev);
+ }
+ *pblock = NULL;
+ *addr_req = ~0;
+ } else {
+ *pblock = nc_head;
+ }
+
+ return err;
+}
+
+/* called with rw_sema acquired */
+static int block_free_locked(struct gk20a_allocator *allocator,
+ u32 addr, u32 len)
+{
+ struct gk20a_alloc_block *block, *prev, *last;
+ u32 end;
+ int err;
+
+ /* no block has block->end > addr, already free */
+ block = find_block_prev(allocator, addr, &prev);
+ if (!block)
+ return 0;
+
+ allocator_dbg(allocator, "first block in free range %d:%d",
+ block->start, block->end);
+
+ end = addr + len;
+ /* not in any block, already free */
+ if (block->start >= end)
+ return 0;
+
+ /* don't touch nc_block in range free */
+ if (addr > block->start && !block->nc_block) {
+ int err = split_block(allocator, block, addr, 0);
+ if (err)
+ return err;
+ prev = block;
+ }
+
+ last = find_block(allocator, end);
+ if (last && end > last->start && !last->nc_block) {
+
+ allocator_dbg(allocator, "last block in free range %d:%d",
+ last->start, last->end);
+
+ err = split_block(allocator, last, end, 1);
+ if (err)
+ return err;
+ }
+
+ block = prev ? prev->next : allocator->block_first;
+
+ allocator_dbg(allocator, "first block for free %d:%d",
+ block->start, block->end);
+
+ /* remove blocks between [addr, addr + len) from rb tree
+ and put them in a list */
+ block = unlink_blocks(allocator, block, prev, end);
+ free_blocks(allocator, block);
+
+ update_free_addr_cache(allocator, NULL, addr, len, true);
+
+ return 0;
+}
+
+/* called with rw_sema acquired */
+static void block_free_list_locked(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *list)
+{
+ struct gk20a_alloc_block *block;
+ u32 len;
+
+ update_free_addr_cache(allocator, NULL,
+ list->start, list->end - list->start, true);
+
+ while (list) {
+ block = list;
+ unlink_block(allocator, block, block->prev);
+
+ len = block->end - block->start;
+ if (allocator->cached_hole_size < len)
+ allocator->cached_hole_size = len;
+
+ list = block->nc_next;
+ kmem_cache_free(allocator->block_cache, block);
+ }
+}
+
+static int
+gk20a_allocator_constrain(struct gk20a_allocator *a,
+ bool enable, u32 base, u32 limit)
+{
+ if (enable) {
+ a->constraint.enable = (base >= a->base &&
+ limit <= a->limit);
+ if (!a->constraint.enable)
+ return -EINVAL;
+ a->constraint.base = base;
+ a->constraint.limit = limit;
+ a->first_free_addr = a->last_free_addr = base;
+
+ } else {
+ a->constraint.enable = false;
+ a->first_free_addr = a->last_free_addr = a->base;
+ }
+
+ a->cached_hole_size = 0;
+
+ return 0;
+}
+
+/* init allocator struct */
+int gk20a_allocator_init(struct gk20a_allocator *allocator,
+ const char *name, u32 start, u32 len, u32 align)
+{
+ memset(allocator, 0, sizeof(struct gk20a_allocator));
+
+ strncpy(allocator->name, name, 32);
+
+ allocator->block_cache =
+ kmem_cache_create(allocator->name,
+ sizeof(struct gk20a_alloc_block), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ if (!allocator->block_cache)
+ return -ENOMEM;
+
+ allocator->rb_root = RB_ROOT;
+
+ allocator->base = start;
+ allocator->limit = start + len - 1;
+ allocator->align = align;
+
+ allocator_dbg(allocator, "%s : base %d, limit %d, align %d",
+ allocator->name, allocator->base,
+ allocator->limit, allocator->align);
+
+ allocator->first_free_addr = allocator->last_free_addr = start;
+ allocator->cached_hole_size = len;
+
+ init_rwsem(&allocator->rw_sema);
+
+ allocator->alloc = gk20a_allocator_block_alloc;
+ allocator->alloc_nc = gk20a_allocator_block_alloc_nc;
+ allocator->free = gk20a_allocator_block_free;
+ allocator->free_nc = gk20a_allocator_block_free_nc;
+ allocator->constrain = gk20a_allocator_constrain;
+
+ return 0;
+}
+
+/* destroy allocator, free all remaining blocks if any */
+void gk20a_allocator_destroy(struct gk20a_allocator *allocator)
+{
+ struct gk20a_alloc_block *block, *next;
+ u32 free_count = 0;
+
+ down_write(&allocator->rw_sema);
+
+ for (block = allocator->block_first; block; ) {
+ allocator_dbg(allocator, "free remaining block %d:%d",
+ block->start, block->end);
+ next = block->next;
+ kmem_cache_free(allocator->block_cache, block);
+ free_count++;
+ block = next;
+ }
+
+ up_write(&allocator->rw_sema);
+
+ /* block_count doesn't match real number of blocks */
+ BUG_ON(free_count != allocator->block_count);
+
+ kmem_cache_destroy(allocator->block_cache);
+
+ memset(allocator, 0, sizeof(struct gk20a_allocator));
+}
+
+/*
+ * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is
+ * returned to caller in *addr.
+ *
+ * contiguous allocation, which allocates one block of
+ * contiguous address.
+*/
+int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator,
+ u32 *addr, u32 len)
+{
+ int ret;
+#if defined(ALLOCATOR_DEBUG)
+ struct gk20a_alloc_block *block;
+ bool should_fail = false;
+#endif
+
+ allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len);
+
+ if (*addr + len > allocator->limit || /* check addr range */
+ *addr & (allocator->align - 1) || /* check addr alignment */
+ len == 0) /* check len */
+ return -EINVAL;
+
+ if (allocator->constraint.enable &&
+ (*addr + len > allocator->constraint.limit ||
+ *addr > allocator->constraint.base))
+ return -EINVAL;
+
+ len = ALIGN(len, allocator->align);
+ if (!len)
+ return -ENOMEM;
+
+ down_write(&allocator->rw_sema);
+
+#if defined(ALLOCATOR_DEBUG)
+ if (*addr) {
+ for (block = allocator->block_first;
+ block; block = block->next) {
+ if (block->end > *addr && block->start < *addr + len) {
+ should_fail = true;
+ break;
+ }
+ }
+ }
+#endif
+
+ ret = block_alloc_single_locked(allocator, addr, len);
+
+#if defined(ALLOCATOR_DEBUG)
+ if (!ret) {
+ bool allocated = false;
+ BUG_ON(should_fail);
+ BUG_ON(*addr < allocator->base);
+ BUG_ON(*addr + len > allocator->limit);
+ for (block = allocator->block_first;
+ block; block = block->next) {
+ if (!block->nc_block &&
+ block->start <= *addr &&
+ block->end >= *addr + len) {
+ allocated = true;
+ break;
+ }
+ }
+ BUG_ON(!allocated);
+ }
+#endif
+
+ up_write(&allocator->rw_sema);
+
+ allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len);
+
+ return ret;
+}
+
+/*
+ * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is
+ * returned to caller in *addr.
+ *
+ * non-contiguous allocation, which returns a list of blocks with aggregated
+ * size == len. Individual block size must meet alignment requirement.
+ */
+int gk20a_allocator_block_alloc_nc(struct gk20a_allocator *allocator,
+ u32 *addr, u32 len, struct gk20a_alloc_block **pblock)
+{
+ int ret;
+
+ allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len);
+
+ BUG_ON(pblock == NULL);
+ *pblock = NULL;
+
+ if (*addr + len > allocator->limit || /* check addr range */
+ *addr & (allocator->align - 1) || /* check addr alignment */
+ len == 0) /* check len */
+ return -EINVAL;
+
+ len = ALIGN(len, allocator->align);
+ if (!len)
+ return -ENOMEM;
+
+ down_write(&allocator->rw_sema);
+
+ ret = block_alloc_list_locked(allocator, addr, len, pblock);
+
+#if defined(ALLOCATOR_DEBUG)
+ if (!ret) {
+ struct gk20a_alloc_block *block = *pblock;
+ BUG_ON(!block);
+ BUG_ON(block->start < allocator->base);
+ while (block->nc_next) {
+ BUG_ON(block->end > block->nc_next->start);
+ block = block->nc_next;
+ }
+ BUG_ON(block->end > allocator->limit);
+ }
+#endif
+
+ up_write(&allocator->rw_sema);
+
+ allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len);
+
+ return ret;
+}
+
+/* free all blocks between start and end */
+int gk20a_allocator_block_free(struct gk20a_allocator *allocator,
+ u32 addr, u32 len)
+{
+ int ret;
+
+ allocator_dbg(allocator, "[in] addr %d, len %d", addr, len);
+
+ if (addr + len > allocator->limit || /* check addr range */
+ addr < allocator->base ||
+ addr & (allocator->align - 1)) /* check addr alignment */
+ return -EINVAL;
+
+ len = ALIGN(len, allocator->align);
+ if (!len)
+ return -EINVAL;
+
+ down_write(&allocator->rw_sema);
+
+ ret = block_free_locked(allocator, addr, len);
+
+#if defined(ALLOCATOR_DEBUG)
+ if (!ret) {
+ struct gk20a_alloc_block *block;
+ for (block = allocator->block_first;
+ block; block = block->next) {
+ if (!block->nc_block)
+ BUG_ON(block->start >= addr &&
+ block->end <= addr + len);
+ }
+ }
+#endif
+ up_write(&allocator->rw_sema);
+
+ allocator_dbg(allocator, "[out] addr %d, len %d", addr, len);
+
+ return ret;
+}
+
+/* free non-contiguous allocation block list */
+void gk20a_allocator_block_free_nc(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *block)
+{
+ /* nothing to free */
+ if (!block)
+ return;
+
+ down_write(&allocator->rw_sema);
+ block_free_list_locked(allocator, block);
+ up_write(&allocator->rw_sema);
+}
+
+#if defined(ALLOCATOR_DEBUG)
+
+#include <linux/random.h>
+
+/* test suite */
+void gk20a_allocator_test(void)
+{
+ struct gk20a_allocator allocator;
+ struct gk20a_alloc_block *list[5];
+ u32 addr, len;
+ u32 count;
+ int n;
+
+ gk20a_allocator_init(&allocator, "test", 0, 10, 1);
+
+ /* alloc/free a single block in the beginning */
+ addr = 0;
+ gk20a_allocator_block_alloc(&allocator, &addr, 2);
+ gk20a_allocator_dump(&allocator);
+ gk20a_allocator_block_free(&allocator, addr, 2);
+ gk20a_allocator_dump(&allocator);
+ /* alloc/free a single block in the middle */
+ addr = 4;
+ gk20a_allocator_block_alloc(&allocator, &addr, 2);
+ gk20a_allocator_dump(&allocator);
+ gk20a_allocator_block_free(&allocator, addr, 2);
+ gk20a_allocator_dump(&allocator);
+ /* alloc/free a single block in the end */
+ addr = 8;
+ gk20a_allocator_block_alloc(&allocator, &addr, 2);
+ gk20a_allocator_dump(&allocator);
+ gk20a_allocator_block_free(&allocator, addr, 2);
+ gk20a_allocator_dump(&allocator);
+
+ /* allocate contiguous blocks */
+ addr = 0;
+ gk20a_allocator_block_alloc(&allocator, &addr, 2);
+ gk20a_allocator_dump(&allocator);
+ addr = 0;
+ gk20a_allocator_block_alloc(&allocator, &addr, 4);
+ gk20a_allocator_dump(&allocator);
+ addr = 0;
+ gk20a_allocator_block_alloc(&allocator, &addr, 4);
+ gk20a_allocator_dump(&allocator);
+
+ /* no free space */
+ addr = 0;
+ gk20a_allocator_block_alloc(&allocator, &addr, 2);
+ gk20a_allocator_dump(&allocator);
+
+ /* free in the end */
+ gk20a_allocator_block_free(&allocator, 8, 2);
+ gk20a_allocator_dump(&allocator);
+ /* free in the beginning */
+ gk20a_allocator_block_free(&allocator, 0, 2);
+ gk20a_allocator_dump(&allocator);
+ /* free in the middle */
+ gk20a_allocator_block_free(&allocator, 4, 2);
+ gk20a_allocator_dump(&allocator);
+
+ /* merge case PPPPAAAANNNN */
+ addr = 4;
+ gk20a_allocator_block_alloc(&allocator, &addr, 2);
+ gk20a_allocator_dump(&allocator);
+ /* merge case ....AAAANNNN */
+ addr = 0;
+ gk20a_allocator_block_alloc(&allocator, &addr, 2);
+ gk20a_allocator_dump(&allocator);
+ /* merge case PPPPAAAA.... */
+ addr = 8;
+ gk20a_allocator_block_alloc(&allocator, &addr, 2);
+ gk20a_allocator_dump(&allocator);
+
+ /* test free across multiple blocks and split */
+ gk20a_allocator_block_free(&allocator, 2, 2);
+ gk20a_allocator_dump(&allocator);
+ gk20a_allocator_block_free(&allocator, 6, 2);
+ gk20a_allocator_dump(&allocator);
+ gk20a_allocator_block_free(&allocator, 1, 8);
+ gk20a_allocator_dump(&allocator);
+
+ /* test non-contiguous allocation */
+ addr = 4;
+ gk20a_allocator_block_alloc(&allocator, &addr, 2);
+ gk20a_allocator_dump(&allocator);
+ addr = 0;
+ gk20a_allocator_block_alloc_nc(&allocator, &addr, 5, &list[0]);
+ gk20a_allocator_dump(&allocator);
+ gk20a_allocator_dump_nc_list(&allocator, list[0]);
+
+ /* test free a range overlaping non-contiguous blocks */
+ gk20a_allocator_block_free(&allocator, 2, 6);
+ gk20a_allocator_dump(&allocator);
+
+ /* test non-contiguous free */
+ gk20a_allocator_block_free_nc(&allocator, list[0]);
+ gk20a_allocator_dump(&allocator);
+
+ gk20a_allocator_destroy(&allocator);
+
+ /* random stress test */
+ gk20a_allocator_init(&allocator, "test", 4096, 4096 * 1024, 4096);
+ for (;;) {
+ pr_debug("alloc tests...\n");
+ for (count = 0; count < 50; count++) {
+ addr = 0;
+ len = random32() % (4096 * 1024 / 16);
+ gk20a_allocator_block_alloc(&allocator, &addr, len);
+ gk20a_allocator_dump(&allocator);
+ }
+
+ pr_debug("free tests...\n");
+ for (count = 0; count < 30; count++) {
+ addr = (random32() % (4096 * 1024)) & ~(4096 - 1);
+ len = random32() % (4096 * 1024 / 16);
+ gk20a_allocator_block_free(&allocator, addr, len);
+ gk20a_allocator_dump(&allocator);
+ }
+
+ pr_debug("non-contiguous alloc tests...\n");
+ for (n = 0; n < 5; n++) {
+ addr = 0;
+ len = random32() % (4096 * 1024 / 8);
+ gk20a_allocator_block_alloc_nc(&allocator, &addr,
+ len, &list[n]);
+ gk20a_allocator_dump(&allocator);
+ gk20a_allocator_dump_nc_list(&allocator, list[n]);
+ }
+
+ pr_debug("free tests...\n");
+ for (count = 0; count < 10; count++) {
+ addr = (random32() % (4096 * 1024)) & ~(4096 - 1);
+ len = random32() % (4096 * 1024 / 16);
+ gk20a_allocator_block_free(&allocator, addr, len);
+ gk20a_allocator_dump(&allocator);
+ }
+
+ pr_debug("non-contiguous free tests...\n");
+ for (n = 4; n >= 0; n--) {
+ gk20a_allocator_dump_nc_list(&allocator, list[n]);
+ gk20a_allocator_block_free_nc(&allocator, list[n]);
+ gk20a_allocator_dump(&allocator);
+ }
+
+ pr_debug("fixed addr alloc tests...\n");
+ for (count = 0; count < 10; count++) {
+ addr = (random32() % (4096 * 1024)) & ~(4096 - 1);
+ len = random32() % (4096 * 1024 / 32);
+ gk20a_allocator_block_alloc(&allocator, &addr, len);
+ gk20a_allocator_dump(&allocator);
+ }
+
+ pr_debug("free tests...\n");
+ for (count = 0; count < 10; count++) {
+ addr = (random32() % (4096 * 1024)) & ~(4096 - 1);
+ len = random32() % (4096 * 1024 / 16);
+ gk20a_allocator_block_free(&allocator, addr, len);
+ gk20a_allocator_dump(&allocator);
+ }
+ }
+ gk20a_allocator_destroy(&allocator);
+}
+
+#endif /* ALLOCATOR_DEBUG */
+
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
new file mode 100644
index 000000000000..dba397e2481c
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
@@ -0,0 +1,177 @@
+/*
+ * gk20a allocator
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __NVHOST_ALLOCATOR_H__
+#define __NVHOST_ALLOCATOR_H__
+
+#include <linux/rbtree.h>
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+
+/* #define ALLOCATOR_DEBUG */
+
+struct allocator_block;
+
+/* main struct */
+struct gk20a_allocator {
+
+ char name[32]; /* name for allocator */
+ struct rb_root rb_root; /* rb tree root for blocks */
+
+ u32 base; /* min value of this linear space */
+ u32 limit; /* max value = limit - 1 */
+ u32 align; /* alignment size, power of 2 */
+
+ struct gk20a_alloc_block *block_first; /* first block in list */
+ struct gk20a_alloc_block *block_recent; /* last visited block */
+
+ u32 first_free_addr; /* first free addr, non-contigous
+ allocation preferred start,
+ in order to pick up small holes */
+ u32 last_free_addr; /* last free addr, contiguous
+ allocation preferred start */
+ u32 cached_hole_size; /* max free hole size up to
+ last_free_addr */
+ u32 block_count; /* number of blocks */
+
+ struct rw_semaphore rw_sema; /* lock */
+ struct kmem_cache *block_cache; /* slab cache */
+
+ /* if enabled, constrain to [base, limit) */
+ struct {
+ bool enable;
+ u32 base;
+ u32 limit;
+ } constraint;
+
+ int (*alloc)(struct gk20a_allocator *allocator,
+ u32 *addr, u32 len);
+ int (*alloc_nc)(struct gk20a_allocator *allocator,
+ u32 *addr, u32 len,
+ struct gk20a_alloc_block **pblock);
+ int (*free)(struct gk20a_allocator *allocator,
+ u32 addr, u32 len);
+ void (*free_nc)(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *block);
+
+ int (*constrain)(struct gk20a_allocator *a,
+ bool enable,
+ u32 base, u32 limit);
+};
+
+/* a block of linear space range [start, end) */
+struct gk20a_alloc_block {
+ struct gk20a_allocator *allocator; /* parent allocator */
+ struct rb_node rb; /* rb tree node */
+
+ u32 start; /* linear space range
+ [start, end) */
+ u32 end;
+
+ void *priv; /* backing structure for this
+ linear space block
+ page table, comp tag, etc */
+
+ struct gk20a_alloc_block *prev; /* prev block with lower address */
+ struct gk20a_alloc_block *next; /* next block with higher address */
+
+ bool nc_block;
+ struct gk20a_alloc_block *nc_prev; /* prev block for
+ non-contiguous allocation */
+ struct gk20a_alloc_block *nc_next; /* next block for
+ non-contiguous allocation */
+};
+
+int gk20a_allocator_init(struct gk20a_allocator *allocator,
+ const char *name, u32 base, u32 size, u32 align);
+void gk20a_allocator_destroy(struct gk20a_allocator *allocator);
+
+int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator,
+ u32 *addr, u32 len);
+int gk20a_allocator_block_alloc_nc(struct gk20a_allocator *allocator,
+ u32 *addr, u32 len,
+ struct gk20a_alloc_block **pblock);
+
+int gk20a_allocator_block_free(struct gk20a_allocator *allocator,
+ u32 addr, u32 len);
+void gk20a_allocator_block_free_nc(struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *block);
+
+#if defined(ALLOCATOR_DEBUG)
+
+#define allocator_dbg(alloctor, format, arg...) \
+do { \
+ if (1) \
+ pr_debug("gk20a_allocator (%s) %s: " format "\n",\
+ alloctor->name, __func__, ##arg);\
+} while (0)
+
+static inline void
+gk20a_allocator_dump(struct gk20a_allocator *allocator) {
+ struct gk20a_alloc_block *block;
+ u32 count = 0;
+
+ down_read(&allocator->rw_sema);
+ for (block = allocator->block_first; block; block = block->next) {
+ allocator_dbg(allocator, "block %d - %d:%d, nc %d",
+ count++, block->start, block->end, block->nc_block);
+
+ if (block->prev)
+ BUG_ON(block->prev->end > block->start);
+ if (block->next)
+ BUG_ON(block->next->start < block->end);
+ }
+ allocator_dbg(allocator, "tracked count %d, actual count %d",
+ allocator->block_count, count);
+ allocator_dbg(allocator, "first block %d:%d",
+ allocator->block_first ? allocator->block_first->start : -1,
+ allocator->block_first ? allocator->block_first->end : -1);
+ allocator_dbg(allocator, "first free addr %d",
+ allocator->first_free_addr);
+ allocator_dbg(allocator, "last free addr %d",
+ allocator->last_free_addr);
+ allocator_dbg(allocator, "cached hole size %d",
+ allocator->cached_hole_size);
+ up_read(&allocator->rw_sema);
+
+ BUG_ON(count != allocator->block_count);
+}
+
+static inline void
+gk20a_allocator_dump_nc_list(
+ struct gk20a_allocator *allocator,
+ struct gk20a_alloc_block *block)
+{
+ down_read(&allocator->rw_sema);
+ while (block) {
+ pr_debug("non-contiguous block %d:%d\n",
+ block->start, block->end);
+ block = block->nc_next;
+ }
+ up_read(&allocator->rw_sema);
+}
+
+void gk20a_allocator_test(void);
+
+#else /* ALLOCATOR_DEBUG */
+
+#define allocator_dbg(format, arg...)
+
+#endif /* ALLOCATOR_DEBUG */
+
+#endif /*__NVHOST_ALLOCATOR_H__ */
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.c b/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.c
new file mode 100644
index 000000000000..c6478a5e1328
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.c
@@ -0,0 +1,374 @@
+/*
+ * Copyright (c) 2012-2014, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * This file is autogenerated. Do not edit.
+ */
+
+#ifndef __gk20a_gating_reglist_h__
+#define __gk20a_gating_reglist_h__
+
+#include <linux/types.h>
+#include "gk20a_gating_reglist.h"
+
+struct gating_desc {
+ u32 addr;
+ u32 prod;
+ u32 disable;
+};
+/* slcg gr */
+const struct gating_desc gk20a_slcg_gr[] = {
+ {.addr = 0x004041f4, .prod = 0x00000000, .disable = 0x03fffffe},
+ {.addr = 0x00409894, .prod = 0x00000040, .disable = 0x0003fffe},
+ {.addr = 0x004078c4, .prod = 0x00000000, .disable = 0x000001fe},
+ {.addr = 0x00406004, .prod = 0x00000000, .disable = 0x0001fffe},
+ {.addr = 0x00405864, .prod = 0x00000000, .disable = 0x000001fe},
+ {.addr = 0x00405910, .prod = 0x00000000, .disable = 0xfffffffe},
+ {.addr = 0x00408044, .prod = 0x00000000, .disable = 0x000007fe},
+ {.addr = 0x00407004, .prod = 0x00000000, .disable = 0x0000001e},
+ {.addr = 0x0041a894, .prod = 0x00000000, .disable = 0x0003fffe},
+ {.addr = 0x00418504, .prod = 0x00000000, .disable = 0x0001fffe},
+ {.addr = 0x0041860c, .prod = 0x00000000, .disable = 0x000001fe},
+ {.addr = 0x0041868c, .prod = 0x00000000, .disable = 0x0000001e},
+ {.addr = 0x0041871c, .prod = 0x00000000, .disable = 0x0000003e},
+ {.addr = 0x00418388, .prod = 0x00000000, .disable = 0x00000001},
+ {.addr = 0x0041882c, .prod = 0x00000000, .disable = 0x0001fffe},
+ {.addr = 0x00418bc0, .prod = 0x00000000, .disable = 0x000001fe},
+ {.addr = 0x00418974, .prod = 0x00000000, .disable = 0x0001fffe},
+ {.addr = 0x00418c74, .prod = 0x00000000, .disable = 0xfffffffe},
+ {.addr = 0x00418cf4, .prod = 0x00000000, .disable = 0xfffffffe},
+ {.addr = 0x00418d74, .prod = 0x00000000, .disable = 0xfffffffe},
+ {.addr = 0x00418f10, .prod = 0x00000000, .disable = 0xfffffffe},
+ {.addr = 0x00418e10, .prod = 0x00000000, .disable = 0xfffffffe},
+ {.addr = 0x00419024, .prod = 0x00000000, .disable = 0x000001fe},
+ {.addr = 0x00419a44, .prod = 0x00000000, .disable = 0x0000000e},
+ {.addr = 0x00419a4c, .prod = 0x00000000, .disable = 0x000001fe},
+ {.addr = 0x00419a54, .prod = 0x00000000, .disable = 0x0000003e},
+ {.addr = 0x00419a5c, .prod = 0x00000000, .disable = 0x0000000e},
+ {.addr = 0x00419a64, .prod = 0x00000000, .disable = 0x000001fe},
+ {.addr = 0x00419a6c, .prod = 0x00000000, .disable = 0x0000000e},
+ {.addr = 0x00419a74, .prod = 0x00000000, .disable = 0x0000000e},
+ {.addr = 0x00419a7c, .prod = 0x00000000, .disable = 0x0000003e},
+ {.addr = 0x00419a84, .prod = 0x00000000, .disable = 0x0000000e},
+ {.addr = 0x00419ad0, .prod = 0x00000000, .disable = 0x0000000e},
+ {.addr = 0x0041986c, .prod = 0x0000dfc0, .disable = 0x00fffffe},
+ {.addr = 0x00419cd8, .prod = 0x00000000, .disable = 0x001ffffe},
+ {.addr = 0x00419ce0, .prod = 0x00000000, .disable = 0x001ffffe},
+ {.addr = 0x00419c74, .prod = 0x00000000, .disable = 0x0000001e},
+ {.addr = 0x00419fd4, .prod = 0x00000000, .disable = 0x0003fffe},
+ {.addr = 0x00419fdc, .prod = 0x00000000, .disable = 0xfffffffe},
+ {.addr = 0x00419fe4, .prod = 0x00000000, .disable = 0x0000000e},
+ {.addr = 0x00419ff4, .prod = 0x00000000, .disable = 0x00003ffe},
+ {.addr = 0x00419ffc, .prod = 0x00000000, .disable = 0x0001fffe},
+ {.addr = 0x0041be2c, .prod = 0x020bbfc0, .disable = 0xfffffffe},
+ {.addr = 0x0041bfec, .prod = 0x00000000, .disable = 0xfffffffe},
+ {.addr = 0x0041bed4, .prod = 0x00000000, .disable = 0xfffffffe},
+ {.addr = 0x00408814, .prod = 0x00000000, .disable = 0x0001fffe},
+ {.addr = 0x0040881c, .prod = 0x00000000, .disable = 0x0001fffe},
+ {.addr = 0x00408a84, .prod = 0x00000000, .disable = 0x0001fffe},
+ {.addr = 0x00408a8c, .prod = 0x00000000, .disable = 0x0001fffe},
+ {.addr = 0x00408a94, .prod = 0x00000000, .disable = 0x0001fffe},
+ {.addr = 0x00408a9c, .prod = 0x00000000, .disable = 0x0001fffe},
+ {.addr = 0x00408aa4, .prod = 0x00000000, .disable = 0x0001fffe},
+ {.addr = 0x00408aac, .prod = 0x00000000, .disable = 0x0001fffe},
+ {.addr = 0x004089ac, .prod = 0x00000000, .disable = 0x0001fffe},
+ {.addr = 0x00408a24, .prod = 0x00000000, .disable = 0x000001ff},
+ {.addr = 0x0017e050, .prod = 0x00000000, .disable = 0x00fffffe},
+ {.addr = 0x001200a8, .prod = 0x00000000, .disable = 0x00000001},
+ {.addr = 0x0010e48c, .prod = 0x00000000, .disable = 0x0000003e},
+ {.addr = 0x00001c04, .prod = 0x00000000, .disable = 0x000000fe},
+ {.addr = 0x00106f28, .prod = 0x00000040, .disable = 0x000007fe},
+ {.addr = 0x000206b8, .prod = 0x00000000, .disable = 0x0000000f},
+ {.addr = 0x0017ea98, .prod = 0x00000000, .disable = 0xfffffffe},
+ {.addr = 0x00106f28, .prod = 0x00000040, .disable = 0x000007fe},
+ {.addr = 0x00120048, .prod = 0x00000000, .disable = 0x00000049},
+};
+
+/* slcg perf */
+const struct gating_desc gk20a_slcg_perf[] = {
+ {.addr = 0x001be018, .prod = 0x000001ff, .disable = 0x00000000},
+ {.addr = 0x001bc018, .prod = 0x000001ff, .disable = 0x00000000},
+ {.addr = 0x001b8018, .prod = 0x000001ff, .disable = 0x00000000},
+ {.addr = 0x001b4124, .prod = 0x00000001, .disable = 0x00000000},
+};
+
+/* blcg gr */
+const struct gating_desc gk20a_blcg_gr[] = {
+ {.addr = 0x004041f0, .prod = 0x00004046, .disable = 0x00000000},
+ {.addr = 0x00409890, .prod = 0x0000007f, .disable = 0x00000000},
+ {.addr = 0x004098b0, .prod = 0x0000007f, .disable = 0x00000000},
+ {.addr = 0x004078c0, .prod = 0x00000042, .disable = 0x00000000},
+ {.addr = 0x00406000, .prod = 0x00004044, .disable = 0x00000000},
+ {.addr = 0x00405860, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x0040590c, .prod = 0x00004044, .disable = 0x00000000},
+ {.addr = 0x00408040, .prod = 0x00004044, .disable = 0x00000000},
+ {.addr = 0x00407000, .prod = 0x00004041, .disable = 0x00000000},
+ {.addr = 0x00405bf0, .prod = 0x00004044, .disable = 0x00000000},
+ {.addr = 0x0041a890, .prod = 0x0000007f, .disable = 0x00000000},
+ {.addr = 0x0041a8b0, .prod = 0x0000007f, .disable = 0x00000000},
+ {.addr = 0x00418500, .prod = 0x00004044, .disable = 0x00000000},
+ {.addr = 0x00418608, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00418688, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00418718, .prod = 0x00000042, .disable = 0x00000000},
+ {.addr = 0x00418828, .prod = 0x00000044, .disable = 0x00000000},
+ {.addr = 0x00418bbc, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00418970, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00418c70, .prod = 0x00004044, .disable = 0x00000000},
+ {.addr = 0x00418cf0, .prod = 0x00004044, .disable = 0x00000000},
+ {.addr = 0x00418d70, .prod = 0x00004044, .disable = 0x00000000},
+ {.addr = 0x00418f0c, .prod = 0x00004044, .disable = 0x00000000},
+ {.addr = 0x00418e0c, .prod = 0x00004044, .disable = 0x00000000},
+ {.addr = 0x00419020, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00419038, .prod = 0x00000042, .disable = 0x00000000},
+ {.addr = 0x00419a40, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00419a48, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00419a50, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00419a58, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00419a60, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00419a68, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00419a70, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00419a78, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00419a80, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00419acc, .prod = 0x00004047, .disable = 0x00000000},
+ {.addr = 0x00419868, .prod = 0x00000043, .disable = 0x00000000},
+ {.addr = 0x00419cd4, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00419cdc, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00419c70, .prod = 0x00004045, .disable = 0x00000000},
+ {.addr = 0x00419fd0, .prod = 0x00004043, .disable = 0x00000000},
+ {.addr = 0x00419fd8, .prod = 0x00004045, .disable = 0x00000000},
+ {.addr = 0x00419fe0, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00419fe8, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00419ff0, .prod = 0x00004044, .disable = 0x00000000},
+ {.addr = 0x00419ff8, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00419f90, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x0041be28, .prod = 0x00000042, .disable = 0x00000000},
+ {.addr = 0x0041bfe8, .prod = 0x00004044, .disable = 0x00000000},
+ {.addr = 0x0041bed0, .prod = 0x00004044, .disable = 0x00000000},
+ {.addr = 0x00408810, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00408818, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00408a80, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00408a88, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00408a90, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00408a98, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00408aa0, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x00408aa8, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x004089a8, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x004089b0, .prod = 0x00000042, .disable = 0x00000000},
+ {.addr = 0x004089b8, .prod = 0x00004042, .disable = 0x00000000},
+ {.addr = 0x0017ea60, .prod = 0x00000044, .disable = 0x00000000},
+ {.addr = 0x0017ea68, .prod = 0x00000044, .disable = 0x00000000},
+ {.addr = 0x00100d30, .prod = 0x0000c242, .disable = 0x00000000},
+ {.addr = 0x00100d48, .prod = 0x0000c242, .disable = 0x00000000},
+ {.addr = 0x00100d3c, .prod = 0x00000242, .disable = 0x00000000},
+ {.addr = 0x0017ea78, .prod = 0x00000044, .disable = 0x00000000},
+ {.addr = 0x0017e040, .prod = 0x00000044, .disable = 0x00000000},
+ {.addr = 0x00100d1c, .prod = 0x00000042, .disable = 0x00000000},
+ {.addr = 0x00106f24, .prod = 0x0000c242, .disable = 0x00000000},
+ {.addr = 0x0041be00, .prod = 0x00000004, .disable = 0x00000007},
+ {.addr = 0x00100d10, .prod = 0x0000c242, .disable = 0x00000000},
+ {.addr = 0x0017ea70, .prod = 0x00000044, .disable = 0x00000000},
+ {.addr = 0x00001c00, .prod = 0x00000042, .disable = 0x00000000},
+ {.addr = 0x00100c98, .prod = 0x00000242, .disable = 0x00000000},
+ {.addr = 0x0017e030, .prod = 0x00000044, .disable = 0x00000000},
+};
+
+/* pg gr */
+const struct gating_desc gk20a_pg_gr[] = {
+ {.addr = 0x004041f8, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x004041fc, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00409898, .prod = 0x10140000, .disable = 0x00000000},
+ {.addr = 0x0040989c, .prod = 0xff00000a, .disable = 0x00000000},
+ {.addr = 0x004078c8, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x004078cc, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00406008, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x0040600c, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00405868, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x0040586c, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00405914, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00405924, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00408048, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x0040804c, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00407008, .prod = 0x10140000, .disable = 0x00000000},
+ {.addr = 0x0040700c, .prod = 0xff00000a, .disable = 0x00000000},
+ {.addr = 0x00405bf8, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00405bfc, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x0041a898, .prod = 0x10140000, .disable = 0x00000000},
+ {.addr = 0x0041a89c, .prod = 0xff00000a, .disable = 0x00000000},
+ {.addr = 0x00418510, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00418514, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00418610, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00418614, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00418690, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00418694, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00418720, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00418724, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00418840, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00418844, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00418bc4, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00418bc8, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00418978, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x0041897c, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00418c78, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00418c7c, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00418cf8, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00418cfc, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00418d78, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00418d7c, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00418f14, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00418f18, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00418e14, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00418e18, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00419030, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00419050, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00419a88, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00419a8c, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00419a90, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00419a94, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00419a98, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00419a9c, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00419aa0, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00419aa4, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00419ad4, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00419ad8, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00419870, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00419874, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00419ce4, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00419cf0, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00419c78, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00419c7c, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00419fa0, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00419fa4, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00419fa8, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00419fac, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00419fb0, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00419fb4, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00419fb8, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00419fbc, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00419fc0, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00419fc4, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00419fc8, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00419fcc, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x0041be30, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x0041be34, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x0041bff0, .prod = 0x10747c00, .disable = 0x00000000},
+ {.addr = 0x0041bff4, .prod = 0xff00000a, .disable = 0x00000000},
+ {.addr = 0x0041bed8, .prod = 0x10240a00, .disable = 0x00000000},
+ {.addr = 0x0041bee0, .prod = 0xff00000a, .disable = 0x00000000},
+ {.addr = 0x00408820, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00408824, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00408828, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x0040882c, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00408ac0, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00408ac4, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00408ac8, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00408acc, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00408ad0, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00408ad4, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00408ad8, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00408adc, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00408ae0, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00408ae4, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x00408ae8, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x00408aec, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x004089c0, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x004089c4, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x004089c8, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x004089cc, .prod = 0xff00a725, .disable = 0x00000000},
+ {.addr = 0x004089d0, .prod = 0x10940000, .disable = 0x00000000},
+ {.addr = 0x004089d4, .prod = 0xff00a725, .disable = 0x00000000},
+};
+
+/* therm gr */
+const struct gating_desc gk20a_slcg_therm[] = {
+ {.addr = 0x000206b8, .prod = 0x00000000, .disable = 0x0000000f},
+};
+
+/* static inline functions */
+void gr_gk20a_slcg_gr_load_gating_prod(struct gk20a *g,
+ bool prod)
+{
+ u32 i;
+ u32 size = sizeof(gk20a_slcg_gr) / sizeof(struct gating_desc);
+ for (i = 0; i < size; i++) {
+ if (prod)
+ gk20a_writel(g, gk20a_slcg_gr[i].addr,
+ gk20a_slcg_gr[i].prod);
+ else
+ gk20a_writel(g, gk20a_slcg_gr[i].addr,
+ gk20a_slcg_gr[i].disable);
+ }
+}
+
+void gr_gk20a_slcg_perf_load_gating_prod(struct gk20a *g,
+ bool prod)
+{
+ u32 i;
+ u32 size = sizeof(gk20a_slcg_perf) / sizeof(struct gating_desc);
+ for (i = 0; i < size; i++) {
+ if (prod)
+ gk20a_writel(g, gk20a_slcg_perf[i].addr,
+ gk20a_slcg_perf[i].prod);
+ else
+ gk20a_writel(g, gk20a_slcg_perf[i].addr,
+ gk20a_slcg_perf[i].disable);
+ }
+}
+
+void gr_gk20a_blcg_gr_load_gating_prod(struct gk20a *g,
+ bool prod)
+{
+ u32 i;
+ u32 size = sizeof(gk20a_blcg_gr) / sizeof(struct gating_desc);
+ for (i = 0; i < size; i++) {
+ if (prod)
+ gk20a_writel(g, gk20a_blcg_gr[i].addr,
+ gk20a_blcg_gr[i].prod);
+ else
+ gk20a_writel(g, gk20a_blcg_gr[i].addr,
+ gk20a_blcg_gr[i].disable);
+ }
+}
+
+void gr_gk20a_pg_gr_load_gating_prod(struct gk20a *g,
+ bool prod)
+{
+ u32 i;
+ u32 size = sizeof(gk20a_pg_gr) / sizeof(struct gating_desc);
+ for (i = 0; i < size; i++) {
+ if (prod)
+ gk20a_writel(g, gk20a_pg_gr[i].addr,
+ gk20a_pg_gr[i].prod);
+ else
+ gk20a_writel(g, gk20a_pg_gr[i].addr,
+ gk20a_pg_gr[i].disable);
+ }
+}
+
+void gr_gk20a_slcg_therm_load_gating_prod(struct gk20a *g,
+ bool prod)
+{
+ u32 i;
+ u32 size = sizeof(gk20a_slcg_therm) / sizeof(struct gating_desc);
+ for (i = 0; i < size; i++) {
+ if (prod)
+ gk20a_writel(g, gk20a_slcg_therm[i].addr,
+ gk20a_slcg_therm[i].prod);
+ else
+ gk20a_writel(g, gk20a_slcg_therm[i].addr,
+ gk20a_slcg_therm[i].disable);
+ }
+}
+
+#endif /* __gk20a_gating_reglist_h__ */
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.h b/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.h
new file mode 100644
index 000000000000..40a6c545cf39
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_gating_reglist.h
@@ -0,0 +1,39 @@
+/*
+ * drivers/video/tegra/host/gk20a/gk20a_gating_reglist.h
+ *
+ * Copyright (c) 2012, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * This file is autogenerated. Do not edit.
+ */
+
+#include "gk20a.h"
+
+void gr_gk20a_slcg_gr_load_gating_prod(struct gk20a *g,
+ bool prod);
+
+void gr_gk20a_slcg_perf_load_gating_prod(struct gk20a *g,
+ bool prod);
+
+void gr_gk20a_blcg_gr_load_gating_prod(struct gk20a *g,
+ bool prod);
+
+void gr_gk20a_pg_gr_load_gating_prod(struct gk20a *g,
+ bool prod);
+
+void gr_gk20a_slcg_therm_load_gating_prod(struct gk20a *g,
+ bool prod);
+
+
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_scale.c b/drivers/gpu/nvgpu/gk20a/gk20a_scale.c
new file mode 100644
index 000000000000..d1fd71fe4e36
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_scale.c
@@ -0,0 +1,358 @@
+/*
+ * gk20a clock scaling profile
+ *
+ * Copyright (c) 2013-2014, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/devfreq.h>
+#include <linux/debugfs.h>
+#include <linux/types.h>
+#include <linux/clk.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/clk/tegra.h>
+#include <linux/tegra-soc.h>
+#include <linux/platform_data/tegra_edp.h>
+#include <linux/pm_qos.h>
+
+#include <governor.h>
+
+#include "gk20a.h"
+#include "pmu_gk20a.h"
+#include "clk_gk20a.h"
+#include "gk20a_scale.h"
+
+static ssize_t gk20a_scale_load_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct gk20a *g = get_gk20a(pdev);
+ u32 busy_time;
+ ssize_t res;
+
+ if (!g->power_on) {
+ busy_time = 0;
+ } else {
+ gk20a_busy(g->dev);
+ gk20a_pmu_load_norm(g, &busy_time);
+ gk20a_idle(g->dev);
+ }
+
+ res = snprintf(buf, PAGE_SIZE, "%u\n", busy_time);
+
+ return res;
+}
+
+static DEVICE_ATTR(load, S_IRUGO, gk20a_scale_load_show, NULL);
+
+/*
+ * gk20a_scale_qos_notify()
+ *
+ * This function is called when the minimum QoS requirement for the device
+ * has changed. The function calls postscaling callback if it is defined.
+ */
+
+static int gk20a_scale_qos_notify(struct notifier_block *nb,
+ unsigned long n, void *p)
+{
+ struct gk20a_scale_profile *profile =
+ container_of(nb, struct gk20a_scale_profile,
+ qos_notify_block);
+ struct gk20a_platform *platform = platform_get_drvdata(profile->pdev);
+ struct gk20a *g = get_gk20a(profile->pdev);
+ unsigned long freq;
+
+ if (!platform->postscale)
+ return NOTIFY_OK;
+
+ /* get the frequency requirement. if devfreq is enabled, check if it
+ * has higher demand than qos */
+ freq = gk20a_clk_round_rate(g, pm_qos_request(platform->qos_id));
+ if (g->devfreq)
+ freq = max(g->devfreq->previous_freq, freq);
+
+ platform->postscale(profile->pdev, freq);
+
+ return NOTIFY_OK;
+}
+
+/*
+ * gk20a_scale_make_freq_table(profile)
+ *
+ * This function initialises the frequency table for the given device profile
+ */
+
+static int gk20a_scale_make_freq_table(struct gk20a_scale_profile *profile)
+{
+ struct gk20a *g = get_gk20a(profile->pdev);
+ unsigned long *freqs;
+ int num_freqs, err;
+
+ /* make sure the clock is available */
+ if (!gk20a_clk_get(g))
+ return -ENOSYS;
+
+ /* get gpu dvfs table */
+ err = tegra_dvfs_get_freqs(clk_get_parent(g->clk.tegra_clk),
+ &freqs, &num_freqs);
+ if (err)
+ return -ENOSYS;
+
+ profile->devfreq_profile.freq_table = (unsigned long *)freqs;
+ profile->devfreq_profile.max_state = num_freqs;
+
+ return 0;
+}
+
+/*
+ * gk20a_scale_target(dev, *freq, flags)
+ *
+ * This function scales the clock
+ */
+
+static int gk20a_scale_target(struct device *dev, unsigned long *freq,
+ u32 flags)
+{
+ struct gk20a *g = get_gk20a(to_platform_device(dev));
+ struct gk20a_platform *platform = dev_get_drvdata(dev);
+ struct gk20a_scale_profile *profile = g->scale_profile;
+ unsigned long rounded_rate = gk20a_clk_round_rate(g, *freq);
+
+ if (gk20a_clk_get_rate(g) == rounded_rate) {
+ *freq = rounded_rate;
+ return 0;
+ }
+
+ gk20a_clk_set_rate(g, rounded_rate);
+ if (platform->postscale)
+ platform->postscale(profile->pdev, rounded_rate);
+ *freq = gk20a_clk_get_rate(g);
+
+ return 0;
+}
+
+/*
+ * update_load_estimate_gpmu(profile)
+ *
+ * Update load estimate using gpmu. The gpmu value is normalised
+ * based on the time it was asked last time.
+ */
+
+static void update_load_estimate_gpmu(struct platform_device *pdev)
+{
+ struct gk20a *g = get_gk20a(pdev);
+ struct gk20a_scale_profile *profile = g->scale_profile;
+ unsigned long dt;
+ u32 busy_time;
+ ktime_t t;
+
+ t = ktime_get();
+ dt = ktime_us_delta(t, profile->last_event_time);
+
+ profile->dev_stat.total_time = dt;
+ profile->last_event_time = t;
+ gk20a_pmu_load_norm(g, &busy_time);
+ profile->dev_stat.busy_time = (busy_time * dt) / 1000;
+}
+
+/*
+ * gk20a_scale_suspend(pdev)
+ *
+ * This function informs devfreq of suspend
+ */
+
+void gk20a_scale_suspend(struct platform_device *pdev)
+{
+ struct gk20a *g = get_gk20a(pdev);
+ struct devfreq *devfreq = g->devfreq;
+
+ if (!devfreq)
+ return;
+
+ devfreq_suspend_device(devfreq);
+}
+
+/*
+ * gk20a_scale_resume(pdev)
+ *
+ * This functions informs devfreq of resume
+ */
+
+void gk20a_scale_resume(struct platform_device *pdev)
+{
+ struct gk20a *g = get_gk20a(pdev);
+ struct devfreq *devfreq = g->devfreq;
+
+ if (!devfreq)
+ return;
+
+ devfreq_resume_device(devfreq);
+}
+
+/*
+ * gk20a_scale_notify(pdev, busy)
+ *
+ * Calling this function informs that the device is idling (..or busy). This
+ * data is used to estimate the current load
+ */
+
+static void gk20a_scale_notify(struct platform_device *pdev, bool busy)
+{
+ struct gk20a_platform *platform = platform_get_drvdata(pdev);
+ struct gk20a *g = get_gk20a(pdev);
+ struct gk20a_scale_profile *profile = g->scale_profile;
+ struct devfreq *devfreq = g->devfreq;
+
+ /* inform edp about new constraint */
+ if (platform->prescale)
+ platform->prescale(pdev);
+
+ /* Is the device profile initialised? */
+ if (!(profile && devfreq))
+ return;
+
+ mutex_lock(&devfreq->lock);
+ profile->dev_stat.busy = busy;
+ update_devfreq(devfreq);
+ mutex_unlock(&devfreq->lock);
+}
+
+void gk20a_scale_notify_idle(struct platform_device *pdev)
+{
+ gk20a_scale_notify(pdev, false);
+
+}
+
+void gk20a_scale_notify_busy(struct platform_device *pdev)
+{
+ gk20a_scale_notify(pdev, true);
+}
+
+/*
+ * gk20a_scale_get_dev_status(dev, *stat)
+ *
+ * This function queries the current device status.
+ */
+
+static int gk20a_scale_get_dev_status(struct device *dev,
+ struct devfreq_dev_status *stat)
+{
+ struct gk20a *g = get_gk20a(to_platform_device(dev));
+ struct gk20a_scale_profile *profile = g->scale_profile;
+
+ /* Make sure there are correct values for the current frequency */
+ profile->dev_stat.current_frequency = gk20a_clk_get_rate(g);
+
+ /* Update load estimate */
+ update_load_estimate_gpmu(to_platform_device(dev));
+
+ /* Copy the contents of the current device status */
+ *stat = profile->dev_stat;
+
+ /* Finally, clear out the local values */
+ profile->dev_stat.total_time = 0;
+ profile->dev_stat.busy_time = 0;
+
+ return 0;
+}
+
+/*
+ * gk20a_scale_init(pdev)
+ */
+
+void gk20a_scale_init(struct platform_device *pdev)
+{
+ struct gk20a_platform *platform = platform_get_drvdata(pdev);
+ struct gk20a *g = platform->g;
+ struct gk20a_scale_profile *profile;
+ int err;
+
+ if (g->scale_profile)
+ return;
+
+ profile = kzalloc(sizeof(*profile), GFP_KERNEL);
+
+ profile->pdev = pdev;
+ profile->dev_stat.busy = false;
+
+ /* Create frequency table */
+ err = gk20a_scale_make_freq_table(profile);
+ if (err || !profile->devfreq_profile.max_state)
+ goto err_get_freqs;
+
+ if (device_create_file(&pdev->dev, &dev_attr_load))
+ goto err_create_sysfs_entry;
+
+ /* Store device profile so we can access it if devfreq governor
+ * init needs that */
+ g->scale_profile = profile;
+
+ if (platform->devfreq_governor) {
+ struct devfreq *devfreq;
+
+ profile->devfreq_profile.initial_freq =
+ profile->devfreq_profile.freq_table[0];
+ profile->devfreq_profile.target = gk20a_scale_target;
+ profile->devfreq_profile.get_dev_status =
+ gk20a_scale_get_dev_status;
+
+ devfreq = devfreq_add_device(&pdev->dev,
+ &profile->devfreq_profile,
+ platform->devfreq_governor, NULL);
+
+ if (IS_ERR(devfreq))
+ devfreq = NULL;
+
+ g->devfreq = devfreq;
+ }
+
+ /* Should we register QoS callback for this device? */
+ if (platform->qos_id < PM_QOS_NUM_CLASSES &&
+ platform->qos_id != PM_QOS_RESERVED &&
+ platform->postscale) {
+ profile->qos_notify_block.notifier_call =
+ &gk20a_scale_qos_notify;
+ pm_qos_add_notifier(platform->qos_id,
+ &profile->qos_notify_block);
+ }
+
+ return;
+
+err_get_freqs:
+ device_remove_file(&pdev->dev, &dev_attr_load);
+err_create_sysfs_entry:
+ kfree(g->scale_profile);
+ g->scale_profile = NULL;
+}
+
+/*
+ * gk20a_scale_hw_init(dev)
+ *
+ * Initialize hardware portion of the device
+ */
+
+void gk20a_scale_hw_init(struct platform_device *pdev)
+{
+ struct gk20a_platform *platform = platform_get_drvdata(pdev);
+ struct gk20a_scale_profile *profile = platform->g->scale_profile;
+
+ /* make sure that scaling has bee initialised */
+ if (!profile)
+ return;
+
+ profile->dev_stat.total_time = 0;
+ profile->last_event_time = ktime_get();
+}
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_scale.h b/drivers/gpu/nvgpu/gk20a/gk20a_scale.h
new file mode 100644
index 000000000000..e76b16627105
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_scale.h
@@ -0,0 +1,51 @@
+/*
+ * gk20a clock scaling profile
+ *
+ * Copyright (c) 2013-2014, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GK20A_SCALE_H
+#define GK20A_SCALE_H
+
+#include <linux/nvhost.h>
+#include <linux/devfreq.h>
+
+struct platform_device;
+struct clk;
+
+struct gk20a_scale_profile {
+ struct platform_device *pdev;
+ ktime_t last_event_time;
+ struct devfreq_dev_profile devfreq_profile;
+ struct devfreq_dev_status dev_stat;
+ struct notifier_block qos_notify_block;
+ void *private_data;
+};
+
+/* Initialization and de-initialization for module */
+void gk20a_scale_init(struct platform_device *);
+void gk20a_scale_hw_init(struct platform_device *pdev);
+
+/*
+ * call when performing submit to notify scaling mechanism that the module is
+ * in use
+ */
+void gk20a_scale_notify_busy(struct platform_device *);
+void gk20a_scale_notify_idle(struct platform_device *);
+
+void gk20a_scale_suspend(struct platform_device *);
+void gk20a_scale_resume(struct platform_device *);
+
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
new file mode 100644
index 000000000000..f6b43f506bd0
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
@@ -0,0 +1,335 @@
+/*
+ * drivers/video/tegra/host/gk20a/gk20a_sysfs.c
+ *
+ * GK20A Graphics
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/kernel.h>
+#include <linux/fb.h>
+
+#include <mach/clk.h>
+
+#include "gk20a.h"
+#include "gr_gk20a.h"
+#include "fifo_gk20a.h"
+
+
+#define PTIMER_FP_FACTOR 1000000
+/* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds. 32 ns is
+ the resolution of ptimer. */
+#define PTIMER_REF_FREQ_HZ 31250000
+
+
+static ssize_t elcg_enable_store(struct device *device,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct platform_device *ndev = to_platform_device(device);
+ struct gk20a *g = get_gk20a(ndev);
+ unsigned long val = 0;
+
+ if (kstrtoul(buf, 10, &val) < 0)
+ return -EINVAL;
+
+ gk20a_busy(g->dev);
+ if (val) {
+ g->elcg_enabled = true;
+ gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A);
+ gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A);
+ } else {
+ g->elcg_enabled = false;
+ gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A);
+ gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A);
+ }
+ gk20a_idle(g->dev);
+
+ dev_info(device, "ELCG is %s.\n", g->elcg_enabled ? "enabled" :
+ "disabled");
+
+ return count;
+}
+
+static ssize_t elcg_enable_read(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct platform_device *ndev = to_platform_device(device);
+ struct gk20a *g = get_gk20a(ndev);
+
+ return sprintf(buf, "%d\n", g->elcg_enabled ? 1 : 0);
+}
+
+static DEVICE_ATTR(elcg_enable, S_IRWXUGO, elcg_enable_read, elcg_enable_store);
+
+static ssize_t blcg_enable_store(struct device *device,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct platform_device *ndev = to_platform_device(device);
+ struct gk20a *g = get_gk20a(ndev);
+ unsigned long val = 0;
+
+ if (kstrtoul(buf, 10, &val) < 0)
+ return -EINVAL;
+
+ if (val)
+ g->blcg_enabled = true;
+ else
+ g->blcg_enabled = false;
+
+ gk20a_busy(g->dev);
+ g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled);
+ gk20a_idle(g->dev);
+
+ dev_info(device, "BLCG is %s.\n", g->blcg_enabled ? "enabled" :
+ "disabled");
+
+ return count;
+}
+
+static ssize_t blcg_enable_read(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct platform_device *ndev = to_platform_device(device);
+ struct gk20a *g = get_gk20a(ndev);
+
+ return sprintf(buf, "%d\n", g->blcg_enabled ? 1 : 0);
+}
+
+static DEVICE_ATTR(blcg_enable, S_IRWXUGO, blcg_enable_read, blcg_enable_store);
+
+static ssize_t slcg_enable_store(struct device *device,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct platform_device *ndev = to_platform_device(device);
+ struct gk20a *g = get_gk20a(ndev);
+ unsigned long val = 0;
+
+ if (kstrtoul(buf, 10, &val) < 0)
+ return -EINVAL;
+
+ if (val)
+ g->slcg_enabled = true;
+ else
+ g->slcg_enabled = false;
+
+ /*
+ * TODO: slcg_therm_load_gating is not enabled anywhere during
+ * init. Therefore, it would be incongruous to add it here. Once
+ * it is added to init, we should add it here too.
+ */
+ gk20a_busy(g->dev);
+ g->ops.clock_gating.slcg_gr_load_gating_prod(g, g->slcg_enabled);
+ g->ops.clock_gating.slcg_perf_load_gating_prod(g, g->slcg_enabled);
+ gk20a_idle(g->dev);
+
+ dev_info(device, "SLCG is %s.\n", g->slcg_enabled ? "enabled" :
+ "disabled");
+
+ return count;
+}
+
+static ssize_t slcg_enable_read(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct platform_device *ndev = to_platform_device(device);
+ struct gk20a *g = get_gk20a(ndev);
+
+ return sprintf(buf, "%d\n", g->slcg_enabled ? 1 : 0);
+}
+
+static DEVICE_ATTR(slcg_enable, S_IRWXUGO, slcg_enable_read, slcg_enable_store);
+
+static ssize_t ptimer_scale_factor_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ u32 tsc_freq_hz = clk_get_rate(clk_get_sys(NULL, "clk_m"));
+ u32 scaling_factor_fp = (u32)(PTIMER_REF_FREQ_HZ) /
+ ((u32)(tsc_freq_hz) /
+ (u32)(PTIMER_FP_FACTOR));
+ ssize_t res = snprintf(buf,
+ PAGE_SIZE,
+ "%u.%u\n",
+ scaling_factor_fp / PTIMER_FP_FACTOR,
+ scaling_factor_fp % PTIMER_FP_FACTOR);
+
+ return res;
+}
+
+static DEVICE_ATTR(ptimer_scale_factor,
+ S_IRUGO,
+ ptimer_scale_factor_show,
+ NULL);
+
+static ssize_t railgate_delay_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct gk20a_platform *platform = dev_get_drvdata(dev);
+ int railgate_delay = 0, ret = 0;
+
+ if (!platform->can_railgate) {
+ dev_info(dev, "does not support power-gating\n");
+ return count;
+ }
+
+ ret = sscanf(buf, "%d", &railgate_delay);
+ if (ret == 1 && railgate_delay >= 0) {
+ struct generic_pm_domain *genpd = pd_to_genpd(dev->pm_domain);
+ platform->railgate_delay = railgate_delay;
+ pm_genpd_set_poweroff_delay(genpd, platform->railgate_delay);
+ } else
+ dev_err(dev, "Invalid powergate delay\n");
+
+ return count;
+}
+static ssize_t railgate_delay_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct gk20a_platform *platform = dev_get_drvdata(dev);
+ return snprintf(buf, PAGE_SIZE, "%d\n", platform->railgate_delay);
+}
+static DEVICE_ATTR(railgate_delay, S_IRWXUGO, railgate_delay_show,
+ railgate_delay_store);
+
+static ssize_t clockgate_delay_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct gk20a_platform *platform = dev_get_drvdata(dev);
+ int clockgate_delay = 0, ret = 0;
+
+ ret = sscanf(buf, "%d", &clockgate_delay);
+ if (ret == 1 && clockgate_delay >= 0) {
+ platform->clockgate_delay = clockgate_delay;
+ pm_runtime_set_autosuspend_delay(dev,
+ platform->clockgate_delay);
+ } else
+ dev_err(dev, "Invalid clockgate delay\n");
+
+ return count;
+}
+static ssize_t clockgate_delay_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct gk20a_platform *platform = dev_get_drvdata(dev);
+ return snprintf(buf, PAGE_SIZE, "%d\n", platform->clockgate_delay);
+}
+static DEVICE_ATTR(clockgate_delay, S_IRWXUGO, clockgate_delay_show,
+ clockgate_delay_store);
+
+static ssize_t counters_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct gk20a *g = get_gk20a(pdev);
+ u32 busy_cycles, total_cycles;
+ ssize_t res;
+
+ gk20a_pmu_get_load_counters(g, &busy_cycles, &total_cycles);
+
+ res = snprintf(buf, PAGE_SIZE, "%u %u\n", busy_cycles, total_cycles);
+
+ return res;
+}
+
+static DEVICE_ATTR(counters, S_IRUGO, counters_show, NULL);
+static ssize_t counters_show_reset(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ ssize_t res = counters_show(dev, attr, buf);
+ struct platform_device *pdev = to_platform_device(dev);
+ struct gk20a *g = get_gk20a(pdev);
+
+ gk20a_pmu_reset_load_counters(g);
+
+ return res;
+}
+
+static DEVICE_ATTR(counters_reset, S_IRUGO, counters_show_reset, NULL);
+
+static ssize_t elpg_enable_store(struct device *device,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct platform_device *ndev = to_platform_device(device);
+ struct gk20a *g = get_gk20a(ndev);
+ unsigned long val = 0;
+
+ if (kstrtoul(buf, 10, &val) < 0)
+ return -EINVAL;
+
+ /*
+ * Since elpg is refcounted, we should not unnecessarily call
+ * enable/disable if it is already so.
+ */
+ gk20a_channel_busy(g->dev);
+ if (val && !g->elpg_enabled) {
+ g->elpg_enabled = true;
+ gk20a_pmu_enable_elpg(g);
+ } else if (!val && g->elpg_enabled) {
+ g->elpg_enabled = false;
+ gk20a_pmu_disable_elpg(g);
+ }
+ gk20a_channel_idle(g->dev);
+
+ dev_info(device, "ELPG is %s.\n", g->elpg_enabled ? "enabled" :
+ "disabled");
+
+ return count;
+}
+
+static ssize_t elpg_enable_read(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct platform_device *ndev = to_platform_device(device);
+ struct gk20a *g = get_gk20a(ndev);
+
+ return sprintf(buf, "%d\n", g->elpg_enabled ? 1 : 0);
+}
+
+static DEVICE_ATTR(elpg_enable, S_IRWXUGO, elpg_enable_read, elpg_enable_store);
+
+void gk20a_remove_sysfs(struct device *dev)
+{
+ device_remove_file(dev, &dev_attr_elcg_enable);
+ device_remove_file(dev, &dev_attr_blcg_enable);
+ device_remove_file(dev, &dev_attr_slcg_enable);
+ device_remove_file(dev, &dev_attr_ptimer_scale_factor);
+ device_remove_file(dev, &dev_attr_elpg_enable);
+ device_remove_file(dev, &dev_attr_counters);
+ device_remove_file(dev, &dev_attr_counters_reset);
+ device_remove_file(dev, &dev_attr_railgate_delay);
+ device_remove_file(dev, &dev_attr_clockgate_delay);
+}
+
+void gk20a_create_sysfs(struct platform_device *dev)
+{
+ int error = 0;
+
+ error |= device_create_file(&dev->dev, &dev_attr_elcg_enable);
+ error |= device_create_file(&dev->dev, &dev_attr_blcg_enable);
+ error |= device_create_file(&dev->dev, &dev_attr_slcg_enable);
+ error |= device_create_file(&dev->dev, &dev_attr_ptimer_scale_factor);
+ error |= device_create_file(&dev->dev, &dev_attr_elpg_enable);
+ error |= device_create_file(&dev->dev, &dev_attr_counters);
+ error |= device_create_file(&dev->dev, &dev_attr_counters_reset);
+ error |= device_create_file(&dev->dev, &dev_attr_railgate_delay);
+ error |= device_create_file(&dev->dev, &dev_attr_clockgate_delay);
+
+ if (error)
+ dev_err(&dev->dev, "Failed to create sysfs attributes!\n");
+}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c
new file mode 100644
index 000000000000..59404f1d8868
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c
@@ -0,0 +1,333 @@
+/*
+ * drivers/video/tegra/host/gk20a/gr_ctx_gk20a.c
+ *
+ * GK20A Graphics Context
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/firmware.h>
+
+#include "gk20a.h"
+#include "gr_ctx_gk20a.h"
+#include "hw_gr_gk20a.h"
+
+static int gr_gk20a_alloc_load_netlist_u32(u32 *src, u32 len,
+ struct u32_list_gk20a *u32_list)
+{
+ u32_list->count = (len + sizeof(u32) - 1) / sizeof(u32);
+ if (!alloc_u32_list_gk20a(u32_list))
+ return -ENOMEM;
+
+ memcpy(u32_list->l, src, len);
+
+ return 0;
+}
+
+static int gr_gk20a_alloc_load_netlist_av(u32 *src, u32 len,
+ struct av_list_gk20a *av_list)
+{
+ av_list->count = len / sizeof(struct av_gk20a);
+ if (!alloc_av_list_gk20a(av_list))
+ return -ENOMEM;
+
+ memcpy(av_list->l, src, len);
+
+ return 0;
+}
+
+static int gr_gk20a_alloc_load_netlist_aiv(u32 *src, u32 len,
+ struct aiv_list_gk20a *aiv_list)
+{
+ aiv_list->count = len / sizeof(struct aiv_gk20a);
+ if (!alloc_aiv_list_gk20a(aiv_list))
+ return -ENOMEM;
+
+ memcpy(aiv_list->l, src, len);
+
+ return 0;
+}
+
+static int gr_gk20a_get_netlist_name(int index, char *name)
+{
+ switch (index) {
+#ifdef GK20A_NETLIST_IMAGE_FW_NAME
+ case NETLIST_FINAL:
+ sprintf(name, GK20A_NETLIST_IMAGE_FW_NAME);
+ return 0;
+#endif
+#ifdef GK20A_NETLIST_IMAGE_A
+ case NETLIST_SLOT_A:
+ sprintf(name, GK20A_NETLIST_IMAGE_A);
+ return 0;
+#endif
+#ifdef GK20A_NETLIST_IMAGE_B
+ case NETLIST_SLOT_B:
+ sprintf(name, GK20A_NETLIST_IMAGE_B);
+ return 0;
+#endif
+#ifdef GK20A_NETLIST_IMAGE_C
+ case NETLIST_SLOT_C:
+ sprintf(name, GK20A_NETLIST_IMAGE_C);
+ return 0;
+#endif
+#ifdef GK20A_NETLIST_IMAGE_D
+ case NETLIST_SLOT_D:
+ sprintf(name, GK20A_NETLIST_IMAGE_D);
+ return 0;
+#endif
+ default:
+ return -1;
+ }
+
+ return -1;
+}
+
+static int gr_gk20a_init_ctx_vars_fw(struct gk20a *g, struct gr_gk20a *gr)
+{
+ struct device *d = dev_from_gk20a(g);
+ const struct firmware *netlist_fw;
+ struct netlist_image *netlist = NULL;
+ char name[MAX_NETLIST_NAME];
+ u32 i, major_v = ~0, major_v_hw, netlist_num;
+ int net, max, err = -ENOENT;
+
+ gk20a_dbg_fn("");
+
+#ifdef GK20A_NETLIST_IMAGE_FW_NAME
+ net = NETLIST_FINAL;
+ max = 0;
+ major_v_hw = ~0;
+ g->gr.ctx_vars.dynamic = false;
+#else
+ net = NETLIST_SLOT_A;
+ max = MAX_NETLIST;
+ major_v_hw = gk20a_readl(g, gr_fecs_ctx_state_store_major_rev_id_r());
+ g->gr.ctx_vars.dynamic = true;
+#endif
+
+ for (; net < max; net++) {
+
+ if (gr_gk20a_get_netlist_name(net, name) != 0) {
+ gk20a_warn(d, "invalid netlist index %d", net);
+ continue;
+ }
+
+ netlist_fw = gk20a_request_firmware(g, name);
+ if (!netlist_fw) {
+ gk20a_warn(d, "failed to load netlist %s", name);
+ continue;
+ }
+
+ netlist = (struct netlist_image *)netlist_fw->data;
+
+ for (i = 0; i < netlist->header.regions; i++) {
+ u32 *src = (u32 *)((u8 *)netlist + netlist->regions[i].data_offset);
+ u32 size = netlist->regions[i].data_size;
+
+ switch (netlist->regions[i].region_id) {
+ case NETLIST_REGIONID_FECS_UCODE_DATA:
+ gk20a_dbg_info("NETLIST_REGIONID_FECS_UCODE_DATA");
+ err = gr_gk20a_alloc_load_netlist_u32(
+ src, size, &g->gr.ctx_vars.ucode.fecs.data);
+ if (err)
+ goto clean_up;
+ break;
+ case NETLIST_REGIONID_FECS_UCODE_INST:
+ gk20a_dbg_info("NETLIST_REGIONID_FECS_UCODE_INST");
+ err = gr_gk20a_alloc_load_netlist_u32(
+ src, size, &g->gr.ctx_vars.ucode.fecs.inst);
+ if (err)
+ goto clean_up;
+ break;
+ case NETLIST_REGIONID_GPCCS_UCODE_DATA:
+ gk20a_dbg_info("NETLIST_REGIONID_GPCCS_UCODE_DATA");
+ err = gr_gk20a_alloc_load_netlist_u32(
+ src, size, &g->gr.ctx_vars.ucode.gpccs.data);
+ if (err)
+ goto clean_up;
+ break;
+ case NETLIST_REGIONID_GPCCS_UCODE_INST:
+ gk20a_dbg_info("NETLIST_REGIONID_GPCCS_UCODE_INST");
+ err = gr_gk20a_alloc_load_netlist_u32(
+ src, size, &g->gr.ctx_vars.ucode.gpccs.inst);
+ if (err)
+ goto clean_up;
+ break;
+ case NETLIST_REGIONID_SW_BUNDLE_INIT:
+ gk20a_dbg_info("NETLIST_REGIONID_SW_BUNDLE_INIT");
+ err = gr_gk20a_alloc_load_netlist_av(
+ src, size, &g->gr.ctx_vars.sw_bundle_init);
+ if (err)
+ goto clean_up;
+ break;
+ case NETLIST_REGIONID_SW_METHOD_INIT:
+ gk20a_dbg_info("NETLIST_REGIONID_SW_METHOD_INIT");
+ err = gr_gk20a_alloc_load_netlist_av(
+ src, size, &g->gr.ctx_vars.sw_method_init);
+ if (err)
+ goto clean_up;
+ break;
+ case NETLIST_REGIONID_SW_CTX_LOAD:
+ gk20a_dbg_info("NETLIST_REGIONID_SW_CTX_LOAD");
+ err = gr_gk20a_alloc_load_netlist_aiv(
+ src, size, &g->gr.ctx_vars.sw_ctx_load);
+ if (err)
+ goto clean_up;
+ break;
+ case NETLIST_REGIONID_SW_NON_CTX_LOAD:
+ gk20a_dbg_info("NETLIST_REGIONID_SW_NON_CTX_LOAD");
+ err = gr_gk20a_alloc_load_netlist_av(
+ src, size, &g->gr.ctx_vars.sw_non_ctx_load);
+ if (err)
+ goto clean_up;
+ break;
+ case NETLIST_REGIONID_CTXREG_SYS:
+ gk20a_dbg_info("NETLIST_REGIONID_CTXREG_SYS");
+ err = gr_gk20a_alloc_load_netlist_aiv(
+ src, size, &g->gr.ctx_vars.ctxsw_regs.sys);
+ if (err)
+ goto clean_up;
+ break;
+ case NETLIST_REGIONID_CTXREG_GPC:
+ gk20a_dbg_info("NETLIST_REGIONID_CTXREG_GPC");
+ err = gr_gk20a_alloc_load_netlist_aiv(
+ src, size, &g->gr.ctx_vars.ctxsw_regs.gpc);
+ if (err)
+ goto clean_up;
+ break;
+ case NETLIST_REGIONID_CTXREG_TPC:
+ gk20a_dbg_info("NETLIST_REGIONID_CTXREG_TPC");
+ err = gr_gk20a_alloc_load_netlist_aiv(
+ src, size, &g->gr.ctx_vars.ctxsw_regs.tpc);
+ if (err)
+ goto clean_up;
+ break;
+ case NETLIST_REGIONID_CTXREG_ZCULL_GPC:
+ gk20a_dbg_info("NETLIST_REGIONID_CTXREG_ZCULL_GPC");
+ err = gr_gk20a_alloc_load_netlist_aiv(
+ src, size, &g->gr.ctx_vars.ctxsw_regs.zcull_gpc);
+ if (err)
+ goto clean_up;
+ break;
+ case NETLIST_REGIONID_CTXREG_PPC:
+ gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PPC");
+ err = gr_gk20a_alloc_load_netlist_aiv(
+ src, size, &g->gr.ctx_vars.ctxsw_regs.ppc);
+ if (err)
+ goto clean_up;
+ break;
+ case NETLIST_REGIONID_CTXREG_PM_SYS:
+ gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PM_SYS");
+ err = gr_gk20a_alloc_load_netlist_aiv(
+ src, size, &g->gr.ctx_vars.ctxsw_regs.pm_sys);
+ if (err)
+ goto clean_up;
+ break;
+ case NETLIST_REGIONID_CTXREG_PM_GPC:
+ gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PM_GPC");
+ err = gr_gk20a_alloc_load_netlist_aiv(
+ src, size, &g->gr.ctx_vars.ctxsw_regs.pm_gpc);
+ if (err)
+ goto clean_up;
+ break;
+ case NETLIST_REGIONID_CTXREG_PM_TPC:
+ gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PM_TPC");
+ err = gr_gk20a_alloc_load_netlist_aiv(
+ src, size, &g->gr.ctx_vars.ctxsw_regs.pm_tpc);
+ if (err)
+ goto clean_up;
+ break;
+ case NETLIST_REGIONID_BUFFER_SIZE:
+ g->gr.ctx_vars.buffer_size = *src;
+ gk20a_dbg_info("NETLIST_REGIONID_BUFFER_SIZE : %d",
+ g->gr.ctx_vars.buffer_size);
+ break;
+ case NETLIST_REGIONID_CTXSW_REG_BASE_INDEX:
+ g->gr.ctx_vars.regs_base_index = *src;
+ gk20a_dbg_info("NETLIST_REGIONID_CTXSW_REG_BASE_INDEX : %d",
+ g->gr.ctx_vars.regs_base_index);
+ break;
+ case NETLIST_REGIONID_MAJORV:
+ major_v = *src;
+ gk20a_dbg_info("NETLIST_REGIONID_MAJORV : %d",
+ major_v);
+ break;
+ case NETLIST_REGIONID_NETLIST_NUM:
+ netlist_num = *src;
+ gk20a_dbg_info("NETLIST_REGIONID_NETLIST_NUM : %d",
+ netlist_num);
+ break;
+ case NETLIST_REGIONID_CTXREG_PMPPC:
+ gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMPPC skipped");
+ break;
+ default:
+ gk20a_warn(d, "unrecognized region %d skipped", i);
+ break;
+ }
+ }
+
+ if (net != NETLIST_FINAL && major_v != major_v_hw) {
+ gk20a_dbg_info("skip %s: major_v 0x%08x doesn't match hw 0x%08x",
+ name, major_v, major_v_hw);
+ goto clean_up;
+ }
+
+ g->gr.ctx_vars.valid = true;
+ g->gr.netlist = net;
+
+ release_firmware(netlist_fw);
+ gk20a_dbg_fn("done");
+ goto done;
+
+clean_up:
+ kfree(g->gr.ctx_vars.ucode.fecs.inst.l);
+ kfree(g->gr.ctx_vars.ucode.fecs.data.l);
+ kfree(g->gr.ctx_vars.ucode.gpccs.inst.l);
+ kfree(g->gr.ctx_vars.ucode.gpccs.data.l);
+ kfree(g->gr.ctx_vars.sw_bundle_init.l);
+ kfree(g->gr.ctx_vars.sw_method_init.l);
+ kfree(g->gr.ctx_vars.sw_ctx_load.l);
+ kfree(g->gr.ctx_vars.sw_non_ctx_load.l);
+ kfree(g->gr.ctx_vars.ctxsw_regs.sys.l);
+ kfree(g->gr.ctx_vars.ctxsw_regs.gpc.l);
+ kfree(g->gr.ctx_vars.ctxsw_regs.tpc.l);
+ kfree(g->gr.ctx_vars.ctxsw_regs.zcull_gpc.l);
+ kfree(g->gr.ctx_vars.ctxsw_regs.ppc.l);
+ kfree(g->gr.ctx_vars.ctxsw_regs.pm_sys.l);
+ kfree(g->gr.ctx_vars.ctxsw_regs.pm_gpc.l);
+ kfree(g->gr.ctx_vars.ctxsw_regs.pm_tpc.l);
+ release_firmware(netlist_fw);
+ err = -ENOENT;
+ }
+
+done:
+ if (g->gr.ctx_vars.valid) {
+ gk20a_dbg_info("netlist image %s loaded", name);
+ return 0;
+ } else {
+ gk20a_err(d, "failed to load netlist image!!");
+ return err;
+ }
+}
+
+int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr)
+{
+ if (tegra_platform_is_linsim())
+ return gr_gk20a_init_ctx_vars_sim(g, gr);
+ else
+ return gr_gk20a_init_ctx_vars_fw(g, gr);
+}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h
new file mode 100644
index 000000000000..909a166ae9c3
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h
@@ -0,0 +1,149 @@
+/*
+ * GK20A Graphics Context
+ *
+ * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __GR_CTX_GK20A_H__
+#define __GR_CTX_GK20A_H__
+
+
+/* production netlist, one and only one from below */
+/*#undef GK20A_NETLIST_IMAGE_FW_NAME*/
+#define GK20A_NETLIST_IMAGE_FW_NAME GK20A_NETLIST_IMAGE_B
+/* emulation netlists, match majorV with HW */
+#define GK20A_NETLIST_IMAGE_A "NETA_img.bin"
+#define GK20A_NETLIST_IMAGE_B "NETB_img.bin"
+#define GK20A_NETLIST_IMAGE_C "NETC_img.bin"
+#define GK20A_NETLIST_IMAGE_D "NETD_img.bin"
+
+union __max_name {
+#ifdef GK20A_NETLIST_IMAGE_A
+ char __name_a[sizeof(GK20A_NETLIST_IMAGE_A)];
+#endif
+#ifdef GK20A_NETLIST_IMAGE_B
+ char __name_b[sizeof(GK20A_NETLIST_IMAGE_B)];
+#endif
+#ifdef GK20A_NETLIST_IMAGE_C
+ char __name_c[sizeof(GK20A_NETLIST_IMAGE_C)];
+#endif
+#ifdef GK20A_NETLIST_IMAGE_D
+ char __name_d[sizeof(GK20A_NETLIST_IMAGE_D)];
+#endif
+};
+
+#define MAX_NETLIST_NAME sizeof(union __max_name)
+
+/* index for emulation netlists */
+#define NETLIST_FINAL -1
+#define NETLIST_SLOT_A 0
+#define NETLIST_SLOT_B 1
+#define NETLIST_SLOT_C 2
+#define NETLIST_SLOT_D 3
+#define MAX_NETLIST 4
+
+/* netlist regions */
+#define NETLIST_REGIONID_FECS_UCODE_DATA 0
+#define NETLIST_REGIONID_FECS_UCODE_INST 1
+#define NETLIST_REGIONID_GPCCS_UCODE_DATA 2
+#define NETLIST_REGIONID_GPCCS_UCODE_INST 3
+#define NETLIST_REGIONID_SW_BUNDLE_INIT 4
+#define NETLIST_REGIONID_SW_CTX_LOAD 5
+#define NETLIST_REGIONID_SW_NON_CTX_LOAD 6
+#define NETLIST_REGIONID_SW_METHOD_INIT 7
+#define NETLIST_REGIONID_CTXREG_SYS 8
+#define NETLIST_REGIONID_CTXREG_GPC 9
+#define NETLIST_REGIONID_CTXREG_TPC 10
+#define NETLIST_REGIONID_CTXREG_ZCULL_GPC 11
+#define NETLIST_REGIONID_CTXREG_PM_SYS 12
+#define NETLIST_REGIONID_CTXREG_PM_GPC 13
+#define NETLIST_REGIONID_CTXREG_PM_TPC 14
+#define NETLIST_REGIONID_MAJORV 15
+#define NETLIST_REGIONID_BUFFER_SIZE 16
+#define NETLIST_REGIONID_CTXSW_REG_BASE_INDEX 17
+#define NETLIST_REGIONID_NETLIST_NUM 18
+#define NETLIST_REGIONID_CTXREG_PPC 19
+#define NETLIST_REGIONID_CTXREG_PMPPC 20
+
+struct netlist_region {
+ u32 region_id;
+ u32 data_size;
+ u32 data_offset;
+};
+
+struct netlist_image_header {
+ u32 version;
+ u32 regions;
+};
+
+struct netlist_image {
+ struct netlist_image_header header;
+ struct netlist_region regions[1];
+};
+
+struct av_gk20a {
+ u32 addr;
+ u32 value;
+};
+struct aiv_gk20a {
+ u32 addr;
+ u32 index;
+ u32 value;
+};
+struct aiv_list_gk20a {
+ struct aiv_gk20a *l;
+ u32 count;
+};
+struct av_list_gk20a {
+ struct av_gk20a *l;
+ u32 count;
+};
+struct u32_list_gk20a {
+ u32 *l;
+ u32 count;
+};
+
+static inline
+struct av_gk20a *alloc_av_list_gk20a(struct av_list_gk20a *avl)
+{
+ avl->l = kzalloc(avl->count * sizeof(*avl->l), GFP_KERNEL);
+ return avl->l;
+}
+
+static inline
+struct aiv_gk20a *alloc_aiv_list_gk20a(struct aiv_list_gk20a *aivl)
+{
+ aivl->l = kzalloc(aivl->count * sizeof(*aivl->l), GFP_KERNEL);
+ return aivl->l;
+}
+
+static inline
+u32 *alloc_u32_list_gk20a(struct u32_list_gk20a *u32l)
+{
+ u32l->l = kzalloc(u32l->count * sizeof(*u32l->l), GFP_KERNEL);
+ return u32l->l;
+}
+
+struct gr_ucode_gk20a {
+ struct {
+ struct u32_list_gk20a inst;
+ struct u32_list_gk20a data;
+ } gpccs, fecs;
+};
+
+/* main entry for grctx loading */
+int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr);
+int gr_gk20a_init_ctx_vars_sim(struct gk20a *g, struct gr_gk20a *gr);
+
+#endif /*__GR_CTX_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a_sim.c b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a_sim.c
new file mode 100644
index 000000000000..12bba1fd7249
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a_sim.c
@@ -0,0 +1,256 @@
+/*
+ * drivers/video/tegra/host/gk20a/gr_ctx_sim_gk20a.c
+ *
+ * GK20A Graphics Context for Simulation
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "gk20a.h"
+#include "gr_ctx_gk20a.h"
+
+int gr_gk20a_init_ctx_vars_sim(struct gk20a *g, struct gr_gk20a *gr)
+{
+ int err = 0;
+ u32 i, temp;
+ char *size_path = NULL;
+ char *reg_path = NULL;
+ char *value_path = NULL;
+
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_info,
+ "querying grctx info from chiplib");
+
+ g->gr.ctx_vars.dynamic = true;
+ g->gr.netlist = GR_NETLIST_DYNAMIC;
+
+ /* query sizes and counts */
+ gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_FECS_COUNT", 0,
+ &g->gr.ctx_vars.ucode.fecs.inst.count);
+ gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_FECS_COUNT", 0,
+ &g->gr.ctx_vars.ucode.fecs.data.count);
+ gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_GPCCS_COUNT", 0,
+ &g->gr.ctx_vars.ucode.gpccs.inst.count);
+ gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_GPCCS_COUNT", 0,
+ &g->gr.ctx_vars.ucode.gpccs.data.count);
+ gk20a_sim_esc_readl(g, "GRCTX_ALL_CTX_TOTAL_WORDS", 0, &temp);
+ g->gr.ctx_vars.buffer_size = temp << 2;
+ gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT_SIZE", 0,
+ &g->gr.ctx_vars.sw_bundle_init.count);
+ gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT_SIZE", 0,
+ &g->gr.ctx_vars.sw_method_init.count);
+ gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD_SIZE", 0,
+ &g->gr.ctx_vars.sw_ctx_load.count);
+
+ switch (0) { /*g->gr.ctx_vars.reg_init_override)*/
+#if 0
+ case NV_REG_STR_RM_GR_REG_INIT_OVERRIDE_PROD_DIFF:
+ sizePath = "GRCTX_NONCTXSW_PROD_DIFF_REG_SIZE";
+ regPath = "GRCTX_NONCTXSW_PROD_DIFF_REG:REG";
+ valuePath = "GRCTX_NONCTXSW_PROD_DIFF_REG:VALUE";
+ break;
+#endif
+ default:
+ size_path = "GRCTX_NONCTXSW_REG_SIZE";
+ reg_path = "GRCTX_NONCTXSW_REG:REG";
+ value_path = "GRCTX_NONCTXSW_REG:VALUE";
+ break;
+ }
+
+ gk20a_sim_esc_readl(g, size_path, 0,
+ &g->gr.ctx_vars.sw_non_ctx_load.count);
+
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS_COUNT", 0,
+ &g->gr.ctx_vars.ctxsw_regs.sys.count);
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC_COUNT", 0,
+ &g->gr.ctx_vars.ctxsw_regs.gpc.count);
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC_COUNT", 0,
+ &g->gr.ctx_vars.ctxsw_regs.tpc.count);
+#if 0
+ /* looks to be unused, actually chokes the sim */
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC_COUNT", 0,
+ &g->gr.ctx_vars.ctxsw_regs.ppc.count);
+#endif
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC_COUNT", 0,
+ &g->gr.ctx_vars.ctxsw_regs.zcull_gpc.count);
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS_COUNT", 0,
+ &g->gr.ctx_vars.ctxsw_regs.pm_sys.count);
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC_COUNT", 0,
+ &g->gr.ctx_vars.ctxsw_regs.pm_gpc.count);
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC_COUNT", 0,
+ &g->gr.ctx_vars.ctxsw_regs.pm_tpc.count);
+
+ err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.fecs.inst);
+ err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.fecs.data);
+ err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.gpccs.inst);
+ err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.gpccs.data);
+ err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_bundle_init);
+ err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_method_init);
+ err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.sw_ctx_load);
+ err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_non_ctx_load);
+ err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.sys);
+ err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.gpc);
+ err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.tpc);
+ err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.zcull_gpc);
+ err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.ppc);
+ err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_sys);
+ err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_gpc);
+ err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_tpc);
+
+ if (err)
+ goto fail;
+
+ for (i = 0; i < g->gr.ctx_vars.ucode.fecs.inst.count; i++)
+ gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_FECS",
+ i, &g->gr.ctx_vars.ucode.fecs.inst.l[i]);
+
+ for (i = 0; i < g->gr.ctx_vars.ucode.fecs.data.count; i++)
+ gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_FECS",
+ i, &g->gr.ctx_vars.ucode.fecs.data.l[i]);
+
+ for (i = 0; i < g->gr.ctx_vars.ucode.gpccs.inst.count; i++)
+ gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_GPCCS",
+ i, &g->gr.ctx_vars.ucode.gpccs.inst.l[i]);
+
+ for (i = 0; i < g->gr.ctx_vars.ucode.gpccs.data.count; i++)
+ gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_GPCCS",
+ i, &g->gr.ctx_vars.ucode.gpccs.data.l[i]);
+
+ for (i = 0; i < g->gr.ctx_vars.sw_bundle_init.count; i++) {
+ struct av_gk20a *l = g->gr.ctx_vars.sw_bundle_init.l;
+ gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT:ADDR",
+ i, &l[i].addr);
+ gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT:VALUE",
+ i, &l[i].value);
+ }
+
+ for (i = 0; i < g->gr.ctx_vars.sw_method_init.count; i++) {
+ struct av_gk20a *l = g->gr.ctx_vars.sw_method_init.l;
+ gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT:ADDR",
+ i, &l[i].addr);
+ gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT:VALUE",
+ i, &l[i].value);
+ }
+
+ for (i = 0; i < g->gr.ctx_vars.sw_ctx_load.count; i++) {
+ struct aiv_gk20a *l = g->gr.ctx_vars.sw_ctx_load.l;
+ gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:ADDR",
+ i, &l[i].addr);
+ gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:INDEX",
+ i, &l[i].index);
+ gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:VALUE",
+ i, &l[i].value);
+ }
+
+ for (i = 0; i < g->gr.ctx_vars.sw_non_ctx_load.count; i++) {
+ struct av_gk20a *l = g->gr.ctx_vars.sw_non_ctx_load.l;
+ gk20a_sim_esc_readl(g, reg_path, i, &l[i].addr);
+ gk20a_sim_esc_readl(g, value_path, i, &l[i].value);
+ }
+
+ for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.sys.count; i++) {
+ struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.sys.l;
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:ADDR",
+ i, &l[i].addr);
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:INDEX",
+ i, &l[i].index);
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:VALUE",
+ i, &l[i].value);
+ }
+
+ for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.gpc.count; i++) {
+ struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.gpc.l;
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:ADDR",
+ i, &l[i].addr);
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:INDEX",
+ i, &l[i].index);
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:VALUE",
+ i, &l[i].value);
+ }
+
+ for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.tpc.count; i++) {
+ struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.tpc.l;
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:ADDR",
+ i, &l[i].addr);
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:INDEX",
+ i, &l[i].index);
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:VALUE",
+ i, &l[i].value);
+ }
+
+ for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.ppc.count; i++) {
+ struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.ppc.l;
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:ADDR",
+ i, &l[i].addr);
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:INDEX",
+ i, &l[i].index);
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:VALUE",
+ i, &l[i].value);
+ }
+
+ for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.zcull_gpc.count; i++) {
+ struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.zcull_gpc.l;
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:ADDR",
+ i, &l[i].addr);
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:INDEX",
+ i, &l[i].index);
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:VALUE",
+ i, &l[i].value);
+ }
+
+ for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_sys.count; i++) {
+ struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_sys.l;
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:ADDR",
+ i, &l[i].addr);
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:INDEX",
+ i, &l[i].index);
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:VALUE",
+ i, &l[i].value);
+ }
+
+ for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_gpc.count; i++) {
+ struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_gpc.l;
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:ADDR",
+ i, &l[i].addr);
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:INDEX",
+ i, &l[i].index);
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:VALUE",
+ i, &l[i].value);
+ }
+
+ for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_tpc.count; i++) {
+ struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_tpc.l;
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:ADDR",
+ i, &l[i].addr);
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:INDEX",
+ i, &l[i].index);
+ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:VALUE",
+ i, &l[i].value);
+ }
+
+ g->gr.ctx_vars.valid = true;
+
+ gk20a_sim_esc_readl(g, "GRCTX_GEN_CTX_REGS_BASE_INDEX", 0,
+ &g->gr.ctx_vars.regs_base_index);
+
+ gk20a_dbg(gpu_dbg_info | gpu_dbg_fn, "finished querying grctx info from chiplib");
+ return 0;
+fail:
+ gk20a_err(dev_from_gk20a(g),
+ "failed querying grctx info from chiplib");
+ return err;
+
+}
+
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
new file mode 100644
index 000000000000..0f93940b402f
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -0,0 +1,6747 @@
+/*
+ * GK20A Graphics
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/delay.h> /* for udelay */
+#include <linux/mm.h> /* for totalram_pages */
+#include <linux/scatterlist.h>
+#include <linux/tegra-soc.h>
+#include <linux/nvhost_dbg_gpu_ioctl.h>
+#include <linux/vmalloc.h>
+#include <linux/dma-mapping.h>
+#include <linux/firmware.h>
+#include <linux/nvhost.h>
+
+#include "gk20a.h"
+#include "kind_gk20a.h"
+#include "gr_ctx_gk20a.h"
+
+#include "hw_ccsr_gk20a.h"
+#include "hw_ctxsw_prog_gk20a.h"
+#include "hw_fifo_gk20a.h"
+#include "hw_gr_gk20a.h"
+#include "hw_gmmu_gk20a.h"
+#include "hw_mc_gk20a.h"
+#include "hw_ram_gk20a.h"
+#include "hw_pri_ringmaster_gk20a.h"
+#include "hw_pri_ringstation_sys_gk20a.h"
+#include "hw_pri_ringstation_gpc_gk20a.h"
+#include "hw_pri_ringstation_fbp_gk20a.h"
+#include "hw_proj_gk20a.h"
+#include "hw_top_gk20a.h"
+#include "hw_ltc_gk20a.h"
+#include "hw_fb_gk20a.h"
+#include "hw_therm_gk20a.h"
+#include "hw_pbdma_gk20a.h"
+#include "gr_pri_gk20a.h"
+#include "regops_gk20a.h"
+#include "dbg_gpu_gk20a.h"
+
+#define BLK_SIZE (256)
+
+static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va);
+
+/* global ctx buffer */
+static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g);
+static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g);
+static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
+ struct channel_gk20a *c);
+static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c);
+
+/* channel gr ctx buffer */
+static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
+ struct channel_gk20a *c);
+static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c);
+
+/* channel patch ctx buffer */
+static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
+ struct channel_gk20a *c);
+static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c);
+
+/* golden ctx image */
+static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
+ struct channel_gk20a *c);
+static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
+ struct channel_gk20a *c);
+
+void gk20a_fecs_dump_falcon_stats(struct gk20a *g)
+{
+ int i;
+
+ gk20a_err(dev_from_gk20a(g), "gr_fecs_os_r : %d",
+ gk20a_readl(g, gr_fecs_os_r()));
+ gk20a_err(dev_from_gk20a(g), "gr_fecs_cpuctl_r : 0x%x",
+ gk20a_readl(g, gr_fecs_cpuctl_r()));
+ gk20a_err(dev_from_gk20a(g), "gr_fecs_idlestate_r : 0x%x",
+ gk20a_readl(g, gr_fecs_idlestate_r()));
+ gk20a_err(dev_from_gk20a(g), "gr_fecs_mailbox0_r : 0x%x",
+ gk20a_readl(g, gr_fecs_mailbox0_r()));
+ gk20a_err(dev_from_gk20a(g), "gr_fecs_mailbox1_r : 0x%x",
+ gk20a_readl(g, gr_fecs_mailbox1_r()));
+ gk20a_err(dev_from_gk20a(g), "gr_fecs_irqstat_r : 0x%x",
+ gk20a_readl(g, gr_fecs_irqstat_r()));
+ gk20a_err(dev_from_gk20a(g), "gr_fecs_irqmode_r : 0x%x",
+ gk20a_readl(g, gr_fecs_irqmode_r()));
+ gk20a_err(dev_from_gk20a(g), "gr_fecs_irqmask_r : 0x%x",
+ gk20a_readl(g, gr_fecs_irqmask_r()));
+ gk20a_err(dev_from_gk20a(g), "gr_fecs_irqdest_r : 0x%x",
+ gk20a_readl(g, gr_fecs_irqdest_r()));
+ gk20a_err(dev_from_gk20a(g), "gr_fecs_debug1_r : 0x%x",
+ gk20a_readl(g, gr_fecs_debug1_r()));
+ gk20a_err(dev_from_gk20a(g), "gr_fecs_debuginfo_r : 0x%x",
+ gk20a_readl(g, gr_fecs_debuginfo_r()));
+
+ for (i = 0; i < gr_fecs_ctxsw_mailbox__size_1_v(); i++)
+ gk20a_err(dev_from_gk20a(g), "gr_fecs_ctxsw_mailbox_r(%d) : 0x%x",
+ i, gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(i)));
+
+ gk20a_err(dev_from_gk20a(g), "gr_fecs_engctl_r : 0x%x",
+ gk20a_readl(g, gr_fecs_engctl_r()));
+ gk20a_err(dev_from_gk20a(g), "gr_fecs_curctx_r : 0x%x",
+ gk20a_readl(g, gr_fecs_curctx_r()));
+ gk20a_err(dev_from_gk20a(g), "gr_fecs_nxtctx_r : 0x%x",
+ gk20a_readl(g, gr_fecs_nxtctx_r()));
+
+ gk20a_writel(g, gr_fecs_icd_cmd_r(),
+ gr_fecs_icd_cmd_opc_rreg_f() |
+ gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_IMB));
+ gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_IMB : 0x%x",
+ gk20a_readl(g, gr_fecs_icd_rdata_r()));
+
+ gk20a_writel(g, gr_fecs_icd_cmd_r(),
+ gr_fecs_icd_cmd_opc_rreg_f() |
+ gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_DMB));
+ gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_DMB : 0x%x",
+ gk20a_readl(g, gr_fecs_icd_rdata_r()));
+
+ gk20a_writel(g, gr_fecs_icd_cmd_r(),
+ gr_fecs_icd_cmd_opc_rreg_f() |
+ gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_CSW));
+ gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_CSW : 0x%x",
+ gk20a_readl(g, gr_fecs_icd_rdata_r()));
+
+ gk20a_writel(g, gr_fecs_icd_cmd_r(),
+ gr_fecs_icd_cmd_opc_rreg_f() |
+ gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_CTX));
+ gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_CTX : 0x%x",
+ gk20a_readl(g, gr_fecs_icd_rdata_r()));
+
+ gk20a_writel(g, gr_fecs_icd_cmd_r(),
+ gr_fecs_icd_cmd_opc_rreg_f() |
+ gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_EXCI));
+ gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_EXCI : 0x%x",
+ gk20a_readl(g, gr_fecs_icd_rdata_r()));
+
+ for (i = 0; i < 4; i++) {
+ gk20a_writel(g, gr_fecs_icd_cmd_r(),
+ gr_fecs_icd_cmd_opc_rreg_f() |
+ gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_PC));
+ gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_PC : 0x%x",
+ gk20a_readl(g, gr_fecs_icd_rdata_r()));
+
+ gk20a_writel(g, gr_fecs_icd_cmd_r(),
+ gr_fecs_icd_cmd_opc_rreg_f() |
+ gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_SP));
+ gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_SP : 0x%x",
+ gk20a_readl(g, gr_fecs_icd_rdata_r()));
+ }
+}
+
+static void gr_gk20a_load_falcon_dmem(struct gk20a *g)
+{
+ u32 i, ucode_u32_size;
+ const u32 *ucode_u32_data;
+ u32 checksum;
+
+ gk20a_dbg_fn("");
+
+ gk20a_writel(g, gr_gpccs_dmemc_r(0), (gr_gpccs_dmemc_offs_f(0) |
+ gr_gpccs_dmemc_blk_f(0) |
+ gr_gpccs_dmemc_aincw_f(1)));
+
+ ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.data.count;
+ ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.data.l;
+
+ for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
+ gk20a_writel(g, gr_gpccs_dmemd_r(0), ucode_u32_data[i]);
+ checksum += ucode_u32_data[i];
+ }
+
+ gk20a_writel(g, gr_fecs_dmemc_r(0), (gr_fecs_dmemc_offs_f(0) |
+ gr_fecs_dmemc_blk_f(0) |
+ gr_fecs_dmemc_aincw_f(1)));
+
+ ucode_u32_size = g->gr.ctx_vars.ucode.fecs.data.count;
+ ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.data.l;
+
+ for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
+ gk20a_writel(g, gr_fecs_dmemd_r(0), ucode_u32_data[i]);
+ checksum += ucode_u32_data[i];
+ }
+ gk20a_dbg_fn("done");
+}
+
+static void gr_gk20a_load_falcon_imem(struct gk20a *g)
+{
+ u32 cfg, fecs_imem_size, gpccs_imem_size, ucode_u32_size;
+ const u32 *ucode_u32_data;
+ u32 tag, i, pad_start, pad_end;
+ u32 checksum;
+
+ gk20a_dbg_fn("");
+
+ cfg = gk20a_readl(g, gr_fecs_cfg_r());
+ fecs_imem_size = gr_fecs_cfg_imem_sz_v(cfg);
+
+ cfg = gk20a_readl(g, gr_gpc0_cfg_r());
+ gpccs_imem_size = gr_gpc0_cfg_imem_sz_v(cfg);
+
+ /* Use the broadcast address to access all of the GPCCS units. */
+ gk20a_writel(g, gr_gpccs_imemc_r(0), (gr_gpccs_imemc_offs_f(0) |
+ gr_gpccs_imemc_blk_f(0) |
+ gr_gpccs_imemc_aincw_f(1)));
+
+ /* Setup the tags for the instruction memory. */
+ tag = 0;
+ gk20a_writel(g, gr_gpccs_imemt_r(0), gr_gpccs_imemt_tag_f(tag));
+
+ ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.inst.count;
+ ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.inst.l;
+
+ for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
+ if (i && ((i % (256/sizeof(u32))) == 0)) {
+ tag++;
+ gk20a_writel(g, gr_gpccs_imemt_r(0),
+ gr_gpccs_imemt_tag_f(tag));
+ }
+ gk20a_writel(g, gr_gpccs_imemd_r(0), ucode_u32_data[i]);
+ checksum += ucode_u32_data[i];
+ }
+
+ pad_start = i*4;
+ pad_end = pad_start+(256-pad_start%256)+256;
+ for (i = pad_start;
+ (i < gpccs_imem_size * 256) && (i < pad_end);
+ i += 4) {
+ if (i && ((i % 256) == 0)) {
+ tag++;
+ gk20a_writel(g, gr_gpccs_imemt_r(0),
+ gr_gpccs_imemt_tag_f(tag));
+ }
+ gk20a_writel(g, gr_gpccs_imemd_r(0), 0);
+ }
+
+ gk20a_writel(g, gr_fecs_imemc_r(0), (gr_fecs_imemc_offs_f(0) |
+ gr_fecs_imemc_blk_f(0) |
+ gr_fecs_imemc_aincw_f(1)));
+
+ /* Setup the tags for the instruction memory. */
+ tag = 0;
+ gk20a_writel(g, gr_fecs_imemt_r(0), gr_fecs_imemt_tag_f(tag));
+
+ ucode_u32_size = g->gr.ctx_vars.ucode.fecs.inst.count;
+ ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.inst.l;
+
+ for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
+ if (i && ((i % (256/sizeof(u32))) == 0)) {
+ tag++;
+ gk20a_writel(g, gr_fecs_imemt_r(0),
+ gr_fecs_imemt_tag_f(tag));
+ }
+ gk20a_writel(g, gr_fecs_imemd_r(0), ucode_u32_data[i]);
+ checksum += ucode_u32_data[i];
+ }
+
+ pad_start = i*4;
+ pad_end = pad_start+(256-pad_start%256)+256;
+ for (i = pad_start; (i < fecs_imem_size * 256) && i < pad_end; i += 4) {
+ if (i && ((i % 256) == 0)) {
+ tag++;
+ gk20a_writel(g, gr_fecs_imemt_r(0),
+ gr_fecs_imemt_tag_f(tag));
+ }
+ gk20a_writel(g, gr_fecs_imemd_r(0), 0);
+ }
+}
+
+static int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies,
+ u32 expect_delay)
+{
+ u32 delay = expect_delay;
+ bool gr_enabled;
+ bool ctxsw_active;
+ bool gr_busy;
+
+ gk20a_dbg_fn("");
+
+ do {
+ /* fmodel: host gets fifo_engine_status(gr) from gr
+ only when gr_status is read */
+ gk20a_readl(g, gr_status_r());
+
+ gr_enabled = gk20a_readl(g, mc_enable_r()) &
+ mc_enable_pgraph_enabled_f();
+
+ ctxsw_active = gk20a_readl(g,
+ fifo_engine_status_r(ENGINE_GR_GK20A)) &
+ fifo_engine_status_ctxsw_in_progress_f();
+
+ gr_busy = gk20a_readl(g, gr_engine_status_r()) &
+ gr_engine_status_value_busy_f();
+
+ if (!gr_enabled || (!gr_busy && !ctxsw_active)) {
+ gk20a_dbg_fn("done");
+ return 0;
+ }
+
+ usleep_range(delay, delay * 2);
+ delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
+
+ } while (time_before(jiffies, end_jiffies)
+ || !tegra_platform_is_silicon());
+
+ gk20a_err(dev_from_gk20a(g),
+ "timeout, ctxsw busy : %d, gr busy : %d",
+ ctxsw_active, gr_busy);
+
+ return -EAGAIN;
+}
+
+static int gr_gk20a_ctx_reset(struct gk20a *g, u32 rst_mask)
+{
+ u32 delay = GR_IDLE_CHECK_DEFAULT;
+ unsigned long end_jiffies = jiffies +
+ msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
+ u32 reg;
+
+ gk20a_dbg_fn("");
+
+ if (!tegra_platform_is_linsim()) {
+ /* Force clocks on */
+ gk20a_writel(g, gr_fe_pwr_mode_r(),
+ gr_fe_pwr_mode_req_send_f() |
+ gr_fe_pwr_mode_mode_force_on_f());
+
+ /* Wait for the clocks to indicate that they are on */
+ do {
+ reg = gk20a_readl(g, gr_fe_pwr_mode_r());
+
+ if (gr_fe_pwr_mode_req_v(reg) ==
+ gr_fe_pwr_mode_req_done_v())
+ break;
+
+ usleep_range(delay, delay * 2);
+ delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
+
+ } while (time_before(jiffies, end_jiffies));
+
+ if (!time_before(jiffies, end_jiffies)) {
+ gk20a_err(dev_from_gk20a(g),
+ "failed to force the clocks on\n");
+ WARN_ON(1);
+ }
+ }
+ if (rst_mask) {
+ gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(), rst_mask);
+ } else {
+ gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(),
+ gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() |
+ gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() |
+ gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f() |
+ gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() |
+ gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() |
+ gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f() |
+ gr_fecs_ctxsw_reset_ctl_sys_context_reset_enabled_f() |
+ gr_fecs_ctxsw_reset_ctl_gpc_context_reset_enabled_f() |
+ gr_fecs_ctxsw_reset_ctl_be_context_reset_enabled_f());
+ }
+
+ /* we need to read the reset register *and* wait for a moment to ensure
+ * reset propagation */
+
+ gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r());
+ udelay(20);
+
+ gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(),
+ gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() |
+ gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() |
+ gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f() |
+ gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() |
+ gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() |
+ gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f() |
+ gr_fecs_ctxsw_reset_ctl_sys_context_reset_disabled_f() |
+ gr_fecs_ctxsw_reset_ctl_gpc_context_reset_disabled_f() |
+ gr_fecs_ctxsw_reset_ctl_be_context_reset_disabled_f());
+
+ /* we need to readl the reset and then wait a small moment after that */
+ gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r());
+ udelay(20);
+
+ if (!tegra_platform_is_linsim()) {
+ /* Set power mode back to auto */
+ gk20a_writel(g, gr_fe_pwr_mode_r(),
+ gr_fe_pwr_mode_req_send_f() |
+ gr_fe_pwr_mode_mode_auto_f());
+
+ /* Wait for the request to complete */
+ end_jiffies = jiffies +
+ msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
+ do {
+ reg = gk20a_readl(g, gr_fe_pwr_mode_r());
+
+ if (gr_fe_pwr_mode_req_v(reg) ==
+ gr_fe_pwr_mode_req_done_v())
+ break;
+
+ usleep_range(delay, delay * 2);
+ delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
+
+ } while (time_before(jiffies, end_jiffies));
+
+ if (!time_before(jiffies, end_jiffies))
+ gk20a_warn(dev_from_gk20a(g),
+ "failed to set power mode to auto\n");
+ }
+
+ return 0;
+}
+
+static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
+ u32 *mailbox_ret, u32 opc_success,
+ u32 mailbox_ok, u32 opc_fail,
+ u32 mailbox_fail)
+{
+ unsigned long end_jiffies = jiffies +
+ msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
+ u32 delay = GR_IDLE_CHECK_DEFAULT;
+ u32 check = WAIT_UCODE_LOOP;
+ u32 reg;
+
+ gk20a_dbg_fn("");
+
+ while (check == WAIT_UCODE_LOOP) {
+ if (!time_before(jiffies, end_jiffies) &&
+ tegra_platform_is_silicon())
+ check = WAIT_UCODE_TIMEOUT;
+
+ reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(mailbox_id));
+
+ if (mailbox_ret)
+ *mailbox_ret = reg;
+
+ switch (opc_success) {
+ case GR_IS_UCODE_OP_EQUAL:
+ if (reg == mailbox_ok)
+ check = WAIT_UCODE_OK;
+ break;
+ case GR_IS_UCODE_OP_NOT_EQUAL:
+ if (reg != mailbox_ok)
+ check = WAIT_UCODE_OK;
+ break;
+ case GR_IS_UCODE_OP_AND:
+ if (reg & mailbox_ok)
+ check = WAIT_UCODE_OK;
+ break;
+ case GR_IS_UCODE_OP_LESSER:
+ if (reg < mailbox_ok)
+ check = WAIT_UCODE_OK;
+ break;
+ case GR_IS_UCODE_OP_LESSER_EQUAL:
+ if (reg <= mailbox_ok)
+ check = WAIT_UCODE_OK;
+ break;
+ case GR_IS_UCODE_OP_SKIP:
+ /* do no success check */
+ break;
+ default:
+ gk20a_err(dev_from_gk20a(g),
+ "invalid success opcode 0x%x", opc_success);
+
+ check = WAIT_UCODE_ERROR;
+ break;
+ }
+
+ switch (opc_fail) {
+ case GR_IS_UCODE_OP_EQUAL:
+ if (reg == mailbox_fail)
+ check = WAIT_UCODE_ERROR;
+ break;
+ case GR_IS_UCODE_OP_NOT_EQUAL:
+ if (reg != mailbox_fail)
+ check = WAIT_UCODE_ERROR;
+ break;
+ case GR_IS_UCODE_OP_AND:
+ if (reg & mailbox_fail)
+ check = WAIT_UCODE_ERROR;
+ break;
+ case GR_IS_UCODE_OP_LESSER:
+ if (reg < mailbox_fail)
+ check = WAIT_UCODE_ERROR;
+ break;
+ case GR_IS_UCODE_OP_LESSER_EQUAL:
+ if (reg <= mailbox_fail)
+ check = WAIT_UCODE_ERROR;
+ break;
+ case GR_IS_UCODE_OP_SKIP:
+ /* do no check on fail*/
+ break;
+ default:
+ gk20a_err(dev_from_gk20a(g),
+ "invalid fail opcode 0x%x", opc_fail);
+ check = WAIT_UCODE_ERROR;
+ break;
+ }
+
+ usleep_range(delay, delay * 2);
+ delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
+ }
+
+ if (check == WAIT_UCODE_TIMEOUT) {
+ gk20a_err(dev_from_gk20a(g),
+ "timeout waiting on ucode response");
+ gk20a_fecs_dump_falcon_stats(g);
+ return -1;
+ } else if (check == WAIT_UCODE_ERROR) {
+ gk20a_err(dev_from_gk20a(g),
+ "ucode method failed on mailbox=%d value=0x%08x",
+ mailbox_id, reg);
+ gk20a_fecs_dump_falcon_stats(g);
+ return -1;
+ }
+
+ gk20a_dbg_fn("done");
+ return 0;
+}
+
+/* The following is a less brittle way to call gr_gk20a_submit_fecs_method(...)
+ * We should replace most, if not all, fecs method calls to this instead. */
+struct fecs_method_op_gk20a {
+ struct {
+ u32 addr;
+ u32 data;
+ } method;
+
+ struct {
+ u32 id;
+ u32 data;
+ u32 clr;
+ u32 *ret;
+ u32 ok;
+ u32 fail;
+ } mailbox;
+
+ struct {
+ u32 ok;
+ u32 fail;
+ } cond;
+
+};
+
+int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
+ struct fecs_method_op_gk20a op)
+{
+ struct gr_gk20a *gr = &g->gr;
+ int ret;
+
+ mutex_lock(&gr->fecs_mutex);
+
+ if (op.mailbox.id != 0)
+ gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(op.mailbox.id),
+ op.mailbox.data);
+
+ gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0),
+ gr_fecs_ctxsw_mailbox_clear_value_f(op.mailbox.clr));
+
+ gk20a_writel(g, gr_fecs_method_data_r(), op.method.data);
+ gk20a_writel(g, gr_fecs_method_push_r(),
+ gr_fecs_method_push_adr_f(op.method.addr));
+
+ /* op.mb.id == 4 cases require waiting for completion on
+ * for op.mb.id == 0 */
+ if (op.mailbox.id == 4)
+ op.mailbox.id = 0;
+
+ ret = gr_gk20a_ctx_wait_ucode(g, op.mailbox.id, op.mailbox.ret,
+ op.cond.ok, op.mailbox.ok,
+ op.cond.fail, op.mailbox.fail);
+
+ mutex_unlock(&gr->fecs_mutex);
+
+ return ret;
+}
+
+int gr_gk20a_ctrl_ctxsw(struct gk20a *g, u32 fecs_method, u32 *ret)
+{
+ return gr_gk20a_submit_fecs_method_op(g,
+ (struct fecs_method_op_gk20a) {
+ .method.addr = fecs_method,
+ .method.data = ~0,
+ .mailbox = { .id = 1, /*sideband?*/
+ .data = ~0, .clr = ~0, .ret = ret,
+ .ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
+ .fail = gr_fecs_ctxsw_mailbox_value_fail_v(), },
+ .cond.ok = GR_IS_UCODE_OP_EQUAL,
+ .cond.fail = GR_IS_UCODE_OP_EQUAL });
+}
+
+/* Stop processing (stall) context switches at FECS.
+ * The caller must hold the dbg_sessions_lock, else if mutliple stop methods
+ * are sent to the ucode in sequence, it can get into an undefined state. */
+int gr_gk20a_disable_ctxsw(struct gk20a *g)
+{
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+ return gr_gk20a_ctrl_ctxsw(g, gr_fecs_method_push_adr_stop_ctxsw_v(), 0);
+}
+
+/* Start processing (continue) context switches at FECS */
+int gr_gk20a_enable_ctxsw(struct gk20a *g)
+{
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+ return gr_gk20a_ctrl_ctxsw(g, gr_fecs_method_push_adr_start_ctxsw_v(), 0);
+}
+
+
+static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
+{
+ u32 addr_lo;
+ u32 addr_hi;
+ void *inst_ptr = NULL;
+
+ gk20a_dbg_fn("");
+
+ /* flush gpu_va before commit */
+ gk20a_mm_fb_flush(c->g);
+ gk20a_mm_l2_flush(c->g, true);
+
+ inst_ptr = c->inst_block.cpuva;
+ if (!inst_ptr)
+ return -ENOMEM;
+
+ addr_lo = u64_lo32(gpu_va) >> 12;
+ addr_hi = u64_hi32(gpu_va);
+
+ gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_target_w(),
+ ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() |
+ ram_in_gr_wfi_ptr_lo_f(addr_lo));
+
+ gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_ptr_hi_w(),
+ ram_in_gr_wfi_ptr_hi_f(addr_hi));
+
+ gk20a_mm_l2_invalidate(c->g);
+
+ return 0;
+}
+
+/*
+ * Context state can be written directly or "patched" at times.
+ * So that code can be used in either situation it is written
+ * using a series _ctx_patch_write(..., patch) statements.
+ * However any necessary cpu map/unmap and gpu l2 invalidates
+ * should be minimized (to avoid doing it once per patch write).
+ * Before a sequence of these set up with "_ctx_patch_write_begin"
+ * and close with "_ctx_patch_write_end."
+ */
+int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
+ struct channel_ctx_gk20a *ch_ctx)
+{
+ /* being defensive still... */
+ if (ch_ctx->patch_ctx.cpu_va) {
+ gk20a_err(dev_from_gk20a(g), "nested ctx patch begin?");
+ return -EBUSY;
+ }
+
+ ch_ctx->patch_ctx.cpu_va = vmap(ch_ctx->patch_ctx.pages,
+ PAGE_ALIGN(ch_ctx->patch_ctx.size) >> PAGE_SHIFT,
+ 0, pgprot_dmacoherent(PAGE_KERNEL));
+
+ if (!ch_ctx->patch_ctx.cpu_va)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int gr_gk20a_ctx_patch_write_end(struct gk20a *g,
+ struct channel_ctx_gk20a *ch_ctx)
+{
+ /* being defensive still... */
+ if (!ch_ctx->patch_ctx.cpu_va) {
+ gk20a_err(dev_from_gk20a(g), "dangling ctx patch end?");
+ return -EINVAL;
+ }
+
+ vunmap(ch_ctx->patch_ctx.cpu_va);
+ ch_ctx->patch_ctx.cpu_va = NULL;
+
+ gk20a_mm_l2_invalidate(g);
+ return 0;
+}
+
+int gr_gk20a_ctx_patch_write(struct gk20a *g,
+ struct channel_ctx_gk20a *ch_ctx,
+ u32 addr, u32 data, bool patch)
+{
+ u32 patch_slot = 0;
+ void *patch_ptr = NULL;
+ bool mapped_here = false;
+
+ BUG_ON(patch != 0 && ch_ctx == NULL);
+
+ if (patch) {
+ if (!ch_ctx)
+ return -EINVAL;
+ /* we added an optimization prolog, epilog
+ * to get rid of unnecessary maps and l2 invals.
+ * but be defensive still... */
+ if (!ch_ctx->patch_ctx.cpu_va) {
+ int err;
+ gk20a_err(dev_from_gk20a(g),
+ "per-write ctx patch begin?");
+ /* yes, gr_gk20a_ctx_patch_smpc causes this one */
+ err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+ if (err)
+ return err;
+ mapped_here = true;
+ } else
+ mapped_here = false;
+
+ patch_ptr = ch_ctx->patch_ctx.cpu_va;
+ patch_slot = ch_ctx->patch_ctx.data_count * 2;
+
+ gk20a_mem_wr32(patch_ptr, patch_slot++, addr);
+ gk20a_mem_wr32(patch_ptr, patch_slot++, data);
+
+ ch_ctx->patch_ctx.data_count++;
+
+ if (mapped_here)
+ gr_gk20a_ctx_patch_write_end(g, ch_ctx);
+
+ } else
+ gk20a_writel(g, addr, data);
+
+ return 0;
+}
+
+static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
+ struct channel_gk20a *c)
+{
+ u32 inst_base_ptr = u64_lo32(c->inst_block.cpu_pa
+ >> ram_in_base_shift_v());
+ u32 ret;
+
+ gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
+ c->hw_chid, inst_base_ptr);
+
+ ret = gr_gk20a_submit_fecs_method_op(g,
+ (struct fecs_method_op_gk20a) {
+ .method.addr = gr_fecs_method_push_adr_bind_pointer_v(),
+ .method.data = (gr_fecs_current_ctx_ptr_f(inst_base_ptr) |
+ gr_fecs_current_ctx_target_vid_mem_f() |
+ gr_fecs_current_ctx_valid_f(1)),
+ .mailbox = { .id = 0, .data = 0,
+ .clr = 0x30,
+ .ret = NULL,
+ .ok = 0x10,
+ .fail = 0x20, },
+ .cond.ok = GR_IS_UCODE_OP_AND,
+ .cond.fail = GR_IS_UCODE_OP_AND});
+ if (ret)
+ gk20a_err(dev_from_gk20a(g),
+ "bind channel instance failed");
+
+ return ret;
+}
+
+static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
+ bool disable_fifo)
+{
+ struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+ struct fifo_gk20a *f = &g->fifo;
+ struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
+ u32 va_lo, va_hi, va;
+ int ret = 0;
+ void *ctx_ptr = NULL;
+
+ gk20a_dbg_fn("");
+
+ ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
+ PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
+ 0, pgprot_dmacoherent(PAGE_KERNEL));
+ if (!ctx_ptr)
+ return -ENOMEM;
+
+ if (ch_ctx->zcull_ctx.gpu_va == 0 &&
+ ch_ctx->zcull_ctx.ctx_sw_mode ==
+ ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) {
+ ret = -EINVAL;
+ goto clean_up;
+ }
+
+ va_lo = u64_lo32(ch_ctx->zcull_ctx.gpu_va);
+ va_hi = u64_hi32(ch_ctx->zcull_ctx.gpu_va);
+ va = ((va_lo >> 8) & 0x00FFFFFF) | ((va_hi << 24) & 0xFF000000);
+
+ if (disable_fifo) {
+ ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
+ if (ret) {
+ gk20a_err(dev_from_gk20a(g),
+ "failed to disable gr engine activity\n");
+ goto clean_up;
+ }
+ }
+
+ /* Channel gr_ctx buffer is gpu cacheable.
+ Flush and invalidate before cpu update. */
+ gk20a_mm_fb_flush(g);
+ gk20a_mm_l2_flush(g, true);
+
+ gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0,
+ ch_ctx->zcull_ctx.ctx_sw_mode);
+
+ gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, va);
+
+ if (disable_fifo) {
+ ret = gk20a_fifo_enable_engine_activity(g, gr_info);
+ if (ret) {
+ gk20a_err(dev_from_gk20a(g),
+ "failed to enable gr engine activity\n");
+ goto clean_up;
+ }
+ }
+ gk20a_mm_l2_invalidate(g);
+
+clean_up:
+ vunmap(ctx_ptr);
+
+ return ret;
+}
+
+static int gr_gk20a_commit_global_cb_manager(struct gk20a *g,
+ struct channel_gk20a *c, bool patch)
+{
+ struct gr_gk20a *gr = &g->gr;
+ struct channel_ctx_gk20a *ch_ctx = NULL;
+ u32 attrib_offset_in_chunk = 0;
+ u32 alpha_offset_in_chunk = 0;
+ u32 pd_ab_max_output;
+ u32 gpc_index, ppc_index;
+ u32 temp;
+ u32 cbm_cfg_size1, cbm_cfg_size2;
+
+ gk20a_dbg_fn("");
+
+ if (patch) {
+ int err;
+ ch_ctx = &c->ch_ctx;
+ err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+ if (err)
+ return err;
+ }
+
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_r(),
+ gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) |
+ gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size),
+ patch);
+
+ pd_ab_max_output = (gr->alpha_cb_default_size *
+ gr_gpc0_ppc0_cbm_cfg_size_granularity_v()) /
+ gr_pd_ab_dist_cfg1_max_output_granularity_v();
+
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(),
+ gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
+ gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
+
+ alpha_offset_in_chunk = attrib_offset_in_chunk +
+ gr->tpc_count * gr->attrib_cb_size;
+
+ for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+ temp = proj_gpc_stride_v() * gpc_index;
+ for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
+ ppc_index++) {
+ cbm_cfg_size1 = gr->attrib_cb_default_size *
+ gr->pes_tpc_count[ppc_index][gpc_index];
+ cbm_cfg_size2 = gr->alpha_cb_default_size *
+ gr->pes_tpc_count[ppc_index][gpc_index];
+
+ gr_gk20a_ctx_patch_write(g, ch_ctx,
+ gr_gpc0_ppc0_cbm_cfg_r() + temp +
+ proj_ppc_in_gpc_stride_v() * ppc_index,
+ gr_gpc0_ppc0_cbm_cfg_timeslice_mode_f(gr->timeslice_mode) |
+ gr_gpc0_ppc0_cbm_cfg_start_offset_f(attrib_offset_in_chunk) |
+ gr_gpc0_ppc0_cbm_cfg_size_f(cbm_cfg_size1), patch);
+
+ attrib_offset_in_chunk += gr->attrib_cb_size *
+ gr->pes_tpc_count[ppc_index][gpc_index];
+
+ gr_gk20a_ctx_patch_write(g, ch_ctx,
+ gr_gpc0_ppc0_cbm_cfg2_r() + temp +
+ proj_ppc_in_gpc_stride_v() * ppc_index,
+ gr_gpc0_ppc0_cbm_cfg2_start_offset_f(alpha_offset_in_chunk) |
+ gr_gpc0_ppc0_cbm_cfg2_size_f(cbm_cfg_size2), patch);
+
+ alpha_offset_in_chunk += gr->alpha_cb_size *
+ gr->pes_tpc_count[ppc_index][gpc_index];
+ }
+ }
+
+ if (patch)
+ gr_gk20a_ctx_patch_write_end(g, ch_ctx);
+
+ return 0;
+}
+
+static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
+ struct channel_gk20a *c, bool patch)
+{
+ struct gr_gk20a *gr = &g->gr;
+ struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+ u64 addr;
+ u32 size;
+
+ gk20a_dbg_fn("");
+ if (patch) {
+ int err;
+ err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+ if (err)
+ return err;
+ }
+
+ /* global pagepool buffer */
+ addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) >>
+ gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
+ (u64_hi32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) <<
+ (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
+
+ size = gr->global_ctx_buffer[PAGEPOOL].size /
+ gr_scc_pagepool_total_pages_byte_granularity_v();
+
+ if (size == gr_scc_pagepool_total_pages_hwmax_value_v())
+ size = gr_scc_pagepool_total_pages_hwmax_v();
+
+ gk20a_dbg_info("pagepool buffer addr : 0x%016llx, size : %d",
+ addr, size);
+
+ g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, patch);
+
+ /* global bundle cb */
+ addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) >>
+ gr_scc_bundle_cb_base_addr_39_8_align_bits_v()) |
+ (u64_hi32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) <<
+ (32 - gr_scc_bundle_cb_base_addr_39_8_align_bits_v()));
+
+ size = gr->bundle_cb_default_size;
+
+ gk20a_dbg_info("bundle cb addr : 0x%016llx, size : %d",
+ addr, size);
+
+ g->ops.gr.commit_global_bundle_cb(g, ch_ctx, addr, size, patch);
+
+ /* global attrib cb */
+ addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) >>
+ gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
+ (u64_hi32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) <<
+ (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
+
+ gk20a_dbg_info("attrib cb addr : 0x%016llx", addr);
+ g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, patch);
+
+ if (patch)
+ gr_gk20a_ctx_patch_write_end(g, ch_ctx);
+
+ return 0;
+}
+
+static void gr_gk20a_commit_global_attrib_cb(struct gk20a *g,
+ struct channel_ctx_gk20a *ch_ctx,
+ u64 addr, bool patch)
+{
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(),
+ gr_gpcs_setup_attrib_cb_base_addr_39_12_f(addr) |
+ gr_gpcs_setup_attrib_cb_base_valid_true_f(), patch);
+
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(),
+ gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(addr) |
+ gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(), patch);
+}
+
+static void gr_gk20a_commit_global_bundle_cb(struct gk20a *g,
+ struct channel_ctx_gk20a *ch_ctx,
+ u64 addr, u64 size, bool patch)
+{
+ u32 data;
+
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(),
+ gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
+
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(),
+ gr_scc_bundle_cb_size_div_256b_f(size) |
+ gr_scc_bundle_cb_size_valid_true_f(), patch);
+
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_bundle_cb_base_r(),
+ gr_gpcs_setup_bundle_cb_base_addr_39_8_f(addr), patch);
+
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_bundle_cb_size_r(),
+ gr_gpcs_setup_bundle_cb_size_div_256b_f(size) |
+ gr_gpcs_setup_bundle_cb_size_valid_true_f(), patch);
+
+ /* data for state_limit */
+ data = (g->gr.bundle_cb_default_size *
+ gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
+ gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
+
+ data = min_t(u32, data, g->gr.min_gpm_fifo_depth);
+
+ gk20a_dbg_info("bundle cb token limit : %d, state limit : %d",
+ g->gr.bundle_cb_token_limit, data);
+
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(),
+ gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) |
+ gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
+
+}
+
+static int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c, bool patch)
+{
+ struct gr_gk20a *gr = &g->gr;
+ struct channel_ctx_gk20a *ch_ctx = NULL;
+ u32 gpm_pd_cfg;
+ u32 pd_ab_dist_cfg0;
+ u32 ds_debug;
+ u32 mpc_vtg_debug;
+ u32 pe_vaf;
+ u32 pe_vsc_vpc;
+
+ gk20a_dbg_fn("");
+
+ gpm_pd_cfg = gk20a_readl(g, gr_gpcs_gpm_pd_cfg_r());
+ pd_ab_dist_cfg0 = gk20a_readl(g, gr_pd_ab_dist_cfg0_r());
+ ds_debug = gk20a_readl(g, gr_ds_debug_r());
+ mpc_vtg_debug = gk20a_readl(g, gr_gpcs_tpcs_mpc_vtg_debug_r());
+
+ if (patch) {
+ int err;
+ ch_ctx = &c->ch_ctx;
+ err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+ if (err)
+ return err;
+ }
+
+ if (gr->timeslice_mode == gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v()) {
+ pe_vaf = gk20a_readl(g, gr_gpcs_tpcs_pe_vaf_r());
+ pe_vsc_vpc = gk20a_readl(g, gr_gpcs_tpcs_pes_vsc_vpc_r());
+
+ gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_enable_f() | gpm_pd_cfg;
+ pe_vaf = gr_gpcs_tpcs_pe_vaf_fast_mode_switch_true_f() | pe_vaf;
+ pe_vsc_vpc = gr_gpcs_tpcs_pes_vsc_vpc_fast_mode_switch_true_f() | pe_vsc_vpc;
+ pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_en_f() | pd_ab_dist_cfg0;
+ ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug;
+ mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug;
+
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch);
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, patch);
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, patch);
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch);
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, patch);
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch);
+ } else {
+ gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg;
+ pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0;
+ ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug;
+ mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug;
+
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch);
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch);
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, patch);
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch);
+ }
+
+ if (patch)
+ gr_gk20a_ctx_patch_write_end(g, ch_ctx);
+
+ return 0;
+}
+
+int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
+{
+ u32 norm_entries, norm_shift;
+ u32 coeff5_mod, coeff6_mod, coeff7_mod, coeff8_mod, coeff9_mod, coeff10_mod, coeff11_mod;
+ u32 map0, map1, map2, map3, map4, map5;
+
+ if (!gr->map_tiles)
+ return -1;
+
+ gk20a_dbg_fn("");
+
+ gk20a_writel(g, gr_crstr_map_table_cfg_r(),
+ gr_crstr_map_table_cfg_row_offset_f(gr->map_row_offset) |
+ gr_crstr_map_table_cfg_num_entries_f(gr->tpc_count));
+
+ map0 = gr_crstr_gpc_map0_tile0_f(gr->map_tiles[0]) |
+ gr_crstr_gpc_map0_tile1_f(gr->map_tiles[1]) |
+ gr_crstr_gpc_map0_tile2_f(gr->map_tiles[2]) |
+ gr_crstr_gpc_map0_tile3_f(gr->map_tiles[3]) |
+ gr_crstr_gpc_map0_tile4_f(gr->map_tiles[4]) |
+ gr_crstr_gpc_map0_tile5_f(gr->map_tiles[5]);
+
+ map1 = gr_crstr_gpc_map1_tile6_f(gr->map_tiles[6]) |
+ gr_crstr_gpc_map1_tile7_f(gr->map_tiles[7]) |
+ gr_crstr_gpc_map1_tile8_f(gr->map_tiles[8]) |
+ gr_crstr_gpc_map1_tile9_f(gr->map_tiles[9]) |
+ gr_crstr_gpc_map1_tile10_f(gr->map_tiles[10]) |
+ gr_crstr_gpc_map1_tile11_f(gr->map_tiles[11]);
+
+ map2 = gr_crstr_gpc_map2_tile12_f(gr->map_tiles[12]) |
+ gr_crstr_gpc_map2_tile13_f(gr->map_tiles[13]) |
+ gr_crstr_gpc_map2_tile14_f(gr->map_tiles[14]) |
+ gr_crstr_gpc_map2_tile15_f(gr->map_tiles[15]) |
+ gr_crstr_gpc_map2_tile16_f(gr->map_tiles[16]) |
+ gr_crstr_gpc_map2_tile17_f(gr->map_tiles[17]);
+
+ map3 = gr_crstr_gpc_map3_tile18_f(gr->map_tiles[18]) |
+ gr_crstr_gpc_map3_tile19_f(gr->map_tiles[19]) |
+ gr_crstr_gpc_map3_tile20_f(gr->map_tiles[20]) |
+ gr_crstr_gpc_map3_tile21_f(gr->map_tiles[21]) |
+ gr_crstr_gpc_map3_tile22_f(gr->map_tiles[22]) |
+ gr_crstr_gpc_map3_tile23_f(gr->map_tiles[23]);
+
+ map4 = gr_crstr_gpc_map4_tile24_f(gr->map_tiles[24]) |
+ gr_crstr_gpc_map4_tile25_f(gr->map_tiles[25]) |
+ gr_crstr_gpc_map4_tile26_f(gr->map_tiles[26]) |
+ gr_crstr_gpc_map4_tile27_f(gr->map_tiles[27]) |
+ gr_crstr_gpc_map4_tile28_f(gr->map_tiles[28]) |
+ gr_crstr_gpc_map4_tile29_f(gr->map_tiles[29]);
+
+ map5 = gr_crstr_gpc_map5_tile30_f(gr->map_tiles[30]) |
+ gr_crstr_gpc_map5_tile31_f(gr->map_tiles[31]) |
+ gr_crstr_gpc_map5_tile32_f(0) |
+ gr_crstr_gpc_map5_tile33_f(0) |
+ gr_crstr_gpc_map5_tile34_f(0) |
+ gr_crstr_gpc_map5_tile35_f(0);
+
+ gk20a_writel(g, gr_crstr_gpc_map0_r(), map0);
+ gk20a_writel(g, gr_crstr_gpc_map1_r(), map1);
+ gk20a_writel(g, gr_crstr_gpc_map2_r(), map2);
+ gk20a_writel(g, gr_crstr_gpc_map3_r(), map3);
+ gk20a_writel(g, gr_crstr_gpc_map4_r(), map4);
+ gk20a_writel(g, gr_crstr_gpc_map5_r(), map5);
+
+ switch (gr->tpc_count) {
+ case 1:
+ norm_shift = 4;
+ break;
+ case 2:
+ case 3:
+ norm_shift = 3;
+ break;
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ norm_shift = 2;
+ break;
+ case 8:
+ case 9:
+ case 10:
+ case 11:
+ case 12:
+ case 13:
+ case 14:
+ case 15:
+ norm_shift = 1;
+ break;
+ default:
+ norm_shift = 0;
+ break;
+ }
+
+ norm_entries = gr->tpc_count << norm_shift;
+ coeff5_mod = (1 << 5) % norm_entries;
+ coeff6_mod = (1 << 6) % norm_entries;
+ coeff7_mod = (1 << 7) % norm_entries;
+ coeff8_mod = (1 << 8) % norm_entries;
+ coeff9_mod = (1 << 9) % norm_entries;
+ coeff10_mod = (1 << 10) % norm_entries;
+ coeff11_mod = (1 << 11) % norm_entries;
+
+ gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(),
+ gr_ppcs_wwdx_map_table_cfg_row_offset_f(gr->map_row_offset) |
+ gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(norm_entries) |
+ gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(norm_shift) |
+ gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(coeff5_mod) |
+ gr_ppcs_wwdx_map_table_cfg_num_entries_f(gr->tpc_count));
+
+ gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg2_r(),
+ gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(coeff6_mod) |
+ gr_ppcs_wwdx_map_table_cfg2_coeff7_mod_value_f(coeff7_mod) |
+ gr_ppcs_wwdx_map_table_cfg2_coeff8_mod_value_f(coeff8_mod) |
+ gr_ppcs_wwdx_map_table_cfg2_coeff9_mod_value_f(coeff9_mod) |
+ gr_ppcs_wwdx_map_table_cfg2_coeff10_mod_value_f(coeff10_mod) |
+ gr_ppcs_wwdx_map_table_cfg2_coeff11_mod_value_f(coeff11_mod));
+
+ gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map0_r(), map0);
+ gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map1_r(), map1);
+ gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map2_r(), map2);
+ gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map3_r(), map3);
+ gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map4_r(), map4);
+ gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map5_r(), map5);
+
+ gk20a_writel(g, gr_rstr2d_map_table_cfg_r(),
+ gr_rstr2d_map_table_cfg_row_offset_f(gr->map_row_offset) |
+ gr_rstr2d_map_table_cfg_num_entries_f(gr->tpc_count));
+
+ gk20a_writel(g, gr_rstr2d_gpc_map0_r(), map0);
+ gk20a_writel(g, gr_rstr2d_gpc_map1_r(), map1);
+ gk20a_writel(g, gr_rstr2d_gpc_map2_r(), map2);
+ gk20a_writel(g, gr_rstr2d_gpc_map3_r(), map3);
+ gk20a_writel(g, gr_rstr2d_gpc_map4_r(), map4);
+ gk20a_writel(g, gr_rstr2d_gpc_map5_r(), map5);
+
+ return 0;
+}
+
+static inline u32 count_bits(u32 mask)
+{
+ u32 temp = mask;
+ u32 count;
+ for (count = 0; temp != 0; count++)
+ temp &= temp - 1;
+
+ return count;
+}
+
+static inline u32 clear_count_bits(u32 num, u32 clear_count)
+{
+ u32 count = clear_count;
+ for (; (num != 0) && (count != 0); count--)
+ num &= num - 1;
+
+ return num;
+}
+
+static int gr_gk20a_setup_alpha_beta_tables(struct gk20a *g,
+ struct gr_gk20a *gr)
+{
+ u32 table_index_bits = 5;
+ u32 rows = (1 << table_index_bits);
+ u32 row_stride = gr_pd_alpha_ratio_table__size_1_v() / rows;
+
+ u32 row;
+ u32 index;
+ u32 gpc_index;
+ u32 gpcs_per_reg = 4;
+ u32 pes_index;
+ u32 tpc_count_pes;
+ u32 num_pes_per_gpc = proj_scal_litter_num_pes_per_gpc_v();
+
+ u32 alpha_target, beta_target;
+ u32 alpha_bits, beta_bits;
+ u32 alpha_mask, beta_mask, partial_mask;
+ u32 reg_offset;
+ bool assign_alpha;
+
+ u32 map_alpha[gr_pd_alpha_ratio_table__size_1_v()];
+ u32 map_beta[gr_pd_alpha_ratio_table__size_1_v()];
+ u32 map_reg_used[gr_pd_alpha_ratio_table__size_1_v()];
+
+ gk20a_dbg_fn("");
+
+ memset(map_alpha, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32));
+ memset(map_beta, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32));
+ memset(map_reg_used, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32));
+
+ for (row = 0; row < rows; ++row) {
+ alpha_target = max_t(u32, gr->tpc_count * row / rows, 1);
+ beta_target = gr->tpc_count - alpha_target;
+
+ assign_alpha = (alpha_target < beta_target);
+
+ for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+ reg_offset = (row * row_stride) + (gpc_index / gpcs_per_reg);
+ alpha_mask = beta_mask = 0;
+
+ for (pes_index = 0; pes_index < num_pes_per_gpc; pes_index++) {
+ tpc_count_pes = gr->pes_tpc_count[pes_index][gpc_index];
+
+ if (assign_alpha) {
+ alpha_bits = (alpha_target == 0) ? 0 : tpc_count_pes;
+ beta_bits = tpc_count_pes - alpha_bits;
+ } else {
+ beta_bits = (beta_target == 0) ? 0 : tpc_count_pes;
+ alpha_bits = tpc_count_pes - beta_bits;
+ }
+
+ partial_mask = gr->pes_tpc_mask[pes_index][gpc_index];
+ partial_mask = clear_count_bits(partial_mask, tpc_count_pes - alpha_bits);
+ alpha_mask |= partial_mask;
+
+ partial_mask = gr->pes_tpc_mask[pes_index][gpc_index] ^ partial_mask;
+ beta_mask |= partial_mask;
+
+ alpha_target -= min(alpha_bits, alpha_target);
+ beta_target -= min(beta_bits, beta_target);
+
+ if ((alpha_bits > 0) || (beta_bits > 0))
+ assign_alpha = !assign_alpha;
+ }
+
+ switch (gpc_index % gpcs_per_reg) {
+ case 0:
+ map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n0_mask_f(alpha_mask);
+ map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n0_mask_f(beta_mask);
+ break;
+ case 1:
+ map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n1_mask_f(alpha_mask);
+ map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n1_mask_f(beta_mask);
+ break;
+ case 2:
+ map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n2_mask_f(alpha_mask);
+ map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n2_mask_f(beta_mask);
+ break;
+ case 3:
+ map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n3_mask_f(alpha_mask);
+ map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n3_mask_f(beta_mask);
+ break;
+ }
+ map_reg_used[reg_offset] = true;
+ }
+ }
+
+ for (index = 0; index < gr_pd_alpha_ratio_table__size_1_v(); index++) {
+ if (map_reg_used[index]) {
+ gk20a_writel(g, gr_pd_alpha_ratio_table_r(index), map_alpha[index]);
+ gk20a_writel(g, gr_pd_beta_ratio_table_r(index), map_beta[index]);
+ }
+ }
+
+ return 0;
+}
+
+static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g)
+{
+ struct gr_gk20a *gr = &g->gr;
+ u32 tpc_index, gpc_index;
+ u32 tpc_offset, gpc_offset;
+ u32 sm_id = 0, gpc_id = 0;
+ u32 sm_id_to_gpc_id[proj_scal_max_gpcs_v() * proj_scal_max_tpc_per_gpc_v()];
+ u32 tpc_per_gpc;
+ u32 max_ways_evict = INVALID_MAX_WAYS;
+ u32 l1c_dbg_reg_val;
+
+ gk20a_dbg_fn("");
+
+ for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) {
+ for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+ gpc_offset = proj_gpc_stride_v() * gpc_index;
+ if (tpc_index < gr->gpc_tpc_count[gpc_index]) {
+ tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index;
+
+ gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset,
+ gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id));
+ gk20a_writel(g, gr_gpc0_tpc0_l1c_cfg_smid_r() + gpc_offset + tpc_offset,
+ gr_gpc0_tpc0_l1c_cfg_smid_value_f(sm_id));
+ gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index) + gpc_offset,
+ gr_gpc0_gpm_pd_sm_id_id_f(sm_id));
+ gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset,
+ gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id));
+
+ sm_id_to_gpc_id[sm_id] = gpc_index;
+ sm_id++;
+ }
+
+ gk20a_writel(g, gr_gpc0_gpm_pd_active_tpcs_r() + gpc_offset,
+ gr_gpc0_gpm_pd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index]));
+ gk20a_writel(g, gr_gpc0_gpm_sd_active_tpcs_r() + gpc_offset,
+ gr_gpc0_gpm_sd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index]));
+ }
+ }
+
+ for (tpc_index = 0, gpc_id = 0;
+ tpc_index < gr_pd_num_tpc_per_gpc__size_1_v();
+ tpc_index++, gpc_id += 8) {
+
+ if (gpc_id >= gr->gpc_count)
+ gpc_id = 0;
+
+ tpc_per_gpc =
+ gr_pd_num_tpc_per_gpc_count0_f(gr->gpc_tpc_count[gpc_id + 0]) |
+ gr_pd_num_tpc_per_gpc_count1_f(gr->gpc_tpc_count[gpc_id + 1]) |
+ gr_pd_num_tpc_per_gpc_count2_f(gr->gpc_tpc_count[gpc_id + 2]) |
+ gr_pd_num_tpc_per_gpc_count3_f(gr->gpc_tpc_count[gpc_id + 3]) |
+ gr_pd_num_tpc_per_gpc_count4_f(gr->gpc_tpc_count[gpc_id + 4]) |
+ gr_pd_num_tpc_per_gpc_count5_f(gr->gpc_tpc_count[gpc_id + 5]) |
+ gr_pd_num_tpc_per_gpc_count6_f(gr->gpc_tpc_count[gpc_id + 6]) |
+ gr_pd_num_tpc_per_gpc_count7_f(gr->gpc_tpc_count[gpc_id + 7]);
+
+ gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc);
+ gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc);
+ }
+
+ /* gr__setup_pd_mapping stubbed for gk20a */
+ gr_gk20a_setup_rop_mapping(g, gr);
+ if (g->ops.gr.setup_alpha_beta_tables)
+ g->ops.gr.setup_alpha_beta_tables(g, gr);
+
+ if (gr->num_fbps == 1)
+ max_ways_evict = 9;
+
+ if (max_ways_evict != INVALID_MAX_WAYS)
+ g->ops.ltc.set_max_ways_evict_last(g, max_ways_evict);
+
+ for (gpc_index = 0;
+ gpc_index < gr_pd_dist_skip_table__size_1_v() * 4;
+ gpc_index += 4) {
+
+ gk20a_writel(g, gr_pd_dist_skip_table_r(gpc_index/4),
+ gr_pd_dist_skip_table_gpc_4n0_mask_f(gr->gpc_skip_mask[gpc_index]) ||
+ gr_pd_dist_skip_table_gpc_4n1_mask_f(gr->gpc_skip_mask[gpc_index + 1]) ||
+ gr_pd_dist_skip_table_gpc_4n2_mask_f(gr->gpc_skip_mask[gpc_index + 2]) ||
+ gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3]));
+ }
+
+ gk20a_writel(g, gr_cwd_fs_r(),
+ gr_cwd_fs_num_gpcs_f(gr->gpc_count) |
+ gr_cwd_fs_num_tpcs_f(gr->tpc_count));
+
+ gk20a_writel(g, gr_bes_zrop_settings_r(),
+ gr_bes_zrop_settings_num_active_fbps_f(gr->num_fbps));
+ gk20a_writel(g, gr_bes_crop_settings_r(),
+ gr_bes_crop_settings_num_active_fbps_f(gr->num_fbps));
+
+ /* turn on cya15 bit for a default val that missed the cut */
+ l1c_dbg_reg_val = gk20a_readl(g, gr_gpc0_tpc0_l1c_dbg_r());
+ l1c_dbg_reg_val |= gr_gpc0_tpc0_l1c_dbg_cya15_en_f();
+ gk20a_writel(g, gr_gpc0_tpc0_l1c_dbg_r(), l1c_dbg_reg_val);
+
+ return 0;
+}
+
+static int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type)
+{
+ struct gk20a *g = c->g;
+ int ret;
+
+ u32 inst_base_ptr =
+ u64_lo32(c->inst_block.cpu_pa
+ >> ram_in_base_shift_v());
+
+
+ gk20a_dbg_fn("");
+
+ ret = gr_gk20a_submit_fecs_method_op(g,
+ (struct fecs_method_op_gk20a) {
+ .method.addr = save_type,
+ .method.data = (gr_fecs_current_ctx_ptr_f(inst_base_ptr) |
+ gr_fecs_current_ctx_target_vid_mem_f() |
+ gr_fecs_current_ctx_valid_f(1)),
+ .mailbox = {.id = 0, .data = 0, .clr = 3, .ret = NULL,
+ .ok = 1, .fail = 2,
+ },
+ .cond.ok = GR_IS_UCODE_OP_AND,
+ .cond.fail = GR_IS_UCODE_OP_AND,
+ });
+
+ if (ret)
+ gk20a_err(dev_from_gk20a(g), "save context image failed");
+
+ return ret;
+}
+
+static u32 gk20a_init_sw_bundle(struct gk20a *g)
+{
+ struct av_list_gk20a *sw_bundle_init = &g->gr.ctx_vars.sw_bundle_init;
+ u32 last_bundle_data = 0;
+ u32 err = 0;
+ int i;
+ unsigned long end_jiffies = jiffies +
+ msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
+
+ /* enable pipe mode override */
+ gk20a_writel(g, gr_pipe_bundle_config_r(),
+ gr_pipe_bundle_config_override_pipe_mode_enabled_f());
+
+ /* load bundle init */
+ for (i = 0; i < sw_bundle_init->count; i++) {
+
+ if (i == 0 || last_bundle_data != sw_bundle_init->l[i].value) {
+ gk20a_writel(g, gr_pipe_bundle_data_r(),
+ sw_bundle_init->l[i].value);
+ last_bundle_data = sw_bundle_init->l[i].value;
+ }
+
+ gk20a_writel(g, gr_pipe_bundle_address_r(),
+ sw_bundle_init->l[i].addr);
+
+ if (gr_pipe_bundle_address_value_v(sw_bundle_init->l[i].addr) ==
+ GR_GO_IDLE_BUNDLE)
+ err |= gr_gk20a_wait_idle(g, end_jiffies,
+ GR_IDLE_CHECK_DEFAULT);
+ }
+
+ /* disable pipe mode override */
+ gk20a_writel(g, gr_pipe_bundle_config_r(),
+ gr_pipe_bundle_config_override_pipe_mode_disabled_f());
+
+ return err;
+}
+
+/* init global golden image from a fresh gr_ctx in channel ctx.
+ save a copy in local_golden_image in ctx_vars */
+static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
+ struct channel_gk20a *c)
+{
+ struct gr_gk20a *gr = &g->gr;
+ struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+ u32 ctx_header_bytes = ctxsw_prog_fecs_header_v();
+ u32 ctx_header_words;
+ u32 i;
+ u32 data;
+ void *ctx_ptr = NULL;
+ void *gold_ptr = NULL;
+ u32 err = 0;
+
+ gk20a_dbg_fn("");
+
+ /* golden ctx is global to all channels. Although only the first
+ channel initializes golden image, driver needs to prevent multiple
+ channels from initializing golden ctx at the same time */
+ mutex_lock(&gr->ctx_mutex);
+
+ if (gr->ctx_vars.golden_image_initialized)
+ goto clean_up;
+
+ err = gr_gk20a_fecs_ctx_bind_channel(g, c);
+ if (err)
+ goto clean_up;
+
+ err = gk20a_init_sw_bundle(g);
+ if (err)
+ goto clean_up;
+
+ err = gr_gk20a_elpg_protected_call(g,
+ gr_gk20a_commit_global_ctx_buffers(g, c, false));
+ if (err)
+ goto clean_up;
+
+ gold_ptr = vmap(gr->global_ctx_buffer[GOLDEN_CTX].pages,
+ PAGE_ALIGN(gr->global_ctx_buffer[GOLDEN_CTX].size) >>
+ PAGE_SHIFT, 0, pgprot_dmacoherent(PAGE_KERNEL));
+ if (!gold_ptr)
+ goto clean_up;
+
+ ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
+ PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
+ 0, pgprot_dmacoherent(PAGE_KERNEL));
+ if (!ctx_ptr)
+ goto clean_up;
+
+ ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
+ ctx_header_words >>= 2;
+
+ /* Channel gr_ctx buffer is gpu cacheable.
+ Flush before cpu read. */
+ gk20a_mm_fb_flush(g);
+ gk20a_mm_l2_flush(g, false);
+
+ for (i = 0; i < ctx_header_words; i++) {
+ data = gk20a_mem_rd32(ctx_ptr, i);
+ gk20a_mem_wr32(gold_ptr, i, data);
+ }
+
+ gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_o(), 0,
+ ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
+
+ gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, 0);
+
+ gr_gk20a_commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
+
+ gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v());
+
+ if (gr->ctx_vars.local_golden_image == NULL) {
+
+ gr->ctx_vars.local_golden_image =
+ kzalloc(gr->ctx_vars.golden_image_size, GFP_KERNEL);
+
+ if (gr->ctx_vars.local_golden_image == NULL) {
+ err = -ENOMEM;
+ goto clean_up;
+ }
+
+ for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++)
+ gr->ctx_vars.local_golden_image[i] =
+ gk20a_mem_rd32(gold_ptr, i);
+ }
+
+ gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va);
+
+ gr->ctx_vars.golden_image_initialized = true;
+
+ gk20a_mm_l2_invalidate(g);
+
+ gk20a_writel(g, gr_fecs_current_ctx_r(),
+ gr_fecs_current_ctx_valid_false_f());
+
+clean_up:
+ if (err)
+ gk20a_err(dev_from_gk20a(g), "fail");
+ else
+ gk20a_dbg_fn("done");
+
+ if (gold_ptr)
+ vunmap(gold_ptr);
+ if (ctx_ptr)
+ vunmap(ctx_ptr);
+
+ mutex_unlock(&gr->ctx_mutex);
+ return err;
+}
+
+int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
+ struct channel_gk20a *c,
+ bool enable_smpc_ctxsw)
+{
+ struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+ void *ctx_ptr = NULL;
+ u32 data;
+
+ /*XXX caller responsible for making sure the channel is quiesced? */
+
+ /* Channel gr_ctx buffer is gpu cacheable.
+ Flush and invalidate before cpu update. */
+ gk20a_mm_fb_flush(g);
+ gk20a_mm_l2_flush(g, true);
+
+ ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
+ PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
+ 0, pgprot_dmacoherent(PAGE_KERNEL));
+ if (!ctx_ptr)
+ return -ENOMEM;
+
+ data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0);
+ data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();
+ data |= enable_smpc_ctxsw ?
+ ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() :
+ ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f();
+ gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0,
+ data);
+
+ vunmap(ctx_ptr);
+
+ gk20a_mm_l2_invalidate(g);
+
+ return 0;
+}
+
+/* load saved fresh copy of gloden image into channel gr_ctx */
+static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
+ struct channel_gk20a *c)
+{
+ struct gr_gk20a *gr = &g->gr;
+ struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+ u32 virt_addr_lo;
+ u32 virt_addr_hi;
+ u32 i, v, data;
+ int ret = 0;
+ void *ctx_ptr = NULL;
+
+ gk20a_dbg_fn("");
+
+ if (gr->ctx_vars.local_golden_image == NULL)
+ return -1;
+
+ /* Channel gr_ctx buffer is gpu cacheable.
+ Flush and invalidate before cpu update. */
+ gk20a_mm_fb_flush(g);
+ gk20a_mm_l2_flush(g, true);
+
+ ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
+ PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
+ 0, pgprot_dmacoherent(PAGE_KERNEL));
+ if (!ctx_ptr)
+ return -ENOMEM;
+
+ for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++)
+ gk20a_mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]);
+
+ gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0);
+ gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0);
+
+ virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.gpu_va);
+ virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.gpu_va);
+
+ gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_count_o(), 0,
+ ch_ctx->patch_ctx.data_count);
+ gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_lo_o(), 0,
+ virt_addr_lo);
+ gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_hi_o(), 0,
+ virt_addr_hi);
+
+ /* no user for client managed performance counter ctx */
+ ch_ctx->pm_ctx.ctx_sw_mode =
+ ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
+ data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0);
+ data = data & ~ctxsw_prog_main_image_pm_mode_m();
+ data |= ch_ctx->pm_ctx.ctx_sw_mode;
+ gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0,
+ data);
+
+ gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, 0);
+
+ /* set priv access map */
+ virt_addr_lo =
+ u64_lo32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
+ virt_addr_hi =
+ u64_hi32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
+
+ gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_config_o(), 0,
+ ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f());
+ gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_lo_o(), 0,
+ virt_addr_lo);
+ gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_hi_o(), 0,
+ virt_addr_hi);
+ /* disable verif features */
+ v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0);
+ v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m());
+ v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f();
+ gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0, v);
+
+
+ vunmap(ctx_ptr);
+
+ gk20a_mm_l2_invalidate(g);
+
+ if (tegra_platform_is_linsim()) {
+ u32 inst_base_ptr =
+ u64_lo32(c->inst_block.cpu_pa
+ >> ram_in_base_shift_v());
+
+ ret = gr_gk20a_submit_fecs_method_op(g,
+ (struct fecs_method_op_gk20a) {
+ .method.data =
+ (gr_fecs_current_ctx_ptr_f(inst_base_ptr) |
+ gr_fecs_current_ctx_target_vid_mem_f() |
+ gr_fecs_current_ctx_valid_f(1)),
+ .method.addr =
+ gr_fecs_method_push_adr_restore_golden_v(),
+ .mailbox = {
+ .id = 0, .data = 0,
+ .clr = ~0, .ret = NULL,
+ .ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
+ .fail = 0},
+ .cond.ok = GR_IS_UCODE_OP_EQUAL,
+ .cond.fail = GR_IS_UCODE_OP_SKIP});
+
+ if (ret)
+ gk20a_err(dev_from_gk20a(g),
+ "restore context image failed");
+ }
+
+ return ret;
+}
+
+static void gr_gk20a_start_falcon_ucode(struct gk20a *g)
+{
+ gk20a_dbg_fn("");
+
+ gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0),
+ gr_fecs_ctxsw_mailbox_clear_value_f(~0));
+
+ gk20a_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0));
+ gk20a_writel(g, gr_fecs_dmactl_r(), gr_fecs_dmactl_require_ctx_f(0));
+
+ gk20a_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1));
+ gk20a_writel(g, gr_fecs_cpuctl_r(), gr_fecs_cpuctl_startcpu_f(1));
+
+ gk20a_dbg_fn("done");
+}
+
+static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g)
+{
+ struct mm_gk20a *mm = &g->mm;
+ struct vm_gk20a *vm = &mm->pmu.vm;
+ struct device *d = dev_from_gk20a(g);
+ struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
+ void *inst_ptr;
+ u32 pde_addr_lo;
+ u32 pde_addr_hi;
+ u64 pde_addr;
+ dma_addr_t iova;
+
+ /* Alloc mem of inst block */
+ ucode_info->inst_blk_desc.size = ram_in_alloc_size_v();
+ ucode_info->inst_blk_desc.cpuva = dma_alloc_coherent(d,
+ ucode_info->inst_blk_desc.size,
+ &iova,
+ GFP_KERNEL);
+ if (!ucode_info->inst_blk_desc.cpuva) {
+ gk20a_err(d, "failed to allocate memory\n");
+ return -ENOMEM;
+ }
+
+ ucode_info->inst_blk_desc.iova = iova;
+ ucode_info->inst_blk_desc.cpu_pa = gk20a_get_phys_from_iova(d,
+ ucode_info->inst_blk_desc.iova);
+
+ inst_ptr = ucode_info->inst_blk_desc.cpuva;
+
+ /* Set inst block */
+ gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
+ u64_lo32(vm->va_limit) | 0xFFF);
+ gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
+ ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
+
+ pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
+ pde_addr_lo = u64_lo32(pde_addr >> 12);
+ pde_addr_hi = u64_hi32(pde_addr);
+ gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
+ ram_in_page_dir_base_target_vid_mem_f() |
+ ram_in_page_dir_base_vol_true_f() |
+ ram_in_page_dir_base_lo_f(pde_addr_lo));
+ gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
+ ram_in_page_dir_base_hi_f(pde_addr_hi));
+
+ /* Map ucode surface to GMMU */
+ ucode_info->ucode_gpuva = gk20a_gmmu_map(vm,
+ &ucode_info->surface_desc.sgt,
+ ucode_info->surface_desc.size,
+ 0, /* flags */
+ gk20a_mem_flag_read_only);
+ if (!ucode_info->ucode_gpuva) {
+ gk20a_err(d, "failed to update gmmu ptes\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void gr_gk20a_init_ctxsw_ucode_segment(
+ struct gk20a_ctxsw_ucode_segment *p_seg, u32 *offset, u32 size)
+{
+ p_seg->offset = *offset;
+ p_seg->size = size;
+ *offset = ALIGN(*offset + size, BLK_SIZE);
+}
+
+static void gr_gk20a_init_ctxsw_ucode_segments(
+ struct gk20a_ctxsw_ucode_segments *segments, u32 *offset,
+ struct gk20a_ctxsw_bootloader_desc *bootdesc,
+ u32 code_size, u32 data_size)
+{
+ u32 boot_size = ALIGN(bootdesc->size, sizeof(u32));
+ segments->boot_entry = bootdesc->entry_point;
+ segments->boot_imem_offset = bootdesc->imem_offset;
+ gr_gk20a_init_ctxsw_ucode_segment(&segments->boot, offset, boot_size);
+ gr_gk20a_init_ctxsw_ucode_segment(&segments->code, offset, code_size);
+ gr_gk20a_init_ctxsw_ucode_segment(&segments->data, offset, data_size);
+}
+
+static int gr_gk20a_copy_ctxsw_ucode_segments(
+ u8 *buf,
+ struct gk20a_ctxsw_ucode_segments *segments,
+ u32 *bootimage,
+ u32 *code, u32 *data)
+{
+ memcpy(buf + segments->boot.offset, bootimage, segments->boot.size);
+ memcpy(buf + segments->code.offset, code, segments->code.size);
+ memcpy(buf + segments->data.offset, data, segments->data.size);
+ return 0;
+}
+
+static int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
+{
+ struct device *d = dev_from_gk20a(g);
+ struct mm_gk20a *mm = &g->mm;
+ struct vm_gk20a *vm = &mm->pmu.vm;
+ struct gk20a_ctxsw_bootloader_desc *fecs_boot_desc;
+ struct gk20a_ctxsw_bootloader_desc *gpccs_boot_desc;
+ const struct firmware *fecs_fw;
+ const struct firmware *gpccs_fw;
+ u32 *fecs_boot_image;
+ u32 *gpccs_boot_image;
+ struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
+ u8 *buf;
+ u32 ucode_size;
+ int err = 0;
+ dma_addr_t iova;
+ DEFINE_DMA_ATTRS(attrs);
+
+ fecs_fw = gk20a_request_firmware(g, GK20A_FECS_UCODE_IMAGE);
+ if (!fecs_fw) {
+ gk20a_err(d, "failed to load fecs ucode!!");
+ return -ENOENT;
+ }
+
+ fecs_boot_desc = (void *)fecs_fw->data;
+ fecs_boot_image = (void *)(fecs_fw->data +
+ sizeof(struct gk20a_ctxsw_bootloader_desc));
+
+ gpccs_fw = gk20a_request_firmware(g, GK20A_GPCCS_UCODE_IMAGE);
+ if (!gpccs_fw) {
+ release_firmware(fecs_fw);
+ gk20a_err(d, "failed to load gpccs ucode!!");
+ return -ENOENT;
+ }
+
+ gpccs_boot_desc = (void *)gpccs_fw->data;
+ gpccs_boot_image = (void *)(gpccs_fw->data +
+ sizeof(struct gk20a_ctxsw_bootloader_desc));
+
+ ucode_size = 0;
+ gr_gk20a_init_ctxsw_ucode_segments(&ucode_info->fecs, &ucode_size,
+ fecs_boot_desc,
+ g->gr.ctx_vars.ucode.fecs.inst.count * sizeof(u32),
+ g->gr.ctx_vars.ucode.fecs.data.count * sizeof(u32));
+ gr_gk20a_init_ctxsw_ucode_segments(&ucode_info->gpccs, &ucode_size,
+ gpccs_boot_desc,
+ g->gr.ctx_vars.ucode.gpccs.inst.count * sizeof(u32),
+ g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32));
+
+ ucode_info->surface_desc.size = ucode_size;
+ dma_set_attr(DMA_ATTR_READ_ONLY, &attrs);
+ ucode_info->surface_desc.cpuva = dma_alloc_attrs(d,
+ ucode_info->surface_desc.size,
+ &iova,
+ GFP_KERNEL,
+ &attrs);
+ if (!ucode_info->surface_desc.cpuva) {
+ gk20a_err(d, "memory allocation failed\n");
+ err = -ENOMEM;
+ goto clean_up;
+ }
+
+ ucode_info->surface_desc.iova = iova;
+ err = gk20a_get_sgtable(d, &ucode_info->surface_desc.sgt,
+ ucode_info->surface_desc.cpuva,
+ ucode_info->surface_desc.iova,
+ ucode_info->surface_desc.size);
+ if (err) {
+ gk20a_err(d, "failed to create sg table\n");
+ goto clean_up;
+ }
+
+ buf = (u8 *)ucode_info->surface_desc.cpuva;
+ if (!buf) {
+ gk20a_err(d, "failed to map surface desc buffer");
+ err = -ENOMEM;
+ goto clean_up;
+ }
+
+ gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->fecs,
+ fecs_boot_image,
+ g->gr.ctx_vars.ucode.fecs.inst.l,
+ g->gr.ctx_vars.ucode.fecs.data.l);
+
+ release_firmware(fecs_fw);
+ fecs_fw = NULL;
+
+ gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->gpccs,
+ gpccs_boot_image,
+ g->gr.ctx_vars.ucode.gpccs.inst.l,
+ g->gr.ctx_vars.ucode.gpccs.data.l);
+
+ release_firmware(gpccs_fw);
+ gpccs_fw = NULL;
+
+ err = gr_gk20a_init_ctxsw_ucode_vaspace(g);
+ if (err)
+ goto clean_up;
+
+ gk20a_free_sgtable(&ucode_info->surface_desc.sgt);
+
+ return 0;
+
+ clean_up:
+ if (ucode_info->ucode_gpuva)
+ gk20a_gmmu_unmap(vm, ucode_info->ucode_gpuva,
+ ucode_info->surface_desc.size, gk20a_mem_flag_none);
+ if (ucode_info->surface_desc.sgt)
+ gk20a_free_sgtable(&ucode_info->surface_desc.sgt);
+ if (ucode_info->surface_desc.cpuva)
+ dma_free_attrs(d, ucode_info->surface_desc.size,
+ ucode_info->surface_desc.cpuva,
+ ucode_info->surface_desc.iova,
+ &attrs);
+ ucode_info->surface_desc.cpuva = NULL;
+ ucode_info->surface_desc.iova = 0;
+
+ release_firmware(gpccs_fw);
+ gpccs_fw = NULL;
+ release_firmware(fecs_fw);
+ fecs_fw = NULL;
+
+ return err;
+}
+
+static void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
+{
+ struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
+ int retries = 20;
+ phys_addr_t inst_ptr;
+ u32 val;
+
+ while ((gk20a_readl(g, gr_fecs_ctxsw_status_1_r()) &
+ gr_fecs_ctxsw_status_1_arb_busy_m()) && retries) {
+ udelay(2);
+ retries--;
+ }
+ if (!retries)
+ gk20a_err(dev_from_gk20a(g), "arbiter idle timeout");
+
+ gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0);
+
+ inst_ptr = ucode_info->inst_blk_desc.cpu_pa;
+ gk20a_writel(g, gr_fecs_new_ctx_r(),
+ gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) |
+ gr_fecs_new_ctx_target_m() |
+ gr_fecs_new_ctx_valid_m());
+
+ gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(),
+ gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) |
+ gr_fecs_arb_ctx_ptr_target_m());
+
+ gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7);
+
+ /* Wait for arbiter command to complete */
+ retries = 20;
+ val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r());
+ while (gr_fecs_arb_ctx_cmd_cmd_v(val) && retries) {
+ udelay(2);
+ retries--;
+ val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r());
+ }
+ if (!retries)
+ gk20a_err(dev_from_gk20a(g), "arbiter complete timeout");
+
+ gk20a_writel(g, gr_fecs_current_ctx_r(),
+ gr_fecs_current_ctx_ptr_f(inst_ptr >> 12) |
+ gr_fecs_current_ctx_target_m() |
+ gr_fecs_current_ctx_valid_m());
+ /* Send command to arbiter to flush */
+ gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), gr_fecs_arb_ctx_cmd_cmd_s());
+
+ retries = 20;
+ val = (gk20a_readl(g, gr_fecs_arb_ctx_cmd_r()));
+ while (gr_fecs_arb_ctx_cmd_cmd_v(val) && retries) {
+ udelay(2);
+ retries--;
+ val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r());
+ }
+ if (!retries)
+ gk20a_err(dev_from_gk20a(g), "arbiter complete timeout");
+}
+
+static int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
+ struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
+{
+ u32 addr_code32;
+ u32 addr_data32;
+ u32 addr_load32;
+ u32 dst = 0;
+ u32 blocks;
+ u32 b;
+
+ addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8);
+ addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8);
+ addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8);
+
+ gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(),
+ gr_fecs_dmactl_require_ctx_f(0));
+
+ /*
+ * Copy falcon bootloader header into dmem at offset 0.
+ * Configure dmem port 0 for auto-incrementing writes starting at dmem
+ * offset 0.
+ */
+ gk20a_writel(g, reg_offset + gr_fecs_dmemc_r(0),
+ gr_fecs_dmemc_offs_f(0) |
+ gr_fecs_dmemc_blk_f(0) |
+ gr_fecs_dmemc_aincw_f(1));
+
+ /* Write out the actual data */
+ gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
+ gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32);
+ gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
+ gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->code.size);
+ gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
+ gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_data32);
+ gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->data.size);
+ gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32);
+ gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
+ gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
+
+ blocks = ((segments->boot.size + 0xFF) & ~0xFF) >> 8;
+
+ /*
+ * Set the base FB address for the DMA transfer. Subtract off the 256
+ * byte IMEM block offset such that the relative FB and IMEM offsets
+ * match, allowing the IMEM tags to be properly created.
+ */
+
+ dst = segments->boot_imem_offset;
+ gk20a_writel(g, reg_offset + gr_fecs_dmatrfbase_r(),
+ (addr_load32 - (dst >> 8)));
+
+ for (b = 0; b < blocks; b++) {
+ /* Setup destination IMEM offset */
+ gk20a_writel(g, reg_offset + gr_fecs_dmatrfmoffs_r(),
+ dst + (b << 8));
+
+ /* Setup source offset (relative to BASE) */
+ gk20a_writel(g, reg_offset + gr_fecs_dmatrffboffs_r(),
+ dst + (b << 8));
+
+ gk20a_writel(g, reg_offset + gr_fecs_dmatrfcmd_r(),
+ gr_fecs_dmatrfcmd_imem_f(0x01) |
+ gr_fecs_dmatrfcmd_write_f(0x00) |
+ gr_fecs_dmatrfcmd_size_f(0x06) |
+ gr_fecs_dmatrfcmd_ctxdma_f(0));
+ }
+
+ /* Specify the falcon boot vector */
+ gk20a_writel(g, reg_offset + gr_fecs_bootvec_r(),
+ gr_fecs_bootvec_vec_f(segments->boot_entry));
+
+ /* Write to CPUCTL to start the falcon */
+ gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(),
+ gr_fecs_cpuctl_startcpu_f(0x01));
+
+ return 0;
+}
+
+static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g)
+{
+ struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
+ u64 addr_base = ucode_info->ucode_gpuva;
+
+ gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0);
+
+ gr_gk20a_load_falcon_bind_instblk(g);
+
+ gr_gk20a_load_ctxsw_ucode_segments(g, addr_base,
+ &g->ctxsw_ucode_info.fecs, 0);
+
+ gr_gk20a_load_ctxsw_ucode_segments(g, addr_base,
+ &g->ctxsw_ucode_info.gpccs,
+ gr_gpcs_gpccs_falcon_hwcfg_r() -
+ gr_fecs_falcon_hwcfg_r());
+}
+
+static int gr_gk20a_load_ctxsw_ucode(struct gk20a *g, struct gr_gk20a *gr)
+{
+ u32 ret;
+
+ gk20a_dbg_fn("");
+
+ if (tegra_platform_is_linsim()) {
+ gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7),
+ gr_fecs_ctxsw_mailbox_value_f(0xc0de7777));
+ gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7),
+ gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777));
+ }
+
+ /*
+ * In case the gPMU falcon is not being used, revert to the old way of
+ * loading gr ucode, without the faster bootstrap routine.
+ */
+ if (!support_gk20a_pmu()) {
+ gr_gk20a_load_falcon_dmem(g);
+ gr_gk20a_load_falcon_imem(g);
+ gr_gk20a_start_falcon_ucode(g);
+ } else {
+ if (!gr->skip_ucode_init)
+ gr_gk20a_init_ctxsw_ucode(g);
+ gr_gk20a_load_falcon_with_bootloader(g);
+ gr->skip_ucode_init = true;
+ }
+
+ ret = gr_gk20a_ctx_wait_ucode(g, 0, 0,
+ GR_IS_UCODE_OP_EQUAL,
+ eUcodeHandshakeInitComplete,
+ GR_IS_UCODE_OP_SKIP, 0);
+ if (ret) {
+ gk20a_err(dev_from_gk20a(g), "falcon ucode init timeout");
+ return ret;
+ }
+
+ if (support_gk20a_pmu())
+ gk20a_writel(g, gr_fecs_current_ctx_r(),
+ gr_fecs_current_ctx_valid_false_f());
+
+ gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0xffffffff);
+ gk20a_writel(g, gr_fecs_method_data_r(), 0x7fffffff);
+ gk20a_writel(g, gr_fecs_method_push_r(),
+ gr_fecs_method_push_adr_set_watchdog_timeout_f());
+
+ gk20a_dbg_fn("done");
+ return 0;
+}
+
+static int gr_gk20a_init_ctx_state(struct gk20a *g, struct gr_gk20a *gr)
+{
+ u32 golden_ctx_image_size = 0;
+ u32 zcull_ctx_image_size = 0;
+ u32 pm_ctx_image_size = 0;
+ u32 ret;
+ struct fecs_method_op_gk20a op = {
+ .mailbox = { .id = 0, .data = 0,
+ .clr = ~0, .ok = 0, .fail = 0},
+ .method.data = 0,
+ .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
+ .cond.fail = GR_IS_UCODE_OP_SKIP,
+ };
+
+ gk20a_dbg_fn("");
+ op.method.addr = gr_fecs_method_push_adr_discover_image_size_v();
+ op.mailbox.ret = &golden_ctx_image_size;
+ ret = gr_gk20a_submit_fecs_method_op(g, op);
+ if (ret) {
+ gk20a_err(dev_from_gk20a(g),
+ "query golden image size failed");
+ return ret;
+ }
+ op.method.addr = gr_fecs_method_push_adr_discover_zcull_image_size_v();
+ op.mailbox.ret = &zcull_ctx_image_size;
+ ret = gr_gk20a_submit_fecs_method_op(g, op);
+ if (ret) {
+ gk20a_err(dev_from_gk20a(g),
+ "query zcull ctx image size failed");
+ return ret;
+ }
+ op.method.addr = gr_fecs_method_push_adr_discover_pm_image_size_v();
+ op.mailbox.ret = &pm_ctx_image_size;
+ ret = gr_gk20a_submit_fecs_method_op(g, op);
+ if (ret) {
+ gk20a_err(dev_from_gk20a(g),
+ "query pm ctx image size failed");
+ return ret;
+ }
+
+ if (!g->gr.ctx_vars.golden_image_size &&
+ !g->gr.ctx_vars.zcull_ctxsw_image_size) {
+ g->gr.ctx_vars.golden_image_size = golden_ctx_image_size;
+ g->gr.ctx_vars.zcull_ctxsw_image_size = zcull_ctx_image_size;
+ } else {
+ /* hw is different after railgating? */
+ BUG_ON(g->gr.ctx_vars.golden_image_size != golden_ctx_image_size);
+ BUG_ON(g->gr.ctx_vars.zcull_ctxsw_image_size != zcull_ctx_image_size);
+ }
+
+ g->gr.ctx_vars.priv_access_map_size = 512 * 1024;
+
+ gk20a_dbg_fn("done");
+ return 0;
+}
+
+static void gk20a_gr_destroy_ctx_buffer(struct platform_device *pdev,
+ struct gr_ctx_buffer_desc *desc)
+{
+ struct device *dev = &pdev->dev;
+ gk20a_free_sgtable(&desc->sgt);
+ dma_free_attrs(dev, desc->size, desc->pages,
+ desc->iova, &desc->attrs);
+}
+
+static int gk20a_gr_alloc_ctx_buffer(struct platform_device *pdev,
+ struct gr_ctx_buffer_desc *desc,
+ size_t size)
+{
+ struct device *dev = &pdev->dev;
+ DEFINE_DMA_ATTRS(attrs);
+ dma_addr_t iova;
+ int err = 0;
+
+ dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
+
+ desc->pages = dma_alloc_attrs(&pdev->dev, size, &iova,
+ GFP_KERNEL, &attrs);
+ if (!desc->pages)
+ return -ENOMEM;
+
+ desc->iova = iova;
+ desc->size = size;
+ desc->attrs = attrs;
+ desc->destroy = gk20a_gr_destroy_ctx_buffer;
+ err = gk20a_get_sgtable_from_pages(&pdev->dev, &desc->sgt, desc->pages,
+ desc->iova, desc->size);
+ if (err) {
+ dma_free_attrs(dev, desc->size, desc->pages,
+ desc->iova, &desc->attrs);
+ memset(desc, 0, sizeof(*desc));
+ }
+
+ return err;
+}
+
+static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
+{
+ struct gk20a_platform *platform = platform_get_drvdata(g->dev);
+ struct gr_gk20a *gr = &g->gr;
+ int i, attr_buffer_size, err;
+ struct platform_device *pdev = g->dev;
+
+ u32 cb_buffer_size = gr->bundle_cb_default_size *
+ gr_scc_bundle_cb_size_div_256b_byte_granularity_v();
+
+ u32 pagepool_buffer_size = gr_scc_pagepool_total_pages_hwmax_value_v() *
+ gr_scc_pagepool_total_pages_byte_granularity_v();
+
+ gk20a_dbg_fn("");
+
+ attr_buffer_size = g->ops.gr.calc_global_ctx_buffer_size(g);
+
+ gk20a_dbg_info("cb_buffer_size : %d", cb_buffer_size);
+
+ err = gk20a_gr_alloc_ctx_buffer(pdev, &gr->global_ctx_buffer[CIRCULAR],
+ cb_buffer_size);
+ if (err)
+ goto clean_up;
+
+ if (platform->secure_alloc)
+ platform->secure_alloc(pdev,
+ &gr->global_ctx_buffer[CIRCULAR_VPR],
+ cb_buffer_size);
+
+ gk20a_dbg_info("pagepool_buffer_size : %d", pagepool_buffer_size);
+
+ err = gk20a_gr_alloc_ctx_buffer(pdev, &gr->global_ctx_buffer[PAGEPOOL],
+ pagepool_buffer_size);
+ if (err)
+ goto clean_up;
+
+ if (platform->secure_alloc)
+ platform->secure_alloc(pdev,
+ &gr->global_ctx_buffer[PAGEPOOL_VPR],
+ pagepool_buffer_size);
+
+ gk20a_dbg_info("attr_buffer_size : %d", attr_buffer_size);
+
+ err = gk20a_gr_alloc_ctx_buffer(pdev, &gr->global_ctx_buffer[ATTRIBUTE],
+ attr_buffer_size);
+ if (err)
+ goto clean_up;
+
+ if (platform->secure_alloc)
+ platform->secure_alloc(pdev,
+ &gr->global_ctx_buffer[ATTRIBUTE_VPR],
+ attr_buffer_size);
+
+ gk20a_dbg_info("golden_image_size : %d",
+ gr->ctx_vars.golden_image_size);
+
+ err = gk20a_gr_alloc_ctx_buffer(pdev,
+ &gr->global_ctx_buffer[GOLDEN_CTX],
+ gr->ctx_vars.golden_image_size);
+ if (err)
+ goto clean_up;
+
+ gk20a_dbg_info("priv_access_map_size : %d",
+ gr->ctx_vars.priv_access_map_size);
+
+ err = gk20a_gr_alloc_ctx_buffer(pdev,
+ &gr->global_ctx_buffer[PRIV_ACCESS_MAP],
+ gr->ctx_vars.priv_access_map_size);
+
+ if (err)
+ goto clean_up;
+
+ gk20a_dbg_fn("done");
+ return 0;
+
+ clean_up:
+ gk20a_err(dev_from_gk20a(g), "fail");
+ for (i = 0; i < NR_GLOBAL_CTX_BUF; i++) {
+ if (gr->global_ctx_buffer[i].destroy) {
+ gr->global_ctx_buffer[i].destroy(pdev,
+ &gr->global_ctx_buffer[i]);
+ }
+ }
+ return -ENOMEM;
+}
+
+static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g)
+{
+ struct platform_device *pdev = g->dev;
+ struct gr_gk20a *gr = &g->gr;
+ DEFINE_DMA_ATTRS(attrs);
+ u32 i;
+
+ dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
+
+ for (i = 0; i < NR_GLOBAL_CTX_BUF; i++) {
+ gr->global_ctx_buffer[i].destroy(pdev,
+ &gr->global_ctx_buffer[i]);
+ }
+
+ gk20a_dbg_fn("done");
+}
+
+static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
+ struct channel_gk20a *c)
+{
+ struct vm_gk20a *ch_vm = c->vm;
+ u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
+ u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
+ struct gr_gk20a *gr = &g->gr;
+ struct sg_table *sgt;
+ u64 size;
+ u64 gpu_va;
+ u32 i;
+ gk20a_dbg_fn("");
+
+ /* Circular Buffer */
+ if (!c->vpr || (gr->global_ctx_buffer[CIRCULAR_VPR].sgt == NULL)) {
+ sgt = gr->global_ctx_buffer[CIRCULAR].sgt;
+ size = gr->global_ctx_buffer[CIRCULAR].size;
+ } else {
+ sgt = gr->global_ctx_buffer[CIRCULAR_VPR].sgt;
+ size = gr->global_ctx_buffer[CIRCULAR_VPR].size;
+ }
+
+ gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size,
+ NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
+ gk20a_mem_flag_none);
+ if (!gpu_va)
+ goto clean_up;
+ g_bfr_va[CIRCULAR_VA] = gpu_va;
+ g_bfr_size[CIRCULAR_VA] = size;
+
+ /* Attribute Buffer */
+ if (!c->vpr || (gr->global_ctx_buffer[ATTRIBUTE_VPR].sgt == NULL)) {
+ sgt = gr->global_ctx_buffer[ATTRIBUTE].sgt;
+ size = gr->global_ctx_buffer[ATTRIBUTE].size;
+ } else {
+ sgt = gr->global_ctx_buffer[ATTRIBUTE_VPR].sgt;
+ size = gr->global_ctx_buffer[ATTRIBUTE_VPR].size;
+ }
+
+ gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size,
+ NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
+ gk20a_mem_flag_none);
+ if (!gpu_va)
+ goto clean_up;
+ g_bfr_va[ATTRIBUTE_VA] = gpu_va;
+ g_bfr_size[ATTRIBUTE_VA] = size;
+
+ /* Page Pool */
+ if (!c->vpr || (gr->global_ctx_buffer[PAGEPOOL_VPR].sgt == NULL)) {
+ sgt = gr->global_ctx_buffer[PAGEPOOL].sgt;
+ size = gr->global_ctx_buffer[PAGEPOOL].size;
+ } else {
+ sgt = gr->global_ctx_buffer[PAGEPOOL_VPR].sgt;
+ size = gr->global_ctx_buffer[PAGEPOOL_VPR].size;
+ }
+
+ gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size,
+ NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
+ gk20a_mem_flag_none);
+ if (!gpu_va)
+ goto clean_up;
+ g_bfr_va[PAGEPOOL_VA] = gpu_va;
+ g_bfr_size[PAGEPOOL_VA] = size;
+
+ /* Golden Image */
+ sgt = gr->global_ctx_buffer[GOLDEN_CTX].sgt;
+ size = gr->global_ctx_buffer[GOLDEN_CTX].size;
+ gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, 0,
+ gk20a_mem_flag_none);
+ if (!gpu_va)
+ goto clean_up;
+ g_bfr_va[GOLDEN_CTX_VA] = gpu_va;
+ g_bfr_size[GOLDEN_CTX_VA] = size;
+
+ /* Priv register Access Map */
+ sgt = gr->global_ctx_buffer[PRIV_ACCESS_MAP].sgt;
+ size = gr->global_ctx_buffer[PRIV_ACCESS_MAP].size;
+ gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, 0,
+ gk20a_mem_flag_none);
+ if (!gpu_va)
+ goto clean_up;
+ g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va;
+ g_bfr_size[PRIV_ACCESS_MAP_VA] = size;
+
+ c->ch_ctx.global_ctx_buffer_mapped = true;
+ return 0;
+
+ clean_up:
+ for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
+ if (g_bfr_va[i]) {
+ gk20a_gmmu_unmap(ch_vm, g_bfr_va[i],
+ gr->global_ctx_buffer[i].size,
+ gk20a_mem_flag_none);
+ g_bfr_va[i] = 0;
+ }
+ }
+ return -ENOMEM;
+}
+
+static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c)
+{
+ struct vm_gk20a *ch_vm = c->vm;
+ u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
+ u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
+ u32 i;
+
+ gk20a_dbg_fn("");
+
+ for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
+ if (g_bfr_va[i]) {
+ gk20a_gmmu_unmap(ch_vm, g_bfr_va[i],
+ g_bfr_size[i],
+ gk20a_mem_flag_none);
+ g_bfr_va[i] = 0;
+ g_bfr_size[i] = 0;
+ }
+ }
+ c->ch_ctx.global_ctx_buffer_mapped = false;
+}
+
+static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
+ struct channel_gk20a *c)
+{
+ struct gr_gk20a *gr = &g->gr;
+ struct gr_ctx_desc *gr_ctx = &c->ch_ctx.gr_ctx;
+ struct vm_gk20a *ch_vm = c->vm;
+ struct device *d = dev_from_gk20a(g);
+ struct sg_table *sgt;
+ DEFINE_DMA_ATTRS(attrs);
+ int err = 0;
+ dma_addr_t iova;
+
+ gk20a_dbg_fn("");
+
+ if (gr->ctx_vars.buffer_size == 0)
+ return 0;
+
+ /* alloc channel gr ctx buffer */
+ gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
+ gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
+
+ gr_ctx->size = gr->ctx_vars.buffer_total_size;
+ dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
+ gr_ctx->pages = dma_alloc_attrs(d, gr_ctx->size,
+ &iova, GFP_KERNEL, &attrs);
+ if (!gr_ctx->pages)
+ return -ENOMEM;
+
+ gr_ctx->iova = iova;
+ err = gk20a_get_sgtable_from_pages(d, &sgt, gr_ctx->pages,
+ gr_ctx->iova, gr_ctx->size);
+ if (err)
+ goto err_free;
+
+ gr_ctx->gpu_va = gk20a_gmmu_map(ch_vm, &sgt, gr_ctx->size,
+ NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
+ gk20a_mem_flag_none);
+ if (!gr_ctx->gpu_va)
+ goto err_free_sgt;
+
+ gk20a_free_sgtable(&sgt);
+
+ return 0;
+
+ err_free_sgt:
+ gk20a_free_sgtable(&sgt);
+ err_free:
+ dma_free_attrs(d, gr_ctx->size,
+ gr_ctx->pages, gr_ctx->iova, &attrs);
+ gr_ctx->pages = NULL;
+ gr_ctx->iova = 0;
+
+ return err;
+}
+
+static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c)
+{
+ struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+ struct vm_gk20a *ch_vm = c->vm;
+ struct gk20a *g = c->g;
+ struct device *d = dev_from_gk20a(g);
+ DEFINE_DMA_ATTRS(attrs);
+
+ gk20a_dbg_fn("");
+
+ if (!ch_ctx->gr_ctx.gpu_va)
+ return;
+
+ gk20a_gmmu_unmap(ch_vm, ch_ctx->gr_ctx.gpu_va,
+ ch_ctx->gr_ctx.size, gk20a_mem_flag_none);
+ dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
+ dma_free_attrs(d, ch_ctx->gr_ctx.size,
+ ch_ctx->gr_ctx.pages, ch_ctx->gr_ctx.iova, &attrs);
+ ch_ctx->gr_ctx.pages = NULL;
+ ch_ctx->gr_ctx.iova = 0;
+}
+
+static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
+ struct channel_gk20a *c)
+{
+ struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
+ struct device *d = dev_from_gk20a(g);
+ struct vm_gk20a *ch_vm = c->vm;
+ DEFINE_DMA_ATTRS(attrs);
+ struct sg_table *sgt;
+ int err = 0;
+ dma_addr_t iova;
+
+ gk20a_dbg_fn("");
+
+ patch_ctx->size = 128 * sizeof(u32);
+ dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
+ patch_ctx->pages = dma_alloc_attrs(d, patch_ctx->size,
+ &iova, GFP_KERNEL,
+ &attrs);
+ if (!patch_ctx->pages)
+ return -ENOMEM;
+
+ patch_ctx->iova = iova;
+ err = gk20a_get_sgtable_from_pages(d, &sgt, patch_ctx->pages,
+ patch_ctx->iova, patch_ctx->size);
+ if (err)
+ goto err_free;
+
+ patch_ctx->gpu_va = gk20a_gmmu_map(ch_vm, &sgt, patch_ctx->size,
+ 0, gk20a_mem_flag_none);
+ if (!patch_ctx->gpu_va)
+ goto err_free_sgtable;
+
+ gk20a_free_sgtable(&sgt);
+
+ gk20a_dbg_fn("done");
+ return 0;
+
+ err_free_sgtable:
+ gk20a_free_sgtable(&sgt);
+ err_free:
+ dma_free_attrs(d, patch_ctx->size,
+ patch_ctx->pages, patch_ctx->iova, &attrs);
+ patch_ctx->pages = NULL;
+ patch_ctx->iova = 0;
+ gk20a_err(dev_from_gk20a(g), "fail");
+ return err;
+}
+
+static void gr_gk20a_unmap_channel_patch_ctx(struct channel_gk20a *c)
+{
+ struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
+ struct vm_gk20a *ch_vm = c->vm;
+
+ gk20a_dbg_fn("");
+
+ if (patch_ctx->gpu_va)
+ gk20a_gmmu_unmap(ch_vm, patch_ctx->gpu_va,
+ patch_ctx->size, gk20a_mem_flag_none);
+ patch_ctx->gpu_va = 0;
+ patch_ctx->data_count = 0;
+}
+
+static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c)
+{
+ struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
+ struct gk20a *g = c->g;
+ struct device *d = dev_from_gk20a(g);
+ DEFINE_DMA_ATTRS(attrs);
+
+ gk20a_dbg_fn("");
+
+ gr_gk20a_unmap_channel_patch_ctx(c);
+
+ if (patch_ctx->pages) {
+ dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
+ dma_free_attrs(d, patch_ctx->size,
+ patch_ctx->pages, patch_ctx->iova, &attrs);
+ patch_ctx->pages = NULL;
+ patch_ctx->iova = 0;
+ }
+}
+
+void gk20a_free_channel_ctx(struct channel_gk20a *c)
+{
+ gr_gk20a_unmap_global_ctx_buffers(c);
+ gr_gk20a_free_channel_patch_ctx(c);
+ gr_gk20a_free_channel_gr_ctx(c);
+
+ /* zcull_ctx, pm_ctx */
+
+ memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a));
+
+ c->num_objects = 0;
+ c->first_init = false;
+}
+
+static bool gr_gk20a_is_valid_class(struct gk20a *g, u32 class_num)
+{
+ bool valid = false;
+
+ switch (class_num) {
+ case KEPLER_COMPUTE_A:
+ case KEPLER_C:
+ case FERMI_TWOD_A:
+ case KEPLER_DMA_COPY_A:
+ valid = true;
+ break;
+
+ default:
+ break;
+ }
+
+ return valid;
+}
+
+int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
+ struct nvhost_alloc_obj_ctx_args *args)
+{
+ struct gk20a *g = c->g;
+ struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+ int err = 0;
+
+ gk20a_dbg_fn("");
+
+ /* an address space needs to have been bound at this point.*/
+ if (!gk20a_channel_as_bound(c)) {
+ gk20a_err(dev_from_gk20a(g),
+ "not bound to address space at time"
+ " of grctx allocation");
+ return -EINVAL;
+ }
+
+ if (!g->ops.gr.is_valid_class(g, args->class_num)) {
+ gk20a_err(dev_from_gk20a(g),
+ "invalid obj class 0x%x", args->class_num);
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* allocate gr ctx buffer */
+ if (ch_ctx->gr_ctx.pages == NULL) {
+ err = gr_gk20a_alloc_channel_gr_ctx(g, c);
+ if (err) {
+ gk20a_err(dev_from_gk20a(g),
+ "fail to allocate gr ctx buffer");
+ goto out;
+ }
+ c->obj_class = args->class_num;
+ } else {
+ /*TBD: needs to be more subtle about which is being allocated
+ * as some are allowed to be allocated along same channel */
+ gk20a_err(dev_from_gk20a(g),
+ "too many classes alloc'd on same channel");
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* commit gr ctx buffer */
+ err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va);
+ if (err) {
+ gk20a_err(dev_from_gk20a(g),
+ "fail to commit gr ctx buffer");
+ goto out;
+ }
+
+ /* allocate patch buffer */
+ if (ch_ctx->patch_ctx.pages == NULL) {
+ err = gr_gk20a_alloc_channel_patch_ctx(g, c);
+ if (err) {
+ gk20a_err(dev_from_gk20a(g),
+ "fail to allocate patch buffer");
+ goto out;
+ }
+ }
+
+ /* map global buffer to channel gpu_va and commit */
+ if (!ch_ctx->global_ctx_buffer_mapped) {
+ err = gr_gk20a_map_global_ctx_buffers(g, c);
+ if (err) {
+ gk20a_err(dev_from_gk20a(g),
+ "fail to map global ctx buffer");
+ goto out;
+ }
+ gr_gk20a_elpg_protected_call(g,
+ gr_gk20a_commit_global_ctx_buffers(g, c, true));
+ }
+
+ /* init golden image, ELPG enabled after this is done */
+ err = gr_gk20a_init_golden_ctx_image(g, c);
+ if (err) {
+ gk20a_err(dev_from_gk20a(g),
+ "fail to init golden ctx image");
+ goto out;
+ }
+
+ /* load golden image */
+ if (!c->first_init) {
+ err = gr_gk20a_elpg_protected_call(g,
+ gr_gk20a_load_golden_ctx_image(g, c));
+ if (err) {
+ gk20a_err(dev_from_gk20a(g),
+ "fail to load golden ctx image");
+ goto out;
+ }
+ c->first_init = true;
+ }
+ gk20a_mm_l2_invalidate(g);
+
+ c->num_objects++;
+
+ gk20a_dbg_fn("done");
+ return 0;
+out:
+ /* 1. gr_ctx, patch_ctx and global ctx buffer mapping
+ can be reused so no need to release them.
+ 2. golden image init and load is a one time thing so if
+ they pass, no need to undo. */
+ gk20a_err(dev_from_gk20a(g), "fail");
+ return err;
+}
+
+int gk20a_free_obj_ctx(struct channel_gk20a *c,
+ struct nvhost_free_obj_ctx_args *args)
+{
+ unsigned long timeout = gk20a_get_gr_idle_timeout(c->g);
+
+ gk20a_dbg_fn("");
+
+ if (c->num_objects == 0)
+ return 0;
+
+ c->num_objects--;
+
+ if (c->num_objects == 0) {
+ c->first_init = false;
+ gk20a_disable_channel(c,
+ !c->has_timedout,
+ timeout);
+ gr_gk20a_unmap_channel_patch_ctx(c);
+ }
+
+ return 0;
+}
+
+static void gk20a_remove_gr_support(struct gr_gk20a *gr)
+{
+ struct gk20a *g = gr->g;
+ struct device *d = dev_from_gk20a(g);
+ DEFINE_DMA_ATTRS(attrs);
+
+ gk20a_dbg_fn("");
+
+ gr_gk20a_free_global_ctx_buffers(g);
+
+ dma_free_coherent(d, gr->mmu_wr_mem.size,
+ gr->mmu_wr_mem.cpuva, gr->mmu_wr_mem.iova);
+ gr->mmu_wr_mem.cpuva = NULL;
+ gr->mmu_wr_mem.iova = 0;
+ dma_free_coherent(d, gr->mmu_rd_mem.size,
+ gr->mmu_rd_mem.cpuva, gr->mmu_rd_mem.iova);
+ gr->mmu_rd_mem.cpuva = NULL;
+ gr->mmu_rd_mem.iova = 0;
+
+ dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
+ dma_free_attrs(d, gr->compbit_store.size, gr->compbit_store.pages,
+ gr->compbit_store.base_iova, &attrs);
+
+ memset(&gr->mmu_wr_mem, 0, sizeof(struct mmu_desc));
+ memset(&gr->mmu_rd_mem, 0, sizeof(struct mmu_desc));
+ memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc));
+
+ kfree(gr->gpc_tpc_count);
+ kfree(gr->gpc_zcb_count);
+ kfree(gr->gpc_ppc_count);
+ kfree(gr->pes_tpc_count[0]);
+ kfree(gr->pes_tpc_count[1]);
+ kfree(gr->pes_tpc_mask[0]);
+ kfree(gr->pes_tpc_mask[1]);
+ kfree(gr->gpc_skip_mask);
+ kfree(gr->map_tiles);
+ gr->gpc_tpc_count = NULL;
+ gr->gpc_zcb_count = NULL;
+ gr->gpc_ppc_count = NULL;
+ gr->pes_tpc_count[0] = NULL;
+ gr->pes_tpc_count[1] = NULL;
+ gr->pes_tpc_mask[0] = NULL;
+ gr->pes_tpc_mask[1] = NULL;
+ gr->gpc_skip_mask = NULL;
+ gr->map_tiles = NULL;
+
+ kfree(gr->ctx_vars.ucode.fecs.inst.l);
+ kfree(gr->ctx_vars.ucode.fecs.data.l);
+ kfree(gr->ctx_vars.ucode.gpccs.inst.l);
+ kfree(gr->ctx_vars.ucode.gpccs.data.l);
+ kfree(gr->ctx_vars.sw_bundle_init.l);
+ kfree(gr->ctx_vars.sw_method_init.l);
+ kfree(gr->ctx_vars.sw_ctx_load.l);
+ kfree(gr->ctx_vars.sw_non_ctx_load.l);
+ kfree(gr->ctx_vars.ctxsw_regs.sys.l);
+ kfree(gr->ctx_vars.ctxsw_regs.gpc.l);
+ kfree(gr->ctx_vars.ctxsw_regs.tpc.l);
+ kfree(gr->ctx_vars.ctxsw_regs.zcull_gpc.l);
+ kfree(gr->ctx_vars.ctxsw_regs.ppc.l);
+ kfree(gr->ctx_vars.ctxsw_regs.pm_sys.l);
+ kfree(gr->ctx_vars.ctxsw_regs.pm_gpc.l);
+ kfree(gr->ctx_vars.ctxsw_regs.pm_tpc.l);
+
+ kfree(gr->ctx_vars.local_golden_image);
+ gr->ctx_vars.local_golden_image = NULL;
+
+ gk20a_allocator_destroy(&gr->comp_tags);
+}
+
+static void gr_gk20a_bundle_cb_defaults(struct gk20a *g)
+{
+ struct gr_gk20a *gr = &g->gr;
+
+ gr->bundle_cb_default_size =
+ gr_scc_bundle_cb_size_div_256b__prod_v();
+ gr->min_gpm_fifo_depth =
+ gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
+ gr->bundle_cb_token_limit =
+ gr_pd_ab_dist_cfg2_token_limit_init_v();
+}
+
+static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
+{
+ u32 gpc_index, pes_index;
+ u32 pes_tpc_mask;
+ u32 pes_tpc_count;
+ u32 pes_heavy_index;
+ u32 gpc_new_skip_mask;
+ u32 tmp;
+
+ tmp = gk20a_readl(g, pri_ringmaster_enum_fbp_r());
+ gr->num_fbps = pri_ringmaster_enum_fbp_count_v(tmp);
+
+ tmp = gk20a_readl(g, top_num_gpcs_r());
+ gr->max_gpc_count = top_num_gpcs_value_v(tmp);
+
+ tmp = gk20a_readl(g, top_num_fbps_r());
+ gr->max_fbps_count = top_num_fbps_value_v(tmp);
+
+ tmp = gk20a_readl(g, top_tpc_per_gpc_r());
+ gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp);
+
+ gr->max_tpc_count = gr->max_gpc_count * gr->max_tpc_per_gpc_count;
+
+ tmp = gk20a_readl(g, top_num_fbps_r());
+ gr->sys_count = top_num_fbps_value_v(tmp);
+
+ tmp = gk20a_readl(g, pri_ringmaster_enum_gpc_r());
+ gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp);
+
+ gr->pe_count_per_gpc = proj_scal_litter_num_pes_per_gpc_v();
+ gr->max_zcull_per_gpc_count = proj_scal_litter_num_zcull_banks_v();
+
+ if (!gr->gpc_count) {
+ gk20a_err(dev_from_gk20a(g), "gpc_count==0!");
+ goto clean_up;
+ }
+
+ gr->gpc_tpc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
+ gr->gpc_zcb_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
+ gr->gpc_ppc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
+ gr->pes_tpc_count[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
+ gr->pes_tpc_count[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
+ gr->pes_tpc_mask[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
+ gr->pes_tpc_mask[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
+ gr->gpc_skip_mask =
+ kzalloc(gr_pd_dist_skip_table__size_1_v() * 4 * sizeof(u32),
+ GFP_KERNEL);
+
+ if (!gr->gpc_tpc_count || !gr->gpc_zcb_count || !gr->gpc_ppc_count ||
+ !gr->pes_tpc_count[0] || !gr->pes_tpc_count[1] ||
+ !gr->pes_tpc_mask[0] || !gr->pes_tpc_mask[1] || !gr->gpc_skip_mask)
+ goto clean_up;
+
+ gr->ppc_count = 0;
+ for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+ tmp = gk20a_readl(g, gr_gpc0_fs_gpc_r());
+
+ gr->gpc_tpc_count[gpc_index] =
+ gr_gpc0_fs_gpc_num_available_tpcs_v(tmp);
+ gr->tpc_count += gr->gpc_tpc_count[gpc_index];
+
+ gr->gpc_zcb_count[gpc_index] =
+ gr_gpc0_fs_gpc_num_available_zculls_v(tmp);
+ gr->zcb_count += gr->gpc_zcb_count[gpc_index];
+
+ gr->gpc_ppc_count[gpc_index] = gr->pe_count_per_gpc;
+ gr->ppc_count += gr->gpc_ppc_count[gpc_index];
+ for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) {
+
+ tmp = gk20a_readl(g,
+ gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) +
+ gpc_index * proj_gpc_stride_v());
+
+ pes_tpc_mask = gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(tmp);
+ pes_tpc_count = count_bits(pes_tpc_mask);
+
+ gr->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count;
+ gr->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask;
+ }
+
+ gpc_new_skip_mask = 0;
+ if (gr->pes_tpc_count[0][gpc_index] +
+ gr->pes_tpc_count[1][gpc_index] == 5) {
+ pes_heavy_index =
+ gr->pes_tpc_count[0][gpc_index] >
+ gr->pes_tpc_count[1][gpc_index] ? 0 : 1;
+
+ gpc_new_skip_mask =
+ gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^
+ (gr->pes_tpc_mask[pes_heavy_index][gpc_index] &
+ (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1));
+
+ } else if ((gr->pes_tpc_count[0][gpc_index] +
+ gr->pes_tpc_count[1][gpc_index] == 4) &&
+ (gr->pes_tpc_count[0][gpc_index] !=
+ gr->pes_tpc_count[1][gpc_index])) {
+ pes_heavy_index =
+ gr->pes_tpc_count[0][gpc_index] >
+ gr->pes_tpc_count[1][gpc_index] ? 0 : 1;
+
+ gpc_new_skip_mask =
+ gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^
+ (gr->pes_tpc_mask[pes_heavy_index][gpc_index] &
+ (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1));
+ }
+ gr->gpc_skip_mask[gpc_index] = gpc_new_skip_mask;
+ }
+
+ gk20a_dbg_info("fbps: %d", gr->num_fbps);
+ gk20a_dbg_info("max_gpc_count: %d", gr->max_gpc_count);
+ gk20a_dbg_info("max_fbps_count: %d", gr->max_fbps_count);
+ gk20a_dbg_info("max_tpc_per_gpc_count: %d", gr->max_tpc_per_gpc_count);
+ gk20a_dbg_info("max_zcull_per_gpc_count: %d", gr->max_zcull_per_gpc_count);
+ gk20a_dbg_info("max_tpc_count: %d", gr->max_tpc_count);
+ gk20a_dbg_info("sys_count: %d", gr->sys_count);
+ gk20a_dbg_info("gpc_count: %d", gr->gpc_count);
+ gk20a_dbg_info("pe_count_per_gpc: %d", gr->pe_count_per_gpc);
+ gk20a_dbg_info("tpc_count: %d", gr->tpc_count);
+ gk20a_dbg_info("ppc_count: %d", gr->ppc_count);
+
+ for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
+ gk20a_dbg_info("gpc_tpc_count[%d] : %d",
+ gpc_index, gr->gpc_tpc_count[gpc_index]);
+ for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
+ gk20a_dbg_info("gpc_zcb_count[%d] : %d",
+ gpc_index, gr->gpc_zcb_count[gpc_index]);
+ for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
+ gk20a_dbg_info("gpc_ppc_count[%d] : %d",
+ gpc_index, gr->gpc_ppc_count[gpc_index]);
+ for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
+ gk20a_dbg_info("gpc_skip_mask[%d] : %d",
+ gpc_index, gr->gpc_skip_mask[gpc_index]);
+ for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
+ for (pes_index = 0;
+ pes_index < gr->pe_count_per_gpc;
+ pes_index++)
+ gk20a_dbg_info("pes_tpc_count[%d][%d] : %d",
+ pes_index, gpc_index,
+ gr->pes_tpc_count[pes_index][gpc_index]);
+
+ for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
+ for (pes_index = 0;
+ pes_index < gr->pe_count_per_gpc;
+ pes_index++)
+ gk20a_dbg_info("pes_tpc_mask[%d][%d] : %d",
+ pes_index, gpc_index,
+ gr->pes_tpc_mask[pes_index][gpc_index]);
+
+ g->ops.gr.bundle_cb_defaults(g);
+ g->ops.gr.cb_size_default(g);
+ g->ops.gr.calc_global_ctx_buffer_size(g);
+ gr->timeslice_mode = gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v();
+
+ gk20a_dbg_info("bundle_cb_default_size: %d",
+ gr->bundle_cb_default_size);
+ gk20a_dbg_info("min_gpm_fifo_depth: %d", gr->min_gpm_fifo_depth);
+ gk20a_dbg_info("bundle_cb_token_limit: %d", gr->bundle_cb_token_limit);
+ gk20a_dbg_info("attrib_cb_default_size: %d",
+ gr->attrib_cb_default_size);
+ gk20a_dbg_info("attrib_cb_size: %d", gr->attrib_cb_size);
+ gk20a_dbg_info("alpha_cb_default_size: %d", gr->alpha_cb_default_size);
+ gk20a_dbg_info("alpha_cb_size: %d", gr->alpha_cb_size);
+ gk20a_dbg_info("timeslice_mode: %d", gr->timeslice_mode);
+
+ return 0;
+
+clean_up:
+ return -ENOMEM;
+}
+
+static int gr_gk20a_init_mmu_sw(struct gk20a *g, struct gr_gk20a *gr)
+{
+ struct device *d = dev_from_gk20a(g);
+ dma_addr_t iova;
+
+ gr->mmu_wr_mem_size = gr->mmu_rd_mem_size = 0x1000;
+
+ gr->mmu_wr_mem.size = gr->mmu_wr_mem_size;
+ gr->mmu_wr_mem.cpuva = dma_zalloc_coherent(d, gr->mmu_wr_mem_size,
+ &iova, GFP_KERNEL);
+ if (!gr->mmu_wr_mem.cpuva)
+ goto err;
+
+ gr->mmu_wr_mem.iova = iova;
+
+ gr->mmu_rd_mem.size = gr->mmu_rd_mem_size;
+ gr->mmu_rd_mem.cpuva = dma_zalloc_coherent(d, gr->mmu_rd_mem_size,
+ &iova, GFP_KERNEL);
+ if (!gr->mmu_rd_mem.cpuva)
+ goto err_free_wr_mem;
+
+ gr->mmu_rd_mem.iova = iova;
+ return 0;
+
+ err_free_wr_mem:
+ dma_free_coherent(d, gr->mmu_wr_mem.size,
+ gr->mmu_wr_mem.cpuva, gr->mmu_wr_mem.iova);
+ gr->mmu_wr_mem.cpuva = NULL;
+ gr->mmu_wr_mem.iova = 0;
+ err:
+ return -ENOMEM;
+}
+
+static u32 prime_set[18] = {
+ 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61 };
+
+static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr)
+{
+ s32 comm_denom;
+ s32 mul_factor;
+ s32 *init_frac = NULL;
+ s32 *init_err = NULL;
+ s32 *run_err = NULL;
+ s32 *sorted_num_tpcs = NULL;
+ s32 *sorted_to_unsorted_gpc_map = NULL;
+ u32 gpc_index;
+ u32 gpc_mark = 0;
+ u32 num_tpc;
+ u32 max_tpc_count = 0;
+ u32 swap;
+ u32 tile_count;
+ u32 index;
+ bool delete_map = false;
+ bool gpc_sorted;
+ int ret = 0;
+
+ init_frac = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
+ init_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
+ run_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
+ sorted_num_tpcs =
+ kzalloc(proj_scal_max_gpcs_v() *
+ proj_scal_max_tpc_per_gpc_v() * sizeof(s32),
+ GFP_KERNEL);
+ sorted_to_unsorted_gpc_map =
+ kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
+
+ if (!(init_frac && init_err && run_err && sorted_num_tpcs &&
+ sorted_to_unsorted_gpc_map)) {
+ ret = -ENOMEM;
+ goto clean_up;
+ }
+
+ gr->map_row_offset = INVALID_SCREEN_TILE_ROW_OFFSET;
+
+ if (gr->tpc_count == 3)
+ gr->map_row_offset = 2;
+ else if (gr->tpc_count < 3)
+ gr->map_row_offset = 1;
+ else {
+ gr->map_row_offset = 3;
+
+ for (index = 1; index < 18; index++) {
+ u32 prime = prime_set[index];
+ if ((gr->tpc_count % prime) != 0) {
+ gr->map_row_offset = prime;
+ break;
+ }
+ }
+ }
+
+ switch (gr->tpc_count) {
+ case 15:
+ gr->map_row_offset = 6;
+ break;
+ case 14:
+ gr->map_row_offset = 5;
+ break;
+ case 13:
+ gr->map_row_offset = 2;
+ break;
+ case 11:
+ gr->map_row_offset = 7;
+ break;
+ case 10:
+ gr->map_row_offset = 6;
+ break;
+ case 7:
+ case 5:
+ gr->map_row_offset = 1;
+ break;
+ default:
+ break;
+ }
+
+ if (gr->map_tiles) {
+ if (gr->map_tile_count != gr->tpc_count)
+ delete_map = true;
+
+ for (tile_count = 0; tile_count < gr->map_tile_count; tile_count++) {
+ if ((u32)gr->map_tiles[tile_count] >= gr->tpc_count)
+ delete_map = true;
+ }
+
+ if (delete_map) {
+ kfree(gr->map_tiles);
+ gr->map_tiles = NULL;
+ gr->map_tile_count = 0;
+ }
+ }
+
+ if (gr->map_tiles == NULL) {
+ gr->map_tile_count = proj_scal_max_gpcs_v();
+
+ gr->map_tiles = kzalloc(proj_scal_max_gpcs_v() * sizeof(u8), GFP_KERNEL);
+ if (gr->map_tiles == NULL) {
+ ret = -ENOMEM;
+ goto clean_up;
+ }
+
+ for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+ sorted_num_tpcs[gpc_index] = gr->gpc_tpc_count[gpc_index];
+ sorted_to_unsorted_gpc_map[gpc_index] = gpc_index;
+ }
+
+ gpc_sorted = false;
+ while (!gpc_sorted) {
+ gpc_sorted = true;
+ for (gpc_index = 0; gpc_index < gr->gpc_count - 1; gpc_index++) {
+ if (sorted_num_tpcs[gpc_index + 1] > sorted_num_tpcs[gpc_index]) {
+ gpc_sorted = false;
+ swap = sorted_num_tpcs[gpc_index];
+ sorted_num_tpcs[gpc_index] = sorted_num_tpcs[gpc_index + 1];
+ sorted_num_tpcs[gpc_index + 1] = swap;
+ swap = sorted_to_unsorted_gpc_map[gpc_index];
+ sorted_to_unsorted_gpc_map[gpc_index] =
+ sorted_to_unsorted_gpc_map[gpc_index + 1];
+ sorted_to_unsorted_gpc_map[gpc_index + 1] = swap;
+ }
+ }
+ }
+
+ for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
+ if (gr->gpc_tpc_count[gpc_index] > max_tpc_count)
+ max_tpc_count = gr->gpc_tpc_count[gpc_index];
+
+ mul_factor = gr->gpc_count * max_tpc_count;
+ if (mul_factor & 0x1)
+ mul_factor = 2;
+ else
+ mul_factor = 1;
+
+ comm_denom = gr->gpc_count * max_tpc_count * mul_factor;
+
+ for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+ num_tpc = sorted_num_tpcs[gpc_index];
+
+ init_frac[gpc_index] = num_tpc * gr->gpc_count * mul_factor;
+
+ if (num_tpc != 0)
+ init_err[gpc_index] = gpc_index * max_tpc_count * mul_factor - comm_denom/2;
+ else
+ init_err[gpc_index] = 0;
+
+ run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index];
+ }
+
+ while (gpc_mark < gr->tpc_count) {
+ for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+ if ((run_err[gpc_index] * 2) >= comm_denom) {
+ gr->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index];
+ run_err[gpc_index] += init_frac[gpc_index] - comm_denom;
+ } else
+ run_err[gpc_index] += init_frac[gpc_index];
+ }
+ }
+ }
+
+clean_up:
+ kfree(init_frac);
+ kfree(init_err);
+ kfree(run_err);
+ kfree(sorted_num_tpcs);
+ kfree(sorted_to_unsorted_gpc_map);
+
+ if (ret)
+ gk20a_err(dev_from_gk20a(g), "fail");
+ else
+ gk20a_dbg_fn("done");
+
+ return ret;
+}
+
+static int gr_gk20a_init_zcull(struct gk20a *g, struct gr_gk20a *gr)
+{
+ struct gr_zcull_gk20a *zcull = &gr->zcull;
+
+ zcull->aliquot_width = gr->tpc_count * 16;
+ zcull->aliquot_height = 16;
+
+ zcull->width_align_pixels = gr->tpc_count * 16;
+ zcull->height_align_pixels = 32;
+
+ zcull->aliquot_size =
+ zcull->aliquot_width * zcull->aliquot_height;
+
+ /* assume no floor sweeping since we only have 1 tpc in 1 gpc */
+ zcull->pixel_squares_by_aliquots =
+ gr->zcb_count * 16 * 16 * gr->tpc_count /
+ (gr->gpc_count * gr->gpc_tpc_count[0]);
+
+ zcull->total_aliquots =
+ gr_gpc0_zcull_total_ram_size_num_aliquots_f(
+ gk20a_readl(g, gr_gpc0_zcull_total_ram_size_r()));
+
+ return 0;
+}
+
+u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr)
+{
+ /* assuming gr has already been initialized */
+ return gr->ctx_vars.zcull_ctxsw_image_size;
+}
+
+int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
+ struct channel_gk20a *c, u64 zcull_va, u32 mode)
+{
+ struct zcull_ctx_desc *zcull_ctx = &c->ch_ctx.zcull_ctx;
+
+ zcull_ctx->ctx_sw_mode = mode;
+ zcull_ctx->gpu_va = zcull_va;
+
+ /* TBD: don't disable channel in sw method processing */
+ return gr_gk20a_ctx_zcull_setup(g, c, true);
+}
+
+int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
+ struct gr_zcull_info *zcull_params)
+{
+ struct gr_zcull_gk20a *zcull = &gr->zcull;
+
+ zcull_params->width_align_pixels = zcull->width_align_pixels;
+ zcull_params->height_align_pixels = zcull->height_align_pixels;
+ zcull_params->pixel_squares_by_aliquots =
+ zcull->pixel_squares_by_aliquots;
+ zcull_params->aliquot_total = zcull->total_aliquots;
+
+ zcull_params->region_byte_multiplier =
+ gr->gpc_count * gr_zcull_bytes_per_aliquot_per_gpu_v();
+ zcull_params->region_header_size =
+ proj_scal_litter_num_gpcs_v() *
+ gr_zcull_save_restore_header_bytes_per_gpc_v();
+
+ zcull_params->subregion_header_size =
+ proj_scal_litter_num_gpcs_v() *
+ gr_zcull_save_restore_subregion_header_bytes_per_gpc_v();
+
+ zcull_params->subregion_width_align_pixels =
+ gr->tpc_count * gr_gpc0_zcull_zcsize_width_subregion__multiple_v();
+ zcull_params->subregion_height_align_pixels =
+ gr_gpc0_zcull_zcsize_height_subregion__multiple_v();
+ zcull_params->subregion_count = gr_zcull_subregion_qty_v();
+
+ return 0;
+}
+
+static int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
+ struct zbc_entry *color_val, u32 index)
+{
+ struct fifo_gk20a *f = &g->fifo;
+ struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
+ u32 i;
+ unsigned long end_jiffies = jiffies +
+ msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
+ u32 ret;
+
+ ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
+ if (ret) {
+ gk20a_err(dev_from_gk20a(g),
+ "failed to disable gr engine activity\n");
+ return ret;
+ }
+
+ ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
+ if (ret) {
+ gk20a_err(dev_from_gk20a(g),
+ "failed to idle graphics\n");
+ goto clean_up;
+ }
+
+ /* update l2 table */
+ g->ops.ltc.set_zbc_color_entry(g, color_val, index);
+
+ /* update ds table */
+ gk20a_writel(g, gr_ds_zbc_color_r_r(),
+ gr_ds_zbc_color_r_val_f(color_val->color_ds[0]));
+ gk20a_writel(g, gr_ds_zbc_color_g_r(),
+ gr_ds_zbc_color_g_val_f(color_val->color_ds[1]));
+ gk20a_writel(g, gr_ds_zbc_color_b_r(),
+ gr_ds_zbc_color_b_val_f(color_val->color_ds[2]));
+ gk20a_writel(g, gr_ds_zbc_color_a_r(),
+ gr_ds_zbc_color_a_val_f(color_val->color_ds[3]));
+
+ gk20a_writel(g, gr_ds_zbc_color_fmt_r(),
+ gr_ds_zbc_color_fmt_val_f(color_val->format));
+
+ gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
+ gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
+
+ /* trigger the write */
+ gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
+ gr_ds_zbc_tbl_ld_select_c_f() |
+ gr_ds_zbc_tbl_ld_action_write_f() |
+ gr_ds_zbc_tbl_ld_trigger_active_f());
+
+ /* update local copy */
+ for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
+ gr->zbc_col_tbl[index].color_l2[i] = color_val->color_l2[i];
+ gr->zbc_col_tbl[index].color_ds[i] = color_val->color_ds[i];
+ }
+ gr->zbc_col_tbl[index].format = color_val->format;
+ gr->zbc_col_tbl[index].ref_cnt++;
+
+clean_up:
+ ret = gk20a_fifo_enable_engine_activity(g, gr_info);
+ if (ret) {
+ gk20a_err(dev_from_gk20a(g),
+ "failed to enable gr engine activity\n");
+ }
+
+ return ret;
+}
+
+static int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
+ struct zbc_entry *depth_val, u32 index)
+{
+ struct fifo_gk20a *f = &g->fifo;
+ struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
+ unsigned long end_jiffies = jiffies +
+ msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
+ u32 ret;
+
+ ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
+ if (ret) {
+ gk20a_err(dev_from_gk20a(g),
+ "failed to disable gr engine activity\n");
+ return ret;
+ }
+
+ ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
+ if (ret) {
+ gk20a_err(dev_from_gk20a(g),
+ "failed to idle graphics\n");
+ goto clean_up;
+ }
+
+ /* update l2 table */
+ g->ops.ltc.set_zbc_depth_entry(g, depth_val, index);
+
+ /* update ds table */
+ gk20a_writel(g, gr_ds_zbc_z_r(),
+ gr_ds_zbc_z_val_f(depth_val->depth));
+
+ gk20a_writel(g, gr_ds_zbc_z_fmt_r(),
+ gr_ds_zbc_z_fmt_val_f(depth_val->format));
+
+ gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
+ gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
+
+ /* trigger the write */
+ gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
+ gr_ds_zbc_tbl_ld_select_z_f() |
+ gr_ds_zbc_tbl_ld_action_write_f() |
+ gr_ds_zbc_tbl_ld_trigger_active_f());
+
+ /* update local copy */
+ gr->zbc_dep_tbl[index].depth = depth_val->depth;
+ gr->zbc_dep_tbl[index].format = depth_val->format;
+ gr->zbc_dep_tbl[index].ref_cnt++;
+
+clean_up:
+ ret = gk20a_fifo_enable_engine_activity(g, gr_info);
+ if (ret) {
+ gk20a_err(dev_from_gk20a(g),
+ "failed to enable gr engine activity\n");
+ }
+
+ return ret;
+}
+
+int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
+ struct zbc_entry *zbc_val)
+{
+ struct zbc_color_table *c_tbl;
+ struct zbc_depth_table *d_tbl;
+ u32 i, ret = -ENOMEM;
+ bool added = false;
+ u32 entries;
+
+ /* no endian swap ? */
+
+ switch (zbc_val->type) {
+ case GK20A_ZBC_TYPE_COLOR:
+ /* search existing tables */
+ for (i = 0; i < gr->max_used_color_index; i++) {
+
+ c_tbl = &gr->zbc_col_tbl[i];
+
+ if (c_tbl->ref_cnt && c_tbl->format == zbc_val->format &&
+ memcmp(c_tbl->color_ds, zbc_val->color_ds,
+ sizeof(zbc_val->color_ds)) == 0) {
+
+ if (memcmp(c_tbl->color_l2, zbc_val->color_l2,
+ sizeof(zbc_val->color_l2))) {
+ gk20a_err(dev_from_gk20a(g),
+ "zbc l2 and ds color don't match with existing entries");
+ return -EINVAL;
+ }
+ added = true;
+ c_tbl->ref_cnt++;
+ ret = 0;
+ break;
+ }
+ }
+ /* add new table */
+ if (!added &&
+ gr->max_used_color_index < GK20A_ZBC_TABLE_SIZE) {
+
+ c_tbl =
+ &gr->zbc_col_tbl[gr->max_used_color_index];
+ WARN_ON(c_tbl->ref_cnt != 0);
+
+ ret = gr_gk20a_add_zbc_color(g, gr,
+ zbc_val, gr->max_used_color_index);
+
+ if (!ret)
+ gr->max_used_color_index++;
+ }
+ break;
+ case GK20A_ZBC_TYPE_DEPTH:
+ /* search existing tables */
+ for (i = 0; i < gr->max_used_depth_index; i++) {
+
+ d_tbl = &gr->zbc_dep_tbl[i];
+
+ if (d_tbl->ref_cnt &&
+ d_tbl->depth == zbc_val->depth &&
+ d_tbl->format == zbc_val->format) {
+ added = true;
+ d_tbl->ref_cnt++;
+ ret = 0;
+ break;
+ }
+ }
+ /* add new table */
+ if (!added &&
+ gr->max_used_depth_index < GK20A_ZBC_TABLE_SIZE) {
+
+ d_tbl =
+ &gr->zbc_dep_tbl[gr->max_used_depth_index];
+ WARN_ON(d_tbl->ref_cnt != 0);
+
+ ret = gr_gk20a_add_zbc_depth(g, gr,
+ zbc_val, gr->max_used_depth_index);
+
+ if (!ret)
+ gr->max_used_depth_index++;
+ }
+ break;
+ default:
+ gk20a_err(dev_from_gk20a(g),
+ "invalid zbc table type %d", zbc_val->type);
+ return -EINVAL;
+ }
+
+ if (!added && ret == 0) {
+ /* update zbc for elpg only when new entry is added */
+ entries = max(gr->max_used_color_index,
+ gr->max_used_depth_index);
+ gk20a_pmu_save_zbc(g, entries);
+ }
+
+ return ret;
+}
+
+int gr_gk20a_clear_zbc_table(struct gk20a *g, struct gr_gk20a *gr)
+{
+ struct fifo_gk20a *f = &g->fifo;
+ struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
+ u32 i, j;
+ unsigned long end_jiffies = jiffies +
+ msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
+ u32 ret;
+
+ ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
+ if (ret) {
+ gk20a_err(dev_from_gk20a(g),
+ "failed to disable gr engine activity\n");
+ return ret;
+ }
+
+ ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
+ if (ret) {
+ gk20a_err(dev_from_gk20a(g),
+ "failed to idle graphics\n");
+ goto clean_up;
+ }
+
+ for (i = 0; i < GK20A_ZBC_TABLE_SIZE; i++) {
+ gr->zbc_col_tbl[i].format = 0;
+ gr->zbc_col_tbl[i].ref_cnt = 0;
+
+ gk20a_writel(g, gr_ds_zbc_color_fmt_r(),
+ gr_ds_zbc_color_fmt_val_invalid_f());
+ gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
+ gr_ds_zbc_tbl_index_val_f(i + GK20A_STARTOF_ZBC_TABLE));
+
+ /* trigger the write */
+ gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
+ gr_ds_zbc_tbl_ld_select_c_f() |
+ gr_ds_zbc_tbl_ld_action_write_f() |
+ gr_ds_zbc_tbl_ld_trigger_active_f());
+
+ /* clear l2 table */
+ g->ops.ltc.clear_zbc_color_entry(g, i);
+
+ for (j = 0; j < GK20A_ZBC_COLOR_VALUE_SIZE; j++) {
+ gr->zbc_col_tbl[i].color_l2[j] = 0;
+ gr->zbc_col_tbl[i].color_ds[j] = 0;
+ }
+ }
+ gr->max_used_color_index = 0;
+ gr->max_default_color_index = 0;
+
+ for (i = 0; i < GK20A_ZBC_TABLE_SIZE; i++) {
+ gr->zbc_dep_tbl[i].depth = 0;
+ gr->zbc_dep_tbl[i].format = 0;
+ gr->zbc_dep_tbl[i].ref_cnt = 0;
+
+ gk20a_writel(g, gr_ds_zbc_z_fmt_r(),
+ gr_ds_zbc_z_fmt_val_invalid_f());
+ gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
+ gr_ds_zbc_tbl_index_val_f(i + GK20A_STARTOF_ZBC_TABLE));
+
+ /* trigger the write */
+ gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
+ gr_ds_zbc_tbl_ld_select_z_f() |
+ gr_ds_zbc_tbl_ld_action_write_f() |
+ gr_ds_zbc_tbl_ld_trigger_active_f());
+
+ /* clear l2 table */
+ g->ops.ltc.clear_zbc_depth_entry(g, i);
+ }
+ gr->max_used_depth_index = 0;
+ gr->max_default_depth_index = 0;
+
+clean_up:
+ ret = gk20a_fifo_enable_engine_activity(g, gr_info);
+ if (ret) {
+ gk20a_err(dev_from_gk20a(g),
+ "failed to enable gr engine activity\n");
+ }
+
+ /* elpg stuff */
+
+ return ret;
+}
+
+/* get a zbc table entry specified by index
+ * return table size when type is invalid */
+int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr,
+ struct zbc_query_params *query_params)
+{
+ u32 index = query_params->index_size;
+ u32 i;
+
+ switch (query_params->type) {
+ case GK20A_ZBC_TYPE_INVALID:
+ query_params->index_size = GK20A_ZBC_TABLE_SIZE;
+ break;
+ case GK20A_ZBC_TYPE_COLOR:
+ if (index >= GK20A_ZBC_TABLE_SIZE) {
+ gk20a_err(dev_from_gk20a(g),
+ "invalid zbc color table index\n");
+ return -EINVAL;
+ }
+ for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
+ query_params->color_l2[i] =
+ gr->zbc_col_tbl[index].color_l2[i];
+ query_params->color_ds[i] =
+ gr->zbc_col_tbl[index].color_ds[i];
+ }
+ query_params->format = gr->zbc_col_tbl[index].format;
+ query_params->ref_cnt = gr->zbc_col_tbl[index].ref_cnt;
+ break;
+ case GK20A_ZBC_TYPE_DEPTH:
+ if (index >= GK20A_ZBC_TABLE_SIZE) {
+ gk20a_err(dev_from_gk20a(g),
+ "invalid zbc depth table index\n");
+ return -EINVAL;
+ }
+ query_params->depth = gr->zbc_dep_tbl[index].depth;
+ query_params->format = gr->zbc_dep_tbl[index].format;
+ query_params->ref_cnt = gr->zbc_dep_tbl[index].ref_cnt;
+ break;
+ default:
+ gk20a_err(dev_from_gk20a(g),
+ "invalid zbc table type\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr)
+{
+ struct zbc_entry zbc_val;
+ u32 i, err;
+
+ /* load default color table */
+ zbc_val.type = GK20A_ZBC_TYPE_COLOR;
+
+ zbc_val.format = gr_ds_zbc_color_fmt_val_zero_v();
+ for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
+ zbc_val.color_ds[i] = 0;
+ zbc_val.color_l2[i] = 0;
+ }
+ err = gr_gk20a_add_zbc(g, gr, &zbc_val);
+
+ zbc_val.format = gr_ds_zbc_color_fmt_val_unorm_one_v();
+ for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
+ zbc_val.color_ds[i] = 0xffffffff;
+ zbc_val.color_l2[i] = 0x3f800000;
+ }
+ err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
+
+ zbc_val.format = gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v();
+ for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
+ zbc_val.color_ds[i] = 0;
+ zbc_val.color_l2[i] = 0;
+ }
+ err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
+
+ zbc_val.format = gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v();
+ for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
+ zbc_val.color_ds[i] = 0x3f800000;
+ zbc_val.color_l2[i] = 0x3f800000;
+ }
+ err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
+
+ if (!err)
+ gr->max_default_color_index = 4;
+ else {
+ gk20a_err(dev_from_gk20a(g),
+ "fail to load default zbc color table\n");
+ return err;
+ }
+
+ /* load default depth table */
+ zbc_val.type = GK20A_ZBC_TYPE_DEPTH;
+
+ zbc_val.format = gr_ds_zbc_z_fmt_val_fp32_v();
+ zbc_val.depth = 0;
+ err = gr_gk20a_add_zbc(g, gr, &zbc_val);
+
+ zbc_val.format = gr_ds_zbc_z_fmt_val_fp32_v();
+ zbc_val.depth = 0x3f800000;
+ err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
+
+ if (!err)
+ gr->max_default_depth_index = 2;
+ else {
+ gk20a_err(dev_from_gk20a(g),
+ "fail to load default zbc depth table\n");
+ return err;
+ }
+
+ return 0;
+}
+
+int gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
+ struct zbc_entry *zbc_val)
+{
+ gk20a_dbg_fn("");
+
+ return gr_gk20a_elpg_protected_call(g,
+ gr_gk20a_add_zbc(g, gr, zbc_val));
+}
+
+void gr_gk20a_init_blcg_mode(struct gk20a *g, u32 mode, u32 engine)
+{
+ u32 gate_ctrl;
+
+ gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(engine));
+
+ switch (mode) {
+ case BLCG_RUN:
+ gate_ctrl = set_field(gate_ctrl,
+ therm_gate_ctrl_blk_clk_m(),
+ therm_gate_ctrl_blk_clk_run_f());
+ break;
+ case BLCG_AUTO:
+ gate_ctrl = set_field(gate_ctrl,
+ therm_gate_ctrl_blk_clk_m(),
+ therm_gate_ctrl_blk_clk_auto_f());
+ break;
+ default:
+ gk20a_err(dev_from_gk20a(g),
+ "invalid blcg mode %d", mode);
+ return;
+ }
+
+ gk20a_writel(g, therm_gate_ctrl_r(engine), gate_ctrl);
+}
+
+void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine)
+{
+ u32 gate_ctrl, idle_filter;
+
+ gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(engine));
+
+ switch (mode) {
+ case ELCG_RUN:
+ gate_ctrl = set_field(gate_ctrl,
+ therm_gate_ctrl_eng_clk_m(),
+ therm_gate_ctrl_eng_clk_run_f());
+ gate_ctrl = set_field(gate_ctrl,
+ therm_gate_ctrl_eng_pwr_m(),
+ /* set elpg to auto to meet hw expectation */
+ therm_gate_ctrl_eng_pwr_auto_f());
+ break;
+ case ELCG_STOP:
+ gate_ctrl = set_field(gate_ctrl,
+ therm_gate_ctrl_eng_clk_m(),
+ therm_gate_ctrl_eng_clk_stop_f());
+ break;
+ case ELCG_AUTO:
+ gate_ctrl = set_field(gate_ctrl,
+ therm_gate_ctrl_eng_clk_m(),
+ therm_gate_ctrl_eng_clk_auto_f());
+ break;
+ default:
+ gk20a_err(dev_from_gk20a(g),
+ "invalid elcg mode %d", mode);
+ }
+
+ if (tegra_platform_is_linsim()) {
+ gate_ctrl = set_field(gate_ctrl,
+ therm_gate_ctrl_eng_delay_after_m(),
+ therm_gate_ctrl_eng_delay_after_f(4));
+ }
+
+ /* 2 * (1 << 9) = 1024 clks */
+ gate_ctrl = set_field(gate_ctrl,
+ therm_gate_ctrl_eng_idle_filt_exp_m(),
+ therm_gate_ctrl_eng_idle_filt_exp_f(9));
+ gate_ctrl = set_field(gate_ctrl,
+ therm_gate_ctrl_eng_idle_filt_mant_m(),
+ therm_gate_ctrl_eng_idle_filt_mant_f(2));
+ gk20a_writel(g, therm_gate_ctrl_r(engine), gate_ctrl);
+
+ /* default fecs_idle_filter to 0 */
+ idle_filter = gk20a_readl(g, therm_fecs_idle_filter_r());
+ idle_filter &= ~therm_fecs_idle_filter_value_m();
+ gk20a_writel(g, therm_fecs_idle_filter_r(), idle_filter);
+ /* default hubmmu_idle_filter to 0 */
+ idle_filter = gk20a_readl(g, therm_hubmmu_idle_filter_r());
+ idle_filter &= ~therm_hubmmu_idle_filter_value_m();
+ gk20a_writel(g, therm_hubmmu_idle_filter_r(), idle_filter);
+}
+
+static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
+{
+ u32 gpc_index, gpc_tpc_count, gpc_zcull_count;
+ u32 *zcull_map_tiles, *zcull_bank_counters;
+ u32 map_counter;
+ u32 rcp_conserv;
+ u32 offset;
+ bool floorsweep = false;
+
+ if (!gr->map_tiles)
+ return -1;
+
+ zcull_map_tiles = kzalloc(proj_scal_max_gpcs_v() *
+ proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL);
+ if (!zcull_map_tiles) {
+ gk20a_err(dev_from_gk20a(g),
+ "failed to allocate zcull temp buffers");
+ return -ENOMEM;
+ }
+ zcull_bank_counters = kzalloc(proj_scal_max_gpcs_v() *
+ proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL);
+
+ if (!zcull_bank_counters) {
+ gk20a_err(dev_from_gk20a(g),
+ "failed to allocate zcull temp buffers");
+ kfree(zcull_map_tiles);
+ return -ENOMEM;
+ }
+
+ for (map_counter = 0; map_counter < gr->tpc_count; map_counter++) {
+ zcull_map_tiles[map_counter] =
+ zcull_bank_counters[gr->map_tiles[map_counter]];
+ zcull_bank_counters[gr->map_tiles[map_counter]]++;
+ }
+
+ gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map0_r(),
+ gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(zcull_map_tiles[0]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(zcull_map_tiles[1]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(zcull_map_tiles[2]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(zcull_map_tiles[3]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(zcull_map_tiles[4]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(zcull_map_tiles[5]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(zcull_map_tiles[6]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(zcull_map_tiles[7]));
+
+ gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map1_r(),
+ gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(zcull_map_tiles[8]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(zcull_map_tiles[9]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(zcull_map_tiles[10]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(zcull_map_tiles[11]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(zcull_map_tiles[12]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(zcull_map_tiles[13]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(zcull_map_tiles[14]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(zcull_map_tiles[15]));
+
+ gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map2_r(),
+ gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(zcull_map_tiles[16]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(zcull_map_tiles[17]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(zcull_map_tiles[18]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(zcull_map_tiles[19]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(zcull_map_tiles[20]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(zcull_map_tiles[21]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(zcull_map_tiles[22]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(zcull_map_tiles[23]));
+
+ gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map3_r(),
+ gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(zcull_map_tiles[24]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(zcull_map_tiles[25]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(zcull_map_tiles[26]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(zcull_map_tiles[27]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(zcull_map_tiles[28]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(zcull_map_tiles[29]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(zcull_map_tiles[30]) |
+ gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(zcull_map_tiles[31]));
+
+ kfree(zcull_map_tiles);
+ kfree(zcull_bank_counters);
+
+ for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+ gpc_tpc_count = gr->gpc_tpc_count[gpc_index];
+ gpc_zcull_count = gr->gpc_zcb_count[gpc_index];
+
+ if (gpc_zcull_count != gr->max_zcull_per_gpc_count &&
+ gpc_zcull_count < gpc_tpc_count) {
+ gk20a_err(dev_from_gk20a(g),
+ "zcull_banks (%d) less than tpcs (%d) for gpc (%d)",
+ gpc_zcull_count, gpc_tpc_count, gpc_index);
+ return -EINVAL;
+ }
+ if (gpc_zcull_count != gr->max_zcull_per_gpc_count &&
+ gpc_zcull_count != 0)
+ floorsweep = true;
+ }
+
+ /* 1.0f / 1.0f * gr_gpc0_zcull_sm_num_rcp_conservative__max_v() */
+ rcp_conserv = gr_gpc0_zcull_sm_num_rcp_conservative__max_v();
+
+ for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+ offset = gpc_index * proj_gpc_stride_v();
+
+ if (floorsweep) {
+ gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset,
+ gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) |
+ gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(
+ gr->max_zcull_per_gpc_count));
+ } else {
+ gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset,
+ gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) |
+ gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(
+ gr->gpc_tpc_count[gpc_index]));
+ }
+
+ gk20a_writel(g, gr_gpc0_zcull_fs_r() + offset,
+ gr_gpc0_zcull_fs_num_active_banks_f(gr->gpc_zcb_count[gpc_index]) |
+ gr_gpc0_zcull_fs_num_sms_f(gr->tpc_count));
+
+ gk20a_writel(g, gr_gpc0_zcull_sm_num_rcp_r() + offset,
+ gr_gpc0_zcull_sm_num_rcp_conservative_f(rcp_conserv));
+ }
+
+ gk20a_writel(g, gr_gpcs_ppcs_wwdx_sm_num_rcp_r(),
+ gr_gpcs_ppcs_wwdx_sm_num_rcp_conservative_f(rcp_conserv));
+
+ return 0;
+}
+
+static void gk20a_gr_enable_gpc_exceptions(struct gk20a *g)
+{
+ /* enable tpc exception forwarding */
+ gk20a_writel(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r(),
+ gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f());
+
+ /* enable gpc exception forwarding */
+ gk20a_writel(g, gr_gpc0_gpccs_gpc_exception_en_r(),
+ gr_gpc0_gpccs_gpc_exception_en_tpc_0_enabled_f());
+}
+
+void gr_gk20a_enable_hww_exceptions(struct gk20a *g)
+{
+ /* enable exceptions */
+ gk20a_writel(g, gr_fe_hww_esr_r(),
+ gr_fe_hww_esr_en_enable_f() |
+ gr_fe_hww_esr_reset_active_f());
+ gk20a_writel(g, gr_memfmt_hww_esr_r(),
+ gr_memfmt_hww_esr_en_enable_f() |
+ gr_memfmt_hww_esr_reset_active_f());
+ gk20a_writel(g, gr_scc_hww_esr_r(),
+ gr_scc_hww_esr_en_enable_f() |
+ gr_scc_hww_esr_reset_active_f());
+ gk20a_writel(g, gr_mme_hww_esr_r(),
+ gr_mme_hww_esr_en_enable_f() |
+ gr_mme_hww_esr_reset_active_f());
+ gk20a_writel(g, gr_pd_hww_esr_r(),
+ gr_pd_hww_esr_en_enable_f() |
+ gr_pd_hww_esr_reset_active_f());
+ gk20a_writel(g, gr_sked_hww_esr_r(), /* enabled by default */
+ gr_sked_hww_esr_reset_active_f());
+ gk20a_writel(g, gr_ds_hww_esr_r(),
+ gr_ds_hww_esr_en_enabled_f() |
+ gr_ds_hww_esr_reset_task_f());
+ gk20a_writel(g, gr_ds_hww_report_mask_r(),
+ gr_ds_hww_report_mask_sph0_err_report_f() |
+ gr_ds_hww_report_mask_sph1_err_report_f() |
+ gr_ds_hww_report_mask_sph2_err_report_f() |
+ gr_ds_hww_report_mask_sph3_err_report_f() |
+ gr_ds_hww_report_mask_sph4_err_report_f() |
+ gr_ds_hww_report_mask_sph5_err_report_f() |
+ gr_ds_hww_report_mask_sph6_err_report_f() |
+ gr_ds_hww_report_mask_sph7_err_report_f() |
+ gr_ds_hww_report_mask_sph8_err_report_f() |
+ gr_ds_hww_report_mask_sph9_err_report_f() |
+ gr_ds_hww_report_mask_sph10_err_report_f() |
+ gr_ds_hww_report_mask_sph11_err_report_f() |
+ gr_ds_hww_report_mask_sph12_err_report_f() |
+ gr_ds_hww_report_mask_sph13_err_report_f() |
+ gr_ds_hww_report_mask_sph14_err_report_f() |
+ gr_ds_hww_report_mask_sph15_err_report_f() |
+ gr_ds_hww_report_mask_sph16_err_report_f() |
+ gr_ds_hww_report_mask_sph17_err_report_f() |
+ gr_ds_hww_report_mask_sph18_err_report_f() |
+ gr_ds_hww_report_mask_sph19_err_report_f() |
+ gr_ds_hww_report_mask_sph20_err_report_f() |
+ gr_ds_hww_report_mask_sph21_err_report_f() |
+ gr_ds_hww_report_mask_sph22_err_report_f() |
+ gr_ds_hww_report_mask_sph23_err_report_f());
+}
+
+static void gr_gk20a_set_hww_esr_report_mask(struct gk20a *g)
+{
+ /* setup sm warp esr report masks */
+ gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(),
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f());
+
+ /* setup sm global esr report mask */
+ gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(),
+ gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f() |
+ gr_gpcs_tpcs_sm_hww_global_esr_report_mask_l1_error_report_f() |
+ gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f() |
+ gr_gpcs_tpcs_sm_hww_global_esr_report_mask_physical_stack_overflow_error_report_f() |
+ gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f() |
+ gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f() |
+ gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complete_report_f());
+}
+
+static int gk20a_init_gr_setup_hw(struct gk20a *g)
+{
+ struct gr_gk20a *gr = &g->gr;
+ struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load;
+ struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init;
+ u32 data;
+ u32 addr_lo, addr_hi;
+ u64 addr;
+ unsigned long end_jiffies = jiffies +
+ msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
+ u32 fe_go_idle_timeout_save;
+ u32 last_method_data = 0;
+ u32 i, err;
+
+ gk20a_dbg_fn("");
+
+ /* slcg prod values */
+ g->ops.clock_gating.slcg_gr_load_gating_prod(g, g->slcg_enabled);
+ g->ops.clock_gating.slcg_perf_load_gating_prod(g, g->slcg_enabled);
+
+ /* init mmu debug buffer */
+ addr = NV_MC_SMMU_VADDR_TRANSLATE(gr->mmu_wr_mem.iova);
+ addr_lo = u64_lo32(addr);
+ addr_hi = u64_hi32(addr);
+ addr = (addr_lo >> fb_mmu_debug_wr_addr_alignment_v()) |
+ (addr_hi << (32 - fb_mmu_debug_wr_addr_alignment_v()));
+
+ gk20a_writel(g, fb_mmu_debug_wr_r(),
+ fb_mmu_debug_wr_aperture_vid_mem_f() |
+ fb_mmu_debug_wr_vol_false_f() |
+ fb_mmu_debug_wr_addr_v(addr));
+
+ addr = NV_MC_SMMU_VADDR_TRANSLATE(gr->mmu_rd_mem.iova);
+ addr_lo = u64_lo32(addr);
+ addr_hi = u64_hi32(addr);
+ addr = (addr_lo >> fb_mmu_debug_rd_addr_alignment_v()) |
+ (addr_hi << (32 - fb_mmu_debug_rd_addr_alignment_v()));
+
+ gk20a_writel(g, fb_mmu_debug_rd_r(),
+ fb_mmu_debug_rd_aperture_vid_mem_f() |
+ fb_mmu_debug_rd_vol_false_f() |
+ fb_mmu_debug_rd_addr_v(addr));
+
+ /* load gr floorsweeping registers */
+ data = gk20a_readl(g, gr_gpc0_ppc0_pes_vsc_strem_r());
+ data = set_field(data, gr_gpc0_ppc0_pes_vsc_strem_master_pe_m(),
+ gr_gpc0_ppc0_pes_vsc_strem_master_pe_true_f());
+ gk20a_writel(g, gr_gpc0_ppc0_pes_vsc_strem_r(), data);
+
+ gr_gk20a_zcull_init_hw(g, gr);
+
+ g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled);
+ g->ops.clock_gating.pg_gr_load_gating_prod(g, true);
+
+ if (g->elcg_enabled) {
+ gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A);
+ gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A);
+ } else {
+ gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A);
+ gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A);
+ }
+
+ /* Bug 1340570: increase the clock timeout to avoid potential
+ * operation failure at high gpcclk rate. Default values are 0x400.
+ */
+ gk20a_writel(g, pri_ringstation_sys_master_config_r(0x15), 0x800);
+ gk20a_writel(g, pri_ringstation_gpc_master_config_r(0xa), 0x800);
+ gk20a_writel(g, pri_ringstation_fbp_master_config_r(0x8), 0x800);
+
+ /* enable fifo access */
+ gk20a_writel(g, gr_gpfifo_ctl_r(),
+ gr_gpfifo_ctl_access_enabled_f() |
+ gr_gpfifo_ctl_semaphore_access_enabled_f());
+
+ /* TBD: reload gr ucode when needed */
+
+ /* enable interrupts */
+ gk20a_writel(g, gr_intr_r(), 0xFFFFFFFF);
+ gk20a_writel(g, gr_intr_en_r(), 0xFFFFFFFF);
+
+ /* enable fecs error interrupts */
+ gk20a_writel(g, gr_fecs_host_int_enable_r(),
+ gr_fecs_host_int_enable_fault_during_ctxsw_enable_f() |
+ gr_fecs_host_int_enable_umimp_firmware_method_enable_f() |
+ gr_fecs_host_int_enable_umimp_illegal_method_enable_f() |
+ gr_fecs_host_int_enable_watchdog_enable_f());
+
+ g->ops.gr.enable_hww_exceptions(g);
+ g->ops.gr.set_hww_esr_report_mask(g);
+
+ /* enable per GPC exceptions */
+ gk20a_gr_enable_gpc_exceptions(g);
+
+ /* TBD: ECC for L1/SM */
+ /* TBD: enable per BE exceptions */
+
+ /* reset and enable all exceptions */
+ gk20a_writel(g, gr_exception_r(), 0xFFFFFFFF);
+ gk20a_writel(g, gr_exception_en_r(), 0xFFFFFFFF);
+ gk20a_writel(g, gr_exception1_r(), 0xFFFFFFFF);
+ gk20a_writel(g, gr_exception1_en_r(), 0xFFFFFFFF);
+ gk20a_writel(g, gr_exception2_r(), 0xFFFFFFFF);
+ gk20a_writel(g, gr_exception2_en_r(), 0xFFFFFFFF);
+
+ /* ignore status from some units */
+ data = gk20a_readl(g, gr_status_mask_r());
+ gk20a_writel(g, gr_status_mask_r(), data & gr->status_disable_mask);
+
+ g->ops.ltc.init_zbc(g, gr);
+ g->ops.ltc.init_cbc(g, gr);
+
+ /* load ctx init */
+ for (i = 0; i < sw_ctx_load->count; i++)
+ gk20a_writel(g, sw_ctx_load->l[i].addr,
+ sw_ctx_load->l[i].value);
+
+ err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
+ if (err)
+ goto out;
+
+ /* save and disable fe_go_idle */
+ fe_go_idle_timeout_save =
+ gk20a_readl(g, gr_fe_go_idle_timeout_r());
+ gk20a_writel(g, gr_fe_go_idle_timeout_r(),
+ (fe_go_idle_timeout_save & gr_fe_go_idle_timeout_count_f(0)) |
+ gr_fe_go_idle_timeout_count_disabled_f());
+
+ /* override a few ctx state registers */
+ g->ops.gr.commit_global_cb_manager(g, NULL, false);
+ gr_gk20a_commit_global_timeslice(g, NULL, false);
+
+ /* floorsweep anything left */
+ g->ops.gr.init_fs_state(g);
+
+ err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
+ if (err)
+ goto restore_fe_go_idle;
+
+restore_fe_go_idle:
+ /* restore fe_go_idle */
+ gk20a_writel(g, gr_fe_go_idle_timeout_r(), fe_go_idle_timeout_save);
+
+ if (err || gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT))
+ goto out;
+
+ /* load method init */
+ if (sw_method_init->count) {
+ gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
+ sw_method_init->l[0].value);
+ gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
+ gr_pri_mme_shadow_raw_index_write_trigger_f() |
+ sw_method_init->l[0].addr);
+ last_method_data = sw_method_init->l[0].value;
+ }
+ for (i = 1; i < sw_method_init->count; i++) {
+ if (sw_method_init->l[i].value != last_method_data) {
+ gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
+ sw_method_init->l[i].value);
+ last_method_data = sw_method_init->l[i].value;
+ }
+ gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
+ gr_pri_mme_shadow_raw_index_write_trigger_f() |
+ sw_method_init->l[i].addr);
+ }
+
+ gk20a_mm_l2_invalidate(g);
+
+ err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
+ if (err)
+ goto out;
+
+out:
+ gk20a_dbg_fn("done");
+ return 0;
+}
+
+static int gk20a_init_gr_prepare(struct gk20a *g)
+{
+ u32 gpfifo_ctrl, pmc_en;
+ u32 err = 0;
+
+ /* disable fifo access */
+ pmc_en = gk20a_readl(g, mc_enable_r());
+ if (pmc_en & mc_enable_pgraph_enabled_f()) {
+ gpfifo_ctrl = gk20a_readl(g, gr_gpfifo_ctl_r());
+ gpfifo_ctrl &= ~gr_gpfifo_ctl_access_enabled_f();
+ gk20a_writel(g, gr_gpfifo_ctl_r(), gpfifo_ctrl);
+ }
+
+ /* reset gr engine */
+ gk20a_reset(g, mc_enable_pgraph_enabled_f()
+ | mc_enable_blg_enabled_f()
+ | mc_enable_perfmon_enabled_f());
+
+ /* enable fifo access */
+ gk20a_writel(g, gr_gpfifo_ctl_r(),
+ gr_gpfifo_ctl_access_enabled_f() |
+ gr_gpfifo_ctl_semaphore_access_enabled_f());
+
+ if (!g->gr.ctx_vars.valid) {
+ err = gr_gk20a_init_ctx_vars(g, &g->gr);
+ if (err)
+ gk20a_err(dev_from_gk20a(g),
+ "fail to load gr init ctx");
+ }
+ return err;
+}
+
+static int gr_gk20a_wait_mem_scrubbing(struct gk20a *g)
+{
+ int retries = GR_IDLE_CHECK_MAX / GR_IDLE_CHECK_DEFAULT;
+ bool fecs_scrubbing;
+ bool gpccs_scrubbing;
+
+ gk20a_dbg_fn("");
+
+ do {
+ fecs_scrubbing = gk20a_readl(g, gr_fecs_dmactl_r()) &
+ (gr_fecs_dmactl_imem_scrubbing_m() |
+ gr_fecs_dmactl_dmem_scrubbing_m());
+
+ gpccs_scrubbing = gk20a_readl(g, gr_gpccs_dmactl_r()) &
+ (gr_gpccs_dmactl_imem_scrubbing_m() |
+ gr_gpccs_dmactl_imem_scrubbing_m());
+
+ if (!fecs_scrubbing && !gpccs_scrubbing) {
+ gk20a_dbg_fn("done");
+ return 0;
+ }
+
+ udelay(GR_IDLE_CHECK_DEFAULT);
+ } while (--retries || !tegra_platform_is_silicon());
+
+ gk20a_err(dev_from_gk20a(g), "Falcon mem scrubbing timeout");
+ return -ETIMEDOUT;
+}
+
+static int gk20a_init_gr_reset_enable_hw(struct gk20a *g)
+{
+ struct gr_gk20a *gr = &g->gr;
+ struct av_list_gk20a *sw_non_ctx_load = &g->gr.ctx_vars.sw_non_ctx_load;
+ unsigned long end_jiffies = jiffies +
+ msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
+ u32 i, err = 0;
+
+ gk20a_dbg_fn("");
+
+ /* enable interrupts */
+ gk20a_writel(g, gr_intr_r(), ~0);
+ gk20a_writel(g, gr_intr_en_r(), ~0);
+
+ /* reset ctx switch state */
+ gr_gk20a_ctx_reset(g, 0);
+
+ /* clear scc ram */
+ gk20a_writel(g, gr_scc_init_r(),
+ gr_scc_init_ram_trigger_f());
+
+ /* load non_ctx init */
+ for (i = 0; i < sw_non_ctx_load->count; i++)
+ gk20a_writel(g, sw_non_ctx_load->l[i].addr,
+ sw_non_ctx_load->l[i].value);
+
+ err = gr_gk20a_wait_mem_scrubbing(g);
+ if (err)
+ goto out;
+
+ err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
+ if (err)
+ goto out;
+
+ err = gr_gk20a_load_ctxsw_ucode(g, gr);
+ if (err)
+ goto out;
+
+ /* this appears query for sw states but fecs actually init
+ ramchain, etc so this is hw init */
+ err = gr_gk20a_init_ctx_state(g, gr);
+ if (err)
+ goto out;
+
+out:
+ if (err)
+ gk20a_err(dev_from_gk20a(g), "fail");
+ else
+ gk20a_dbg_fn("done");
+
+ return 0;
+}
+
+/*
+ * XXX Merge this list with the debugger/profiler
+ * session regops whitelists?
+ */
+static u32 wl_addr_gk20a[] = {
+ /* this list must be sorted (low to high) */
+ 0x404468, /* gr_pri_mme_max_instructions */
+ 0x418800, /* gr_pri_gpcs_setup_debug */
+ 0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg */
+ 0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg */
+ 0x419e10, /* gr_pri_gpcs_tpcs_sm_dbgr_control0 */
+ 0x419f78, /* gr_pri_gpcs_tpcs_sm_disp_ctrl */
+};
+
+static int gr_gk20a_init_access_map(struct gk20a *g)
+{
+ struct gr_gk20a *gr = &g->gr;
+ void *data;
+ int err = 0;
+ u32 w, nr_pages =
+ DIV_ROUND_UP(gr->ctx_vars.priv_access_map_size,
+ PAGE_SIZE);
+
+ data = vmap(gr->global_ctx_buffer[PRIV_ACCESS_MAP].pages,
+ PAGE_ALIGN(gr->global_ctx_buffer[PRIV_ACCESS_MAP].size) >>
+ PAGE_SHIFT, 0, pgprot_dmacoherent(PAGE_KERNEL));
+ if (!data) {
+ gk20a_err(dev_from_gk20a(g),
+ "failed to map priv access map memory");
+ err = -ENOMEM;
+ goto clean_up;
+ }
+
+ memset(data, 0x0, PAGE_SIZE * nr_pages);
+
+ for (w = 0; w < ARRAY_SIZE(wl_addr_gk20a); w++) {
+ u32 map_bit, map_byte, map_shift;
+ map_bit = wl_addr_gk20a[w] >> 2;
+ map_byte = map_bit >> 3;
+ map_shift = map_bit & 0x7; /* i.e. 0-7 */
+ gk20a_dbg_info("access map addr:0x%x byte:0x%x bit:%d",
+ wl_addr_gk20a[w], map_byte, map_shift);
+ ((u8 *)data)[map_byte] |= 1 << map_shift;
+ }
+
+clean_up:
+ if (data)
+ vunmap(data);
+ return 0;
+}
+
+static int gk20a_init_gr_setup_sw(struct gk20a *g)
+{
+ struct gr_gk20a *gr = &g->gr;
+ int err;
+
+ gk20a_dbg_fn("");
+
+ if (gr->sw_ready) {
+ gk20a_dbg_fn("skip init");
+ return 0;
+ }
+
+ gr->g = g;
+
+ err = gr_gk20a_init_gr_config(g, gr);
+ if (err)
+ goto clean_up;
+
+ err = gr_gk20a_init_mmu_sw(g, gr);
+ if (err)
+ goto clean_up;
+
+ err = gr_gk20a_init_map_tiles(g, gr);
+ if (err)
+ goto clean_up;
+
+ if (tegra_cpu_is_asim())
+ gr->max_comptag_mem = 1; /* MBs worth of comptag coverage */
+ else {
+ gk20a_dbg_info("total ram pages : %lu", totalram_pages);
+ gr->max_comptag_mem = totalram_pages
+ >> (10 - (PAGE_SHIFT - 10));
+ }
+ err = g->ops.ltc.init_comptags(g, gr);
+ if (err)
+ goto clean_up;
+
+ err = gr_gk20a_init_zcull(g, gr);
+ if (err)
+ goto clean_up;
+
+ err = gr_gk20a_alloc_global_ctx_buffers(g);
+ if (err)
+ goto clean_up;
+
+ err = gr_gk20a_init_access_map(g);
+ if (err)
+ goto clean_up;
+
+ mutex_init(&gr->ctx_mutex);
+ spin_lock_init(&gr->ch_tlb_lock);
+
+ gr->remove_support = gk20a_remove_gr_support;
+ gr->sw_ready = true;
+
+ gk20a_dbg_fn("done");
+ return 0;
+
+clean_up:
+ gk20a_err(dev_from_gk20a(g), "fail");
+ gk20a_remove_gr_support(gr);
+ return err;
+}
+
+int gk20a_init_gr_support(struct gk20a *g)
+{
+ u32 err;
+
+ gk20a_dbg_fn("");
+
+ err = gk20a_init_gr_prepare(g);
+ if (err)
+ return err;
+
+ /* this is required before gr_gk20a_init_ctx_state */
+ mutex_init(&g->gr.fecs_mutex);
+
+ err = gk20a_init_gr_reset_enable_hw(g);
+ if (err)
+ return err;
+
+ err = gk20a_init_gr_setup_sw(g);
+ if (err)
+ return err;
+
+ err = gk20a_init_gr_setup_hw(g);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+#define NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE 0x02dc
+#define NVA297_SET_CIRCULAR_BUFFER_SIZE 0x1280
+#define NVA297_SET_SHADER_EXCEPTIONS 0x1528
+#define NVA0C0_SET_SHADER_EXCEPTIONS 0x1528
+
+#define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0
+
+struct gr_isr_data {
+ u32 addr;
+ u32 data_lo;
+ u32 data_hi;
+ u32 curr_ctx;
+ u32 chid;
+ u32 offset;
+ u32 sub_chan;
+ u32 class_num;
+};
+
+void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data)
+{
+ gk20a_dbg_fn("");
+
+ if (data == NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE) {
+ gk20a_writel(g,
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(), 0);
+ gk20a_writel(g,
+ gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(), 0);
+ } else {
+ /* setup sm warp esr report masks */
+ gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(),
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f() |
+ gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f());
+
+ /* setup sm global esr report mask */
+ gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(),
+ gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f() |
+ gr_gpcs_tpcs_sm_hww_global_esr_report_mask_l1_error_report_f() |
+ gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f() |
+ gr_gpcs_tpcs_sm_hww_global_esr_report_mask_physical_stack_overflow_error_report_f() |
+ gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f() |
+ gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f() |
+ gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complete_report_f());
+ }
+}
+
+static void gk20a_gr_set_circular_buffer_size(struct gk20a *g, u32 data)
+{
+ struct gr_gk20a *gr = &g->gr;
+ u32 gpc_index, ppc_index, stride, val, offset;
+ u32 cb_size = data * 4;
+
+ gk20a_dbg_fn("");
+
+ if (cb_size > gr->attrib_cb_size)
+ cb_size = gr->attrib_cb_size;
+
+ gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
+ (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) &
+ ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) |
+ gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size));
+
+ for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+ stride = proj_gpc_stride_v() * gpc_index;
+
+ for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
+ ppc_index++) {
+
+ val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg_r() +
+ stride +
+ proj_ppc_in_gpc_stride_v() * ppc_index);
+
+ offset = gr_gpc0_ppc0_cbm_cfg_start_offset_v(val);
+
+ val = set_field(val,
+ gr_gpc0_ppc0_cbm_cfg_size_m(),
+ gr_gpc0_ppc0_cbm_cfg_size_f(cb_size *
+ gr->pes_tpc_count[ppc_index][gpc_index]));
+ val = set_field(val,
+ gr_gpc0_ppc0_cbm_cfg_start_offset_m(),
+ (offset + 1));
+
+ gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() +
+ stride +
+ proj_ppc_in_gpc_stride_v() * ppc_index, val);
+
+ val = set_field(val,
+ gr_gpc0_ppc0_cbm_cfg_start_offset_m(),
+ offset);
+
+ gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() +
+ stride +
+ proj_ppc_in_gpc_stride_v() * ppc_index, val);
+ }
+ }
+}
+
+static void gk20a_gr_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
+{
+ struct gr_gk20a *gr = &g->gr;
+ u32 gpc_index, ppc_index, stride, val;
+ u32 pd_ab_max_output;
+ u32 alpha_cb_size = data * 4;
+
+ gk20a_dbg_fn("");
+ /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF)
+ return; */
+
+ if (alpha_cb_size > gr->alpha_cb_size)
+ alpha_cb_size = gr->alpha_cb_size;
+
+ gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
+ (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) &
+ ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) |
+ gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size));
+
+ pd_ab_max_output = alpha_cb_size *
+ gr_gpc0_ppc0_cbm_cfg_size_granularity_v() /
+ gr_pd_ab_dist_cfg1_max_output_granularity_v();
+
+ gk20a_writel(g, gr_pd_ab_dist_cfg1_r(),
+ gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output));
+
+ for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+ stride = proj_gpc_stride_v() * gpc_index;
+
+ for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
+ ppc_index++) {
+
+ val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg2_r() +
+ stride +
+ proj_ppc_in_gpc_stride_v() * ppc_index);
+
+ val = set_field(val, gr_gpc0_ppc0_cbm_cfg2_size_m(),
+ gr_gpc0_ppc0_cbm_cfg2_size_f(alpha_cb_size *
+ gr->pes_tpc_count[ppc_index][gpc_index]));
+
+ gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg2_r() +
+ stride +
+ proj_ppc_in_gpc_stride_v() * ppc_index, val);
+ }
+ }
+}
+
+void gk20a_gr_reset(struct gk20a *g)
+{
+ int err;
+ err = gk20a_init_gr_prepare(g);
+ BUG_ON(err);
+ err = gk20a_init_gr_reset_enable_hw(g);
+ BUG_ON(err);
+ err = gk20a_init_gr_setup_hw(g);
+ BUG_ON(err);
+}
+
+static int gr_gk20a_handle_sw_method(struct gk20a *g, u32 addr,
+ u32 class_num, u32 offset, u32 data)
+{
+ gk20a_dbg_fn("");
+
+ if (class_num == KEPLER_COMPUTE_A) {
+ switch (offset << 2) {
+ case NVA0C0_SET_SHADER_EXCEPTIONS:
+ gk20a_gr_set_shader_exceptions(g, data);
+ break;
+ default:
+ goto fail;
+ }
+ }
+
+ if (class_num == KEPLER_C) {
+ switch (offset << 2) {
+ case NVA297_SET_SHADER_EXCEPTIONS:
+ gk20a_gr_set_shader_exceptions(g, data);
+ break;
+ case NVA297_SET_CIRCULAR_BUFFER_SIZE:
+ g->ops.gr.set_circular_buffer_size(g, data);
+ break;
+ case NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE:
+ g->ops.gr.set_alpha_circular_buffer_size(g, data);
+ break;
+ default:
+ goto fail;
+ }
+ }
+ return 0;
+
+fail:
+ return -EINVAL;
+}
+
+static int gk20a_gr_handle_semaphore_timeout_pending(struct gk20a *g,
+ struct gr_isr_data *isr_data)
+{
+ struct fifo_gk20a *f = &g->fifo;
+ struct channel_gk20a *ch = &f->channel[isr_data->chid];
+ gk20a_dbg_fn("");
+ gk20a_set_error_notifier(ch,
+ NVHOST_CHANNEL_GR_SEMAPHORE_TIMEOUT);
+ gk20a_err(dev_from_gk20a(g),
+ "gr semaphore timeout\n");
+ return -EINVAL;
+}
+
+static int gk20a_gr_intr_illegal_notify_pending(struct gk20a *g,
+ struct gr_isr_data *isr_data)
+{
+ struct fifo_gk20a *f = &g->fifo;
+ struct channel_gk20a *ch = &f->channel[isr_data->chid];
+ gk20a_dbg_fn("");
+ gk20a_set_error_notifier(ch,
+ NVHOST_CHANNEL_GR_ILLEGAL_NOTIFY);
+ /* This is an unrecoverable error, reset is needed */
+ gk20a_err(dev_from_gk20a(g),
+ "gr semaphore timeout\n");
+ return -EINVAL;
+}
+
+static int gk20a_gr_handle_illegal_method(struct gk20a *g,
+ struct gr_isr_data *isr_data)
+{
+ int ret = g->ops.gr.handle_sw_method(g, isr_data->addr,
+ isr_data->class_num, isr_data->offset,
+ isr_data->data_lo);
+ if (ret)
+ gk20a_err(dev_from_gk20a(g), "invalid method class 0x%08x"
+ ", offset 0x%08x address 0x%08x\n",
+ isr_data->class_num, isr_data->offset, isr_data->addr);
+
+ return ret;
+}
+
+static int gk20a_gr_handle_illegal_class(struct gk20a *g,
+ struct gr_isr_data *isr_data)
+{
+ struct fifo_gk20a *f = &g->fifo;
+ struct channel_gk20a *ch = &f->channel[isr_data->chid];
+ gk20a_dbg_fn("");
+ gk20a_set_error_notifier(ch,
+ NVHOST_CHANNEL_GR_ERROR_SW_NOTIFY);
+ gk20a_err(dev_from_gk20a(g),
+ "invalid class 0x%08x, offset 0x%08x",
+ isr_data->class_num, isr_data->offset);
+ return -EINVAL;
+}
+
+static int gk20a_gr_handle_class_error(struct gk20a *g,
+ struct gr_isr_data *isr_data)
+{
+ struct fifo_gk20a *f = &g->fifo;
+ struct channel_gk20a *ch = &f->channel[isr_data->chid];
+ gk20a_dbg_fn("");
+
+ gk20a_set_error_notifier(ch,
+ NVHOST_CHANNEL_GR_ERROR_SW_NOTIFY);
+ gk20a_err(dev_from_gk20a(g),
+ "class error 0x%08x, offset 0x%08x",
+ isr_data->class_num, isr_data->offset);
+ return -EINVAL;
+}
+
+static int gk20a_gr_handle_semaphore_pending(struct gk20a *g,
+ struct gr_isr_data *isr_data)
+{
+ struct fifo_gk20a *f = &g->fifo;
+ struct channel_gk20a *ch = &f->channel[isr_data->chid];
+
+ wake_up(&ch->semaphore_wq);
+
+ return 0;
+}
+
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+static inline bool is_valid_cyclestats_bar0_offset_gk20a(struct gk20a *g,
+ u32 offset)
+{
+ /* support only 24-bit 4-byte aligned offsets */
+ bool valid = !(offset & 0xFF000003);
+ /* whitelist check */
+ valid = valid &&
+ is_bar0_global_offset_whitelisted_gk20a(offset);
+ /* resource size check in case there was a problem
+ * with allocating the assumed size of bar0 */
+ valid = valid &&
+ offset < resource_size(g->reg_mem);
+ return valid;
+}
+#endif
+
+static int gk20a_gr_handle_notify_pending(struct gk20a *g,
+ struct gr_isr_data *isr_data)
+{
+ struct fifo_gk20a *f = &g->fifo;
+ struct channel_gk20a *ch = &f->channel[isr_data->chid];
+
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+ void *virtual_address;
+ u32 buffer_size;
+ u32 offset;
+ u32 new_offset;
+ bool exit;
+ struct share_buffer_head *sh_hdr;
+ u32 raw_reg;
+ u64 mask_orig;
+ u64 v = 0;
+ struct gk20a_cyclestate_buffer_elem *op_elem;
+ /* GL will never use payload 0 for cycle state */
+ if ((ch->cyclestate.cyclestate_buffer == NULL) || (isr_data->data_lo == 0))
+ return 0;
+
+ mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
+
+ virtual_address = ch->cyclestate.cyclestate_buffer;
+ buffer_size = ch->cyclestate.cyclestate_buffer_size;
+ offset = isr_data->data_lo;
+ exit = false;
+ while (!exit) {
+ if (offset >= buffer_size) {
+ WARN_ON(1);
+ break;
+ }
+
+ sh_hdr = (struct share_buffer_head *)
+ ((char *)virtual_address + offset);
+
+ if (sh_hdr->size < sizeof(struct share_buffer_head)) {
+ WARN_ON(1);
+ break;
+ }
+ new_offset = offset + sh_hdr->size;
+
+ switch (sh_hdr->operation) {
+ case OP_END:
+ exit = true;
+ break;
+
+ case BAR0_READ32:
+ case BAR0_WRITE32:
+ {
+ bool valid;
+ op_elem =
+ (struct gk20a_cyclestate_buffer_elem *)
+ sh_hdr;
+ valid = is_valid_cyclestats_bar0_offset_gk20a(g,
+ op_elem->offset_bar0);
+ if (!valid) {
+ gk20a_err(dev_from_gk20a(g),
+ "invalid cycletstats op offset: 0x%x\n",
+ op_elem->offset_bar0);
+
+ sh_hdr->failed = exit = true;
+ break;
+ }
+
+
+ mask_orig =
+ ((1ULL <<
+ (op_elem->last_bit + 1))
+ -1)&~((1ULL <<
+ op_elem->first_bit)-1);
+
+ raw_reg =
+ gk20a_readl(g,
+ op_elem->offset_bar0);
+
+ switch (sh_hdr->operation) {
+ case BAR0_READ32:
+ op_elem->data =
+ (raw_reg & mask_orig)
+ >> op_elem->first_bit;
+ break;
+
+ case BAR0_WRITE32:
+ v = 0;
+ if ((unsigned int)mask_orig !=
+ (unsigned int)~0) {
+ v = (unsigned int)
+ (raw_reg & ~mask_orig);
+ }
+
+ v |= ((op_elem->data
+ << op_elem->first_bit)
+ & mask_orig);
+
+ gk20a_writel(g,
+ op_elem->offset_bar0,
+ (unsigned int)v);
+ break;
+ default:
+ /* nop ok?*/
+ break;
+ }
+ }
+ break;
+
+ default:
+ /* no operation content case */
+ exit = true;
+ break;
+ }
+ sh_hdr->completed = true;
+ offset = new_offset;
+ }
+ mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
+#endif
+ gk20a_dbg_fn("");
+ wake_up(&ch->notifier_wq);
+ return 0;
+}
+
+/* Used by sw interrupt thread to translate current ctx to chid.
+ * For performance, we don't want to go through 128 channels every time.
+ * A small tlb is used here to cache translation */
+static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx)
+{
+ struct fifo_gk20a *f = &g->fifo;
+ struct gr_gk20a *gr = &g->gr;
+ u32 chid = -1;
+ u32 i;
+
+ spin_lock(&gr->ch_tlb_lock);
+
+ /* check cache first */
+ for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
+ if (gr->chid_tlb[i].curr_ctx == curr_ctx) {
+ chid = gr->chid_tlb[i].hw_chid;
+ goto unlock;
+ }
+ }
+
+ /* slow path */
+ for (chid = 0; chid < f->num_channels; chid++)
+ if (f->channel[chid].in_use) {
+ if ((u32)(f->channel[chid].inst_block.cpu_pa >>
+ ram_in_base_shift_v()) ==
+ gr_fecs_current_ctx_ptr_v(curr_ctx))
+ break;
+ }
+
+ if (chid >= f->num_channels) {
+ chid = -1;
+ goto unlock;
+ }
+
+ /* add to free tlb entry */
+ for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
+ if (gr->chid_tlb[i].curr_ctx == 0) {
+ gr->chid_tlb[i].curr_ctx = curr_ctx;
+ gr->chid_tlb[i].hw_chid = chid;
+ goto unlock;
+ }
+ }
+
+ /* no free entry, flush one */
+ gr->chid_tlb[gr->channel_tlb_flush_index].curr_ctx = curr_ctx;
+ gr->chid_tlb[gr->channel_tlb_flush_index].hw_chid = chid;
+
+ gr->channel_tlb_flush_index =
+ (gr->channel_tlb_flush_index + 1) &
+ (GR_CHANNEL_MAP_TLB_SIZE - 1);
+
+unlock:
+ spin_unlock(&gr->ch_tlb_lock);
+ return chid;
+}
+
+static int gk20a_gr_lock_down_sm(struct gk20a *g, u32 global_esr_mask)
+{
+ unsigned long end_jiffies = jiffies +
+ msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
+ u32 delay = GR_IDLE_CHECK_DEFAULT;
+ bool mmu_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled(g);
+ u32 dbgr_control0;
+
+ gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "locking down SM");
+
+ /* assert stop trigger */
+ dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
+ dbgr_control0 |= gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f();
+ gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r(), dbgr_control0);
+
+ /* wait for the sm to lock down */
+ do {
+ u32 global_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r());
+ u32 warp_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_warp_esr_r());
+ u32 dbgr_status0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_status0_r());
+ bool locked_down =
+ (gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) ==
+ gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v());
+ bool error_pending =
+ (gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) !=
+ gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) ||
+ ((global_esr & ~global_esr_mask) != 0);
+
+ if (locked_down || !error_pending) {
+ gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "locked down SM");
+
+ /* de-assert stop trigger */
+ dbgr_control0 &= ~gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f();
+ gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r(), dbgr_control0);
+
+ return 0;
+ }
+
+ /* if an mmu fault is pending and mmu debug mode is not
+ * enabled, the sm will never lock down. */
+ if (!mmu_debug_mode_enabled && gk20a_fifo_mmu_fault_pending(g)) {
+ gk20a_err(dev_from_gk20a(g), "mmu fault pending, sm will"
+ " never lock down!");
+ return -EFAULT;
+ }
+
+ usleep_range(delay, delay * 2);
+ delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
+
+ } while (time_before(jiffies, end_jiffies)
+ || !tegra_platform_is_silicon());
+
+ gk20a_err(dev_from_gk20a(g), "timed out while trying to lock down SM");
+
+ return -EAGAIN;
+}
+
+bool gk20a_gr_sm_debugger_attached(struct gk20a *g)
+{
+ u32 dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
+
+ /* check if an sm debugger is attached */
+ if (gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_v(dbgr_control0) ==
+ gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v())
+ return true;
+
+ return false;
+}
+
+static void gk20a_gr_clear_sm_hww(struct gk20a *g, u32 global_esr)
+{
+ gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r(), global_esr);
+
+ /* clear the warp hww */
+ gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r(),
+ gr_gpc0_tpc0_sm_hww_warp_esr_error_none_f());
+}
+
+static struct channel_gk20a *
+channel_from_hw_chid(struct gk20a *g, u32 hw_chid)
+{
+ return g->fifo.channel+hw_chid;
+}
+
+static int gk20a_gr_handle_sm_exception(struct gk20a *g,
+ struct gr_isr_data *isr_data)
+{
+ int ret = 0;
+ bool do_warp_sync = false;
+ /* these three interrupts don't require locking down the SM. They can
+ * be handled by usermode clients as they aren't fatal. Additionally,
+ * usermode clients may wish to allow some warps to execute while others
+ * are at breakpoints, as opposed to fatal errors where all warps should
+ * halt. */
+ u32 global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() |
+ gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() |
+ gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
+ u32 global_esr, warp_esr;
+ bool sm_debugger_attached = gk20a_gr_sm_debugger_attached(g);
+ struct channel_gk20a *fault_ch;
+
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+
+ global_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r());
+ warp_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_warp_esr_r());
+
+ /* if an sm debugger is attached, disable forwarding of tpc exceptions.
+ * the debugger will reenable exceptions after servicing them. */
+ if (sm_debugger_attached) {
+ u32 tpc_exception_en = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r());
+ tpc_exception_en &= ~gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f();
+ gk20a_writel(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r(), tpc_exception_en);
+ gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "SM debugger attached");
+ }
+
+ /* if a debugger is present and an error has occurred, do a warp sync */
+ if (sm_debugger_attached && ((warp_esr != 0) || ((global_esr & ~global_mask) != 0))) {
+ gk20a_dbg(gpu_dbg_intr, "warp sync needed");
+ do_warp_sync = true;
+ }
+
+ if (do_warp_sync) {
+ ret = gk20a_gr_lock_down_sm(g, global_mask);
+ if (ret) {
+ gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n");
+ return ret;
+ }
+ }
+
+ /* finally, signal any client waiting on an event */
+ fault_ch = channel_from_hw_chid(g, isr_data->chid);
+ if (fault_ch)
+ gk20a_dbg_gpu_post_events(fault_ch);
+
+ return ret;
+}
+
+static int gk20a_gr_handle_tpc_exception(struct gk20a *g,
+ struct gr_isr_data *isr_data)
+{
+ int ret = 0;
+ u32 tpc_exception = gk20a_readl(g, gr_gpcs_tpcs_tpccs_tpc_exception_r());
+
+ gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "");
+
+ /* check if an sm exeption is pending */
+ if (gr_gpcs_tpcs_tpccs_tpc_exception_sm_v(tpc_exception) ==
+ gr_gpcs_tpcs_tpccs_tpc_exception_sm_pending_v()) {
+ gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "SM exception pending");
+ ret = gk20a_gr_handle_sm_exception(g, isr_data);
+ }
+
+ return ret;
+}
+
+static int gk20a_gr_handle_gpc_exception(struct gk20a *g,
+ struct gr_isr_data *isr_data)
+{
+ int ret = 0;
+ u32 gpc_exception = gk20a_readl(g, gr_gpcs_gpccs_gpc_exception_r());
+
+ gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "");
+
+ /* check if tpc 0 has an exception */
+ if (gr_gpcs_gpccs_gpc_exception_tpc_v(gpc_exception) ==
+ gr_gpcs_gpccs_gpc_exception_tpc_0_pending_v()) {
+ gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "TPC exception pending");
+ ret = gk20a_gr_handle_tpc_exception(g, isr_data);
+ }
+
+ return ret;
+}
+
+int gk20a_gr_isr(struct gk20a *g)
+{
+ struct gr_isr_data isr_data;
+ u32 grfifo_ctl;
+ u32 obj_table;
+ int need_reset = 0;
+ u32 gr_intr = gk20a_readl(g, gr_intr_r());
+
+ gk20a_dbg_fn("");
+ gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr);
+
+ if (!gr_intr)
+ return 0;
+
+ grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r());
+ grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1);
+ grfifo_ctl &= ~gr_gpfifo_ctl_access_f(1);
+
+ gk20a_writel(g, gr_gpfifo_ctl_r(),
+ grfifo_ctl | gr_gpfifo_ctl_access_f(0) |
+ gr_gpfifo_ctl_semaphore_access_f(0));
+
+ isr_data.addr = gk20a_readl(g, gr_trapped_addr_r());
+ isr_data.data_lo = gk20a_readl(g, gr_trapped_data_lo_r());
+ isr_data.data_hi = gk20a_readl(g, gr_trapped_data_hi_r());
+ isr_data.curr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r());
+ isr_data.offset = gr_trapped_addr_mthd_v(isr_data.addr);
+ isr_data.sub_chan = gr_trapped_addr_subch_v(isr_data.addr);
+ obj_table = gk20a_readl(g,
+ gr_fe_object_table_r(isr_data.sub_chan));
+ isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table);
+
+ isr_data.chid =
+ gk20a_gr_get_chid_from_ctx(g, isr_data.curr_ctx);
+ if (isr_data.chid == -1) {
+ gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x",
+ isr_data.curr_ctx);
+ goto clean_up;
+ }
+
+ gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
+ "channel %d: addr 0x%08x, "
+ "data 0x%08x 0x%08x,"
+ "ctx 0x%08x, offset 0x%08x, "
+ "subchannel 0x%08x, class 0x%08x",
+ isr_data.chid, isr_data.addr,
+ isr_data.data_hi, isr_data.data_lo,
+ isr_data.curr_ctx, isr_data.offset,
+ isr_data.sub_chan, isr_data.class_num);
+
+ if (gr_intr & gr_intr_notify_pending_f()) {
+ gk20a_gr_handle_notify_pending(g, &isr_data);
+ gk20a_writel(g, gr_intr_r(),
+ gr_intr_notify_reset_f());
+ gr_intr &= ~gr_intr_notify_pending_f();
+ }
+
+ if (gr_intr & gr_intr_semaphore_pending_f()) {
+ gk20a_gr_handle_semaphore_pending(g, &isr_data);
+ gk20a_writel(g, gr_intr_r(),
+ gr_intr_semaphore_reset_f());
+ gr_intr &= ~gr_intr_semaphore_pending_f();
+ }
+
+ if (gr_intr & gr_intr_semaphore_timeout_pending_f()) {
+ need_reset |= gk20a_gr_handle_semaphore_timeout_pending(g,
+ &isr_data);
+ gk20a_writel(g, gr_intr_r(),
+ gr_intr_semaphore_reset_f());
+ gr_intr &= ~gr_intr_semaphore_pending_f();
+ }
+
+ if (gr_intr & gr_intr_illegal_notify_pending_f()) {
+ need_reset |= gk20a_gr_intr_illegal_notify_pending(g,
+ &isr_data);
+ gk20a_writel(g, gr_intr_r(),
+ gr_intr_illegal_notify_reset_f());
+ gr_intr &= ~gr_intr_illegal_notify_pending_f();
+ }
+
+ if (gr_intr & gr_intr_illegal_method_pending_f()) {
+ need_reset |= gk20a_gr_handle_illegal_method(g, &isr_data);
+ gk20a_writel(g, gr_intr_r(),
+ gr_intr_illegal_method_reset_f());
+ gr_intr &= ~gr_intr_illegal_method_pending_f();
+ }
+
+ if (gr_intr & gr_intr_illegal_class_pending_f()) {
+ need_reset |= gk20a_gr_handle_illegal_class(g, &isr_data);
+ gk20a_writel(g, gr_intr_r(),
+ gr_intr_illegal_class_reset_f());
+ gr_intr &= ~gr_intr_illegal_class_pending_f();
+ }
+
+ if (gr_intr & gr_intr_class_error_pending_f()) {
+ need_reset |= gk20a_gr_handle_class_error(g, &isr_data);
+ gk20a_writel(g, gr_intr_r(),
+ gr_intr_class_error_reset_f());
+ gr_intr &= ~gr_intr_class_error_pending_f();
+ }
+
+ /* this one happens if someone tries to hit a non-whitelisted
+ * register using set_falcon[4] */
+ if (gr_intr & gr_intr_firmware_method_pending_f()) {
+ need_reset |= true;
+ gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "firmware method intr pending\n");
+ gk20a_writel(g, gr_intr_r(),
+ gr_intr_firmware_method_reset_f());
+ gr_intr &= ~gr_intr_firmware_method_pending_f();
+ }
+
+ if (gr_intr & gr_intr_exception_pending_f()) {
+ u32 exception = gk20a_readl(g, gr_exception_r());
+ struct fifo_gk20a *f = &g->fifo;
+ struct channel_gk20a *ch = &f->channel[isr_data.chid];
+
+ gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "exception %08x\n", exception);
+
+ if (exception & gr_exception_fe_m()) {
+ u32 fe = gk20a_readl(g, gr_fe_hww_esr_r());
+ gk20a_dbg(gpu_dbg_intr, "fe warning %08x\n", fe);
+ gk20a_writel(g, gr_fe_hww_esr_r(), fe);
+ }
+
+ /* check if a gpc exception has occurred */
+ if (exception & gr_exception_gpc_m() && need_reset == 0) {
+ u32 exception1 = gk20a_readl(g, gr_exception1_r());
+ u32 global_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r());
+
+ gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "GPC exception pending");
+
+ /* if no sm debugger is present, clean up the channel */
+ if (!gk20a_gr_sm_debugger_attached(g)) {
+ gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
+ "SM debugger not attached, clearing interrupt");
+ need_reset |= -EFAULT;
+ } else {
+ /* check if gpc 0 has an exception */
+ if (exception1 & gr_exception1_gpc_0_pending_f())
+ need_reset |= gk20a_gr_handle_gpc_exception(g, &isr_data);
+ /* clear the hwws, also causes tpc and gpc
+ * exceptions to be cleared */
+ gk20a_gr_clear_sm_hww(g, global_esr);
+ }
+
+ if (need_reset)
+ gk20a_set_error_notifier(ch,
+ NVHOST_CHANNEL_GR_ERROR_SW_NOTIFY);
+ }
+
+ gk20a_writel(g, gr_intr_r(), gr_intr_exception_reset_f());
+ gr_intr &= ~gr_intr_exception_pending_f();
+ }
+
+ if (need_reset)
+ gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), true);
+
+clean_up:
+ gk20a_writel(g, gr_gpfifo_ctl_r(),
+ grfifo_ctl | gr_gpfifo_ctl_access_f(1) |
+ gr_gpfifo_ctl_semaphore_access_f(1));
+
+ if (gr_intr)
+ gk20a_err(dev_from_gk20a(g),
+ "unhandled gr interrupt 0x%08x", gr_intr);
+
+ return 0;
+}
+
+int gk20a_gr_nonstall_isr(struct gk20a *g)
+{
+ u32 gr_intr = gk20a_readl(g, gr_intr_nonstall_r());
+ u32 clear_intr = 0;
+
+ gk20a_dbg(gpu_dbg_intr, "pgraph nonstall intr %08x", gr_intr);
+
+ if (gr_intr & gr_intr_nonstall_trap_pending_f()) {
+ gk20a_channel_semaphore_wakeup(g);
+ clear_intr |= gr_intr_nonstall_trap_pending_f();
+ }
+
+ gk20a_writel(g, gr_intr_nonstall_r(), clear_intr);
+
+ return 0;
+}
+
+int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size)
+{
+ BUG_ON(size == NULL);
+ return gr_gk20a_submit_fecs_method_op(g,
+ (struct fecs_method_op_gk20a) {
+ .mailbox.id = 0,
+ .mailbox.data = 0,
+ .mailbox.clr = ~0,
+ .method.data = 1,
+ .method.addr = gr_fecs_method_push_adr_discover_reglist_image_size_v(),
+ .mailbox.ret = size,
+ .cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
+ .mailbox.ok = 0,
+ .cond.fail = GR_IS_UCODE_OP_SKIP,
+ .mailbox.fail = 0});
+}
+
+int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr)
+{
+ return gr_gk20a_submit_fecs_method_op(g,
+ (struct fecs_method_op_gk20a){
+ .mailbox.id = 4,
+ .mailbox.data = (gr_fecs_current_ctx_ptr_f(addr >> 12) |
+ gr_fecs_current_ctx_valid_f(1) |
+ gr_fecs_current_ctx_target_vid_mem_f()),
+ .mailbox.clr = ~0,
+ .method.data = 1,
+ .method.addr = gr_fecs_method_push_adr_set_reglist_bind_instance_v(),
+ .mailbox.ret = NULL,
+ .cond.ok = GR_IS_UCODE_OP_EQUAL,
+ .mailbox.ok = 1,
+ .cond.fail = GR_IS_UCODE_OP_SKIP,
+ .mailbox.fail = 0});
+}
+
+int gr_gk20a_fecs_set_reglist_virual_addr(struct gk20a *g, u64 pmu_va)
+{
+ return gr_gk20a_submit_fecs_method_op(g,
+ (struct fecs_method_op_gk20a) {
+ .mailbox.id = 4,
+ .mailbox.data = u64_lo32(pmu_va >> 8),
+ .mailbox.clr = ~0,
+ .method.data = 1,
+ .method.addr = gr_fecs_method_push_adr_set_reglist_virtual_address_v(),
+ .mailbox.ret = NULL,
+ .cond.ok = GR_IS_UCODE_OP_EQUAL,
+ .mailbox.ok = 1,
+ .cond.fail = GR_IS_UCODE_OP_SKIP,
+ .mailbox.fail = 0});
+}
+
+int gk20a_gr_suspend(struct gk20a *g)
+{
+ unsigned long end_jiffies = jiffies +
+ msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
+ u32 ret = 0;
+
+ gk20a_dbg_fn("");
+
+ ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
+ if (ret)
+ return ret;
+
+ gk20a_writel(g, gr_gpfifo_ctl_r(),
+ gr_gpfifo_ctl_access_disabled_f());
+
+ /* disable gr intr */
+ gk20a_writel(g, gr_intr_r(), 0);
+ gk20a_writel(g, gr_intr_en_r(), 0);
+
+ /* disable all exceptions */
+ gk20a_writel(g, gr_exception_r(), 0);
+ gk20a_writel(g, gr_exception_en_r(), 0);
+ gk20a_writel(g, gr_exception1_r(), 0);
+ gk20a_writel(g, gr_exception1_en_r(), 0);
+ gk20a_writel(g, gr_exception2_r(), 0);
+ gk20a_writel(g, gr_exception2_en_r(), 0);
+
+ gk20a_gr_flush_channel_tlb(&g->gr);
+
+ gk20a_dbg_fn("done");
+ return ret;
+}
+
+static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
+ u32 addr,
+ bool is_quad, u32 quad,
+ u32 *context_buffer,
+ u32 context_buffer_size,
+ u32 *priv_offset);
+
+/* This function will decode a priv address and return the partition type and numbers. */
+int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
+ int *addr_type, /* enum ctxsw_addr_type */
+ u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num,
+ u32 *broadcast_flags)
+{
+ u32 gpc_addr;
+ u32 ppc_address;
+ u32 ppc_broadcast_addr;
+
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
+
+ /* setup defaults */
+ ppc_address = 0;
+ ppc_broadcast_addr = 0;
+ *addr_type = CTXSW_ADDR_TYPE_SYS;
+ *broadcast_flags = PRI_BROADCAST_FLAGS_NONE;
+ *gpc_num = 0;
+ *tpc_num = 0;
+ *ppc_num = 0;
+ *be_num = 0;
+
+ if (pri_is_gpc_addr(addr)) {
+ *addr_type = CTXSW_ADDR_TYPE_GPC;
+ gpc_addr = pri_gpccs_addr_mask(addr);
+ if (pri_is_gpc_addr_shared(addr)) {
+ *addr_type = CTXSW_ADDR_TYPE_GPC;
+ *broadcast_flags |= PRI_BROADCAST_FLAGS_GPC;
+ } else
+ *gpc_num = pri_get_gpc_num(addr);
+
+ if (pri_is_tpc_addr(gpc_addr)) {
+ *addr_type = CTXSW_ADDR_TYPE_TPC;
+ if (pri_is_tpc_addr_shared(gpc_addr)) {
+ *broadcast_flags |= PRI_BROADCAST_FLAGS_TPC;
+ return 0;
+ }
+ *tpc_num = pri_get_tpc_num(gpc_addr);
+ }
+ return 0;
+ } else if (pri_is_be_addr(addr)) {
+ *addr_type = CTXSW_ADDR_TYPE_BE;
+ if (pri_is_be_addr_shared(addr)) {
+ *broadcast_flags |= PRI_BROADCAST_FLAGS_BE;
+ return 0;
+ }
+ *be_num = pri_get_be_num(addr);
+ return 0;
+ } else {
+ *addr_type = CTXSW_ADDR_TYPE_SYS;
+ return 0;
+ }
+ /* PPC!?!?!?! */
+
+ /*NOTREACHED*/
+ return -EINVAL;
+}
+
+static int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr,
+ u32 gpc_num,
+ u32 *priv_addr_table, u32 *t)
+{
+ u32 ppc_num;
+
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
+
+ for (ppc_num = 0; ppc_num < g->gr.pe_count_per_gpc; ppc_num++)
+ priv_addr_table[(*t)++] = pri_ppc_addr(pri_ppccs_addr_mask(addr),
+ gpc_num, ppc_num);
+
+ return 0;
+}
+
+/*
+ * The context buffer is indexed using BE broadcast addresses and GPC/TPC
+ * unicast addresses. This function will convert a BE unicast address to a BE
+ * broadcast address and split a GPC/TPC broadcast address into a table of
+ * GPC/TPC addresses. The addresses generated by this function can be
+ * successfully processed by gr_gk20a_find_priv_offset_in_buffer
+ */
+static int gr_gk20a_create_priv_addr_table(struct gk20a *g,
+ u32 addr,
+ u32 *priv_addr_table,
+ u32 *num_registers)
+{
+ int addr_type; /*enum ctxsw_addr_type */
+ u32 gpc_num, tpc_num, ppc_num, be_num;
+ u32 broadcast_flags;
+ u32 t;
+ int err;
+
+ t = 0;
+ *num_registers = 0;
+
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
+
+ err = gr_gk20a_decode_priv_addr(g, addr, &addr_type,
+ &gpc_num, &tpc_num, &ppc_num, &be_num,
+ &broadcast_flags);
+ gk20a_dbg(gpu_dbg_gpu_dbg, "addr_type = %d", addr_type);
+ if (err)
+ return err;
+
+ if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
+ (addr_type == CTXSW_ADDR_TYPE_BE)) {
+ /* The BE broadcast registers are included in the compressed PRI
+ * table. Convert a BE unicast address to a broadcast address
+ * so that we can look up the offset. */
+ if ((addr_type == CTXSW_ADDR_TYPE_BE) &&
+ !(broadcast_flags & PRI_BROADCAST_FLAGS_BE))
+ priv_addr_table[t++] = pri_be_shared_addr(addr);
+ else
+ priv_addr_table[t++] = addr;
+
+ *num_registers = t;
+ return 0;
+ }
+
+ /* The GPC/TPC unicast registers are included in the compressed PRI
+ * tables. Convert a GPC/TPC broadcast address to unicast addresses so
+ * that we can look up the offsets. */
+ if (broadcast_flags & PRI_BROADCAST_FLAGS_GPC) {
+ for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) {
+
+ if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC)
+ for (tpc_num = 0;
+ tpc_num < g->gr.gpc_tpc_count[gpc_num];
+ tpc_num++)
+ priv_addr_table[t++] =
+ pri_tpc_addr(pri_tpccs_addr_mask(addr),
+ gpc_num, tpc_num);
+
+ else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) {
+ err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num,
+ priv_addr_table, &t);
+ if (err)
+ return err;
+ } else
+ priv_addr_table[t++] =
+ pri_gpc_addr(pri_gpccs_addr_mask(addr),
+ gpc_num);
+ }
+ } else {
+ if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC)
+ for (tpc_num = 0;
+ tpc_num < g->gr.gpc_tpc_count[gpc_num];
+ tpc_num++)
+ priv_addr_table[t++] =
+ pri_tpc_addr(pri_tpccs_addr_mask(addr),
+ gpc_num, tpc_num);
+ else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC)
+ err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num,
+ priv_addr_table, &t);
+ else
+ priv_addr_table[t++] = addr;
+ }
+
+ *num_registers = t;
+ return 0;
+}
+
+int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
+ u32 addr,
+ u32 max_offsets,
+ u32 *offsets, u32 *offset_addrs,
+ u32 *num_offsets,
+ bool is_quad, u32 quad)
+{
+ u32 i;
+ u32 priv_offset = 0;
+ u32 *priv_registers;
+ u32 num_registers = 0;
+ int err = 0;
+ u32 potential_offsets = proj_scal_litter_num_gpcs_v() *
+ proj_scal_litter_num_tpc_per_gpc_v();
+
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
+
+ /* implementation is crossed-up if either of these happen */
+ if (max_offsets > potential_offsets)
+ return -EINVAL;
+
+ if (!g->gr.ctx_vars.golden_image_initialized)
+ return -ENODEV;
+
+ priv_registers = kzalloc(sizeof(u32) * potential_offsets, GFP_KERNEL);
+ if (IS_ERR_OR_NULL(priv_registers)) {
+ gk20a_dbg_fn("failed alloc for potential_offsets=%d", potential_offsets);
+ err = PTR_ERR(priv_registers);
+ goto cleanup;
+ }
+ memset(offsets, 0, sizeof(u32) * max_offsets);
+ memset(offset_addrs, 0, sizeof(u32) * max_offsets);
+ *num_offsets = 0;
+
+ gr_gk20a_create_priv_addr_table(g, addr, &priv_registers[0], &num_registers);
+
+ if ((max_offsets > 1) && (num_registers > max_offsets)) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ if ((max_offsets == 1) && (num_registers > 1))
+ num_registers = 1;
+
+ if (!g->gr.ctx_vars.local_golden_image) {
+ gk20a_dbg_fn("no context switch header info to work with");
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ for (i = 0; i < num_registers; i++) {
+ err = gr_gk20a_find_priv_offset_in_buffer(g,
+ priv_registers[i],
+ is_quad, quad,
+ g->gr.ctx_vars.local_golden_image,
+ g->gr.ctx_vars.golden_image_size,
+ &priv_offset);
+ if (err) {
+ gk20a_dbg_fn("Could not determine priv_offset for addr:0x%x",
+ addr); /*, grPriRegStr(addr)));*/
+ goto cleanup;
+ }
+
+ offsets[i] = priv_offset;
+ offset_addrs[i] = priv_registers[i];
+ }
+
+ *num_offsets = num_registers;
+
+ cleanup:
+
+ if (!IS_ERR_OR_NULL(priv_registers))
+ kfree(priv_registers);
+
+ return err;
+}
+
+/* Setup some register tables. This looks hacky; our
+ * register/offset functions are just that, functions.
+ * So they can't be used as initializers... TBD: fix to
+ * generate consts at least on an as-needed basis.
+ */
+static const u32 _num_ovr_perf_regs = 17;
+static u32 _ovr_perf_regs[17] = { 0, };
+/* Following are the blocks of registers that the ucode
+ stores in the extended region.*/
+/* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */
+static const u32 _num_sm_dsm_perf_regs = 5;
+/* == ctxsw_extended_sm_dsm_perf_counter_control_register_stride_v() ?*/
+static const u32 _num_sm_dsm_perf_ctrl_regs = 4;
+static u32 _sm_dsm_perf_regs[5];
+static u32 _sm_dsm_perf_ctrl_regs[4];
+
+static void init_sm_dsm_reg_info(void)
+{
+ if (_ovr_perf_regs[0] != 0)
+ return;
+
+ _ovr_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel0_r();
+ _ovr_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel1_r();
+ _ovr_perf_regs[2] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r();
+ _ovr_perf_regs[3] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r();
+ _ovr_perf_regs[4] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status1_r();
+ _ovr_perf_regs[5] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_control_r();
+ _ovr_perf_regs[6] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_control_r();
+ _ovr_perf_regs[7] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_control_r();
+ _ovr_perf_regs[8] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_control_r();
+ _ovr_perf_regs[9] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_control_r();
+ _ovr_perf_regs[10] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_control_r();
+ _ovr_perf_regs[11] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_control_r();
+ _ovr_perf_regs[12] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_control_r();
+ _ovr_perf_regs[13] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_r();
+ _ovr_perf_regs[14] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r();
+ _ovr_perf_regs[15] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r();
+ _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r();
+
+
+ _sm_dsm_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_r();
+ _sm_dsm_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_r();
+ _sm_dsm_perf_regs[2] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_r();
+ _sm_dsm_perf_regs[3] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_r();
+ _sm_dsm_perf_regs[4] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_r();
+
+ _sm_dsm_perf_ctrl_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control1_r();
+ _sm_dsm_perf_ctrl_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control2_r();
+ _sm_dsm_perf_ctrl_regs[2] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control3_r();
+ _sm_dsm_perf_ctrl_regs[3] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control4_r();
+
+}
+
+/* TBD: would like to handle this elsewhere, at a higher level.
+ * these are currently constructed in a "test-then-write" style
+ * which makes it impossible to know externally whether a ctx
+ * write will actually occur. so later we should put a lazy,
+ * map-and-hold system in the patch write state */
+int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
+ struct channel_ctx_gk20a *ch_ctx,
+ u32 addr, u32 data,
+ u8 *context)
+{
+ u32 num_gpc = g->gr.gpc_count;
+ u32 num_tpc;
+ u32 tpc, gpc, reg;
+ u32 chk_addr;
+ u32 vaddr_lo;
+ u32 vaddr_hi;
+ u32 tmp;
+
+ init_sm_dsm_reg_info();
+
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
+
+ for (reg = 0; reg < _num_ovr_perf_regs; reg++) {
+ for (gpc = 0; gpc < num_gpc; gpc++) {
+ num_tpc = g->gr.gpc_tpc_count[gpc];
+ for (tpc = 0; tpc < num_tpc; tpc++) {
+ chk_addr = ((proj_gpc_stride_v() * gpc) +
+ (proj_tpc_in_gpc_stride_v() * tpc) +
+ _ovr_perf_regs[reg]);
+ if (chk_addr != addr)
+ continue;
+ /* reset the patch count from previous
+ runs,if ucode has already processed
+ it */
+ tmp = gk20a_mem_rd32(context +
+ ctxsw_prog_main_image_patch_count_o(), 0);
+
+ if (!tmp)
+ ch_ctx->patch_ctx.data_count = 0;
+
+ gr_gk20a_ctx_patch_write(g, ch_ctx,
+ addr, data, true);
+
+ vaddr_lo = u64_lo32(ch_ctx->patch_ctx.gpu_va);
+ vaddr_hi = u64_hi32(ch_ctx->patch_ctx.gpu_va);
+
+ gk20a_mem_wr32(context +
+ ctxsw_prog_main_image_patch_count_o(),
+ 0, ch_ctx->patch_ctx.data_count);
+ gk20a_mem_wr32(context +
+ ctxsw_prog_main_image_patch_adr_lo_o(),
+ 0, vaddr_lo);
+ gk20a_mem_wr32(context +
+ ctxsw_prog_main_image_patch_adr_hi_o(),
+ 0, vaddr_hi);
+
+ /* we're not caching these on cpu side,
+ but later watch for it */
+
+ /* the l2 invalidate in the patch_write
+ * would be too early for this? */
+ gk20a_mm_l2_invalidate(g);
+ return 0;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void gr_gk20a_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset)
+{
+ u32 reg;
+ u32 quad_ctrl;
+ u32 half_ctrl;
+ u32 tpc, gpc;
+ u32 gpc_tpc_addr;
+ u32 gpc_tpc_stride;
+
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "offset=0x%x", offset);
+
+ gpc = pri_get_gpc_num(offset);
+ gpc_tpc_addr = pri_gpccs_addr_mask(offset);
+ tpc = pri_get_tpc_num(gpc_tpc_addr);
+
+ quad_ctrl = quad & 0x1; /* first bit tells us quad */
+ half_ctrl = (quad >> 1) & 0x1; /* second bit tells us half */
+
+ gpc_tpc_stride = gpc * proj_gpc_stride_v() +
+ tpc * proj_tpc_in_gpc_stride_v();
+ gpc_tpc_addr = gr_gpc0_tpc0_sm_halfctl_ctrl_r() + gpc_tpc_stride;
+
+ reg = gk20a_readl(g, gpc_tpc_addr);
+ reg = set_field(reg,
+ gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_m(),
+ gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_f(quad_ctrl));
+
+ gk20a_writel(g, gpc_tpc_addr, reg);
+
+ gpc_tpc_addr = gr_gpc0_tpc0_sm_debug_sfe_control_r() + gpc_tpc_stride;
+ reg = gk20a_readl(g, gpc_tpc_addr);
+ reg = set_field(reg,
+ gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_m(),
+ gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_f(half_ctrl));
+ gk20a_writel(g, gpc_tpc_addr, reg);
+}
+
+#define ILLEGAL_ID (~0)
+
+static inline bool check_main_image_header_magic(void *context)
+{
+ u32 magic = gk20a_mem_rd32(context +
+ ctxsw_prog_main_image_magic_value_o(), 0);
+ gk20a_dbg(gpu_dbg_gpu_dbg, "main image magic=0x%x", magic);
+ return magic == ctxsw_prog_main_image_magic_value_v_value_v();
+}
+static inline bool check_local_header_magic(void *context)
+{
+ u32 magic = gk20a_mem_rd32(context +
+ ctxsw_prog_local_magic_value_o(), 0);
+ gk20a_dbg(gpu_dbg_gpu_dbg, "local magic=0x%x", magic);
+ return magic == ctxsw_prog_local_magic_value_v_value_v();
+
+}
+
+/* most likely dupe of ctxsw_gpccs_header__size_1_v() */
+static inline int ctxsw_prog_ucode_header_size_in_bytes(void)
+{
+ return 256;
+}
+
+void gr_gk20a_get_sm_dsm_perf_regs(struct gk20a *g,
+ u32 *num_sm_dsm_perf_regs,
+ u32 **sm_dsm_perf_regs,
+ u32 *perf_register_stride)
+{
+ *num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs;
+ *sm_dsm_perf_regs = _sm_dsm_perf_regs;
+ *perf_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v();
+}
+
+void gr_gk20a_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
+ u32 *num_sm_dsm_perf_ctrl_regs,
+ u32 **sm_dsm_perf_ctrl_regs,
+ u32 *ctrl_register_stride)
+{
+ *num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs;
+ *sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs;
+ *ctrl_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v();
+}
+
+static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
+ u32 addr,
+ bool is_quad, u32 quad,
+ u32 *context_buffer,
+ u32 context_buffer_size,
+ u32 *priv_offset)
+{
+ u32 i, data32;
+ u32 gpc_num, tpc_num;
+ u32 num_gpcs, num_tpcs;
+ u32 chk_addr;
+ u32 ext_priv_offset, ext_priv_size;
+ void *context;
+ u32 offset_to_segment, offset_to_segment_end;
+ u32 sm_dsm_perf_reg_id = ILLEGAL_ID;
+ u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID;
+ u32 num_ext_gpccs_ext_buffer_segments;
+ u32 inter_seg_offset;
+ u32 tpc_gpc_mask = (proj_tpc_in_gpc_stride_v() - 1);
+ u32 max_tpc_count;
+ u32 *sm_dsm_perf_ctrl_regs = NULL;
+ u32 num_sm_dsm_perf_ctrl_regs = 0;
+ u32 *sm_dsm_perf_regs = NULL;
+ u32 num_sm_dsm_perf_regs = 0;
+ u32 buffer_segments_size = 0;
+ u32 marker_size = 0;
+ u32 control_register_stride = 0;
+ u32 perf_register_stride = 0;
+
+ /* Only have TPC registers in extended region, so if not a TPC reg,
+ then return error so caller can look elsewhere. */
+ if (pri_is_gpc_addr(addr)) {
+ u32 gpc_addr = 0;
+ gpc_num = pri_get_gpc_num(addr);
+ gpc_addr = pri_gpccs_addr_mask(addr);
+ if (pri_is_tpc_addr(gpc_addr))
+ tpc_num = pri_get_tpc_num(gpc_addr);
+ else
+ return -EINVAL;
+
+ gk20a_dbg_info(" gpc = %d tpc = %d",
+ gpc_num, tpc_num);
+ } else
+ return -EINVAL;
+
+ buffer_segments_size = ctxsw_prog_extended_buffer_segments_size_in_bytes_v();
+ /* note below is in words/num_registers */
+ marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2;
+
+ context = context_buffer;
+ /* sanity check main header */
+ if (!check_main_image_header_magic(context)) {
+ gk20a_err(dev_from_gk20a(g),
+ "Invalid main header: magic value");
+ return -EINVAL;
+ }
+ num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0);
+ if (gpc_num >= num_gpcs) {
+ gk20a_err(dev_from_gk20a(g),
+ "GPC 0x%08x is greater than total count 0x%08x!\n",
+ gpc_num, num_gpcs);
+ return -EINVAL;
+ }
+
+ data32 = gk20a_mem_rd32(context + ctxsw_prog_main_extended_buffer_ctl_o(), 0);
+ ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32);
+ if (0 == ext_priv_size) {
+ gk20a_dbg_info(" No extended memory in context buffer");
+ return -EINVAL;
+ }
+ ext_priv_offset = ctxsw_prog_main_extended_buffer_ctl_offset_v(data32);
+
+ offset_to_segment = ext_priv_offset * ctxsw_prog_ucode_header_size_in_bytes();
+ offset_to_segment_end = offset_to_segment +
+ (ext_priv_size * buffer_segments_size);
+
+ /* check local header magic */
+ context += ctxsw_prog_ucode_header_size_in_bytes();
+ if (!check_local_header_magic(context)) {
+ gk20a_err(dev_from_gk20a(g),
+ "Invalid local header: magic value\n");
+ return -EINVAL;
+ }
+
+ /*
+ * See if the incoming register address is in the first table of
+ * registers. We check this by decoding only the TPC addr portion.
+ * If we get a hit on the TPC bit, we then double check the address
+ * by computing it from the base gpc/tpc strides. Then make sure
+ * it is a real match.
+ */
+ g->ops.gr.get_sm_dsm_perf_regs(g, &num_sm_dsm_perf_regs,
+ &sm_dsm_perf_regs,
+ &perf_register_stride);
+
+ init_sm_dsm_reg_info();
+
+ for (i = 0; i < num_sm_dsm_perf_regs; i++) {
+ if ((addr & tpc_gpc_mask) == (sm_dsm_perf_regs[i] & tpc_gpc_mask)) {
+ sm_dsm_perf_reg_id = i;
+
+ gk20a_dbg_info("register match: 0x%08x",
+ sm_dsm_perf_regs[i]);
+
+ chk_addr = (proj_gpc_base_v() +
+ (proj_gpc_stride_v() * gpc_num) +
+ proj_tpc_in_gpc_base_v() +
+ (proj_tpc_in_gpc_stride_v() * tpc_num) +
+ (sm_dsm_perf_regs[sm_dsm_perf_reg_id] & tpc_gpc_mask));
+
+ if (chk_addr != addr) {
+ gk20a_err(dev_from_gk20a(g),
+ "Oops addr miss-match! : 0x%08x != 0x%08x\n",
+ addr, chk_addr);
+ return -EINVAL;
+ }
+ break;
+ }
+ }
+
+ /* Didn't find reg in supported group 1.
+ * so try the second group now */
+ g->ops.gr.get_sm_dsm_perf_ctrl_regs(g, &num_sm_dsm_perf_ctrl_regs,
+ &sm_dsm_perf_ctrl_regs,
+ &control_register_stride);
+
+ if (ILLEGAL_ID == sm_dsm_perf_reg_id) {
+ for (i = 0; i < num_sm_dsm_perf_ctrl_regs; i++) {
+ if ((addr & tpc_gpc_mask) ==
+ (sm_dsm_perf_ctrl_regs[i] & tpc_gpc_mask)) {
+ sm_dsm_perf_ctrl_reg_id = i;
+
+ gk20a_dbg_info("register match: 0x%08x",
+ sm_dsm_perf_ctrl_regs[i]);
+
+ chk_addr = (proj_gpc_base_v() +
+ (proj_gpc_stride_v() * gpc_num) +
+ proj_tpc_in_gpc_base_v() +
+ (proj_tpc_in_gpc_stride_v() * tpc_num) +
+ (sm_dsm_perf_ctrl_regs[sm_dsm_perf_ctrl_reg_id] &
+ tpc_gpc_mask));
+
+ if (chk_addr != addr) {
+ gk20a_err(dev_from_gk20a(g),
+ "Oops addr miss-match! : 0x%08x != 0x%08x\n",
+ addr, chk_addr);
+ return -EINVAL;
+
+ }
+
+ break;
+ }
+ }
+ }
+
+ if ((ILLEGAL_ID == sm_dsm_perf_ctrl_reg_id) &&
+ (ILLEGAL_ID == sm_dsm_perf_reg_id))
+ return -EINVAL;
+
+ /* Skip the FECS extended header, nothing there for us now. */
+ offset_to_segment += buffer_segments_size;
+
+ /* skip through the GPCCS extended headers until we get to the data for
+ * our GPC. The size of each gpc extended segment is enough to hold the
+ * max tpc count for the gpcs,in 256b chunks.
+ */
+
+ max_tpc_count = proj_scal_litter_num_tpc_per_gpc_v();
+
+ num_ext_gpccs_ext_buffer_segments = (u32)((max_tpc_count + 1) / 2);
+
+ offset_to_segment += (num_ext_gpccs_ext_buffer_segments *
+ buffer_segments_size * gpc_num);
+
+ num_tpcs = g->gr.gpc_tpc_count[gpc_num];
+
+ /* skip the head marker to start with */
+ inter_seg_offset = marker_size;
+
+ if (ILLEGAL_ID != sm_dsm_perf_ctrl_reg_id) {
+ /* skip over control regs of TPC's before the one we want.
+ * then skip to the register in this tpc */
+ inter_seg_offset = inter_seg_offset +
+ (tpc_num * control_register_stride) +
+ sm_dsm_perf_ctrl_reg_id;
+ } else {
+ /* skip all the control registers */
+ inter_seg_offset = inter_seg_offset +
+ (num_tpcs * control_register_stride);
+
+ /* skip the marker between control and counter segments */
+ inter_seg_offset += marker_size;
+
+ /* skip over counter regs of TPCs before the one we want */
+ inter_seg_offset = inter_seg_offset +
+ (tpc_num * perf_register_stride) *
+ ctxsw_prog_extended_num_smpc_quadrants_v();
+
+ /* skip over the register for the quadrants we do not want.
+ * then skip to the register in this tpc */
+ inter_seg_offset = inter_seg_offset +
+ (perf_register_stride * quad) +
+ sm_dsm_perf_reg_id;
+ }
+
+ /* set the offset to the segment offset plus the inter segment offset to
+ * our register */
+ offset_to_segment += (inter_seg_offset * 4);
+
+ /* last sanity check: did we somehow compute an offset outside the
+ * extended buffer? */
+ if (offset_to_segment > offset_to_segment_end) {
+ gk20a_err(dev_from_gk20a(g),
+ "Overflow ctxsw buffer! 0x%08x > 0x%08x\n",
+ offset_to_segment, offset_to_segment_end);
+ return -EINVAL;
+ }
+
+ *priv_offset = offset_to_segment;
+
+ return 0;
+}
+
+
+static int
+gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g,
+ int addr_type,/* enum ctxsw_addr_type */
+ u32 pri_addr,
+ u32 gpc_num, u32 num_tpcs,
+ u32 num_ppcs, u32 ppc_mask,
+ u32 *priv_offset)
+{
+ u32 i;
+ u32 address, base_address;
+ u32 sys_offset, gpc_offset, tpc_offset, ppc_offset;
+ u32 ppc_num, tpc_num, tpc_addr, gpc_addr, ppc_addr;
+ struct aiv_gk20a *reg;
+
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "pri_addr=0x%x", pri_addr);
+
+ if (!g->gr.ctx_vars.valid)
+ return -EINVAL;
+
+ /* Process the SYS/BE segment. */
+ if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
+ (addr_type == CTXSW_ADDR_TYPE_BE)) {
+ for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.sys.count; i++) {
+ reg = &g->gr.ctx_vars.ctxsw_regs.sys.l[i];
+ address = reg->addr;
+ sys_offset = reg->index;
+
+ if (pri_addr == address) {
+ *priv_offset = sys_offset;
+ return 0;
+ }
+ }
+ }
+
+ /* Process the TPC segment. */
+ if (addr_type == CTXSW_ADDR_TYPE_TPC) {
+ for (tpc_num = 0; tpc_num < num_tpcs; tpc_num++) {
+ for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.tpc.count; i++) {
+ reg = &g->gr.ctx_vars.ctxsw_regs.tpc.l[i];
+ address = reg->addr;
+ tpc_addr = pri_tpccs_addr_mask(address);
+ base_address = proj_gpc_base_v() +
+ (gpc_num * proj_gpc_stride_v()) +
+ proj_tpc_in_gpc_base_v() +
+ (tpc_num * proj_tpc_in_gpc_stride_v());
+ address = base_address + tpc_addr;
+ /*
+ * The data for the TPCs is interleaved in the context buffer.
+ * Example with num_tpcs = 2
+ * 0 1 2 3 4 5 6 7 8 9 10 11 ...
+ * 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ...
+ */
+ tpc_offset = (reg->index * num_tpcs) + (tpc_num * 4);
+
+ if (pri_addr == address) {
+ *priv_offset = tpc_offset;
+ return 0;
+ }
+ }
+ }
+ }
+
+ /* Process the PPC segment. */
+ if (addr_type == CTXSW_ADDR_TYPE_PPC) {
+ for (ppc_num = 0; ppc_num < num_ppcs; ppc_num++) {
+ for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.ppc.count; i++) {
+ reg = &g->gr.ctx_vars.ctxsw_regs.ppc.l[i];
+ address = reg->addr;
+ ppc_addr = pri_ppccs_addr_mask(address);
+ base_address = proj_gpc_base_v() +
+ (gpc_num * proj_gpc_stride_v()) +
+ proj_ppc_in_gpc_base_v() +
+ (ppc_num * proj_ppc_in_gpc_stride_v());
+ address = base_address + ppc_addr;
+ /*
+ * The data for the PPCs is interleaved in the context buffer.
+ * Example with numPpcs = 2
+ * 0 1 2 3 4 5 6 7 8 9 10 11 ...
+ * 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ...
+ */
+ ppc_offset = (reg->index * num_ppcs) + (ppc_num * 4);
+
+ if (pri_addr == address) {
+ *priv_offset = ppc_offset;
+ return 0;
+ }
+ }
+ }
+ }
+
+
+ /* Process the GPC segment. */
+ if (addr_type == CTXSW_ADDR_TYPE_GPC) {
+ for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.gpc.count; i++) {
+ reg = &g->gr.ctx_vars.ctxsw_regs.gpc.l[i];
+
+ address = reg->addr;
+ gpc_addr = pri_gpccs_addr_mask(address);
+ gpc_offset = reg->index;
+
+ base_address = proj_gpc_base_v() +
+ (gpc_num * proj_gpc_stride_v());
+ address = base_address + gpc_addr;
+
+ if (pri_addr == address) {
+ *priv_offset = gpc_offset;
+ return 0;
+ }
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
+ void *context,
+ u32 *num_ppcs, u32 *ppc_mask,
+ u32 *reg_ppc_count)
+{
+ u32 data32;
+ u32 litter_num_pes_per_gpc = proj_scal_litter_num_pes_per_gpc_v();
+
+ /*
+ * if there is only 1 PES_PER_GPC, then we put the PES registers
+ * in the GPC reglist, so we can't error out if ppc.count == 0
+ */
+ if ((!g->gr.ctx_vars.valid) ||
+ ((g->gr.ctx_vars.ctxsw_regs.ppc.count == 0) &&
+ (litter_num_pes_per_gpc > 1)))
+ return -EINVAL;
+
+ data32 = gk20a_mem_rd32(context + ctxsw_prog_local_image_ppc_info_o(), 0);
+
+ *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32);
+ *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32);
+
+ *reg_ppc_count = g->gr.ctx_vars.ctxsw_regs.ppc.count;
+
+ return 0;
+}
+
+
+
+/*
+ * This function will return the 32 bit offset for a priv register if it is
+ * present in the context buffer.
+ */
+static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
+ u32 addr,
+ bool is_quad, u32 quad,
+ u32 *context_buffer,
+ u32 context_buffer_size,
+ u32 *priv_offset)
+{
+ struct gr_gk20a *gr = &g->gr;
+ u32 i, data32;
+ int err;
+ int addr_type; /*enum ctxsw_addr_type */
+ u32 broadcast_flags;
+ u32 gpc_num, tpc_num, ppc_num, be_num;
+ u32 num_gpcs, num_tpcs, num_ppcs;
+ u32 offset;
+ u32 sys_priv_offset, gpc_priv_offset;
+ u32 ppc_mask, reg_list_ppc_count;
+ void *context;
+ u32 offset_to_segment;
+
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
+
+ err = gr_gk20a_decode_priv_addr(g, addr, &addr_type,
+ &gpc_num, &tpc_num, &ppc_num, &be_num,
+ &broadcast_flags);
+ if (err)
+ return err;
+
+ context = context_buffer;
+ if (!check_main_image_header_magic(context)) {
+ gk20a_err(dev_from_gk20a(g),
+ "Invalid main header: magic value");
+ return -EINVAL;
+ }
+ num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0);
+
+ /* Parse the FECS local header. */
+ context += ctxsw_prog_ucode_header_size_in_bytes();
+ if (!check_local_header_magic(context)) {
+ gk20a_err(dev_from_gk20a(g),
+ "Invalid FECS local header: magic value\n");
+ return -EINVAL;
+ }
+ data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0);
+ sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
+
+ /* If found in Ext buffer, ok.
+ * If it failed and we expected to find it there (quad offset)
+ * then return the error. Otherwise continue on.
+ */
+ err = gr_gk20a_find_priv_offset_in_ext_buffer(g,
+ addr, is_quad, quad, context_buffer,
+ context_buffer_size, priv_offset);
+ if (!err || (err && is_quad))
+ return err;
+
+ if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
+ (addr_type == CTXSW_ADDR_TYPE_BE)) {
+ /* Find the offset in the FECS segment. */
+ offset_to_segment = sys_priv_offset *
+ ctxsw_prog_ucode_header_size_in_bytes();
+
+ err = gr_gk20a_process_context_buffer_priv_segment(g,
+ addr_type, addr,
+ 0, 0, 0, 0,
+ &offset);
+ if (err)
+ return err;
+
+ *priv_offset = (offset_to_segment + offset);
+ return 0;
+ }
+
+ if ((gpc_num + 1) > num_gpcs) {
+ gk20a_err(dev_from_gk20a(g),
+ "GPC %d not in this context buffer.\n",
+ gpc_num);
+ return -EINVAL;
+ }
+
+ /* Parse the GPCCS local header(s).*/
+ for (i = 0; i < num_gpcs; i++) {
+ context += ctxsw_prog_ucode_header_size_in_bytes();
+ if (!check_local_header_magic(context)) {
+ gk20a_err(dev_from_gk20a(g),
+ "Invalid GPCCS local header: magic value\n");
+ return -EINVAL;
+
+ }
+ data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0);
+ gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
+
+ err = gr_gk20a_determine_ppc_configuration(g, context,
+ &num_ppcs, &ppc_mask,
+ &reg_list_ppc_count);
+ if (err)
+ return err;
+
+ num_tpcs = gk20a_mem_rd32(context + ctxsw_prog_local_image_num_tpcs_o(), 0);
+
+ if ((i == gpc_num) && ((tpc_num + 1) > num_tpcs)) {
+ gk20a_err(dev_from_gk20a(g),
+ "GPC %d TPC %d not in this context buffer.\n",
+ gpc_num, tpc_num);
+ return -EINVAL;
+ }
+
+ /* Find the offset in the GPCCS segment.*/
+ if (i == gpc_num) {
+ offset_to_segment = gpc_priv_offset *
+ ctxsw_prog_ucode_header_size_in_bytes();
+
+ if (addr_type == CTXSW_ADDR_TYPE_TPC) {
+ /*reg = gr->ctx_vars.ctxsw_regs.tpc.l;*/
+ } else if (addr_type == CTXSW_ADDR_TYPE_PPC) {
+ /* The ucode stores TPC data before PPC data.
+ * Advance offset past TPC data to PPC data. */
+ offset_to_segment +=
+ ((gr->ctx_vars.ctxsw_regs.tpc.count *
+ num_tpcs) << 2);
+ } else if (addr_type == CTXSW_ADDR_TYPE_GPC) {
+ /* The ucode stores TPC/PPC data before GPC data.
+ * Advance offset past TPC/PPC data to GPC data. */
+ /* note 1 PES_PER_GPC case */
+ u32 litter_num_pes_per_gpc =
+ proj_scal_litter_num_pes_per_gpc_v();
+ if (litter_num_pes_per_gpc > 1) {
+ offset_to_segment +=
+ (((gr->ctx_vars.ctxsw_regs.tpc.count *
+ num_tpcs) << 2) +
+ ((reg_list_ppc_count * num_ppcs) << 2));
+ } else {
+ offset_to_segment +=
+ ((gr->ctx_vars.ctxsw_regs.tpc.count *
+ num_tpcs) << 2);
+ }
+ } else {
+ gk20a_err(dev_from_gk20a(g),
+ " Unknown address type.\n");
+ return -EINVAL;
+ }
+ err = gr_gk20a_process_context_buffer_priv_segment(g,
+ addr_type, addr,
+ i, num_tpcs,
+ num_ppcs, ppc_mask,
+ &offset);
+ if (err)
+ return -EINVAL;
+
+ *priv_offset = offset_to_segment + offset;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+
+int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
+ struct nvhost_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
+ u32 num_ctx_wr_ops, u32 num_ctx_rd_ops)
+{
+ struct gk20a *g = ch->g;
+ struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
+ void *ctx_ptr = NULL;
+ int curr_gr_chid, curr_gr_ctx;
+ bool ch_is_curr_ctx, restart_gr_ctxsw = false;
+ u32 i, j, offset, v;
+ u32 max_offsets = proj_scal_litter_num_gpcs_v() *
+ proj_scal_litter_num_tpc_per_gpc_v();
+ u32 *offsets = NULL;
+ u32 *offset_addrs = NULL;
+ u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops};
+ int err, pass;
+
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d",
+ num_ctx_wr_ops, num_ctx_rd_ops);
+
+ /* disable channel switching.
+ * at that point the hardware state can be inspected to
+ * determine if the context we're interested in is current.
+ */
+ err = gr_gk20a_disable_ctxsw(g);
+ if (err) {
+ gk20a_err(dev_from_gk20a(g), "unable to stop gr ctxsw");
+ /* this should probably be ctx-fatal... */
+ goto cleanup;
+ }
+
+ restart_gr_ctxsw = true;
+
+ curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r());
+ curr_gr_chid = gk20a_gr_get_chid_from_ctx(g, curr_gr_ctx);
+ ch_is_curr_ctx = (curr_gr_chid != -1) && (ch->hw_chid == curr_gr_chid);
+
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", ch_is_curr_ctx);
+ if (ch_is_curr_ctx) {
+ for (pass = 0; pass < 2; pass++) {
+ ctx_op_nr = 0;
+ for (i = 0; (ctx_op_nr < num_ctx_ops[pass]) && (i < num_ops); ++i) {
+ /* only do ctx ops and only on the right pass */
+ if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) ||
+ (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) ||
+ ((pass == 1) && !reg_op_is_read(ctx_ops[i].op))))
+ continue;
+
+ /* if this is a quad access, setup for special access*/
+ if (ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD)
+ && g->ops.gr.access_smpc_reg)
+ g->ops.gr.access_smpc_reg(g,
+ ctx_ops[i].quad,
+ ctx_ops[i].offset);
+ offset = ctx_ops[i].offset;
+
+ if (pass == 0) { /* write pass */
+ v = gk20a_readl(g, offset);
+ v &= ~ctx_ops[i].and_n_mask_lo;
+ v |= ctx_ops[i].value_lo;
+ gk20a_writel(g, offset, v);
+
+ gk20a_dbg(gpu_dbg_gpu_dbg,
+ "direct wr: offset=0x%x v=0x%x",
+ offset, v);
+
+ if (ctx_ops[i].op == REGOP(WRITE_64)) {
+ v = gk20a_readl(g, offset + 4);
+ v &= ~ctx_ops[i].and_n_mask_hi;
+ v |= ctx_ops[i].value_hi;
+ gk20a_writel(g, offset + 4, v);
+
+ gk20a_dbg(gpu_dbg_gpu_dbg,
+ "direct wr: offset=0x%x v=0x%x",
+ offset + 4, v);
+ }
+
+ } else { /* read pass */
+ ctx_ops[i].value_lo =
+ gk20a_readl(g, offset);
+
+ gk20a_dbg(gpu_dbg_gpu_dbg,
+ "direct rd: offset=0x%x v=0x%x",
+ offset, ctx_ops[i].value_lo);
+
+ if (ctx_ops[i].op == REGOP(READ_64)) {
+ ctx_ops[i].value_hi =
+ gk20a_readl(g, offset + 4);
+
+ gk20a_dbg(gpu_dbg_gpu_dbg,
+ "direct rd: offset=0x%x v=0x%x",
+ offset, ctx_ops[i].value_lo);
+ } else
+ ctx_ops[i].value_hi = 0;
+ }
+ ctx_op_nr++;
+ }
+ }
+ goto cleanup;
+ }
+
+ /* they're the same size, so just use one alloc for both */
+ offsets = kzalloc(2 * sizeof(u32) * max_offsets, GFP_KERNEL);
+ if (!offsets) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+ offset_addrs = offsets + max_offsets;
+
+ /* would have been a variant of gr_gk20a_apply_instmem_overrides */
+ /* recoded in-place instead.*/
+ ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
+ PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
+ 0, pgprot_dmacoherent(PAGE_KERNEL));
+ if (!ctx_ptr) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+
+ /* Channel gr_ctx buffer is gpu cacheable; so flush and invalidate.
+ * There should be no on-going/in-flight references by the gpu now. */
+ gk20a_mm_fb_flush(g);
+ gk20a_mm_l2_flush(g, true);
+
+ /* write to appropriate place in context image,
+ * first have to figure out where that really is */
+
+ /* first pass is writes, second reads */
+ for (pass = 0; pass < 2; pass++) {
+ ctx_op_nr = 0;
+ for (i = 0; (ctx_op_nr < num_ctx_ops[pass]) && (i < num_ops); ++i) {
+ u32 num_offsets;
+
+ /* only do ctx ops and only on the right pass */
+ if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) ||
+ (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) ||
+ ((pass == 1) && !reg_op_is_read(ctx_ops[i].op))))
+ continue;
+
+ err = gr_gk20a_get_ctx_buffer_offsets(g,
+ ctx_ops[i].offset,
+ max_offsets,
+ offsets, offset_addrs,
+ &num_offsets,
+ ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD),
+ ctx_ops[i].quad);
+ if (err) {
+ gk20a_dbg(gpu_dbg_gpu_dbg,
+ "ctx op invalid offset: offset=0x%x",
+ ctx_ops[i].offset);
+ ctx_ops[i].status =
+ NVHOST_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET;
+ continue;
+ }
+
+ /* if this is a quad access, setup for special access*/
+ if (ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD) &&
+ g->ops.gr.access_smpc_reg)
+ g->ops.gr.access_smpc_reg(g, ctx_ops[i].quad,
+ ctx_ops[i].offset);
+
+ for (j = 0; j < num_offsets; j++) {
+ /* sanity check, don't write outside, worst case */
+ if (offsets[j] >= g->gr.ctx_vars.golden_image_size)
+ continue;
+ if (pass == 0) { /* write pass */
+ v = gk20a_mem_rd32(ctx_ptr + offsets[j], 0);
+ v &= ~ctx_ops[i].and_n_mask_lo;
+ v |= ctx_ops[i].value_lo;
+ gk20a_mem_wr32(ctx_ptr + offsets[j], 0, v);
+
+ gk20a_dbg(gpu_dbg_gpu_dbg,
+ "context wr: offset=0x%x v=0x%x",
+ offsets[j], v);
+
+ if (ctx_ops[i].op == REGOP(WRITE_64)) {
+ v = gk20a_mem_rd32(ctx_ptr + offsets[j] + 4, 0);
+ v &= ~ctx_ops[i].and_n_mask_hi;
+ v |= ctx_ops[i].value_hi;
+ gk20a_mem_wr32(ctx_ptr + offsets[j] + 4, 0, v);
+
+ gk20a_dbg(gpu_dbg_gpu_dbg,
+ "context wr: offset=0x%x v=0x%x",
+ offsets[j] + 4, v);
+ }
+
+ /* check to see if we need to add a special WAR
+ for some of the SMPC perf regs */
+ gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j],
+ v, ctx_ptr);
+
+ } else { /* read pass */
+ ctx_ops[i].value_lo =
+ gk20a_mem_rd32(ctx_ptr + offsets[0], 0);
+
+ gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x",
+ offsets[0], ctx_ops[i].value_lo);
+
+ if (ctx_ops[i].op == REGOP(READ_64)) {
+ ctx_ops[i].value_hi =
+ gk20a_mem_rd32(ctx_ptr + offsets[0] + 4, 0);
+
+ gk20a_dbg(gpu_dbg_gpu_dbg,
+ "context rd: offset=0x%x v=0x%x",
+ offsets[0] + 4, ctx_ops[i].value_hi);
+ } else
+ ctx_ops[i].value_hi = 0;
+ }
+ }
+ ctx_op_nr++;
+ }
+ }
+#if 0
+ /* flush cpu caches for the ctx buffer? only if cpu cached, of course.
+ * they aren't, yet */
+ if (cached) {
+ FLUSH_CPU_DCACHE(ctx_ptr,
+ sg_phys(ch_ctx->gr_ctx.mem.ref), size);
+ }
+#endif
+
+ cleanup:
+ if (offsets)
+ kfree(offsets);
+
+ if (ctx_ptr)
+ vunmap(ctx_ptr);
+
+ if (restart_gr_ctxsw) {
+ int tmp_err = gr_gk20a_enable_ctxsw(g);
+ if (tmp_err) {
+ gk20a_err(dev_from_gk20a(g), "unable to restart ctxsw!\n");
+ err = tmp_err;
+ }
+ }
+
+ return err;
+}
+
+static void gr_gk20a_cb_size_default(struct gk20a *g)
+{
+ struct gr_gk20a *gr = &g->gr;
+
+ gr->attrib_cb_default_size =
+ gr_gpc0_ppc0_cbm_cfg_size_default_v();
+ gr->alpha_cb_default_size =
+ gr_gpc0_ppc0_cbm_cfg2_size_default_v();
+}
+
+static int gr_gk20a_calc_global_ctx_buffer_size(struct gk20a *g)
+{
+ struct gr_gk20a *gr = &g->gr;
+ int size;
+
+ gr->attrib_cb_size = gr->attrib_cb_default_size;
+ gr->alpha_cb_size = gr->alpha_cb_default_size
+ + (gr->alpha_cb_default_size >> 1);
+
+ size = gr->attrib_cb_size *
+ gr_gpc0_ppc0_cbm_cfg_size_granularity_v() *
+ gr->max_tpc_count;
+
+ size += gr->alpha_cb_size *
+ gr_gpc0_ppc0_cbm_cfg2_size_granularity_v() *
+ gr->max_tpc_count;
+
+ return size;
+}
+
+void gr_gk20a_commit_global_pagepool(struct gk20a *g,
+ struct channel_ctx_gk20a *ch_ctx,
+ u64 addr, u32 size, bool patch)
+{
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(),
+ gr_scc_pagepool_base_addr_39_8_f(addr), patch);
+
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(),
+ gr_scc_pagepool_total_pages_f(size) |
+ gr_scc_pagepool_valid_true_f(), patch);
+
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(),
+ gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch);
+
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(),
+ gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
+
+ gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_pagepool_r(),
+ gr_pd_pagepool_total_pages_f(size) |
+ gr_pd_pagepool_valid_true_f(), patch);
+}
+
+void gk20a_init_gr(struct gpu_ops *gops)
+{
+ gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg;
+ gops->gr.bundle_cb_defaults = gr_gk20a_bundle_cb_defaults;
+ gops->gr.cb_size_default = gr_gk20a_cb_size_default;
+ gops->gr.calc_global_ctx_buffer_size =
+ gr_gk20a_calc_global_ctx_buffer_size;
+ gops->gr.commit_global_attrib_cb = gr_gk20a_commit_global_attrib_cb;
+ gops->gr.commit_global_bundle_cb = gr_gk20a_commit_global_bundle_cb;
+ gops->gr.commit_global_cb_manager = gr_gk20a_commit_global_cb_manager;
+ gops->gr.commit_global_pagepool = gr_gk20a_commit_global_pagepool;
+ gops->gr.handle_sw_method = gr_gk20a_handle_sw_method;
+ gops->gr.set_alpha_circular_buffer_size =
+ gk20a_gr_set_circular_buffer_size;
+ gops->gr.set_circular_buffer_size =
+ gk20a_gr_set_alpha_circular_buffer_size;
+ gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions;
+ gops->gr.is_valid_class = gr_gk20a_is_valid_class;
+ gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs;
+ gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs;
+ gops->gr.init_fs_state = gr_gk20a_ctx_state_floorsweep;
+ gops->gr.set_hww_esr_report_mask = gr_gk20a_set_hww_esr_report_mask;
+ gops->gr.setup_alpha_beta_tables = gr_gk20a_setup_alpha_beta_tables;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
new file mode 100644
index 000000000000..7eb2923ab2c3
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -0,0 +1,406 @@
+/*
+ * GK20A Graphics Engine
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __GR_GK20A_H__
+#define __GR_GK20A_H__
+
+#include <linux/slab.h>
+
+#include "gr_ctx_gk20a.h"
+
+#define GR_IDLE_CHECK_DEFAULT 100 /* usec */
+#define GR_IDLE_CHECK_MAX 5000 /* usec */
+
+#define INVALID_SCREEN_TILE_ROW_OFFSET 0xFFFFFFFF
+#define INVALID_MAX_WAYS 0xFFFFFFFF
+
+#define GK20A_FECS_UCODE_IMAGE "fecs.bin"
+#define GK20A_GPCCS_UCODE_IMAGE "gpccs.bin"
+
+enum /* global_ctx_buffer */ {
+ CIRCULAR = 0,
+ PAGEPOOL = 1,
+ ATTRIBUTE = 2,
+ CIRCULAR_VPR = 3,
+ PAGEPOOL_VPR = 4,
+ ATTRIBUTE_VPR = 5,
+ GOLDEN_CTX = 6,
+ PRIV_ACCESS_MAP = 7,
+ NR_GLOBAL_CTX_BUF = 8
+};
+
+/* either ATTRIBUTE or ATTRIBUTE_VPR maps to ATTRIBUTE_VA */
+enum /*global_ctx_buffer_va */ {
+ CIRCULAR_VA = 0,
+ PAGEPOOL_VA = 1,
+ ATTRIBUTE_VA = 2,
+ GOLDEN_CTX_VA = 3,
+ PRIV_ACCESS_MAP_VA = 4,
+ NR_GLOBAL_CTX_BUF_VA = 5
+};
+
+enum {
+ WAIT_UCODE_LOOP,
+ WAIT_UCODE_TIMEOUT,
+ WAIT_UCODE_ERROR,
+ WAIT_UCODE_OK
+};
+
+enum {
+ GR_IS_UCODE_OP_EQUAL,
+ GR_IS_UCODE_OP_NOT_EQUAL,
+ GR_IS_UCODE_OP_AND,
+ GR_IS_UCODE_OP_LESSER,
+ GR_IS_UCODE_OP_LESSER_EQUAL,
+ GR_IS_UCODE_OP_SKIP
+};
+
+enum {
+ eUcodeHandshakeInitComplete = 1,
+ eUcodeHandshakeMethodFinished
+};
+
+enum {
+ ELCG_RUN, /* clk always run, i.e. disable elcg */
+ ELCG_STOP, /* clk is stopped */
+ ELCG_AUTO /* clk will run when non-idle, standard elcg mode */
+};
+
+enum {
+ BLCG_RUN, /* clk always run, i.e. disable blcg */
+ BLCG_AUTO /* clk will run when non-idle, standard blcg mode */
+};
+
+#ifndef GR_GO_IDLE_BUNDLE
+#define GR_GO_IDLE_BUNDLE 0x0000e100 /* --V-B */
+#endif
+
+struct gr_channel_map_tlb_entry {
+ u32 curr_ctx;
+ u32 hw_chid;
+};
+
+struct gr_zcull_gk20a {
+ u32 aliquot_width;
+ u32 aliquot_height;
+ u32 aliquot_size;
+ u32 total_aliquots;
+
+ u32 width_align_pixels;
+ u32 height_align_pixels;
+ u32 pixel_squares_by_aliquots;
+};
+
+struct gr_zcull_info {
+ u32 width_align_pixels;
+ u32 height_align_pixels;
+ u32 pixel_squares_by_aliquots;
+ u32 aliquot_total;
+ u32 region_byte_multiplier;
+ u32 region_header_size;
+ u32 subregion_header_size;
+ u32 subregion_width_align_pixels;
+ u32 subregion_height_align_pixels;
+ u32 subregion_count;
+};
+
+#define GK20A_ZBC_COLOR_VALUE_SIZE 4 /* RGBA */
+
+#define GK20A_STARTOF_ZBC_TABLE 1 /* index zero reserved to indicate "not ZBCd" */
+#define GK20A_SIZEOF_ZBC_TABLE 16 /* match ltcs_ltss_dstg_zbc_index_address width (4) */
+#define GK20A_ZBC_TABLE_SIZE (16 - 1)
+
+#define GK20A_ZBC_TYPE_INVALID 0
+#define GK20A_ZBC_TYPE_COLOR 1
+#define GK20A_ZBC_TYPE_DEPTH 2
+
+struct zbc_color_table {
+ u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE];
+ u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE];
+ u32 format;
+ u32 ref_cnt;
+};
+
+struct zbc_depth_table {
+ u32 depth;
+ u32 format;
+ u32 ref_cnt;
+};
+
+struct zbc_entry {
+ u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE];
+ u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE];
+ u32 depth;
+ u32 type; /* color or depth */
+ u32 format;
+};
+
+struct zbc_query_params {
+ u32 color_ds[GK20A_ZBC_COLOR_VALUE_SIZE];
+ u32 color_l2[GK20A_ZBC_COLOR_VALUE_SIZE];
+ u32 depth;
+ u32 ref_cnt;
+ u32 format;
+ u32 type; /* color or depth */
+ u32 index_size; /* [out] size, [in] index */
+};
+
+struct gr_gk20a {
+ struct gk20a *g;
+ struct {
+ bool dynamic;
+
+ u32 buffer_size;
+ u32 buffer_total_size;
+
+ bool golden_image_initialized;
+ u32 golden_image_size;
+ u32 *local_golden_image;
+
+ u32 zcull_ctxsw_image_size;
+
+ u32 buffer_header_size;
+
+ u32 priv_access_map_size;
+
+ struct gr_ucode_gk20a ucode;
+
+ struct av_list_gk20a sw_bundle_init;
+ struct av_list_gk20a sw_method_init;
+ struct aiv_list_gk20a sw_ctx_load;
+ struct av_list_gk20a sw_non_ctx_load;
+ struct {
+ struct aiv_list_gk20a sys;
+ struct aiv_list_gk20a gpc;
+ struct aiv_list_gk20a tpc;
+ struct aiv_list_gk20a zcull_gpc;
+ struct aiv_list_gk20a ppc;
+ struct aiv_list_gk20a pm_sys;
+ struct aiv_list_gk20a pm_gpc;
+ struct aiv_list_gk20a pm_tpc;
+ } ctxsw_regs;
+ int regs_base_index;
+ bool valid;
+ } ctx_vars;
+
+ struct mutex ctx_mutex; /* protect golden ctx init */
+ struct mutex fecs_mutex; /* protect fecs method */
+
+#define GR_NETLIST_DYNAMIC -1
+#define GR_NETLIST_STATIC_A 'A'
+ int netlist;
+
+ int initialized;
+ u32 num_fbps;
+
+ u32 max_gpc_count;
+ u32 max_fbps_count;
+ u32 max_tpc_per_gpc_count;
+ u32 max_zcull_per_gpc_count;
+ u32 max_tpc_count;
+
+ u32 sys_count;
+ u32 gpc_count;
+ u32 pe_count_per_gpc;
+ u32 ppc_count;
+ u32 *gpc_ppc_count;
+ u32 tpc_count;
+ u32 *gpc_tpc_count;
+ u32 zcb_count;
+ u32 *gpc_zcb_count;
+ u32 *pes_tpc_count[2];
+ u32 *pes_tpc_mask[2];
+ u32 *gpc_skip_mask;
+
+ u32 bundle_cb_default_size;
+ u32 min_gpm_fifo_depth;
+ u32 bundle_cb_token_limit;
+ u32 attrib_cb_default_size;
+ u32 attrib_cb_size;
+ u32 alpha_cb_default_size;
+ u32 alpha_cb_size;
+ u32 timeslice_mode;
+
+ struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF];
+
+ struct mmu_desc mmu_wr_mem;
+ u32 mmu_wr_mem_size;
+ struct mmu_desc mmu_rd_mem;
+ u32 mmu_rd_mem_size;
+
+ u8 *map_tiles;
+ u32 map_tile_count;
+ u32 map_row_offset;
+
+#define COMP_TAG_LINE_SIZE_SHIFT (17) /* one tag covers 128K */
+#define COMP_TAG_LINE_SIZE (1 << COMP_TAG_LINE_SIZE_SHIFT)
+
+ u32 max_comptag_mem; /* max memory size (MB) for comptag */
+ struct compbit_store_desc compbit_store;
+ struct gk20a_allocator comp_tags;
+
+ struct gr_zcull_gk20a zcull;
+
+ struct zbc_color_table zbc_col_tbl[GK20A_ZBC_TABLE_SIZE];
+ struct zbc_depth_table zbc_dep_tbl[GK20A_ZBC_TABLE_SIZE];
+
+ s32 max_default_color_index;
+ s32 max_default_depth_index;
+
+ s32 max_used_color_index;
+ s32 max_used_depth_index;
+
+ u32 status_disable_mask;
+
+#define GR_CHANNEL_MAP_TLB_SIZE 2 /* must of power of 2 */
+ struct gr_channel_map_tlb_entry chid_tlb[GR_CHANNEL_MAP_TLB_SIZE];
+ u32 channel_tlb_flush_index;
+ spinlock_t ch_tlb_lock;
+
+ void (*remove_support)(struct gr_gk20a *gr);
+ bool sw_ready;
+ bool skip_ucode_init;
+};
+
+void gk20a_fecs_dump_falcon_stats(struct gk20a *g);
+
+struct gk20a_ctxsw_ucode_segment {
+ u32 offset;
+ u32 size;
+};
+
+struct gk20a_ctxsw_ucode_segments {
+ u32 boot_entry;
+ u32 boot_imem_offset;
+ struct gk20a_ctxsw_ucode_segment boot;
+ struct gk20a_ctxsw_ucode_segment code;
+ struct gk20a_ctxsw_ucode_segment data;
+};
+
+struct gk20a_ctxsw_ucode_info {
+ u64 *p_va;
+ struct inst_desc inst_blk_desc;
+ struct surface_mem_desc surface_desc;
+ u64 ucode_gpuva;
+ struct gk20a_ctxsw_ucode_segments fecs;
+ struct gk20a_ctxsw_ucode_segments gpccs;
+};
+
+struct gk20a_ctxsw_bootloader_desc {
+ u32 start_offset;
+ u32 size;
+ u32 imem_offset;
+ u32 entry_point;
+};
+
+struct gpu_ops;
+void gk20a_init_gr(struct gpu_ops *gops);
+int gk20a_init_gr_support(struct gk20a *g);
+void gk20a_gr_reset(struct gk20a *g);
+
+int gk20a_init_gr_channel(struct channel_gk20a *ch_gk20a);
+
+int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr);
+
+struct nvhost_alloc_obj_ctx_args;
+struct nvhost_free_obj_ctx_args;
+
+int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
+ struct nvhost_alloc_obj_ctx_args *args);
+int gk20a_free_obj_ctx(struct channel_gk20a *c,
+ struct nvhost_free_obj_ctx_args *args);
+void gk20a_free_channel_ctx(struct channel_gk20a *c);
+
+int gk20a_gr_isr(struct gk20a *g);
+int gk20a_gr_nonstall_isr(struct gk20a *g);
+
+/* zcull */
+u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr);
+int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
+ struct channel_gk20a *c, u64 zcull_va, u32 mode);
+int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
+ struct gr_zcull_info *zcull_params);
+/* zbc */
+int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
+ struct zbc_entry *zbc_val);
+int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr,
+ struct zbc_query_params *query_params);
+int gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
+ struct zbc_entry *zbc_val);
+int gr_gk20a_clear_zbc_table(struct gk20a *g, struct gr_gk20a *gr);
+int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr);
+
+/* pmu */
+int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size);
+int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr);
+int gr_gk20a_fecs_set_reglist_virual_addr(struct gk20a *g, u64 pmu_va);
+
+void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine);
+void gr_gk20a_init_blcg_mode(struct gk20a *g, u32 mode, u32 engine);
+
+/* sm */
+bool gk20a_gr_sm_debugger_attached(struct gk20a *g);
+
+#define gr_gk20a_elpg_protected_call(g, func) \
+ ({ \
+ int err; \
+ if (support_gk20a_pmu()) \
+ gk20a_pmu_disable_elpg(g); \
+ err = func; \
+ if (support_gk20a_pmu()) \
+ gk20a_pmu_enable_elpg(g); \
+ err; \
+ })
+
+int gk20a_gr_suspend(struct gk20a *g);
+
+struct nvhost_dbg_gpu_reg_op;
+int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
+ struct nvhost_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
+ u32 num_ctx_wr_ops, u32 num_ctx_rd_ops);
+int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
+ u32 addr,
+ u32 max_offsets,
+ u32 *offsets, u32 *offset_addrs,
+ u32 *num_offsets,
+ bool is_quad, u32 quad);
+int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
+ struct channel_gk20a *c,
+ bool enable_smpc_ctxsw);
+
+struct channel_ctx_gk20a;
+int gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx,
+ u32 addr, u32 data, bool patch);
+int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
+ struct channel_ctx_gk20a *ch_ctx);
+int gr_gk20a_ctx_patch_write_end(struct gk20a *g,
+ struct channel_ctx_gk20a *ch_ctx);
+void gr_gk20a_commit_global_pagepool(struct gk20a *g,
+ struct channel_ctx_gk20a *ch_ctx,
+ u64 addr, u32 size, bool patch);
+void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data);
+void gr_gk20a_enable_hww_exceptions(struct gk20a *g);
+void gr_gk20a_get_sm_dsm_perf_regs(struct gk20a *g,
+ u32 *num_sm_dsm_perf_regs,
+ u32 **sm_dsm_perf_regs,
+ u32 *perf_register_stride);
+void gr_gk20a_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
+ u32 *num_sm_dsm_perf_regs,
+ u32 **sm_dsm_perf_regs,
+ u32 *perf_register_stride);
+int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr);
+#endif /*__GR_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h
new file mode 100644
index 000000000000..a82a1ee7caa8
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h
@@ -0,0 +1,179 @@
+/*
+ * GK20A Graphics Context Pri Register Addressing
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _NVHOST_GR_PRI_GK20A_H_
+#define _NVHOST_GR_PRI_GK20A_H_
+
+/*
+ * These convenience macros are generally for use in the management/modificaiton
+ * of the context state store for gr/compute contexts.
+ */
+
+/*
+ * GPC pri addressing
+ */
+static inline u32 pri_gpccs_addr_width(void)
+{
+ return 15; /*from where?*/
+}
+static inline u32 pri_gpccs_addr_mask(u32 addr)
+{
+ return addr & ((1 << pri_gpccs_addr_width()) - 1);
+}
+static inline u32 pri_gpc_addr(u32 addr, u32 gpc)
+{
+ return proj_gpc_base_v() + (gpc * proj_gpc_stride_v()) + addr;
+}
+static inline bool pri_is_gpc_addr_shared(u32 addr)
+{
+ return (addr >= proj_gpc_shared_base_v()) &&
+ (addr < proj_gpc_shared_base_v() + proj_gpc_stride_v());
+}
+static inline bool pri_is_gpc_addr(u32 addr)
+{
+ return ((addr >= proj_gpc_base_v()) &&
+ (addr < proj_gpc_base_v() +
+ proj_scal_litter_num_gpcs_v() * proj_gpc_stride_v())) ||
+ pri_is_gpc_addr_shared(addr);
+}
+static inline u32 pri_get_gpc_num(u32 addr)
+{
+ u32 i, start;
+ u32 num_gpcs = proj_scal_litter_num_gpcs_v();
+
+ for (i = 0; i < num_gpcs; i++) {
+ start = proj_gpc_base_v() + (i * proj_gpc_stride_v());
+ if ((addr >= start) && (addr < (start + proj_gpc_stride_v())))
+ return i;
+ }
+ return 0;
+}
+/*
+ * TPC pri addressing
+ */
+static inline u32 pri_tpccs_addr_width(void)
+{
+ return 11; /* from where? */
+}
+static inline u32 pri_tpccs_addr_mask(u32 addr)
+{
+ return addr & ((1 << pri_tpccs_addr_width()) - 1);
+}
+static inline u32 pri_tpc_addr(u32 addr, u32 gpc, u32 tpc)
+{
+ return proj_gpc_base_v() + (gpc * proj_gpc_stride_v()) +
+ proj_tpc_in_gpc_base_v() + (tpc * proj_tpc_in_gpc_stride_v()) +
+ addr;
+}
+static inline bool pri_is_tpc_addr_shared(u32 addr)
+{
+ return (addr >= proj_tpc_in_gpc_shared_base_v()) &&
+ (addr < (proj_tpc_in_gpc_shared_base_v() +
+ proj_tpc_in_gpc_stride_v()));
+}
+static inline bool pri_is_tpc_addr(u32 addr)
+{
+ return ((addr >= proj_tpc_in_gpc_base_v()) &&
+ (addr < proj_tpc_in_gpc_base_v() + (proj_scal_litter_num_tpc_per_gpc_v() *
+ proj_tpc_in_gpc_stride_v())))
+ ||
+ pri_is_tpc_addr_shared(addr);
+}
+static inline u32 pri_get_tpc_num(u32 addr)
+{
+ u32 i, start;
+ u32 num_tpcs = proj_scal_litter_num_tpc_per_gpc_v();
+
+ for (i = 0; i < num_tpcs; i++) {
+ start = proj_tpc_in_gpc_base_v() + (i * proj_tpc_in_gpc_stride_v());
+ if ((addr >= start) && (addr < (start + proj_tpc_in_gpc_stride_v())))
+ return i;
+ }
+ return 0;
+}
+
+/*
+ * BE pri addressing
+ */
+static inline u32 pri_becs_addr_width(void)
+{
+ return 10;/* from where? */
+}
+static inline u32 pri_becs_addr_mask(u32 addr)
+{
+ return addr & ((1 << pri_becs_addr_width()) - 1);
+}
+static inline bool pri_is_be_addr_shared(u32 addr)
+{
+ return (addr >= proj_rop_shared_base_v()) &&
+ (addr < proj_rop_shared_base_v() + proj_rop_stride_v());
+}
+static inline u32 pri_be_shared_addr(u32 addr)
+{
+ return proj_rop_shared_base_v() + pri_becs_addr_mask(addr);
+}
+static inline bool pri_is_be_addr(u32 addr)
+{
+ return ((addr >= proj_rop_base_v()) &&
+ (addr < proj_rop_base_v()+proj_scal_litter_num_fbps_v() * proj_rop_stride_v())) ||
+ pri_is_be_addr_shared(addr);
+}
+
+static inline u32 pri_get_be_num(u32 addr)
+{
+ u32 i, start;
+ u32 num_fbps = proj_scal_litter_num_fbps_v();
+ for (i = 0; i < num_fbps; i++) {
+ start = proj_rop_base_v() + (i * proj_rop_stride_v());
+ if ((addr >= start) && (addr < (start + proj_rop_stride_v())))
+ return i;
+ }
+ return 0;
+}
+
+/*
+ * PPC pri addressing
+ */
+static inline u32 pri_ppccs_addr_width(void)
+{
+ return 9; /* from where? */
+}
+static inline u32 pri_ppccs_addr_mask(u32 addr)
+{
+ return addr & ((1 << pri_ppccs_addr_width()) - 1);
+}
+static inline u32 pri_ppc_addr(u32 addr, u32 gpc, u32 ppc)
+{
+ return proj_gpc_base_v() + (gpc * proj_gpc_stride_v()) +
+ proj_ppc_in_gpc_base_v() + (ppc * proj_ppc_in_gpc_stride_v()) + addr;
+}
+
+enum ctxsw_addr_type {
+ CTXSW_ADDR_TYPE_SYS = 0,
+ CTXSW_ADDR_TYPE_GPC = 1,
+ CTXSW_ADDR_TYPE_TPC = 2,
+ CTXSW_ADDR_TYPE_BE = 3,
+ CTXSW_ADDR_TYPE_PPC = 4
+};
+
+#define PRI_BROADCAST_FLAGS_NONE 0
+#define PRI_BROADCAST_FLAGS_GPC BIT(0)
+#define PRI_BROADCAST_FLAGS_TPC BIT(1)
+#define PRI_BROADCAST_FLAGS_BE BIT(2)
+#define PRI_BROADCAST_FLAGS_PPC BIT(3)
+
+#endif /*_NVHOST_GR_PRI_GK20A_H_ */
diff --git a/drivers/gpu/nvgpu/gk20a/hal.c b/drivers/gpu/nvgpu/gk20a/hal.c
new file mode 100644
index 000000000000..dea740c2da1a
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hal.c
@@ -0,0 +1,33 @@
+/*
+ * NVIDIA GPU HAL interface.
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a.h"
+#include "hal_gk20a.h"
+
+int gpu_init_hal(struct gk20a *g)
+{
+ u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
+ switch (ver) {
+ case GK20A_GPUID_GK20A:
+ gk20a_dbg_info("gk20a detected");
+ gk20a_init_hal(&g->ops);
+ break;
+ default:
+ gk20a_err(&g->dev->dev, "no support for %x", ver);
+ return -ENODEV;
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/hal.h b/drivers/gpu/nvgpu/gk20a/hal.h
new file mode 100644
index 000000000000..da02cf5f69d7
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hal.h
@@ -0,0 +1,25 @@
+/*
+ * NVIDIA GPU Hardware Abstraction Layer functions definitions.
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __HAL_GPU__
+#define __HAL_GPU__
+
+#include <linux/kernel.h>
+
+struct gk20a;
+
+int gpu_init_hal(struct gk20a *g);
+
+#endif /* __HAL_GPU__ */
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
new file mode 100644
index 000000000000..b3e9b0e6ee42
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
@@ -0,0 +1,50 @@
+/*
+ * drivers/video/tegra/host/gk20a/hal_gk20a.c
+ *
+ * GK20A Tegra HAL interface.
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include "hal_gk20a.h"
+#include "ltc_gk20a.h"
+#include "fb_gk20a.h"
+#include "gk20a.h"
+#include "gk20a_gating_reglist.h"
+#include "channel_gk20a.h"
+
+struct gpu_ops gk20a_ops = {
+ .clock_gating = {
+ .slcg_gr_load_gating_prod =
+ gr_gk20a_slcg_gr_load_gating_prod,
+ .slcg_perf_load_gating_prod =
+ gr_gk20a_slcg_perf_load_gating_prod,
+ .blcg_gr_load_gating_prod =
+ gr_gk20a_blcg_gr_load_gating_prod,
+ .pg_gr_load_gating_prod =
+ gr_gk20a_pg_gr_load_gating_prod,
+ .slcg_therm_load_gating_prod =
+ gr_gk20a_slcg_therm_load_gating_prod,
+ }
+};
+
+int gk20a_init_hal(struct gpu_ops *gops)
+{
+ *gops = gk20a_ops;
+ gk20a_init_ltc(gops);
+ gk20a_init_gr(gops);
+ gk20a_init_fb(gops);
+ gk20a_init_fifo(gops);
+ gops->name = "gk20a";
+
+ return 0;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.h b/drivers/gpu/nvgpu/gk20a/hal_gk20a.h
new file mode 100644
index 000000000000..db77a4a75320
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.h
@@ -0,0 +1,28 @@
+/*
+ * drivers/video/tegra/host/gk20a/hal_gk20a.h
+ *
+ * GK20A Hardware Abstraction Layer functions definitions.
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __HAL_GK20A__
+#define __HAL_GK20A__
+
+#include <linux/kernel.h>
+
+struct gpu_ops;
+struct gk20a;
+
+int gk20a_init_hal(struct gpu_ops *gops);
+
+#endif /* __HAL_GK20A__ */
diff --git a/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h
new file mode 100644
index 000000000000..ebf8a873e2cf
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_o(void) : Returns the offset for element <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+#ifndef _hw_bus_gk20a_h_
+#define _hw_bus_gk20a_h_
+
+static inline u32 bus_bar1_block_r(void)
+{
+ return 0x00001704;
+}
+static inline u32 bus_bar1_block_ptr_f(u32 v)
+{
+ return (v & 0xfffffff) << 0;
+}
+static inline u32 bus_bar1_block_target_vid_mem_f(void)
+{
+ return 0x0;
+}
+static inline u32 bus_bar1_block_mode_virtual_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 bus_bar1_block_ptr_shift_v(void)
+{
+ return 0x0000000c;
+}
+static inline u32 bus_intr_0_r(void)
+{
+ return 0x00001100;
+}
+static inline u32 bus_intr_0_pri_squash_m(void)
+{
+ return 0x1 << 1;
+}
+static inline u32 bus_intr_0_pri_fecserr_m(void)
+{
+ return 0x1 << 2;
+}
+static inline u32 bus_intr_0_pri_timeout_m(void)
+{
+ return 0x1 << 3;
+}
+static inline u32 bus_intr_en_0_r(void)
+{
+ return 0x00001140;
+}
+static inline u32 bus_intr_en_0_pri_squash_m(void)
+{
+ return 0x1 << 1;
+}
+static inline u32 bus_intr_en_0_pri_fecserr_m(void)
+{
+ return 0x1 << 2;
+}
+static inline u32 bus_intr_en_0_pri_timeout_m(void)
+{
+ return 0x1 << 3;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_ccsr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ccsr_gk20a.h
new file mode 100644
index 000000000000..573329f1fc2c
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_ccsr_gk20a.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_o(void) : Returns the offset for element <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+#ifndef _hw_ccsr_gk20a_h_
+#define _hw_ccsr_gk20a_h_
+
+static inline u32 ccsr_channel_inst_r(u32 i)
+{
+ return 0x00800000 + i*8;
+}
+static inline u32 ccsr_channel_inst__size_1_v(void)
+{
+ return 0x00000080;
+}
+static inline u32 ccsr_channel_inst_ptr_f(u32 v)
+{
+ return (v & 0xfffffff) << 0;
+}
+static inline u32 ccsr_channel_inst_target_vid_mem_f(void)
+{
+ return 0x0;
+}
+static inline u32 ccsr_channel_inst_bind_false_f(void)
+{
+ return 0x0;
+}
+static inline u32 ccsr_channel_inst_bind_true_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 ccsr_channel_r(u32 i)
+{
+ return 0x00800004 + i*8;
+}
+static inline u32 ccsr_channel__size_1_v(void)
+{
+ return 0x00000080;
+}
+static inline u32 ccsr_channel_enable_v(u32 r)
+{
+ return (r >> 0) & 0x1;
+}
+static inline u32 ccsr_channel_enable_set_f(u32 v)
+{
+ return (v & 0x1) << 10;
+}
+static inline u32 ccsr_channel_enable_set_true_f(void)
+{
+ return 0x400;
+}
+static inline u32 ccsr_channel_enable_clr_true_f(void)
+{
+ return 0x800;
+}
+static inline u32 ccsr_channel_runlist_f(u32 v)
+{
+ return (v & 0xf) << 16;
+}
+static inline u32 ccsr_channel_status_v(u32 r)
+{
+ return (r >> 24) & 0xf;
+}
+static inline u32 ccsr_channel_busy_v(u32 r)
+{
+ return (r >> 28) & 0x1;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_chiplet_pwr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_chiplet_pwr_gk20a.h
new file mode 100644
index 000000000000..66bf01b0e2d1
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_chiplet_pwr_gk20a.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_o(void) : Returns the offset for element <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+#ifndef _hw_chiplet_pwr_gk20a_h_
+#define _hw_chiplet_pwr_gk20a_h_
+
+static inline u32 chiplet_pwr_gpcs_weight_6_r(void)
+{
+ return 0x0010e018;
+}
+static inline u32 chiplet_pwr_gpcs_weight_7_r(void)
+{
+ return 0x0010e01c;
+}
+static inline u32 chiplet_pwr_gpcs_config_1_r(void)
+{
+ return 0x0010e03c;
+}
+static inline u32 chiplet_pwr_gpcs_config_1_ba_enable_yes_f(void)
+{
+ return 0x1;
+}
+static inline u32 chiplet_pwr_fbps_weight_0_r(void)
+{
+ return 0x0010e100;
+}
+static inline u32 chiplet_pwr_fbps_weight_1_r(void)
+{
+ return 0x0010e104;
+}
+static inline u32 chiplet_pwr_fbps_config_1_r(void)
+{
+ return 0x0010e13c;
+}
+static inline u32 chiplet_pwr_fbps_config_1_ba_enable_yes_f(void)
+{
+ return 0x1;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h
new file mode 100644
index 000000000000..e2a4f2f21651
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_o(void) : Returns the offset for element <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+#ifndef _hw_ctxsw_prog_gk20a_h_
+#define _hw_ctxsw_prog_gk20a_h_
+
+static inline u32 ctxsw_prog_fecs_header_v(void)
+{
+ return 0x00000100;
+}
+static inline u32 ctxsw_prog_main_image_num_gpcs_o(void)
+{
+ return 0x00000008;
+}
+static inline u32 ctxsw_prog_main_image_patch_count_o(void)
+{
+ return 0x00000010;
+}
+static inline u32 ctxsw_prog_main_image_patch_adr_lo_o(void)
+{
+ return 0x00000014;
+}
+static inline u32 ctxsw_prog_main_image_patch_adr_hi_o(void)
+{
+ return 0x00000018;
+}
+static inline u32 ctxsw_prog_main_image_zcull_o(void)
+{
+ return 0x0000001c;
+}
+static inline u32 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 ctxsw_prog_main_image_zcull_mode_separate_buffer_v(void)
+{
+ return 0x00000002;
+}
+static inline u32 ctxsw_prog_main_image_zcull_ptr_o(void)
+{
+ return 0x00000020;
+}
+static inline u32 ctxsw_prog_main_image_pm_o(void)
+{
+ return 0x00000028;
+}
+static inline u32 ctxsw_prog_main_image_pm_mode_m(void)
+{
+ return 0x7 << 0;
+}
+static inline u32 ctxsw_prog_main_image_pm_mode_v(u32 r)
+{
+ return (r >> 0) & 0x7;
+}
+static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
+{
+ return 0x0;
+}
+static inline u32 ctxsw_prog_main_image_pm_smpc_mode_m(void)
+{
+ return 0x7 << 3;
+}
+static inline u32 ctxsw_prog_main_image_pm_smpc_mode_v(u32 r)
+{
+ return (r >> 3) & 0x7;
+}
+static inline u32 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(void)
+{
+ return 0x0;
+}
+static inline u32 ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f(void)
+{
+ return 0x8;
+}
+static inline u32 ctxsw_prog_main_image_pm_ptr_o(void)
+{
+ return 0x0000002c;
+}
+static inline u32 ctxsw_prog_main_image_num_save_ops_o(void)
+{
+ return 0x000000f4;
+}
+static inline u32 ctxsw_prog_main_image_num_restore_ops_o(void)
+{
+ return 0x000000f8;
+}
+static inline u32 ctxsw_prog_main_image_magic_value_o(void)
+{
+ return 0x000000fc;
+}
+static inline u32 ctxsw_prog_main_image_magic_value_v_value_v(void)
+{
+ return 0x600dc0de;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_config_o(void)
+{
+ return 0x000000a0;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f(void)
+{
+ return 0x0;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_allow_none_f(void)
+{
+ return 0x1;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f(void)
+{
+ return 0x2;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_addr_lo_o(void)
+{
+ return 0x000000a4;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_addr_hi_o(void)
+{
+ return 0x000000a8;
+}
+static inline u32 ctxsw_prog_main_image_misc_options_o(void)
+{
+ return 0x0000003c;
+}
+static inline u32 ctxsw_prog_main_image_misc_options_verif_features_m(void)
+{
+ return 0x1 << 3;
+}
+static inline u32 ctxsw_prog_main_image_misc_options_verif_features_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 ctxsw_prog_main_image_misc_options_verif_features_enabled_f(void)
+{
+ return 0x8;
+}
+static inline u32 ctxsw_prog_local_priv_register_ctl_o(void)
+{
+ return 0x0000000c;
+}
+static inline u32 ctxsw_prog_local_priv_register_ctl_offset_v(u32 r)
+{
+ return (r >> 0) & 0xffff;
+}
+static inline u32 ctxsw_prog_local_image_ppc_info_o(void)
+{
+ return 0x000000f4;
+}
+static inline u32 ctxsw_prog_local_image_ppc_info_num_ppcs_v(u32 r)
+{
+ return (r >> 0) & 0xffff;
+}
+static inline u32 ctxsw_prog_local_image_ppc_info_ppc_mask_v(u32 r)
+{
+ return (r >> 16) & 0xffff;
+}
+static inline u32 ctxsw_prog_local_image_num_tpcs_o(void)
+{
+ return 0x000000f8;
+}
+static inline u32 ctxsw_prog_local_magic_value_o(void)
+{
+ return 0x000000fc;
+}
+static inline u32 ctxsw_prog_local_magic_value_v_value_v(void)
+{
+ return 0xad0becab;
+}
+static inline u32 ctxsw_prog_main_extended_buffer_ctl_o(void)
+{
+ return 0x000000ec;
+}
+static inline u32 ctxsw_prog_main_extended_buffer_ctl_offset_v(u32 r)
+{
+ return (r >> 0) & 0xffff;
+}
+static inline u32 ctxsw_prog_main_extended_buffer_ctl_size_v(u32 r)
+{
+ return (r >> 16) & 0xff;
+}
+static inline u32 ctxsw_prog_extended_buffer_segments_size_in_bytes_v(void)
+{
+ return 0x00000100;
+}
+static inline u32 ctxsw_prog_extended_marker_size_in_bytes_v(void)
+{
+ return 0x00000004;
+}
+static inline u32 ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(void)
+{
+ return 0x00000005;
+}
+static inline u32 ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(void)
+{
+ return 0x00000004;
+}
+static inline u32 ctxsw_prog_extended_num_smpc_quadrants_v(void)
+{
+ return 0x00000004;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h
new file mode 100644
index 000000000000..b7edc29d8d7e
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h
@@ -0,0 +1,213 @@
+/*
+ * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_o(void) : Returns the offset for element <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+#ifndef _hw_fb_gk20a_h_
+#define _hw_fb_gk20a_h_
+
+static inline u32 fb_mmu_ctrl_r(void)
+{
+ return 0x00100c80;
+}
+static inline u32 fb_mmu_ctrl_vm_pg_size_f(u32 v)
+{
+ return (v & 0x1) << 0;
+}
+static inline u32 fb_mmu_ctrl_vm_pg_size_128kb_f(void)
+{
+ return 0x0;
+}
+static inline u32 fb_mmu_ctrl_pri_fifo_empty_v(u32 r)
+{
+ return (r >> 15) & 0x1;
+}
+static inline u32 fb_mmu_ctrl_pri_fifo_empty_false_f(void)
+{
+ return 0x0;
+}
+static inline u32 fb_mmu_ctrl_pri_fifo_space_v(u32 r)
+{
+ return (r >> 16) & 0xff;
+}
+static inline u32 fb_mmu_invalidate_pdb_r(void)
+{
+ return 0x00100cb8;
+}
+static inline u32 fb_mmu_invalidate_pdb_aperture_vid_mem_f(void)
+{
+ return 0x0;
+}
+static inline u32 fb_mmu_invalidate_pdb_addr_f(u32 v)
+{
+ return (v & 0xfffffff) << 4;
+}
+static inline u32 fb_mmu_invalidate_r(void)
+{
+ return 0x00100cbc;
+}
+static inline u32 fb_mmu_invalidate_all_va_true_f(void)
+{
+ return 0x1;
+}
+static inline u32 fb_mmu_invalidate_all_pdb_true_f(void)
+{
+ return 0x2;
+}
+static inline u32 fb_mmu_invalidate_trigger_s(void)
+{
+ return 1;
+}
+static inline u32 fb_mmu_invalidate_trigger_f(u32 v)
+{
+ return (v & 0x1) << 31;
+}
+static inline u32 fb_mmu_invalidate_trigger_m(void)
+{
+ return 0x1 << 31;
+}
+static inline u32 fb_mmu_invalidate_trigger_v(u32 r)
+{
+ return (r >> 31) & 0x1;
+}
+static inline u32 fb_mmu_invalidate_trigger_true_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 fb_mmu_debug_wr_r(void)
+{
+ return 0x00100cc8;
+}
+static inline u32 fb_mmu_debug_wr_aperture_s(void)
+{
+ return 2;
+}
+static inline u32 fb_mmu_debug_wr_aperture_f(u32 v)
+{
+ return (v & 0x3) << 0;
+}
+static inline u32 fb_mmu_debug_wr_aperture_m(void)
+{
+ return 0x3 << 0;
+}
+static inline u32 fb_mmu_debug_wr_aperture_v(u32 r)
+{
+ return (r >> 0) & 0x3;
+}
+static inline u32 fb_mmu_debug_wr_aperture_vid_mem_f(void)
+{
+ return 0x0;
+}
+static inline u32 fb_mmu_debug_wr_vol_false_f(void)
+{
+ return 0x0;
+}
+static inline u32 fb_mmu_debug_wr_vol_true_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 fb_mmu_debug_wr_vol_true_f(void)
+{
+ return 0x4;
+}
+static inline u32 fb_mmu_debug_wr_addr_v(u32 r)
+{
+ return (r >> 4) & 0xfffffff;
+}
+static inline u32 fb_mmu_debug_wr_addr_alignment_v(void)
+{
+ return 0x0000000c;
+}
+static inline u32 fb_mmu_debug_rd_r(void)
+{
+ return 0x00100ccc;
+}
+static inline u32 fb_mmu_debug_rd_aperture_vid_mem_f(void)
+{
+ return 0x0;
+}
+static inline u32 fb_mmu_debug_rd_vol_false_f(void)
+{
+ return 0x0;
+}
+static inline u32 fb_mmu_debug_rd_addr_v(u32 r)
+{
+ return (r >> 4) & 0xfffffff;
+}
+static inline u32 fb_mmu_debug_rd_addr_alignment_v(void)
+{
+ return 0x0000000c;
+}
+static inline u32 fb_mmu_debug_ctrl_r(void)
+{
+ return 0x00100cc4;
+}
+static inline u32 fb_mmu_debug_ctrl_debug_v(u32 r)
+{
+ return (r >> 16) & 0x1;
+}
+static inline u32 fb_mmu_debug_ctrl_debug_enabled_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 fb_mmu_vpr_info_r(void)
+{
+ return 0x00100cd0;
+}
+static inline u32 fb_mmu_vpr_info_fetch_v(u32 r)
+{
+ return (r >> 2) & 0x1;
+}
+static inline u32 fb_mmu_vpr_info_fetch_false_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 fb_mmu_vpr_info_fetch_true_v(void)
+{
+ return 0x00000001;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h
new file mode 100644
index 000000000000..a39d3c51e1ea
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h
@@ -0,0 +1,565 @@
+/*
+ * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_o(void) : Returns the offset for element <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+#ifndef _hw_fifo_gk20a_h_
+#define _hw_fifo_gk20a_h_
+
+static inline u32 fifo_bar1_base_r(void)
+{
+ return 0x00002254;
+}
+static inline u32 fifo_bar1_base_ptr_f(u32 v)
+{
+ return (v & 0xfffffff) << 0;
+}
+static inline u32 fifo_bar1_base_ptr_align_shift_v(void)
+{
+ return 0x0000000c;
+}
+static inline u32 fifo_bar1_base_valid_false_f(void)
+{
+ return 0x0;
+}
+static inline u32 fifo_bar1_base_valid_true_f(void)
+{
+ return 0x10000000;
+}
+static inline u32 fifo_runlist_base_r(void)
+{
+ return 0x00002270;
+}
+static inline u32 fifo_runlist_base_ptr_f(u32 v)
+{
+ return (v & 0xfffffff) << 0;
+}
+static inline u32 fifo_runlist_base_target_vid_mem_f(void)
+{
+ return 0x0;
+}
+static inline u32 fifo_runlist_r(void)
+{
+ return 0x00002274;
+}
+static inline u32 fifo_runlist_engine_f(u32 v)
+{
+ return (v & 0xf) << 20;
+}
+static inline u32 fifo_eng_runlist_base_r(u32 i)
+{
+ return 0x00002280 + i*8;
+}
+static inline u32 fifo_eng_runlist_base__size_1_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 fifo_eng_runlist_r(u32 i)
+{
+ return 0x00002284 + i*8;
+}
+static inline u32 fifo_eng_runlist__size_1_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 fifo_eng_runlist_length_f(u32 v)
+{
+ return (v & 0xffff) << 0;
+}
+static inline u32 fifo_eng_runlist_pending_true_f(void)
+{
+ return 0x100000;
+}
+static inline u32 fifo_eng_timeslice_r(u32 i)
+{
+ return 0x00002310 + i*4;
+}
+static inline u32 fifo_eng_timeslice_timeout_128_f(void)
+{
+ return 0x80;
+}
+static inline u32 fifo_eng_timeslice_timescale_3_f(void)
+{
+ return 0x3000;
+}
+static inline u32 fifo_eng_timeslice_enable_true_f(void)
+{
+ return 0x10000000;
+}
+static inline u32 fifo_pb_timeslice_r(u32 i)
+{
+ return 0x00002350 + i*4;
+}
+static inline u32 fifo_pb_timeslice_timeout_16_f(void)
+{
+ return 0x10;
+}
+static inline u32 fifo_pb_timeslice_timescale_0_f(void)
+{
+ return 0x0;
+}
+static inline u32 fifo_pb_timeslice_enable_true_f(void)
+{
+ return 0x10000000;
+}
+static inline u32 fifo_pbdma_map_r(u32 i)
+{
+ return 0x00002390 + i*4;
+}
+static inline u32 fifo_intr_0_r(void)
+{
+ return 0x00002100;
+}
+static inline u32 fifo_intr_0_bind_error_pending_f(void)
+{
+ return 0x1;
+}
+static inline u32 fifo_intr_0_bind_error_reset_f(void)
+{
+ return 0x1;
+}
+static inline u32 fifo_intr_0_pio_error_pending_f(void)
+{
+ return 0x10;
+}
+static inline u32 fifo_intr_0_pio_error_reset_f(void)
+{
+ return 0x10;
+}
+static inline u32 fifo_intr_0_sched_error_pending_f(void)
+{
+ return 0x100;
+}
+static inline u32 fifo_intr_0_sched_error_reset_f(void)
+{
+ return 0x100;
+}
+static inline u32 fifo_intr_0_chsw_error_pending_f(void)
+{
+ return 0x10000;
+}
+static inline u32 fifo_intr_0_chsw_error_reset_f(void)
+{
+ return 0x10000;
+}
+static inline u32 fifo_intr_0_fb_flush_timeout_pending_f(void)
+{
+ return 0x800000;
+}
+static inline u32 fifo_intr_0_fb_flush_timeout_reset_f(void)
+{
+ return 0x800000;
+}
+static inline u32 fifo_intr_0_lb_error_pending_f(void)
+{
+ return 0x1000000;
+}
+static inline u32 fifo_intr_0_lb_error_reset_f(void)
+{
+ return 0x1000000;
+}
+static inline u32 fifo_intr_0_dropped_mmu_fault_pending_f(void)
+{
+ return 0x8000000;
+}
+static inline u32 fifo_intr_0_dropped_mmu_fault_reset_f(void)
+{
+ return 0x8000000;
+}
+static inline u32 fifo_intr_0_mmu_fault_pending_f(void)
+{
+ return 0x10000000;
+}
+static inline u32 fifo_intr_0_pbdma_intr_pending_f(void)
+{
+ return 0x20000000;
+}
+static inline u32 fifo_intr_0_runlist_event_pending_f(void)
+{
+ return 0x40000000;
+}
+static inline u32 fifo_intr_0_channel_intr_pending_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 fifo_intr_en_0_r(void)
+{
+ return 0x00002140;
+}
+static inline u32 fifo_intr_en_1_r(void)
+{
+ return 0x00002528;
+}
+static inline u32 fifo_intr_bind_error_r(void)
+{
+ return 0x0000252c;
+}
+static inline u32 fifo_intr_sched_error_r(void)
+{
+ return 0x0000254c;
+}
+static inline u32 fifo_intr_sched_error_code_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+static inline u32 fifo_intr_sched_error_code_ctxsw_timeout_v(void)
+{
+ return 0x0000000a;
+}
+static inline u32 fifo_intr_chsw_error_r(void)
+{
+ return 0x0000256c;
+}
+static inline u32 fifo_intr_mmu_fault_id_r(void)
+{
+ return 0x0000259c;
+}
+static inline u32 fifo_intr_mmu_fault_eng_id_graphics_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 fifo_intr_mmu_fault_eng_id_graphics_f(void)
+{
+ return 0x0;
+}
+static inline u32 fifo_intr_mmu_fault_inst_r(u32 i)
+{
+ return 0x00002800 + i*16;
+}
+static inline u32 fifo_intr_mmu_fault_inst_ptr_v(u32 r)
+{
+ return (r >> 0) & 0xfffffff;
+}
+static inline u32 fifo_intr_mmu_fault_inst_ptr_align_shift_v(void)
+{
+ return 0x0000000c;
+}
+static inline u32 fifo_intr_mmu_fault_lo_r(u32 i)
+{
+ return 0x00002804 + i*16;
+}
+static inline u32 fifo_intr_mmu_fault_hi_r(u32 i)
+{
+ return 0x00002808 + i*16;
+}
+static inline u32 fifo_intr_mmu_fault_info_r(u32 i)
+{
+ return 0x0000280c + i*16;
+}
+static inline u32 fifo_intr_mmu_fault_info_type_v(u32 r)
+{
+ return (r >> 0) & 0xf;
+}
+static inline u32 fifo_intr_mmu_fault_info_engine_subid_v(u32 r)
+{
+ return (r >> 6) & 0x1;
+}
+static inline u32 fifo_intr_mmu_fault_info_engine_subid_gpc_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 fifo_intr_mmu_fault_info_engine_subid_hub_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 fifo_intr_mmu_fault_info_client_v(u32 r)
+{
+ return (r >> 8) & 0x1f;
+}
+static inline u32 fifo_intr_pbdma_id_r(void)
+{
+ return 0x000025a0;
+}
+static inline u32 fifo_intr_pbdma_id_status_f(u32 v, u32 i)
+{
+ return (v & 0x1) << (0 + i*1);
+}
+static inline u32 fifo_intr_pbdma_id_status__size_1_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 fifo_intr_runlist_r(void)
+{
+ return 0x00002a00;
+}
+static inline u32 fifo_fb_timeout_r(void)
+{
+ return 0x00002a04;
+}
+static inline u32 fifo_fb_timeout_period_m(void)
+{
+ return 0x3fffffff << 0;
+}
+static inline u32 fifo_fb_timeout_period_max_f(void)
+{
+ return 0x3fffffff;
+}
+static inline u32 fifo_pb_timeout_r(void)
+{
+ return 0x00002a08;
+}
+static inline u32 fifo_pb_timeout_detection_enabled_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 fifo_eng_timeout_r(void)
+{
+ return 0x00002a0c;
+}
+static inline u32 fifo_eng_timeout_period_m(void)
+{
+ return 0x7fffffff << 0;
+}
+static inline u32 fifo_eng_timeout_period_max_f(void)
+{
+ return 0x7fffffff;
+}
+static inline u32 fifo_eng_timeout_detection_m(void)
+{
+ return 0x1 << 31;
+}
+static inline u32 fifo_eng_timeout_detection_enabled_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 fifo_eng_timeout_detection_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 fifo_error_sched_disable_r(void)
+{
+ return 0x0000262c;
+}
+static inline u32 fifo_sched_disable_r(void)
+{
+ return 0x00002630;
+}
+static inline u32 fifo_sched_disable_runlist_f(u32 v, u32 i)
+{
+ return (v & 0x1) << (0 + i*1);
+}
+static inline u32 fifo_sched_disable_runlist_m(u32 i)
+{
+ return 0x1 << (0 + i*1);
+}
+static inline u32 fifo_sched_disable_true_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 fifo_preempt_r(void)
+{
+ return 0x00002634;
+}
+static inline u32 fifo_preempt_pending_true_f(void)
+{
+ return 0x100000;
+}
+static inline u32 fifo_preempt_type_channel_f(void)
+{
+ return 0x0;
+}
+static inline u32 fifo_preempt_chid_f(u32 v)
+{
+ return (v & 0xfff) << 0;
+}
+static inline u32 fifo_trigger_mmu_fault_r(u32 i)
+{
+ return 0x00002a30 + i*4;
+}
+static inline u32 fifo_trigger_mmu_fault_id_f(u32 v)
+{
+ return (v & 0x1f) << 0;
+}
+static inline u32 fifo_trigger_mmu_fault_enable_f(u32 v)
+{
+ return (v & 0x1) << 8;
+}
+static inline u32 fifo_engine_status_r(u32 i)
+{
+ return 0x00002640 + i*8;
+}
+static inline u32 fifo_engine_status__size_1_v(void)
+{
+ return 0x00000002;
+}
+static inline u32 fifo_engine_status_id_v(u32 r)
+{
+ return (r >> 0) & 0xfff;
+}
+static inline u32 fifo_engine_status_id_type_v(u32 r)
+{
+ return (r >> 12) & 0x1;
+}
+static inline u32 fifo_engine_status_id_type_chid_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 fifo_engine_status_ctx_status_v(u32 r)
+{
+ return (r >> 13) & 0x7;
+}
+static inline u32 fifo_engine_status_ctx_status_valid_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 fifo_engine_status_ctx_status_ctxsw_load_v(void)
+{
+ return 0x00000005;
+}
+static inline u32 fifo_engine_status_ctx_status_ctxsw_save_v(void)
+{
+ return 0x00000006;
+}
+static inline u32 fifo_engine_status_ctx_status_ctxsw_switch_v(void)
+{
+ return 0x00000007;
+}
+static inline u32 fifo_engine_status_next_id_v(u32 r)
+{
+ return (r >> 16) & 0xfff;
+}
+static inline u32 fifo_engine_status_next_id_type_v(u32 r)
+{
+ return (r >> 28) & 0x1;
+}
+static inline u32 fifo_engine_status_next_id_type_chid_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 fifo_engine_status_faulted_v(u32 r)
+{
+ return (r >> 30) & 0x1;
+}
+static inline u32 fifo_engine_status_faulted_true_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 fifo_engine_status_engine_v(u32 r)
+{
+ return (r >> 31) & 0x1;
+}
+static inline u32 fifo_engine_status_engine_idle_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 fifo_engine_status_engine_busy_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 fifo_engine_status_ctxsw_v(u32 r)
+{
+ return (r >> 15) & 0x1;
+}
+static inline u32 fifo_engine_status_ctxsw_in_progress_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 fifo_engine_status_ctxsw_in_progress_f(void)
+{
+ return 0x8000;
+}
+static inline u32 fifo_pbdma_status_r(u32 i)
+{
+ return 0x00003080 + i*4;
+}
+static inline u32 fifo_pbdma_status__size_1_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 fifo_pbdma_status_id_v(u32 r)
+{
+ return (r >> 0) & 0xfff;
+}
+static inline u32 fifo_pbdma_status_id_type_v(u32 r)
+{
+ return (r >> 12) & 0x1;
+}
+static inline u32 fifo_pbdma_status_id_type_chid_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 fifo_pbdma_status_chan_status_v(u32 r)
+{
+ return (r >> 13) & 0x7;
+}
+static inline u32 fifo_pbdma_status_chan_status_valid_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 fifo_pbdma_status_chan_status_chsw_load_v(void)
+{
+ return 0x00000005;
+}
+static inline u32 fifo_pbdma_status_chan_status_chsw_save_v(void)
+{
+ return 0x00000006;
+}
+static inline u32 fifo_pbdma_status_chan_status_chsw_switch_v(void)
+{
+ return 0x00000007;
+}
+static inline u32 fifo_pbdma_status_next_id_v(u32 r)
+{
+ return (r >> 16) & 0xfff;
+}
+static inline u32 fifo_pbdma_status_next_id_type_v(u32 r)
+{
+ return (r >> 28) & 0x1;
+}
+static inline u32 fifo_pbdma_status_next_id_type_chid_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 fifo_pbdma_status_chsw_v(u32 r)
+{
+ return (r >> 15) & 0x1;
+}
+static inline u32 fifo_pbdma_status_chsw_in_progress_v(void)
+{
+ return 0x00000001;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_flush_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_flush_gk20a.h
new file mode 100644
index 000000000000..0aeb11f92bf2
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_flush_gk20a.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_o(void) : Returns the offset for element <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+#ifndef _hw_flush_gk20a_h_
+#define _hw_flush_gk20a_h_
+
+static inline u32 flush_l2_system_invalidate_r(void)
+{
+ return 0x00070004;
+}
+static inline u32 flush_l2_system_invalidate_pending_v(u32 r)
+{
+ return (r >> 0) & 0x1;
+}
+static inline u32 flush_l2_system_invalidate_pending_busy_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 flush_l2_system_invalidate_pending_busy_f(void)
+{
+ return 0x1;
+}
+static inline u32 flush_l2_system_invalidate_outstanding_v(u32 r)
+{
+ return (r >> 1) & 0x1;
+}
+static inline u32 flush_l2_system_invalidate_outstanding_true_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 flush_l2_flush_dirty_r(void)
+{
+ return 0x00070010;
+}
+static inline u32 flush_l2_flush_dirty_pending_v(u32 r)
+{
+ return (r >> 0) & 0x1;
+}
+static inline u32 flush_l2_flush_dirty_pending_empty_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 flush_l2_flush_dirty_pending_empty_f(void)
+{
+ return 0x0;
+}
+static inline u32 flush_l2_flush_dirty_pending_busy_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 flush_l2_flush_dirty_pending_busy_f(void)
+{
+ return 0x1;
+}
+static inline u32 flush_l2_flush_dirty_outstanding_v(u32 r)
+{
+ return (r >> 1) & 0x1;
+}
+static inline u32 flush_l2_flush_dirty_outstanding_false_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 flush_l2_flush_dirty_outstanding_false_f(void)
+{
+ return 0x0;
+}
+static inline u32 flush_l2_flush_dirty_outstanding_true_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 flush_fb_flush_r(void)
+{
+ return 0x00070000;
+}
+static inline u32 flush_fb_flush_pending_v(u32 r)
+{
+ return (r >> 0) & 0x1;
+}
+static inline u32 flush_fb_flush_pending_busy_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 flush_fb_flush_pending_busy_f(void)
+{
+ return 0x1;
+}
+static inline u32 flush_fb_flush_outstanding_v(u32 r)
+{
+ return (r >> 1) & 0x1;
+}
+static inline u32 flush_fb_flush_outstanding_true_v(void)
+{
+ return 0x00000001;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h
new file mode 100644
index 000000000000..e0118946aec6
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_gmmu_gk20a.h
@@ -0,0 +1,1141 @@
+/*
+ * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_o(void) : Returns the offset for element <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+#ifndef _hw_gmmu_gk20a_h_
+#define _hw_gmmu_gk20a_h_
+
+static inline u32 gmmu_pde_aperture_big_w(void)
+{
+ return 0;
+}
+static inline u32 gmmu_pde_aperture_big_invalid_f(void)
+{
+ return 0x0;
+}
+static inline u32 gmmu_pde_aperture_big_video_memory_f(void)
+{
+ return 0x1;
+}
+static inline u32 gmmu_pde_size_w(void)
+{
+ return 0;
+}
+static inline u32 gmmu_pde_size_full_f(void)
+{
+ return 0x0;
+}
+static inline u32 gmmu_pde_address_big_sys_f(u32 v)
+{
+ return (v & 0xfffffff) << 4;
+}
+static inline u32 gmmu_pde_address_big_sys_w(void)
+{
+ return 0;
+}
+static inline u32 gmmu_pde_aperture_small_w(void)
+{
+ return 1;
+}
+static inline u32 gmmu_pde_aperture_small_invalid_f(void)
+{
+ return 0x0;
+}
+static inline u32 gmmu_pde_aperture_small_video_memory_f(void)
+{
+ return 0x1;
+}
+static inline u32 gmmu_pde_vol_small_w(void)
+{
+ return 1;
+}
+static inline u32 gmmu_pde_vol_small_true_f(void)
+{
+ return 0x4;
+}
+static inline u32 gmmu_pde_vol_small_false_f(void)
+{
+ return 0x0;
+}
+static inline u32 gmmu_pde_vol_big_w(void)
+{
+ return 1;
+}
+static inline u32 gmmu_pde_vol_big_true_f(void)
+{
+ return 0x8;
+}
+static inline u32 gmmu_pde_vol_big_false_f(void)
+{
+ return 0x0;
+}
+static inline u32 gmmu_pde_address_small_sys_f(u32 v)
+{
+ return (v & 0xfffffff) << 4;
+}
+static inline u32 gmmu_pde_address_small_sys_w(void)
+{
+ return 1;
+}
+static inline u32 gmmu_pde_address_shift_v(void)
+{
+ return 0x0000000c;
+}
+static inline u32 gmmu_pde__size_v(void)
+{
+ return 0x00000008;
+}
+static inline u32 gmmu_pte__size_v(void)
+{
+ return 0x00000008;
+}
+static inline u32 gmmu_pte_valid_w(void)
+{
+ return 0;
+}
+static inline u32 gmmu_pte_valid_true_f(void)
+{
+ return 0x1;
+}
+static inline u32 gmmu_pte_address_sys_f(u32 v)
+{
+ return (v & 0xfffffff) << 4;
+}
+static inline u32 gmmu_pte_address_sys_w(void)
+{
+ return 0;
+}
+static inline u32 gmmu_pte_vol_w(void)
+{
+ return 1;
+}
+static inline u32 gmmu_pte_vol_true_f(void)
+{
+ return 0x1;
+}
+static inline u32 gmmu_pte_vol_false_f(void)
+{
+ return 0x0;
+}
+static inline u32 gmmu_pte_aperture_w(void)
+{
+ return 1;
+}
+static inline u32 gmmu_pte_aperture_video_memory_f(void)
+{
+ return 0x0;
+}
+static inline u32 gmmu_pte_read_only_w(void)
+{
+ return 0;
+}
+static inline u32 gmmu_pte_read_only_true_f(void)
+{
+ return 0x4;
+}
+static inline u32 gmmu_pte_write_disable_w(void)
+{
+ return 1;
+}
+static inline u32 gmmu_pte_write_disable_true_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 gmmu_pte_read_disable_w(void)
+{
+ return 1;
+}
+static inline u32 gmmu_pte_read_disable_true_f(void)
+{
+ return 0x40000000;
+}
+static inline u32 gmmu_pte_comptagline_f(u32 v)
+{
+ return (v & 0x1ffff) << 12;
+}
+static inline u32 gmmu_pte_comptagline_w(void)
+{
+ return 1;
+}
+static inline u32 gmmu_pte_address_shift_v(void)
+{
+ return 0x0000000c;
+}
+static inline u32 gmmu_pte_kind_f(u32 v)
+{
+ return (v & 0xff) << 4;
+}
+static inline u32 gmmu_pte_kind_w(void)
+{
+ return 1;
+}
+static inline u32 gmmu_pte_kind_invalid_v(void)
+{
+ return 0x000000ff;
+}
+static inline u32 gmmu_pte_kind_pitch_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 gmmu_pte_kind_z16_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 gmmu_pte_kind_z16_2c_v(void)
+{
+ return 0x00000002;
+}
+static inline u32 gmmu_pte_kind_z16_ms2_2c_v(void)
+{
+ return 0x00000003;
+}
+static inline u32 gmmu_pte_kind_z16_ms4_2c_v(void)
+{
+ return 0x00000004;
+}
+static inline u32 gmmu_pte_kind_z16_ms8_2c_v(void)
+{
+ return 0x00000005;
+}
+static inline u32 gmmu_pte_kind_z16_ms16_2c_v(void)
+{
+ return 0x00000006;
+}
+static inline u32 gmmu_pte_kind_z16_2z_v(void)
+{
+ return 0x00000007;
+}
+static inline u32 gmmu_pte_kind_z16_ms2_2z_v(void)
+{
+ return 0x00000008;
+}
+static inline u32 gmmu_pte_kind_z16_ms4_2z_v(void)
+{
+ return 0x00000009;
+}
+static inline u32 gmmu_pte_kind_z16_ms8_2z_v(void)
+{
+ return 0x0000000a;
+}
+static inline u32 gmmu_pte_kind_z16_ms16_2z_v(void)
+{
+ return 0x0000000b;
+}
+static inline u32 gmmu_pte_kind_z16_4cz_v(void)
+{
+ return 0x0000000c;
+}
+static inline u32 gmmu_pte_kind_z16_ms2_4cz_v(void)
+{
+ return 0x0000000d;
+}
+static inline u32 gmmu_pte_kind_z16_ms4_4cz_v(void)
+{
+ return 0x0000000e;
+}
+static inline u32 gmmu_pte_kind_z16_ms8_4cz_v(void)
+{
+ return 0x0000000f;
+}
+static inline u32 gmmu_pte_kind_z16_ms16_4cz_v(void)
+{
+ return 0x00000010;
+}
+static inline u32 gmmu_pte_kind_s8z24_v(void)
+{
+ return 0x00000011;
+}
+static inline u32 gmmu_pte_kind_s8z24_1z_v(void)
+{
+ return 0x00000012;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms2_1z_v(void)
+{
+ return 0x00000013;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms4_1z_v(void)
+{
+ return 0x00000014;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms8_1z_v(void)
+{
+ return 0x00000015;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms16_1z_v(void)
+{
+ return 0x00000016;
+}
+static inline u32 gmmu_pte_kind_s8z24_2cz_v(void)
+{
+ return 0x00000017;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms2_2cz_v(void)
+{
+ return 0x00000018;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms4_2cz_v(void)
+{
+ return 0x00000019;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms8_2cz_v(void)
+{
+ return 0x0000001a;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms16_2cz_v(void)
+{
+ return 0x0000001b;
+}
+static inline u32 gmmu_pte_kind_s8z24_2cs_v(void)
+{
+ return 0x0000001c;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms2_2cs_v(void)
+{
+ return 0x0000001d;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms4_2cs_v(void)
+{
+ return 0x0000001e;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms8_2cs_v(void)
+{
+ return 0x0000001f;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms16_2cs_v(void)
+{
+ return 0x00000020;
+}
+static inline u32 gmmu_pte_kind_s8z24_4cszv_v(void)
+{
+ return 0x00000021;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms2_4cszv_v(void)
+{
+ return 0x00000022;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms4_4cszv_v(void)
+{
+ return 0x00000023;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms8_4cszv_v(void)
+{
+ return 0x00000024;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms16_4cszv_v(void)
+{
+ return 0x00000025;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_v(void)
+{
+ return 0x00000026;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_v(void)
+{
+ return 0x00000027;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_v(void)
+{
+ return 0x00000028;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_v(void)
+{
+ return 0x00000029;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_1zv_v(void)
+{
+ return 0x0000002e;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_1zv_v(void)
+{
+ return 0x0000002f;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_1zv_v(void)
+{
+ return 0x00000030;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_1zv_v(void)
+{
+ return 0x00000031;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_2cs_v(void)
+{
+ return 0x00000032;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_2cs_v(void)
+{
+ return 0x00000033;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_2cs_v(void)
+{
+ return 0x00000034;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_2cs_v(void)
+{
+ return 0x00000035;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_2czv_v(void)
+{
+ return 0x0000003a;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_2czv_v(void)
+{
+ return 0x0000003b;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_2czv_v(void)
+{
+ return 0x0000003c;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_2czv_v(void)
+{
+ return 0x0000003d;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_2zv_v(void)
+{
+ return 0x0000003e;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_2zv_v(void)
+{
+ return 0x0000003f;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_2zv_v(void)
+{
+ return 0x00000040;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_2zv_v(void)
+{
+ return 0x00000041;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_4cszv_v(void)
+{
+ return 0x00000042;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_4cszv_v(void)
+{
+ return 0x00000043;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_4cszv_v(void)
+{
+ return 0x00000044;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_4cszv_v(void)
+{
+ return 0x00000045;
+}
+static inline u32 gmmu_pte_kind_z24s8_v(void)
+{
+ return 0x00000046;
+}
+static inline u32 gmmu_pte_kind_z24s8_1z_v(void)
+{
+ return 0x00000047;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms2_1z_v(void)
+{
+ return 0x00000048;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms4_1z_v(void)
+{
+ return 0x00000049;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms8_1z_v(void)
+{
+ return 0x0000004a;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms16_1z_v(void)
+{
+ return 0x0000004b;
+}
+static inline u32 gmmu_pte_kind_z24s8_2cs_v(void)
+{
+ return 0x0000004c;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms2_2cs_v(void)
+{
+ return 0x0000004d;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms4_2cs_v(void)
+{
+ return 0x0000004e;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms8_2cs_v(void)
+{
+ return 0x0000004f;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms16_2cs_v(void)
+{
+ return 0x00000050;
+}
+static inline u32 gmmu_pte_kind_z24s8_2cz_v(void)
+{
+ return 0x00000051;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms2_2cz_v(void)
+{
+ return 0x00000052;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms4_2cz_v(void)
+{
+ return 0x00000053;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms8_2cz_v(void)
+{
+ return 0x00000054;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms16_2cz_v(void)
+{
+ return 0x00000055;
+}
+static inline u32 gmmu_pte_kind_z24s8_4cszv_v(void)
+{
+ return 0x00000056;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms2_4cszv_v(void)
+{
+ return 0x00000057;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms4_4cszv_v(void)
+{
+ return 0x00000058;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms8_4cszv_v(void)
+{
+ return 0x00000059;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms16_4cszv_v(void)
+{
+ return 0x0000005a;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_v(void)
+{
+ return 0x0000005b;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_v(void)
+{
+ return 0x0000005c;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_v(void)
+{
+ return 0x0000005d;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_v(void)
+{
+ return 0x0000005e;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_1zv_v(void)
+{
+ return 0x00000063;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_1zv_v(void)
+{
+ return 0x00000064;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_1zv_v(void)
+{
+ return 0x00000065;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_1zv_v(void)
+{
+ return 0x00000066;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_2cs_v(void)
+{
+ return 0x00000067;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_2cs_v(void)
+{
+ return 0x00000068;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_2cs_v(void)
+{
+ return 0x00000069;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_2cs_v(void)
+{
+ return 0x0000006a;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_2czv_v(void)
+{
+ return 0x0000006f;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_2czv_v(void)
+{
+ return 0x00000070;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_2czv_v(void)
+{
+ return 0x00000071;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_2czv_v(void)
+{
+ return 0x00000072;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_2zv_v(void)
+{
+ return 0x00000073;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_2zv_v(void)
+{
+ return 0x00000074;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_2zv_v(void)
+{
+ return 0x00000075;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_2zv_v(void)
+{
+ return 0x00000076;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_4cszv_v(void)
+{
+ return 0x00000077;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_4cszv_v(void)
+{
+ return 0x00000078;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_4cszv_v(void)
+{
+ return 0x00000079;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_4cszv_v(void)
+{
+ return 0x0000007a;
+}
+static inline u32 gmmu_pte_kind_zf32_v(void)
+{
+ return 0x0000007b;
+}
+static inline u32 gmmu_pte_kind_zf32_1z_v(void)
+{
+ return 0x0000007c;
+}
+static inline u32 gmmu_pte_kind_zf32_ms2_1z_v(void)
+{
+ return 0x0000007d;
+}
+static inline u32 gmmu_pte_kind_zf32_ms4_1z_v(void)
+{
+ return 0x0000007e;
+}
+static inline u32 gmmu_pte_kind_zf32_ms8_1z_v(void)
+{
+ return 0x0000007f;
+}
+static inline u32 gmmu_pte_kind_zf32_ms16_1z_v(void)
+{
+ return 0x00000080;
+}
+static inline u32 gmmu_pte_kind_zf32_2cs_v(void)
+{
+ return 0x00000081;
+}
+static inline u32 gmmu_pte_kind_zf32_ms2_2cs_v(void)
+{
+ return 0x00000082;
+}
+static inline u32 gmmu_pte_kind_zf32_ms4_2cs_v(void)
+{
+ return 0x00000083;
+}
+static inline u32 gmmu_pte_kind_zf32_ms8_2cs_v(void)
+{
+ return 0x00000084;
+}
+static inline u32 gmmu_pte_kind_zf32_ms16_2cs_v(void)
+{
+ return 0x00000085;
+}
+static inline u32 gmmu_pte_kind_zf32_2cz_v(void)
+{
+ return 0x00000086;
+}
+static inline u32 gmmu_pte_kind_zf32_ms2_2cz_v(void)
+{
+ return 0x00000087;
+}
+static inline u32 gmmu_pte_kind_zf32_ms4_2cz_v(void)
+{
+ return 0x00000088;
+}
+static inline u32 gmmu_pte_kind_zf32_ms8_2cz_v(void)
+{
+ return 0x00000089;
+}
+static inline u32 gmmu_pte_kind_zf32_ms16_2cz_v(void)
+{
+ return 0x0000008a;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_v(void)
+{
+ return 0x0000008b;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_v(void)
+{
+ return 0x0000008c;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_v(void)
+{
+ return 0x0000008d;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_v(void)
+{
+ return 0x0000008e;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1cs_v(void)
+{
+ return 0x0000008f;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_1cs_v(void)
+{
+ return 0x00000090;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_1cs_v(void)
+{
+ return 0x00000091;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1cs_v(void)
+{
+ return 0x00000092;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1zv_v(void)
+{
+ return 0x00000097;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_1zv_v(void)
+{
+ return 0x00000098;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_1zv_v(void)
+{
+ return 0x00000099;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1zv_v(void)
+{
+ return 0x0000009a;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1czv_v(void)
+{
+ return 0x0000009b;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_1czv_v(void)
+{
+ return 0x0000009c;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_1czv_v(void)
+{
+ return 0x0000009d;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1czv_v(void)
+{
+ return 0x0000009e;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cs_v(void)
+{
+ return 0x0000009f;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_2cs_v(void)
+{
+ return 0x000000a0;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_2cs_v(void)
+{
+ return 0x000000a1;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cs_v(void)
+{
+ return 0x000000a2;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cszv_v(void)
+{
+ return 0x000000a3;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_2cszv_v(void)
+{
+ return 0x000000a4;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_2cszv_v(void)
+{
+ return 0x000000a5;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cszv_v(void)
+{
+ return 0x000000a6;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_v(void)
+{
+ return 0x000000a7;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_v(void)
+{
+ return 0x000000a8;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_v(void)
+{
+ return 0x000000a9;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_v(void)
+{
+ return 0x000000aa;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1cs_v(void)
+{
+ return 0x000000ab;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_1cs_v(void)
+{
+ return 0x000000ac;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_1cs_v(void)
+{
+ return 0x000000ad;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1cs_v(void)
+{
+ return 0x000000ae;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1zv_v(void)
+{
+ return 0x000000b3;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_1zv_v(void)
+{
+ return 0x000000b4;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_1zv_v(void)
+{
+ return 0x000000b5;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1zv_v(void)
+{
+ return 0x000000b6;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1czv_v(void)
+{
+ return 0x000000b7;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_1czv_v(void)
+{
+ return 0x000000b8;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_1czv_v(void)
+{
+ return 0x000000b9;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1czv_v(void)
+{
+ return 0x000000ba;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cs_v(void)
+{
+ return 0x000000bb;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_2cs_v(void)
+{
+ return 0x000000bc;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_2cs_v(void)
+{
+ return 0x000000bd;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cs_v(void)
+{
+ return 0x000000be;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cszv_v(void)
+{
+ return 0x000000bf;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_2cszv_v(void)
+{
+ return 0x000000c0;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_2cszv_v(void)
+{
+ return 0x000000c1;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cszv_v(void)
+{
+ return 0x000000c2;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_v(void)
+{
+ return 0x000000c3;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_1cs_v(void)
+{
+ return 0x000000c4;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms2_1cs_v(void)
+{
+ return 0x000000c5;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms4_1cs_v(void)
+{
+ return 0x000000c6;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms8_1cs_v(void)
+{
+ return 0x000000c7;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms16_1cs_v(void)
+{
+ return 0x000000c8;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_2cszv_v(void)
+{
+ return 0x000000ce;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms2_2cszv_v(void)
+{
+ return 0x000000cf;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms4_2cszv_v(void)
+{
+ return 0x000000d0;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms8_2cszv_v(void)
+{
+ return 0x000000d1;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms16_2cszv_v(void)
+{
+ return 0x000000d2;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_2cs_v(void)
+{
+ return 0x000000d3;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms2_2cs_v(void)
+{
+ return 0x000000d4;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms4_2cs_v(void)
+{
+ return 0x000000d5;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms8_2cs_v(void)
+{
+ return 0x000000d6;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms16_2cs_v(void)
+{
+ return 0x000000d7;
+}
+static inline u32 gmmu_pte_kind_generic_16bx2_v(void)
+{
+ return 0x000000fe;
+}
+static inline u32 gmmu_pte_kind_c32_2c_v(void)
+{
+ return 0x000000d8;
+}
+static inline u32 gmmu_pte_kind_c32_2cbr_v(void)
+{
+ return 0x000000d9;
+}
+static inline u32 gmmu_pte_kind_c32_2cba_v(void)
+{
+ return 0x000000da;
+}
+static inline u32 gmmu_pte_kind_c32_2cra_v(void)
+{
+ return 0x000000db;
+}
+static inline u32 gmmu_pte_kind_c32_2bra_v(void)
+{
+ return 0x000000dc;
+}
+static inline u32 gmmu_pte_kind_c32_ms2_2c_v(void)
+{
+ return 0x000000dd;
+}
+static inline u32 gmmu_pte_kind_c32_ms2_2cbr_v(void)
+{
+ return 0x000000de;
+}
+static inline u32 gmmu_pte_kind_c32_ms2_2cra_v(void)
+{
+ return 0x000000cc;
+}
+static inline u32 gmmu_pte_kind_c32_ms4_2c_v(void)
+{
+ return 0x000000df;
+}
+static inline u32 gmmu_pte_kind_c32_ms4_2cbr_v(void)
+{
+ return 0x000000e0;
+}
+static inline u32 gmmu_pte_kind_c32_ms4_2cba_v(void)
+{
+ return 0x000000e1;
+}
+static inline u32 gmmu_pte_kind_c32_ms4_2cra_v(void)
+{
+ return 0x000000e2;
+}
+static inline u32 gmmu_pte_kind_c32_ms4_2bra_v(void)
+{
+ return 0x000000e3;
+}
+static inline u32 gmmu_pte_kind_c32_ms8_ms16_2c_v(void)
+{
+ return 0x000000e4;
+}
+static inline u32 gmmu_pte_kind_c32_ms8_ms16_2cra_v(void)
+{
+ return 0x000000e5;
+}
+static inline u32 gmmu_pte_kind_c64_2c_v(void)
+{
+ return 0x000000e6;
+}
+static inline u32 gmmu_pte_kind_c64_2cbr_v(void)
+{
+ return 0x000000e7;
+}
+static inline u32 gmmu_pte_kind_c64_2cba_v(void)
+{
+ return 0x000000e8;
+}
+static inline u32 gmmu_pte_kind_c64_2cra_v(void)
+{
+ return 0x000000e9;
+}
+static inline u32 gmmu_pte_kind_c64_2bra_v(void)
+{
+ return 0x000000ea;
+}
+static inline u32 gmmu_pte_kind_c64_ms2_2c_v(void)
+{
+ return 0x000000eb;
+}
+static inline u32 gmmu_pte_kind_c64_ms2_2cbr_v(void)
+{
+ return 0x000000ec;
+}
+static inline u32 gmmu_pte_kind_c64_ms2_2cra_v(void)
+{
+ return 0x000000cd;
+}
+static inline u32 gmmu_pte_kind_c64_ms4_2c_v(void)
+{
+ return 0x000000ed;
+}
+static inline u32 gmmu_pte_kind_c64_ms4_2cbr_v(void)
+{
+ return 0x000000ee;
+}
+static inline u32 gmmu_pte_kind_c64_ms4_2cba_v(void)
+{
+ return 0x000000ef;
+}
+static inline u32 gmmu_pte_kind_c64_ms4_2cra_v(void)
+{
+ return 0x000000f0;
+}
+static inline u32 gmmu_pte_kind_c64_ms4_2bra_v(void)
+{
+ return 0x000000f1;
+}
+static inline u32 gmmu_pte_kind_c64_ms8_ms16_2c_v(void)
+{
+ return 0x000000f2;
+}
+static inline u32 gmmu_pte_kind_c64_ms8_ms16_2cra_v(void)
+{
+ return 0x000000f3;
+}
+static inline u32 gmmu_pte_kind_c128_2c_v(void)
+{
+ return 0x000000f4;
+}
+static inline u32 gmmu_pte_kind_c128_2cr_v(void)
+{
+ return 0x000000f5;
+}
+static inline u32 gmmu_pte_kind_c128_ms2_2c_v(void)
+{
+ return 0x000000f6;
+}
+static inline u32 gmmu_pte_kind_c128_ms2_2cr_v(void)
+{
+ return 0x000000f7;
+}
+static inline u32 gmmu_pte_kind_c128_ms4_2c_v(void)
+{
+ return 0x000000f8;
+}
+static inline u32 gmmu_pte_kind_c128_ms4_2cr_v(void)
+{
+ return 0x000000f9;
+}
+static inline u32 gmmu_pte_kind_c128_ms8_ms16_2c_v(void)
+{
+ return 0x000000fa;
+}
+static inline u32 gmmu_pte_kind_c128_ms8_ms16_2cr_v(void)
+{
+ return 0x000000fb;
+}
+static inline u32 gmmu_pte_kind_x8c24_v(void)
+{
+ return 0x000000fc;
+}
+static inline u32 gmmu_pte_kind_pitch_no_swizzle_v(void)
+{
+ return 0x000000fd;
+}
+static inline u32 gmmu_pte_kind_smsked_message_v(void)
+{
+ return 0x000000ca;
+}
+static inline u32 gmmu_pte_kind_smhost_message_v(void)
+{
+ return 0x000000cb;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
new file mode 100644
index 000000000000..ece7602d43d1
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
@@ -0,0 +1,3173 @@
+/*
+ * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_o(void) : Returns the offset for element <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+#ifndef _hw_gr_gk20a_h_
+#define _hw_gr_gk20a_h_
+
+static inline u32 gr_intr_r(void)
+{
+ return 0x00400100;
+}
+static inline u32 gr_intr_notify_pending_f(void)
+{
+ return 0x1;
+}
+static inline u32 gr_intr_notify_reset_f(void)
+{
+ return 0x1;
+}
+static inline u32 gr_intr_semaphore_pending_f(void)
+{
+ return 0x2;
+}
+static inline u32 gr_intr_semaphore_reset_f(void)
+{
+ return 0x2;
+}
+static inline u32 gr_intr_semaphore_timeout_not_pending_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_intr_semaphore_timeout_pending_f(void)
+{
+ return 0x4;
+}
+static inline u32 gr_intr_semaphore_timeout_reset_f(void)
+{
+ return 0x4;
+}
+static inline u32 gr_intr_illegal_method_pending_f(void)
+{
+ return 0x10;
+}
+static inline u32 gr_intr_illegal_method_reset_f(void)
+{
+ return 0x10;
+}
+static inline u32 gr_intr_illegal_notify_pending_f(void)
+{
+ return 0x40;
+}
+static inline u32 gr_intr_illegal_notify_reset_f(void)
+{
+ return 0x40;
+}
+static inline u32 gr_intr_illegal_class_pending_f(void)
+{
+ return 0x20;
+}
+static inline u32 gr_intr_illegal_class_reset_f(void)
+{
+ return 0x20;
+}
+static inline u32 gr_intr_class_error_pending_f(void)
+{
+ return 0x100000;
+}
+static inline u32 gr_intr_class_error_reset_f(void)
+{
+ return 0x100000;
+}
+static inline u32 gr_intr_exception_pending_f(void)
+{
+ return 0x200000;
+}
+static inline u32 gr_intr_exception_reset_f(void)
+{
+ return 0x200000;
+}
+static inline u32 gr_intr_firmware_method_pending_f(void)
+{
+ return 0x100;
+}
+static inline u32 gr_intr_firmware_method_reset_f(void)
+{
+ return 0x100;
+}
+static inline u32 gr_intr_nonstall_r(void)
+{
+ return 0x00400120;
+}
+static inline u32 gr_intr_nonstall_trap_pending_f(void)
+{
+ return 0x2;
+}
+static inline u32 gr_intr_en_r(void)
+{
+ return 0x0040013c;
+}
+static inline u32 gr_exception_r(void)
+{
+ return 0x00400108;
+}
+static inline u32 gr_exception_fe_m(void)
+{
+ return 0x1 << 0;
+}
+static inline u32 gr_exception_gpc_m(void)
+{
+ return 0x1 << 24;
+}
+static inline u32 gr_exception1_r(void)
+{
+ return 0x00400118;
+}
+static inline u32 gr_exception1_gpc_0_pending_f(void)
+{
+ return 0x1;
+}
+static inline u32 gr_exception2_r(void)
+{
+ return 0x0040011c;
+}
+static inline u32 gr_exception_en_r(void)
+{
+ return 0x00400138;
+}
+static inline u32 gr_exception_en_fe_m(void)
+{
+ return 0x1 << 0;
+}
+static inline u32 gr_exception1_en_r(void)
+{
+ return 0x00400130;
+}
+static inline u32 gr_exception2_en_r(void)
+{
+ return 0x00400134;
+}
+static inline u32 gr_gpfifo_ctl_r(void)
+{
+ return 0x00400500;
+}
+static inline u32 gr_gpfifo_ctl_access_f(u32 v)
+{
+ return (v & 0x1) << 0;
+}
+static inline u32 gr_gpfifo_ctl_access_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_gpfifo_ctl_access_enabled_f(void)
+{
+ return 0x1;
+}
+static inline u32 gr_gpfifo_ctl_semaphore_access_f(u32 v)
+{
+ return (v & 0x1) << 16;
+}
+static inline u32 gr_gpfifo_ctl_semaphore_access_enabled_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 gr_gpfifo_ctl_semaphore_access_enabled_f(void)
+{
+ return 0x10000;
+}
+static inline u32 gr_trapped_addr_r(void)
+{
+ return 0x00400704;
+}
+static inline u32 gr_trapped_addr_mthd_v(u32 r)
+{
+ return (r >> 2) & 0xfff;
+}
+static inline u32 gr_trapped_addr_subch_v(u32 r)
+{
+ return (r >> 16) & 0x7;
+}
+static inline u32 gr_trapped_data_lo_r(void)
+{
+ return 0x00400708;
+}
+static inline u32 gr_trapped_data_hi_r(void)
+{
+ return 0x0040070c;
+}
+static inline u32 gr_status_r(void)
+{
+ return 0x00400700;
+}
+static inline u32 gr_status_fe_method_lower_v(u32 r)
+{
+ return (r >> 2) & 0x1;
+}
+static inline u32 gr_status_fe_method_lower_idle_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 gr_status_mask_r(void)
+{
+ return 0x00400610;
+}
+static inline u32 gr_engine_status_r(void)
+{
+ return 0x0040060c;
+}
+static inline u32 gr_engine_status_value_busy_f(void)
+{
+ return 0x1;
+}
+static inline u32 gr_pipe_bundle_address_r(void)
+{
+ return 0x00400200;
+}
+static inline u32 gr_pipe_bundle_address_value_v(u32 r)
+{
+ return (r >> 0) & 0xffff;
+}
+static inline u32 gr_pipe_bundle_data_r(void)
+{
+ return 0x00400204;
+}
+static inline u32 gr_pipe_bundle_config_r(void)
+{
+ return 0x00400208;
+}
+static inline u32 gr_pipe_bundle_config_override_pipe_mode_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_pipe_bundle_config_override_pipe_mode_enabled_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 gr_fe_hww_esr_r(void)
+{
+ return 0x00404000;
+}
+static inline u32 gr_fe_hww_esr_reset_active_f(void)
+{
+ return 0x40000000;
+}
+static inline u32 gr_fe_hww_esr_en_enable_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 gr_fe_go_idle_timeout_r(void)
+{
+ return 0x00404154;
+}
+static inline u32 gr_fe_go_idle_timeout_count_f(u32 v)
+{
+ return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_fe_go_idle_timeout_count_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_fe_object_table_r(u32 i)
+{
+ return 0x00404200 + i*4;
+}
+static inline u32 gr_fe_object_table_nvclass_v(u32 r)
+{
+ return (r >> 0) & 0xffff;
+}
+static inline u32 gr_pri_mme_shadow_raw_index_r(void)
+{
+ return 0x00404488;
+}
+static inline u32 gr_pri_mme_shadow_raw_index_write_trigger_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 gr_pri_mme_shadow_raw_data_r(void)
+{
+ return 0x0040448c;
+}
+static inline u32 gr_mme_hww_esr_r(void)
+{
+ return 0x00404490;
+}
+static inline u32 gr_mme_hww_esr_reset_active_f(void)
+{
+ return 0x40000000;
+}
+static inline u32 gr_mme_hww_esr_en_enable_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 gr_memfmt_hww_esr_r(void)
+{
+ return 0x00404600;
+}
+static inline u32 gr_memfmt_hww_esr_reset_active_f(void)
+{
+ return 0x40000000;
+}
+static inline u32 gr_memfmt_hww_esr_en_enable_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 gr_fecs_cpuctl_r(void)
+{
+ return 0x00409100;
+}
+static inline u32 gr_fecs_cpuctl_startcpu_f(u32 v)
+{
+ return (v & 0x1) << 1;
+}
+static inline u32 gr_fecs_dmactl_r(void)
+{
+ return 0x0040910c;
+}
+static inline u32 gr_fecs_dmactl_require_ctx_f(u32 v)
+{
+ return (v & 0x1) << 0;
+}
+static inline u32 gr_fecs_dmactl_dmem_scrubbing_m(void)
+{
+ return 0x1 << 1;
+}
+static inline u32 gr_fecs_dmactl_imem_scrubbing_m(void)
+{
+ return 0x1 << 2;
+}
+static inline u32 gr_fecs_os_r(void)
+{
+ return 0x00409080;
+}
+static inline u32 gr_fecs_idlestate_r(void)
+{
+ return 0x0040904c;
+}
+static inline u32 gr_fecs_mailbox0_r(void)
+{
+ return 0x00409040;
+}
+static inline u32 gr_fecs_mailbox1_r(void)
+{
+ return 0x00409044;
+}
+static inline u32 gr_fecs_irqstat_r(void)
+{
+ return 0x00409008;
+}
+static inline u32 gr_fecs_irqmode_r(void)
+{
+ return 0x0040900c;
+}
+static inline u32 gr_fecs_irqmask_r(void)
+{
+ return 0x00409018;
+}
+static inline u32 gr_fecs_irqdest_r(void)
+{
+ return 0x0040901c;
+}
+static inline u32 gr_fecs_curctx_r(void)
+{
+ return 0x00409050;
+}
+static inline u32 gr_fecs_nxtctx_r(void)
+{
+ return 0x00409054;
+}
+static inline u32 gr_fecs_engctl_r(void)
+{
+ return 0x004090a4;
+}
+static inline u32 gr_fecs_debug1_r(void)
+{
+ return 0x00409090;
+}
+static inline u32 gr_fecs_debuginfo_r(void)
+{
+ return 0x00409094;
+}
+static inline u32 gr_fecs_icd_cmd_r(void)
+{
+ return 0x00409200;
+}
+static inline u32 gr_fecs_icd_cmd_opc_s(void)
+{
+ return 4;
+}
+static inline u32 gr_fecs_icd_cmd_opc_f(u32 v)
+{
+ return (v & 0xf) << 0;
+}
+static inline u32 gr_fecs_icd_cmd_opc_m(void)
+{
+ return 0xf << 0;
+}
+static inline u32 gr_fecs_icd_cmd_opc_v(u32 r)
+{
+ return (r >> 0) & 0xf;
+}
+static inline u32 gr_fecs_icd_cmd_opc_rreg_f(void)
+{
+ return 0x8;
+}
+static inline u32 gr_fecs_icd_cmd_opc_rstat_f(void)
+{
+ return 0xe;
+}
+static inline u32 gr_fecs_icd_cmd_idx_f(u32 v)
+{
+ return (v & 0x1f) << 8;
+}
+static inline u32 gr_fecs_icd_rdata_r(void)
+{
+ return 0x0040920c;
+}
+static inline u32 gr_fecs_imemc_r(u32 i)
+{
+ return 0x00409180 + i*16;
+}
+static inline u32 gr_fecs_imemc_offs_f(u32 v)
+{
+ return (v & 0x3f) << 2;
+}
+static inline u32 gr_fecs_imemc_blk_f(u32 v)
+{
+ return (v & 0xff) << 8;
+}
+static inline u32 gr_fecs_imemc_aincw_f(u32 v)
+{
+ return (v & 0x1) << 24;
+}
+static inline u32 gr_fecs_imemd_r(u32 i)
+{
+ return 0x00409184 + i*16;
+}
+static inline u32 gr_fecs_imemt_r(u32 i)
+{
+ return 0x00409188 + i*16;
+}
+static inline u32 gr_fecs_imemt_tag_f(u32 v)
+{
+ return (v & 0xffff) << 0;
+}
+static inline u32 gr_fecs_dmemc_r(u32 i)
+{
+ return 0x004091c0 + i*8;
+}
+static inline u32 gr_fecs_dmemc_offs_s(void)
+{
+ return 6;
+}
+static inline u32 gr_fecs_dmemc_offs_f(u32 v)
+{
+ return (v & 0x3f) << 2;
+}
+static inline u32 gr_fecs_dmemc_offs_m(void)
+{
+ return 0x3f << 2;
+}
+static inline u32 gr_fecs_dmemc_offs_v(u32 r)
+{
+ return (r >> 2) & 0x3f;
+}
+static inline u32 gr_fecs_dmemc_blk_f(u32 v)
+{
+ return (v & 0xff) << 8;
+}
+static inline u32 gr_fecs_dmemc_aincw_f(u32 v)
+{
+ return (v & 0x1) << 24;
+}
+static inline u32 gr_fecs_dmemd_r(u32 i)
+{
+ return 0x004091c4 + i*8;
+}
+static inline u32 gr_fecs_dmatrfbase_r(void)
+{
+ return 0x00409110;
+}
+static inline u32 gr_fecs_dmatrfmoffs_r(void)
+{
+ return 0x00409114;
+}
+static inline u32 gr_fecs_dmatrffboffs_r(void)
+{
+ return 0x0040911c;
+}
+static inline u32 gr_fecs_dmatrfcmd_r(void)
+{
+ return 0x00409118;
+}
+static inline u32 gr_fecs_dmatrfcmd_imem_f(u32 v)
+{
+ return (v & 0x1) << 4;
+}
+static inline u32 gr_fecs_dmatrfcmd_write_f(u32 v)
+{
+ return (v & 0x1) << 5;
+}
+static inline u32 gr_fecs_dmatrfcmd_size_f(u32 v)
+{
+ return (v & 0x7) << 8;
+}
+static inline u32 gr_fecs_dmatrfcmd_ctxdma_f(u32 v)
+{
+ return (v & 0x7) << 12;
+}
+static inline u32 gr_fecs_bootvec_r(void)
+{
+ return 0x00409104;
+}
+static inline u32 gr_fecs_bootvec_vec_f(u32 v)
+{
+ return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_fecs_falcon_hwcfg_r(void)
+{
+ return 0x00409108;
+}
+static inline u32 gr_gpcs_gpccs_falcon_hwcfg_r(void)
+{
+ return 0x0041a108;
+}
+static inline u32 gr_fecs_falcon_rm_r(void)
+{
+ return 0x00409084;
+}
+static inline u32 gr_fecs_current_ctx_r(void)
+{
+ return 0x00409b00;
+}
+static inline u32 gr_fecs_current_ctx_ptr_f(u32 v)
+{
+ return (v & 0xfffffff) << 0;
+}
+static inline u32 gr_fecs_current_ctx_ptr_v(u32 r)
+{
+ return (r >> 0) & 0xfffffff;
+}
+static inline u32 gr_fecs_current_ctx_target_s(void)
+{
+ return 2;
+}
+static inline u32 gr_fecs_current_ctx_target_f(u32 v)
+{
+ return (v & 0x3) << 28;
+}
+static inline u32 gr_fecs_current_ctx_target_m(void)
+{
+ return 0x3 << 28;
+}
+static inline u32 gr_fecs_current_ctx_target_v(u32 r)
+{
+ return (r >> 28) & 0x3;
+}
+static inline u32 gr_fecs_current_ctx_target_vid_mem_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_fecs_current_ctx_valid_s(void)
+{
+ return 1;
+}
+static inline u32 gr_fecs_current_ctx_valid_f(u32 v)
+{
+ return (v & 0x1) << 31;
+}
+static inline u32 gr_fecs_current_ctx_valid_m(void)
+{
+ return 0x1 << 31;
+}
+static inline u32 gr_fecs_current_ctx_valid_v(u32 r)
+{
+ return (r >> 31) & 0x1;
+}
+static inline u32 gr_fecs_current_ctx_valid_false_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_fecs_method_data_r(void)
+{
+ return 0x00409500;
+}
+static inline u32 gr_fecs_method_push_r(void)
+{
+ return 0x00409504;
+}
+static inline u32 gr_fecs_method_push_adr_f(u32 v)
+{
+ return (v & 0xfff) << 0;
+}
+static inline u32 gr_fecs_method_push_adr_bind_pointer_v(void)
+{
+ return 0x00000003;
+}
+static inline u32 gr_fecs_method_push_adr_bind_pointer_f(void)
+{
+ return 0x3;
+}
+static inline u32 gr_fecs_method_push_adr_discover_image_size_v(void)
+{
+ return 0x00000010;
+}
+static inline u32 gr_fecs_method_push_adr_wfi_golden_save_v(void)
+{
+ return 0x00000009;
+}
+static inline u32 gr_fecs_method_push_adr_restore_golden_v(void)
+{
+ return 0x00000015;
+}
+static inline u32 gr_fecs_method_push_adr_discover_zcull_image_size_v(void)
+{
+ return 0x00000016;
+}
+static inline u32 gr_fecs_method_push_adr_discover_pm_image_size_v(void)
+{
+ return 0x00000025;
+}
+static inline u32 gr_fecs_method_push_adr_discover_reglist_image_size_v(void)
+{
+ return 0x00000030;
+}
+static inline u32 gr_fecs_method_push_adr_set_reglist_bind_instance_v(void)
+{
+ return 0x00000031;
+}
+static inline u32 gr_fecs_method_push_adr_set_reglist_virtual_address_v(void)
+{
+ return 0x00000032;
+}
+static inline u32 gr_fecs_method_push_adr_stop_ctxsw_v(void)
+{
+ return 0x00000038;
+}
+static inline u32 gr_fecs_method_push_adr_start_ctxsw_v(void)
+{
+ return 0x00000039;
+}
+static inline u32 gr_fecs_method_push_adr_set_watchdog_timeout_f(void)
+{
+ return 0x21;
+}
+static inline u32 gr_fecs_host_int_enable_r(void)
+{
+ return 0x00409c24;
+}
+static inline u32 gr_fecs_host_int_enable_fault_during_ctxsw_enable_f(void)
+{
+ return 0x10000;
+}
+static inline u32 gr_fecs_host_int_enable_umimp_firmware_method_enable_f(void)
+{
+ return 0x20000;
+}
+static inline u32 gr_fecs_host_int_enable_umimp_illegal_method_enable_f(void)
+{
+ return 0x40000;
+}
+static inline u32 gr_fecs_host_int_enable_watchdog_enable_f(void)
+{
+ return 0x80000;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_r(void)
+{
+ return 0x00409614;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f(void)
+{
+ return 0x10;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f(void)
+{
+ return 0x20;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f(void)
+{
+ return 0x40;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_sys_context_reset_enabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_sys_context_reset_disabled_f(void)
+{
+ return 0x100;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_enabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_disabled_f(void)
+{
+ return 0x200;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_s(void)
+{
+ return 1;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_f(u32 v)
+{
+ return (v & 0x1) << 10;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_m(void)
+{
+ return 0x1 << 10;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_v(u32 r)
+{
+ return (r >> 10) & 0x1;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_enabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_disabled_f(void)
+{
+ return 0x400;
+}
+static inline u32 gr_fecs_ctx_state_store_major_rev_id_r(void)
+{
+ return 0x0040960c;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_r(u32 i)
+{
+ return 0x00409800 + i*4;
+}
+static inline u32 gr_fecs_ctxsw_mailbox__size_1_v(void)
+{
+ return 0x00000008;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_value_f(u32 v)
+{
+ return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_value_pass_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_value_fail_v(void)
+{
+ return 0x00000002;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_set_r(u32 i)
+{
+ return 0x00409820 + i*4;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_set_value_f(u32 v)
+{
+ return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_clear_r(u32 i)
+{
+ return 0x00409840 + i*4;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_clear_value_f(u32 v)
+{
+ return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_fecs_fs_r(void)
+{
+ return 0x00409604;
+}
+static inline u32 gr_fecs_fs_num_available_gpcs_s(void)
+{
+ return 5;
+}
+static inline u32 gr_fecs_fs_num_available_gpcs_f(u32 v)
+{
+ return (v & 0x1f) << 0;
+}
+static inline u32 gr_fecs_fs_num_available_gpcs_m(void)
+{
+ return 0x1f << 0;
+}
+static inline u32 gr_fecs_fs_num_available_gpcs_v(u32 r)
+{
+ return (r >> 0) & 0x1f;
+}
+static inline u32 gr_fecs_fs_num_available_fbps_s(void)
+{
+ return 5;
+}
+static inline u32 gr_fecs_fs_num_available_fbps_f(u32 v)
+{
+ return (v & 0x1f) << 16;
+}
+static inline u32 gr_fecs_fs_num_available_fbps_m(void)
+{
+ return 0x1f << 16;
+}
+static inline u32 gr_fecs_fs_num_available_fbps_v(u32 r)
+{
+ return (r >> 16) & 0x1f;
+}
+static inline u32 gr_fecs_cfg_r(void)
+{
+ return 0x00409620;
+}
+static inline u32 gr_fecs_cfg_imem_sz_v(u32 r)
+{
+ return (r >> 0) & 0xff;
+}
+static inline u32 gr_fecs_rc_lanes_r(void)
+{
+ return 0x00409880;
+}
+static inline u32 gr_fecs_rc_lanes_num_chains_s(void)
+{
+ return 6;
+}
+static inline u32 gr_fecs_rc_lanes_num_chains_f(u32 v)
+{
+ return (v & 0x3f) << 0;
+}
+static inline u32 gr_fecs_rc_lanes_num_chains_m(void)
+{
+ return 0x3f << 0;
+}
+static inline u32 gr_fecs_rc_lanes_num_chains_v(u32 r)
+{
+ return (r >> 0) & 0x3f;
+}
+static inline u32 gr_fecs_ctxsw_status_1_r(void)
+{
+ return 0x00409400;
+}
+static inline u32 gr_fecs_ctxsw_status_1_arb_busy_s(void)
+{
+ return 1;
+}
+static inline u32 gr_fecs_ctxsw_status_1_arb_busy_f(u32 v)
+{
+ return (v & 0x1) << 12;
+}
+static inline u32 gr_fecs_ctxsw_status_1_arb_busy_m(void)
+{
+ return 0x1 << 12;
+}
+static inline u32 gr_fecs_ctxsw_status_1_arb_busy_v(u32 r)
+{
+ return (r >> 12) & 0x1;
+}
+static inline u32 gr_fecs_arb_ctx_adr_r(void)
+{
+ return 0x00409a24;
+}
+static inline u32 gr_fecs_new_ctx_r(void)
+{
+ return 0x00409b04;
+}
+static inline u32 gr_fecs_new_ctx_ptr_s(void)
+{
+ return 28;
+}
+static inline u32 gr_fecs_new_ctx_ptr_f(u32 v)
+{
+ return (v & 0xfffffff) << 0;
+}
+static inline u32 gr_fecs_new_ctx_ptr_m(void)
+{
+ return 0xfffffff << 0;
+}
+static inline u32 gr_fecs_new_ctx_ptr_v(u32 r)
+{
+ return (r >> 0) & 0xfffffff;
+}
+static inline u32 gr_fecs_new_ctx_target_s(void)
+{
+ return 2;
+}
+static inline u32 gr_fecs_new_ctx_target_f(u32 v)
+{
+ return (v & 0x3) << 28;
+}
+static inline u32 gr_fecs_new_ctx_target_m(void)
+{
+ return 0x3 << 28;
+}
+static inline u32 gr_fecs_new_ctx_target_v(u32 r)
+{
+ return (r >> 28) & 0x3;
+}
+static inline u32 gr_fecs_new_ctx_valid_s(void)
+{
+ return 1;
+}
+static inline u32 gr_fecs_new_ctx_valid_f(u32 v)
+{
+ return (v & 0x1) << 31;
+}
+static inline u32 gr_fecs_new_ctx_valid_m(void)
+{
+ return 0x1 << 31;
+}
+static inline u32 gr_fecs_new_ctx_valid_v(u32 r)
+{
+ return (r >> 31) & 0x1;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_r(void)
+{
+ return 0x00409a0c;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_ptr_s(void)
+{
+ return 28;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_ptr_f(u32 v)
+{
+ return (v & 0xfffffff) << 0;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_ptr_m(void)
+{
+ return 0xfffffff << 0;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_ptr_v(u32 r)
+{
+ return (r >> 0) & 0xfffffff;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_target_s(void)
+{
+ return 2;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_target_f(u32 v)
+{
+ return (v & 0x3) << 28;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_target_m(void)
+{
+ return 0x3 << 28;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_target_v(u32 r)
+{
+ return (r >> 28) & 0x3;
+}
+static inline u32 gr_fecs_arb_ctx_cmd_r(void)
+{
+ return 0x00409a10;
+}
+static inline u32 gr_fecs_arb_ctx_cmd_cmd_s(void)
+{
+ return 5;
+}
+static inline u32 gr_fecs_arb_ctx_cmd_cmd_f(u32 v)
+{
+ return (v & 0x1f) << 0;
+}
+static inline u32 gr_fecs_arb_ctx_cmd_cmd_m(void)
+{
+ return 0x1f << 0;
+}
+static inline u32 gr_fecs_arb_ctx_cmd_cmd_v(u32 r)
+{
+ return (r >> 0) & 0x1f;
+}
+static inline u32 gr_rstr2d_gpc_map0_r(void)
+{
+ return 0x0040780c;
+}
+static inline u32 gr_rstr2d_gpc_map1_r(void)
+{
+ return 0x00407810;
+}
+static inline u32 gr_rstr2d_gpc_map2_r(void)
+{
+ return 0x00407814;
+}
+static inline u32 gr_rstr2d_gpc_map3_r(void)
+{
+ return 0x00407818;
+}
+static inline u32 gr_rstr2d_gpc_map4_r(void)
+{
+ return 0x0040781c;
+}
+static inline u32 gr_rstr2d_gpc_map5_r(void)
+{
+ return 0x00407820;
+}
+static inline u32 gr_rstr2d_map_table_cfg_r(void)
+{
+ return 0x004078bc;
+}
+static inline u32 gr_rstr2d_map_table_cfg_row_offset_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+static inline u32 gr_rstr2d_map_table_cfg_num_entries_f(u32 v)
+{
+ return (v & 0xff) << 8;
+}
+static inline u32 gr_pd_hww_esr_r(void)
+{
+ return 0x00406018;
+}
+static inline u32 gr_pd_hww_esr_reset_active_f(void)
+{
+ return 0x40000000;
+}
+static inline u32 gr_pd_hww_esr_en_enable_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_r(u32 i)
+{
+ return 0x00406028 + i*4;
+}
+static inline u32 gr_pd_num_tpc_per_gpc__size_1_v(void)
+{
+ return 0x00000004;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count0_f(u32 v)
+{
+ return (v & 0xf) << 0;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count1_f(u32 v)
+{
+ return (v & 0xf) << 4;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count2_f(u32 v)
+{
+ return (v & 0xf) << 8;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count3_f(u32 v)
+{
+ return (v & 0xf) << 12;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count4_f(u32 v)
+{
+ return (v & 0xf) << 16;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count5_f(u32 v)
+{
+ return (v & 0xf) << 20;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count6_f(u32 v)
+{
+ return (v & 0xf) << 24;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count7_f(u32 v)
+{
+ return (v & 0xf) << 28;
+}
+static inline u32 gr_pd_ab_dist_cfg0_r(void)
+{
+ return 0x004064c0;
+}
+static inline u32 gr_pd_ab_dist_cfg0_timeslice_enable_en_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 gr_pd_ab_dist_cfg0_timeslice_enable_dis_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_pd_ab_dist_cfg1_r(void)
+{
+ return 0x004064c4;
+}
+static inline u32 gr_pd_ab_dist_cfg1_max_batches_init_f(void)
+{
+ return 0xffff;
+}
+static inline u32 gr_pd_ab_dist_cfg1_max_output_f(u32 v)
+{
+ return (v & 0x7ff) << 16;
+}
+static inline u32 gr_pd_ab_dist_cfg1_max_output_granularity_v(void)
+{
+ return 0x00000080;
+}
+static inline u32 gr_pd_ab_dist_cfg2_r(void)
+{
+ return 0x004064c8;
+}
+static inline u32 gr_pd_ab_dist_cfg2_token_limit_f(u32 v)
+{
+ return (v & 0xfff) << 0;
+}
+static inline u32 gr_pd_ab_dist_cfg2_token_limit_init_v(void)
+{
+ return 0x00000100;
+}
+static inline u32 gr_pd_ab_dist_cfg2_state_limit_f(u32 v)
+{
+ return (v & 0xfff) << 16;
+}
+static inline u32 gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(void)
+{
+ return 0x00000020;
+}
+static inline u32 gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(void)
+{
+ return 0x00000062;
+}
+static inline u32 gr_pd_pagepool_r(void)
+{
+ return 0x004064cc;
+}
+static inline u32 gr_pd_pagepool_total_pages_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+static inline u32 gr_pd_pagepool_valid_true_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 gr_pd_dist_skip_table_r(u32 i)
+{
+ return 0x004064d0 + i*4;
+}
+static inline u32 gr_pd_dist_skip_table__size_1_v(void)
+{
+ return 0x00000008;
+}
+static inline u32 gr_pd_dist_skip_table_gpc_4n0_mask_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+static inline u32 gr_pd_dist_skip_table_gpc_4n1_mask_f(u32 v)
+{
+ return (v & 0xff) << 8;
+}
+static inline u32 gr_pd_dist_skip_table_gpc_4n2_mask_f(u32 v)
+{
+ return (v & 0xff) << 16;
+}
+static inline u32 gr_pd_dist_skip_table_gpc_4n3_mask_f(u32 v)
+{
+ return (v & 0xff) << 24;
+}
+static inline u32 gr_pd_alpha_ratio_table_r(u32 i)
+{
+ return 0x00406800 + i*4;
+}
+static inline u32 gr_pd_alpha_ratio_table__size_1_v(void)
+{
+ return 0x00000100;
+}
+static inline u32 gr_pd_alpha_ratio_table_gpc_4n0_mask_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+static inline u32 gr_pd_alpha_ratio_table_gpc_4n1_mask_f(u32 v)
+{
+ return (v & 0xff) << 8;
+}
+static inline u32 gr_pd_alpha_ratio_table_gpc_4n2_mask_f(u32 v)
+{
+ return (v & 0xff) << 16;
+}
+static inline u32 gr_pd_alpha_ratio_table_gpc_4n3_mask_f(u32 v)
+{
+ return (v & 0xff) << 24;
+}
+static inline u32 gr_pd_beta_ratio_table_r(u32 i)
+{
+ return 0x00406c00 + i*4;
+}
+static inline u32 gr_pd_beta_ratio_table__size_1_v(void)
+{
+ return 0x00000100;
+}
+static inline u32 gr_pd_beta_ratio_table_gpc_4n0_mask_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+static inline u32 gr_pd_beta_ratio_table_gpc_4n1_mask_f(u32 v)
+{
+ return (v & 0xff) << 8;
+}
+static inline u32 gr_pd_beta_ratio_table_gpc_4n2_mask_f(u32 v)
+{
+ return (v & 0xff) << 16;
+}
+static inline u32 gr_pd_beta_ratio_table_gpc_4n3_mask_f(u32 v)
+{
+ return (v & 0xff) << 24;
+}
+static inline u32 gr_ds_debug_r(void)
+{
+ return 0x00405800;
+}
+static inline u32 gr_ds_debug_timeslice_mode_disable_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_ds_debug_timeslice_mode_enable_f(void)
+{
+ return 0x8000000;
+}
+static inline u32 gr_ds_zbc_color_r_r(void)
+{
+ return 0x00405804;
+}
+static inline u32 gr_ds_zbc_color_r_val_f(u32 v)
+{
+ return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_ds_zbc_color_g_r(void)
+{
+ return 0x00405808;
+}
+static inline u32 gr_ds_zbc_color_g_val_f(u32 v)
+{
+ return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_ds_zbc_color_b_r(void)
+{
+ return 0x0040580c;
+}
+static inline u32 gr_ds_zbc_color_b_val_f(u32 v)
+{
+ return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_ds_zbc_color_a_r(void)
+{
+ return 0x00405810;
+}
+static inline u32 gr_ds_zbc_color_a_val_f(u32 v)
+{
+ return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_ds_zbc_color_fmt_r(void)
+{
+ return 0x00405814;
+}
+static inline u32 gr_ds_zbc_color_fmt_val_f(u32 v)
+{
+ return (v & 0x7f) << 0;
+}
+static inline u32 gr_ds_zbc_color_fmt_val_invalid_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_ds_zbc_color_fmt_val_zero_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 gr_ds_zbc_color_fmt_val_unorm_one_v(void)
+{
+ return 0x00000002;
+}
+static inline u32 gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v(void)
+{
+ return 0x00000004;
+}
+static inline u32 gr_ds_zbc_z_r(void)
+{
+ return 0x00405818;
+}
+static inline u32 gr_ds_zbc_z_val_s(void)
+{
+ return 32;
+}
+static inline u32 gr_ds_zbc_z_val_f(u32 v)
+{
+ return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_ds_zbc_z_val_m(void)
+{
+ return 0xffffffff << 0;
+}
+static inline u32 gr_ds_zbc_z_val_v(u32 r)
+{
+ return (r >> 0) & 0xffffffff;
+}
+static inline u32 gr_ds_zbc_z_val__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 gr_ds_zbc_z_val__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_ds_zbc_z_fmt_r(void)
+{
+ return 0x0040581c;
+}
+static inline u32 gr_ds_zbc_z_fmt_val_f(u32 v)
+{
+ return (v & 0x1) << 0;
+}
+static inline u32 gr_ds_zbc_z_fmt_val_invalid_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_ds_zbc_z_fmt_val_fp32_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 gr_ds_zbc_tbl_index_r(void)
+{
+ return 0x00405820;
+}
+static inline u32 gr_ds_zbc_tbl_index_val_f(u32 v)
+{
+ return (v & 0xf) << 0;
+}
+static inline u32 gr_ds_zbc_tbl_ld_r(void)
+{
+ return 0x00405824;
+}
+static inline u32 gr_ds_zbc_tbl_ld_select_c_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_ds_zbc_tbl_ld_select_z_f(void)
+{
+ return 0x1;
+}
+static inline u32 gr_ds_zbc_tbl_ld_action_write_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_ds_zbc_tbl_ld_trigger_active_f(void)
+{
+ return 0x4;
+}
+static inline u32 gr_ds_tga_constraintlogic_r(void)
+{
+ return 0x00405830;
+}
+static inline u32 gr_ds_tga_constraintlogic_beta_cbsize_f(u32 v)
+{
+ return (v & 0xfff) << 16;
+}
+static inline u32 gr_ds_tga_constraintlogic_alpha_cbsize_f(u32 v)
+{
+ return (v & 0xfff) << 0;
+}
+static inline u32 gr_ds_hww_esr_r(void)
+{
+ return 0x00405840;
+}
+static inline u32 gr_ds_hww_esr_reset_s(void)
+{
+ return 1;
+}
+static inline u32 gr_ds_hww_esr_reset_f(u32 v)
+{
+ return (v & 0x1) << 30;
+}
+static inline u32 gr_ds_hww_esr_reset_m(void)
+{
+ return 0x1 << 30;
+}
+static inline u32 gr_ds_hww_esr_reset_v(u32 r)
+{
+ return (r >> 30) & 0x1;
+}
+static inline u32 gr_ds_hww_esr_reset_task_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 gr_ds_hww_esr_reset_task_f(void)
+{
+ return 0x40000000;
+}
+static inline u32 gr_ds_hww_esr_en_enabled_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 gr_ds_hww_report_mask_r(void)
+{
+ return 0x00405844;
+}
+static inline u32 gr_ds_hww_report_mask_sph0_err_report_f(void)
+{
+ return 0x1;
+}
+static inline u32 gr_ds_hww_report_mask_sph1_err_report_f(void)
+{
+ return 0x2;
+}
+static inline u32 gr_ds_hww_report_mask_sph2_err_report_f(void)
+{
+ return 0x4;
+}
+static inline u32 gr_ds_hww_report_mask_sph3_err_report_f(void)
+{
+ return 0x8;
+}
+static inline u32 gr_ds_hww_report_mask_sph4_err_report_f(void)
+{
+ return 0x10;
+}
+static inline u32 gr_ds_hww_report_mask_sph5_err_report_f(void)
+{
+ return 0x20;
+}
+static inline u32 gr_ds_hww_report_mask_sph6_err_report_f(void)
+{
+ return 0x40;
+}
+static inline u32 gr_ds_hww_report_mask_sph7_err_report_f(void)
+{
+ return 0x80;
+}
+static inline u32 gr_ds_hww_report_mask_sph8_err_report_f(void)
+{
+ return 0x100;
+}
+static inline u32 gr_ds_hww_report_mask_sph9_err_report_f(void)
+{
+ return 0x200;
+}
+static inline u32 gr_ds_hww_report_mask_sph10_err_report_f(void)
+{
+ return 0x400;
+}
+static inline u32 gr_ds_hww_report_mask_sph11_err_report_f(void)
+{
+ return 0x800;
+}
+static inline u32 gr_ds_hww_report_mask_sph12_err_report_f(void)
+{
+ return 0x1000;
+}
+static inline u32 gr_ds_hww_report_mask_sph13_err_report_f(void)
+{
+ return 0x2000;
+}
+static inline u32 gr_ds_hww_report_mask_sph14_err_report_f(void)
+{
+ return 0x4000;
+}
+static inline u32 gr_ds_hww_report_mask_sph15_err_report_f(void)
+{
+ return 0x8000;
+}
+static inline u32 gr_ds_hww_report_mask_sph16_err_report_f(void)
+{
+ return 0x10000;
+}
+static inline u32 gr_ds_hww_report_mask_sph17_err_report_f(void)
+{
+ return 0x20000;
+}
+static inline u32 gr_ds_hww_report_mask_sph18_err_report_f(void)
+{
+ return 0x40000;
+}
+static inline u32 gr_ds_hww_report_mask_sph19_err_report_f(void)
+{
+ return 0x80000;
+}
+static inline u32 gr_ds_hww_report_mask_sph20_err_report_f(void)
+{
+ return 0x100000;
+}
+static inline u32 gr_ds_hww_report_mask_sph21_err_report_f(void)
+{
+ return 0x200000;
+}
+static inline u32 gr_ds_hww_report_mask_sph22_err_report_f(void)
+{
+ return 0x400000;
+}
+static inline u32 gr_ds_hww_report_mask_sph23_err_report_f(void)
+{
+ return 0x800000;
+}
+static inline u32 gr_ds_num_tpc_per_gpc_r(u32 i)
+{
+ return 0x00405870 + i*4;
+}
+static inline u32 gr_scc_bundle_cb_base_r(void)
+{
+ return 0x00408004;
+}
+static inline u32 gr_scc_bundle_cb_base_addr_39_8_f(u32 v)
+{
+ return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_scc_bundle_cb_base_addr_39_8_align_bits_v(void)
+{
+ return 0x00000008;
+}
+static inline u32 gr_scc_bundle_cb_size_r(void)
+{
+ return 0x00408008;
+}
+static inline u32 gr_scc_bundle_cb_size_div_256b_f(u32 v)
+{
+ return (v & 0x7ff) << 0;
+}
+static inline u32 gr_scc_bundle_cb_size_div_256b__prod_v(void)
+{
+ return 0x00000018;
+}
+static inline u32 gr_scc_bundle_cb_size_div_256b_byte_granularity_v(void)
+{
+ return 0x00000100;
+}
+static inline u32 gr_scc_bundle_cb_size_valid_false_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 gr_scc_bundle_cb_size_valid_false_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_scc_bundle_cb_size_valid_true_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 gr_scc_pagepool_base_r(void)
+{
+ return 0x0040800c;
+}
+static inline u32 gr_scc_pagepool_base_addr_39_8_f(u32 v)
+{
+ return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_scc_pagepool_base_addr_39_8_align_bits_v(void)
+{
+ return 0x00000008;
+}
+static inline u32 gr_scc_pagepool_r(void)
+{
+ return 0x00408010;
+}
+static inline u32 gr_scc_pagepool_total_pages_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+static inline u32 gr_scc_pagepool_total_pages_hwmax_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 gr_scc_pagepool_total_pages_hwmax_value_v(void)
+{
+ return 0x00000080;
+}
+static inline u32 gr_scc_pagepool_total_pages_byte_granularity_v(void)
+{
+ return 0x00000100;
+}
+static inline u32 gr_scc_pagepool_max_valid_pages_s(void)
+{
+ return 8;
+}
+static inline u32 gr_scc_pagepool_max_valid_pages_f(u32 v)
+{
+ return (v & 0xff) << 8;
+}
+static inline u32 gr_scc_pagepool_max_valid_pages_m(void)
+{
+ return 0xff << 8;
+}
+static inline u32 gr_scc_pagepool_max_valid_pages_v(u32 r)
+{
+ return (r >> 8) & 0xff;
+}
+static inline u32 gr_scc_pagepool_valid_true_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 gr_scc_init_r(void)
+{
+ return 0x0040802c;
+}
+static inline u32 gr_scc_init_ram_trigger_f(void)
+{
+ return 0x1;
+}
+static inline u32 gr_scc_hww_esr_r(void)
+{
+ return 0x00408030;
+}
+static inline u32 gr_scc_hww_esr_reset_active_f(void)
+{
+ return 0x40000000;
+}
+static inline u32 gr_scc_hww_esr_en_enable_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 gr_sked_hww_esr_r(void)
+{
+ return 0x00407020;
+}
+static inline u32 gr_sked_hww_esr_reset_active_f(void)
+{
+ return 0x40000000;
+}
+static inline u32 gr_cwd_fs_r(void)
+{
+ return 0x00405b00;
+}
+static inline u32 gr_cwd_fs_num_gpcs_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+static inline u32 gr_cwd_fs_num_tpcs_f(u32 v)
+{
+ return (v & 0xff) << 8;
+}
+static inline u32 gr_gpc0_fs_gpc_r(void)
+{
+ return 0x00502608;
+}
+static inline u32 gr_gpc0_fs_gpc_num_available_tpcs_v(u32 r)
+{
+ return (r >> 0) & 0x1f;
+}
+static inline u32 gr_gpc0_fs_gpc_num_available_zculls_v(u32 r)
+{
+ return (r >> 16) & 0x1f;
+}
+static inline u32 gr_gpc0_cfg_r(void)
+{
+ return 0x00502620;
+}
+static inline u32 gr_gpc0_cfg_imem_sz_v(u32 r)
+{
+ return (r >> 0) & 0xff;
+}
+static inline u32 gr_gpccs_rc_lanes_r(void)
+{
+ return 0x00502880;
+}
+static inline u32 gr_gpccs_rc_lanes_num_chains_s(void)
+{
+ return 6;
+}
+static inline u32 gr_gpccs_rc_lanes_num_chains_f(u32 v)
+{
+ return (v & 0x3f) << 0;
+}
+static inline u32 gr_gpccs_rc_lanes_num_chains_m(void)
+{
+ return 0x3f << 0;
+}
+static inline u32 gr_gpccs_rc_lanes_num_chains_v(u32 r)
+{
+ return (r >> 0) & 0x3f;
+}
+static inline u32 gr_gpccs_rc_lane_size_r(u32 i)
+{
+ return 0x00502910 + i*0;
+}
+static inline u32 gr_gpccs_rc_lane_size__size_1_v(void)
+{
+ return 0x00000010;
+}
+static inline u32 gr_gpccs_rc_lane_size_v_s(void)
+{
+ return 24;
+}
+static inline u32 gr_gpccs_rc_lane_size_v_f(u32 v)
+{
+ return (v & 0xffffff) << 0;
+}
+static inline u32 gr_gpccs_rc_lane_size_v_m(void)
+{
+ return 0xffffff << 0;
+}
+static inline u32 gr_gpccs_rc_lane_size_v_v(u32 r)
+{
+ return (r >> 0) & 0xffffff;
+}
+static inline u32 gr_gpccs_rc_lane_size_v_0_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 gr_gpccs_rc_lane_size_v_0_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_gpc0_zcull_fs_r(void)
+{
+ return 0x00500910;
+}
+static inline u32 gr_gpc0_zcull_fs_num_sms_f(u32 v)
+{
+ return (v & 0x1ff) << 0;
+}
+static inline u32 gr_gpc0_zcull_fs_num_active_banks_f(u32 v)
+{
+ return (v & 0xf) << 16;
+}
+static inline u32 gr_gpc0_zcull_ram_addr_r(void)
+{
+ return 0x00500914;
+}
+static inline u32 gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(u32 v)
+{
+ return (v & 0xf) << 0;
+}
+static inline u32 gr_gpc0_zcull_ram_addr_row_offset_f(u32 v)
+{
+ return (v & 0xf) << 8;
+}
+static inline u32 gr_gpc0_zcull_sm_num_rcp_r(void)
+{
+ return 0x00500918;
+}
+static inline u32 gr_gpc0_zcull_sm_num_rcp_conservative_f(u32 v)
+{
+ return (v & 0xffffff) << 0;
+}
+static inline u32 gr_gpc0_zcull_sm_num_rcp_conservative__max_v(void)
+{
+ return 0x00800000;
+}
+static inline u32 gr_gpc0_zcull_total_ram_size_r(void)
+{
+ return 0x00500920;
+}
+static inline u32 gr_gpc0_zcull_total_ram_size_num_aliquots_f(u32 v)
+{
+ return (v & 0xffff) << 0;
+}
+static inline u32 gr_gpc0_zcull_zcsize_r(u32 i)
+{
+ return 0x00500a04 + i*32;
+}
+static inline u32 gr_gpc0_zcull_zcsize_height_subregion__multiple_v(void)
+{
+ return 0x00000040;
+}
+static inline u32 gr_gpc0_zcull_zcsize_width_subregion__multiple_v(void)
+{
+ return 0x00000010;
+}
+static inline u32 gr_gpc0_gpm_pd_active_tpcs_r(void)
+{
+ return 0x00500c08;
+}
+static inline u32 gr_gpc0_gpm_pd_active_tpcs_num_f(u32 v)
+{
+ return (v & 0x7) << 0;
+}
+static inline u32 gr_gpc0_gpm_pd_sm_id_r(u32 i)
+{
+ return 0x00500c10 + i*4;
+}
+static inline u32 gr_gpc0_gpm_pd_sm_id_id_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+static inline u32 gr_gpc0_gpm_pd_pes_tpc_id_mask_r(u32 i)
+{
+ return 0x00500c30 + i*4;
+}
+static inline u32 gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(u32 r)
+{
+ return (r >> 0) & 0xff;
+}
+static inline u32 gr_gpc0_gpm_sd_active_tpcs_r(void)
+{
+ return 0x00500c8c;
+}
+static inline u32 gr_gpc0_gpm_sd_active_tpcs_num_f(u32 v)
+{
+ return (v & 0x7) << 0;
+}
+static inline u32 gr_gpc0_tpc0_pe_cfg_smid_r(void)
+{
+ return 0x00504088;
+}
+static inline u32 gr_gpc0_tpc0_pe_cfg_smid_value_f(u32 v)
+{
+ return (v & 0xffff) << 0;
+}
+static inline u32 gr_gpc0_tpc0_l1c_cfg_smid_r(void)
+{
+ return 0x005044e8;
+}
+static inline u32 gr_gpc0_tpc0_l1c_cfg_smid_value_f(u32 v)
+{
+ return (v & 0xffff) << 0;
+}
+static inline u32 gr_gpc0_tpc0_sm_cfg_r(void)
+{
+ return 0x00504698;
+}
+static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
+{
+ return (v & 0xffff) << 0;
+}
+static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void)
+{
+ return 0x00503018;
+}
+static inline u32 gr_gpc0_ppc0_pes_vsc_strem_master_pe_m(void)
+{
+ return 0x1 << 0;
+}
+static inline u32 gr_gpc0_ppc0_pes_vsc_strem_master_pe_true_f(void)
+{
+ return 0x1;
+}
+static inline u32 gr_gpc0_ppc0_cbm_cfg_r(void)
+{
+ return 0x005030c0;
+}
+static inline u32 gr_gpc0_ppc0_cbm_cfg_start_offset_f(u32 v)
+{
+ return (v & 0xffff) << 0;
+}
+static inline u32 gr_gpc0_ppc0_cbm_cfg_start_offset_m(void)
+{
+ return 0xffff << 0;
+}
+static inline u32 gr_gpc0_ppc0_cbm_cfg_start_offset_v(u32 r)
+{
+ return (r >> 0) & 0xffff;
+}
+static inline u32 gr_gpc0_ppc0_cbm_cfg_size_f(u32 v)
+{
+ return (v & 0xfff) << 16;
+}
+static inline u32 gr_gpc0_ppc0_cbm_cfg_size_m(void)
+{
+ return 0xfff << 16;
+}
+static inline u32 gr_gpc0_ppc0_cbm_cfg_size_v(u32 r)
+{
+ return (r >> 16) & 0xfff;
+}
+static inline u32 gr_gpc0_ppc0_cbm_cfg_size_default_v(void)
+{
+ return 0x00000240;
+}
+static inline u32 gr_gpc0_ppc0_cbm_cfg_size_granularity_v(void)
+{
+ return 0x00000020;
+}
+static inline u32 gr_gpc0_ppc0_cbm_cfg_timeslice_mode_f(u32 v)
+{
+ return (v & 0x1) << 28;
+}
+static inline u32 gr_gpc0_ppc0_cbm_cfg2_r(void)
+{
+ return 0x005030e4;
+}
+static inline u32 gr_gpc0_ppc0_cbm_cfg2_start_offset_f(u32 v)
+{
+ return (v & 0xffff) << 0;
+}
+static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_f(u32 v)
+{
+ return (v & 0xfff) << 16;
+}
+static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_m(void)
+{
+ return 0xfff << 16;
+}
+static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_v(u32 r)
+{
+ return (r >> 16) & 0xfff;
+}
+static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_default_v(void)
+{
+ return 0x00000648;
+}
+static inline u32 gr_gpc0_ppc0_cbm_cfg2_size_granularity_v(void)
+{
+ return 0x00000020;
+}
+static inline u32 gr_gpccs_falcon_addr_r(void)
+{
+ return 0x0041a0ac;
+}
+static inline u32 gr_gpccs_falcon_addr_lsb_s(void)
+{
+ return 6;
+}
+static inline u32 gr_gpccs_falcon_addr_lsb_f(u32 v)
+{
+ return (v & 0x3f) << 0;
+}
+static inline u32 gr_gpccs_falcon_addr_lsb_m(void)
+{
+ return 0x3f << 0;
+}
+static inline u32 gr_gpccs_falcon_addr_lsb_v(u32 r)
+{
+ return (r >> 0) & 0x3f;
+}
+static inline u32 gr_gpccs_falcon_addr_lsb_init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 gr_gpccs_falcon_addr_lsb_init_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_gpccs_falcon_addr_msb_s(void)
+{
+ return 6;
+}
+static inline u32 gr_gpccs_falcon_addr_msb_f(u32 v)
+{
+ return (v & 0x3f) << 6;
+}
+static inline u32 gr_gpccs_falcon_addr_msb_m(void)
+{
+ return 0x3f << 6;
+}
+static inline u32 gr_gpccs_falcon_addr_msb_v(u32 r)
+{
+ return (r >> 6) & 0x3f;
+}
+static inline u32 gr_gpccs_falcon_addr_msb_init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 gr_gpccs_falcon_addr_msb_init_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_gpccs_falcon_addr_ext_s(void)
+{
+ return 12;
+}
+static inline u32 gr_gpccs_falcon_addr_ext_f(u32 v)
+{
+ return (v & 0xfff) << 0;
+}
+static inline u32 gr_gpccs_falcon_addr_ext_m(void)
+{
+ return 0xfff << 0;
+}
+static inline u32 gr_gpccs_falcon_addr_ext_v(u32 r)
+{
+ return (r >> 0) & 0xfff;
+}
+static inline u32 gr_gpccs_cpuctl_r(void)
+{
+ return 0x0041a100;
+}
+static inline u32 gr_gpccs_cpuctl_startcpu_f(u32 v)
+{
+ return (v & 0x1) << 1;
+}
+static inline u32 gr_gpccs_dmactl_r(void)
+{
+ return 0x0041a10c;
+}
+static inline u32 gr_gpccs_dmactl_require_ctx_f(u32 v)
+{
+ return (v & 0x1) << 0;
+}
+static inline u32 gr_gpccs_dmactl_dmem_scrubbing_m(void)
+{
+ return 0x1 << 1;
+}
+static inline u32 gr_gpccs_dmactl_imem_scrubbing_m(void)
+{
+ return 0x1 << 2;
+}
+static inline u32 gr_gpccs_imemc_r(u32 i)
+{
+ return 0x0041a180 + i*16;
+}
+static inline u32 gr_gpccs_imemc_offs_f(u32 v)
+{
+ return (v & 0x3f) << 2;
+}
+static inline u32 gr_gpccs_imemc_blk_f(u32 v)
+{
+ return (v & 0xff) << 8;
+}
+static inline u32 gr_gpccs_imemc_aincw_f(u32 v)
+{
+ return (v & 0x1) << 24;
+}
+static inline u32 gr_gpccs_imemd_r(u32 i)
+{
+ return 0x0041a184 + i*16;
+}
+static inline u32 gr_gpccs_imemt_r(u32 i)
+{
+ return 0x0041a188 + i*16;
+}
+static inline u32 gr_gpccs_imemt__size_1_v(void)
+{
+ return 0x00000004;
+}
+static inline u32 gr_gpccs_imemt_tag_f(u32 v)
+{
+ return (v & 0xffff) << 0;
+}
+static inline u32 gr_gpccs_dmemc_r(u32 i)
+{
+ return 0x0041a1c0 + i*8;
+}
+static inline u32 gr_gpccs_dmemc_offs_f(u32 v)
+{
+ return (v & 0x3f) << 2;
+}
+static inline u32 gr_gpccs_dmemc_blk_f(u32 v)
+{
+ return (v & 0xff) << 8;
+}
+static inline u32 gr_gpccs_dmemc_aincw_f(u32 v)
+{
+ return (v & 0x1) << 24;
+}
+static inline u32 gr_gpccs_dmemd_r(u32 i)
+{
+ return 0x0041a1c4 + i*8;
+}
+static inline u32 gr_gpccs_ctxsw_mailbox_r(u32 i)
+{
+ return 0x0041a800 + i*4;
+}
+static inline u32 gr_gpccs_ctxsw_mailbox_value_f(u32 v)
+{
+ return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_base_r(void)
+{
+ return 0x00418808;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_s(void)
+{
+ return 32;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_f(u32 v)
+{
+ return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_m(void)
+{
+ return 0xffffffff << 0;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_v(u32 r)
+{
+ return (r >> 0) & 0xffffffff;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_base_addr_39_8_init_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_size_r(void)
+{
+ return 0x0041880c;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_s(void)
+{
+ return 11;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_f(u32 v)
+{
+ return (v & 0x7ff) << 0;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_m(void)
+{
+ return 0x7ff << 0;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_v(u32 r)
+{
+ return (r >> 0) & 0x7ff;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b_init_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b__prod_v(void)
+{
+ return 0x00000018;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_size_div_256b__prod_f(void)
+{
+ return 0x18;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_size_valid_s(void)
+{
+ return 1;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_size_valid_f(u32 v)
+{
+ return (v & 0x1) << 31;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_size_valid_m(void)
+{
+ return 0x1 << 31;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_size_valid_v(u32 r)
+{
+ return (r >> 31) & 0x1;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_size_valid_false_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_size_valid_false_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_size_valid_true_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 gr_gpcs_setup_bundle_cb_size_valid_true_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 gr_gpcs_setup_attrib_cb_base_r(void)
+{
+ return 0x00418810;
+}
+static inline u32 gr_gpcs_setup_attrib_cb_base_addr_39_12_f(u32 v)
+{
+ return (v & 0xfffffff) << 0;
+}
+static inline u32 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v(void)
+{
+ return 0x0000000c;
+}
+static inline u32 gr_gpcs_setup_attrib_cb_base_valid_true_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 gr_crstr_gpc_map0_r(void)
+{
+ return 0x00418b08;
+}
+static inline u32 gr_crstr_gpc_map0_tile0_f(u32 v)
+{
+ return (v & 0x7) << 0;
+}
+static inline u32 gr_crstr_gpc_map0_tile1_f(u32 v)
+{
+ return (v & 0x7) << 5;
+}
+static inline u32 gr_crstr_gpc_map0_tile2_f(u32 v)
+{
+ return (v & 0x7) << 10;
+}
+static inline u32 gr_crstr_gpc_map0_tile3_f(u32 v)
+{
+ return (v & 0x7) << 15;
+}
+static inline u32 gr_crstr_gpc_map0_tile4_f(u32 v)
+{
+ return (v & 0x7) << 20;
+}
+static inline u32 gr_crstr_gpc_map0_tile5_f(u32 v)
+{
+ return (v & 0x7) << 25;
+}
+static inline u32 gr_crstr_gpc_map1_r(void)
+{
+ return 0x00418b0c;
+}
+static inline u32 gr_crstr_gpc_map1_tile6_f(u32 v)
+{
+ return (v & 0x7) << 0;
+}
+static inline u32 gr_crstr_gpc_map1_tile7_f(u32 v)
+{
+ return (v & 0x7) << 5;
+}
+static inline u32 gr_crstr_gpc_map1_tile8_f(u32 v)
+{
+ return (v & 0x7) << 10;
+}
+static inline u32 gr_crstr_gpc_map1_tile9_f(u32 v)
+{
+ return (v & 0x7) << 15;
+}
+static inline u32 gr_crstr_gpc_map1_tile10_f(u32 v)
+{
+ return (v & 0x7) << 20;
+}
+static inline u32 gr_crstr_gpc_map1_tile11_f(u32 v)
+{
+ return (v & 0x7) << 25;
+}
+static inline u32 gr_crstr_gpc_map2_r(void)
+{
+ return 0x00418b10;
+}
+static inline u32 gr_crstr_gpc_map2_tile12_f(u32 v)
+{
+ return (v & 0x7) << 0;
+}
+static inline u32 gr_crstr_gpc_map2_tile13_f(u32 v)
+{
+ return (v & 0x7) << 5;
+}
+static inline u32 gr_crstr_gpc_map2_tile14_f(u32 v)
+{
+ return (v & 0x7) << 10;
+}
+static inline u32 gr_crstr_gpc_map2_tile15_f(u32 v)
+{
+ return (v & 0x7) << 15;
+}
+static inline u32 gr_crstr_gpc_map2_tile16_f(u32 v)
+{
+ return (v & 0x7) << 20;
+}
+static inline u32 gr_crstr_gpc_map2_tile17_f(u32 v)
+{
+ return (v & 0x7) << 25;
+}
+static inline u32 gr_crstr_gpc_map3_r(void)
+{
+ return 0x00418b14;
+}
+static inline u32 gr_crstr_gpc_map3_tile18_f(u32 v)
+{
+ return (v & 0x7) << 0;
+}
+static inline u32 gr_crstr_gpc_map3_tile19_f(u32 v)
+{
+ return (v & 0x7) << 5;
+}
+static inline u32 gr_crstr_gpc_map3_tile20_f(u32 v)
+{
+ return (v & 0x7) << 10;
+}
+static inline u32 gr_crstr_gpc_map3_tile21_f(u32 v)
+{
+ return (v & 0x7) << 15;
+}
+static inline u32 gr_crstr_gpc_map3_tile22_f(u32 v)
+{
+ return (v & 0x7) << 20;
+}
+static inline u32 gr_crstr_gpc_map3_tile23_f(u32 v)
+{
+ return (v & 0x7) << 25;
+}
+static inline u32 gr_crstr_gpc_map4_r(void)
+{
+ return 0x00418b18;
+}
+static inline u32 gr_crstr_gpc_map4_tile24_f(u32 v)
+{
+ return (v & 0x7) << 0;
+}
+static inline u32 gr_crstr_gpc_map4_tile25_f(u32 v)
+{
+ return (v & 0x7) << 5;
+}
+static inline u32 gr_crstr_gpc_map4_tile26_f(u32 v)
+{
+ return (v & 0x7) << 10;
+}
+static inline u32 gr_crstr_gpc_map4_tile27_f(u32 v)
+{
+ return (v & 0x7) << 15;
+}
+static inline u32 gr_crstr_gpc_map4_tile28_f(u32 v)
+{
+ return (v & 0x7) << 20;
+}
+static inline u32 gr_crstr_gpc_map4_tile29_f(u32 v)
+{
+ return (v & 0x7) << 25;
+}
+static inline u32 gr_crstr_gpc_map5_r(void)
+{
+ return 0x00418b1c;
+}
+static inline u32 gr_crstr_gpc_map5_tile30_f(u32 v)
+{
+ return (v & 0x7) << 0;
+}
+static inline u32 gr_crstr_gpc_map5_tile31_f(u32 v)
+{
+ return (v & 0x7) << 5;
+}
+static inline u32 gr_crstr_gpc_map5_tile32_f(u32 v)
+{
+ return (v & 0x7) << 10;
+}
+static inline u32 gr_crstr_gpc_map5_tile33_f(u32 v)
+{
+ return (v & 0x7) << 15;
+}
+static inline u32 gr_crstr_gpc_map5_tile34_f(u32 v)
+{
+ return (v & 0x7) << 20;
+}
+static inline u32 gr_crstr_gpc_map5_tile35_f(u32 v)
+{
+ return (v & 0x7) << 25;
+}
+static inline u32 gr_crstr_map_table_cfg_r(void)
+{
+ return 0x00418bb8;
+}
+static inline u32 gr_crstr_map_table_cfg_row_offset_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+static inline u32 gr_crstr_map_table_cfg_num_entries_f(u32 v)
+{
+ return (v & 0xff) << 8;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_r(void)
+{
+ return 0x00418980;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(u32 v)
+{
+ return (v & 0x7) << 0;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(u32 v)
+{
+ return (v & 0x7) << 4;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(u32 v)
+{
+ return (v & 0x7) << 8;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(u32 v)
+{
+ return (v & 0x7) << 12;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(u32 v)
+{
+ return (v & 0x7) << 16;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(u32 v)
+{
+ return (v & 0x7) << 20;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(u32 v)
+{
+ return (v & 0x7) << 24;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(u32 v)
+{
+ return (v & 0x7) << 28;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_r(void)
+{
+ return 0x00418984;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(u32 v)
+{
+ return (v & 0x7) << 0;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(u32 v)
+{
+ return (v & 0x7) << 4;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(u32 v)
+{
+ return (v & 0x7) << 8;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(u32 v)
+{
+ return (v & 0x7) << 12;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(u32 v)
+{
+ return (v & 0x7) << 16;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(u32 v)
+{
+ return (v & 0x7) << 20;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(u32 v)
+{
+ return (v & 0x7) << 24;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(u32 v)
+{
+ return (v & 0x7) << 28;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_r(void)
+{
+ return 0x00418988;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(u32 v)
+{
+ return (v & 0x7) << 0;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(u32 v)
+{
+ return (v & 0x7) << 4;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(u32 v)
+{
+ return (v & 0x7) << 8;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(u32 v)
+{
+ return (v & 0x7) << 12;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(u32 v)
+{
+ return (v & 0x7) << 16;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(u32 v)
+{
+ return (v & 0x7) << 20;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(u32 v)
+{
+ return (v & 0x7) << 24;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_s(void)
+{
+ return 3;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(u32 v)
+{
+ return (v & 0x7) << 28;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_m(void)
+{
+ return 0x7 << 28;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_v(u32 r)
+{
+ return (r >> 28) & 0x7;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_r(void)
+{
+ return 0x0041898c;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(u32 v)
+{
+ return (v & 0x7) << 0;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(u32 v)
+{
+ return (v & 0x7) << 4;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(u32 v)
+{
+ return (v & 0x7) << 8;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(u32 v)
+{
+ return (v & 0x7) << 12;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(u32 v)
+{
+ return (v & 0x7) << 16;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(u32 v)
+{
+ return (v & 0x7) << 20;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(u32 v)
+{
+ return (v & 0x7) << 24;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(u32 v)
+{
+ return (v & 0x7) << 28;
+}
+static inline u32 gr_gpcs_gpm_pd_cfg_r(void)
+{
+ return 0x00418c6c;
+}
+static inline u32 gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_gpcs_gpm_pd_cfg_timeslice_mode_enable_f(void)
+{
+ return 0x1;
+}
+static inline u32 gr_gpcs_gcc_pagepool_base_r(void)
+{
+ return 0x00419004;
+}
+static inline u32 gr_gpcs_gcc_pagepool_base_addr_39_8_f(u32 v)
+{
+ return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpcs_gcc_pagepool_r(void)
+{
+ return 0x00419008;
+}
+static inline u32 gr_gpcs_gcc_pagepool_total_pages_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+static inline u32 gr_gpcs_tpcs_pe_vaf_r(void)
+{
+ return 0x0041980c;
+}
+static inline u32 gr_gpcs_tpcs_pe_vaf_fast_mode_switch_true_f(void)
+{
+ return 0x10;
+}
+static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(void)
+{
+ return 0x00419848;
+}
+static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(u32 v)
+{
+ return (v & 0xfffffff) << 0;
+}
+static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_f(u32 v)
+{
+ return (v & 0x1) << 28;
+}
+static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(void)
+{
+ return 0x10000000;
+}
+static inline u32 gr_gpcs_tpcs_l1c_pm_r(void)
+{
+ return 0x00419ca8;
+}
+static inline u32 gr_gpcs_tpcs_l1c_pm_enable_m(void)
+{
+ return 0x1 << 31;
+}
+static inline u32 gr_gpcs_tpcs_l1c_pm_enable_enable_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 gr_gpcs_tpcs_l1c_cfg_r(void)
+{
+ return 0x00419cb8;
+}
+static inline u32 gr_gpcs_tpcs_l1c_cfg_blkactivity_enable_m(void)
+{
+ return 0x1 << 31;
+}
+static inline u32 gr_gpcs_tpcs_l1c_cfg_blkactivity_enable_enable_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 gr_gpcs_tpcs_mpc_vtg_debug_r(void)
+{
+ return 0x00419c00;
+}
+static inline u32 gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f(void)
+{
+ return 0x8;
+}
+static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_r(void)
+{
+ return 0x00419e00;
+}
+static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_core_enable_m(void)
+{
+ return 0x1 << 7;
+}
+static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_core_enable_enable_f(void)
+{
+ return 0x80;
+}
+static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_qctl_enable_m(void)
+{
+ return 0x1 << 15;
+}
+static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_qctl_enable_enable_f(void)
+{
+ return 0x8000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(void)
+{
+ return 0x00419e44;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f(void)
+{
+ return 0x2;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f(void)
+{
+ return 0x4;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f(void)
+{
+ return 0x8;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f(void)
+{
+ return 0x10;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f(void)
+{
+ return 0x20;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f(void)
+{
+ return 0x40;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f(void)
+{
+ return 0x80;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f(void)
+{
+ return 0x100;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f(void)
+{
+ return 0x200;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f(void)
+{
+ return 0x400;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f(void)
+{
+ return 0x800;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f(void)
+{
+ return 0x1000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f(void)
+{
+ return 0x2000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f(void)
+{
+ return 0x4000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f(void)
+{
+ return 0x8000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f(void)
+{
+ return 0x10000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f(void)
+{
+ return 0x20000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f(void)
+{
+ return 0x40000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f(void)
+{
+ return 0x80000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f(void)
+{
+ return 0x100000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(void)
+{
+ return 0x00419e4c;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f(void)
+{
+ return 0x1;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_l1_error_report_f(void)
+{
+ return 0x2;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f(void)
+{
+ return 0x4;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_physical_stack_overflow_error_report_f(void)
+{
+ return 0x8;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f(void)
+{
+ return 0x10;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f(void)
+{
+ return 0x20;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complete_report_f(void)
+{
+ return 0x40;
+}
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_r(void)
+{
+ return 0x0050450c;
+}
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(void)
+{
+ return 0x2;
+}
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_gpc0_gpccs_gpc_exception_en_r(void)
+{
+ return 0x00502c94;
+}
+static inline u32 gr_gpc0_gpccs_gpc_exception_en_tpc_0_enabled_f(void)
+{
+ return 0x10000;
+}
+static inline u32 gr_gpc0_gpccs_gpc_exception_en_tpc_0_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_gpcs_gpccs_gpc_exception_r(void)
+{
+ return 0x0041ac90;
+}
+static inline u32 gr_gpcs_gpccs_gpc_exception_tpc_v(u32 r)
+{
+ return (r >> 16) & 0xff;
+}
+static inline u32 gr_gpcs_gpccs_gpc_exception_tpc_0_pending_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_r(void)
+{
+ return 0x00419d08;
+}
+static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_sm_v(u32 r)
+{
+ return (r >> 1) & 0x1;
+}
+static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_sm_pending_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_r(void)
+{
+ return 0x00504610;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_v(u32 r)
+{
+ return (r >> 0) & 0x1;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void)
+{
+ return 0x0050460c;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(u32 r)
+{
+ return (r >> 4) & 0x1;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_r(void)
+{
+ return 0x00504650;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f(void)
+{
+ return 0x10;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f(void)
+{
+ return 0x20;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(void)
+{
+ return 0x40;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_r(void)
+{
+ return 0x00504648;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_v(u32 r)
+{
+ return (r >> 0) & 0xffff;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void)
+{
+ return 0x00504770;
+}
+static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_r(void)
+{
+ return 0x00419f70;
+}
+static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_blkactivity_enable_m(void)
+{
+ return 0x1 << 1;
+}
+static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_blkactivity_enable_enable_f(void)
+{
+ return 0x2;
+}
+static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_m(void)
+{
+ return 0x1 << 4;
+}
+static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_f(u32 v)
+{
+ return (v & 0x1) << 4;
+}
+static inline u32 gr_gpc0_tpc0_sm_debug_sfe_control_r(void)
+{
+ return 0x0050477c;
+}
+static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_r(void)
+{
+ return 0x00419f7c;
+}
+static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_m(void)
+{
+ return 0x1 << 0;
+}
+static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_f(u32 v)
+{
+ return (v & 0x1) << 0;
+}
+static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_blkactivity_enable_m(void)
+{
+ return 0x1 << 16;
+}
+static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_blkactivity_enable_enable_f(void)
+{
+ return 0x10000;
+}
+static inline u32 gr_gpcs_tpcs_sm_power_throttle_r(void)
+{
+ return 0x00419ed0;
+}
+static inline u32 gr_gpcs_tpcs_pes_vsc_vpc_r(void)
+{
+ return 0x0041be08;
+}
+static inline u32 gr_gpcs_tpcs_pes_vsc_vpc_fast_mode_switch_true_f(void)
+{
+ return 0x4;
+}
+static inline u32 gr_ppcs_wwdx_map_gpc_map0_r(void)
+{
+ return 0x0041bf00;
+}
+static inline u32 gr_ppcs_wwdx_map_gpc_map1_r(void)
+{
+ return 0x0041bf04;
+}
+static inline u32 gr_ppcs_wwdx_map_gpc_map2_r(void)
+{
+ return 0x0041bf08;
+}
+static inline u32 gr_ppcs_wwdx_map_gpc_map3_r(void)
+{
+ return 0x0041bf0c;
+}
+static inline u32 gr_ppcs_wwdx_map_gpc_map4_r(void)
+{
+ return 0x0041bf10;
+}
+static inline u32 gr_ppcs_wwdx_map_gpc_map5_r(void)
+{
+ return 0x0041bf14;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg_r(void)
+{
+ return 0x0041bfd0;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg_row_offset_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg_num_entries_f(u32 v)
+{
+ return (v & 0xff) << 8;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(u32 v)
+{
+ return (v & 0x1f) << 16;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(u32 v)
+{
+ return (v & 0x7) << 21;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(u32 v)
+{
+ return (v & 0x1f) << 24;
+}
+static inline u32 gr_gpcs_ppcs_wwdx_sm_num_rcp_r(void)
+{
+ return 0x0041bfd4;
+}
+static inline u32 gr_gpcs_ppcs_wwdx_sm_num_rcp_conservative_f(u32 v)
+{
+ return (v & 0xffffff) << 0;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg2_r(void)
+{
+ return 0x0041bfe4;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(u32 v)
+{
+ return (v & 0x1f) << 0;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff7_mod_value_f(u32 v)
+{
+ return (v & 0x1f) << 5;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff8_mod_value_f(u32 v)
+{
+ return (v & 0x1f) << 10;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff9_mod_value_f(u32 v)
+{
+ return (v & 0x1f) << 15;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff10_mod_value_f(u32 v)
+{
+ return (v & 0x1f) << 20;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff11_mod_value_f(u32 v)
+{
+ return (v & 0x1f) << 25;
+}
+static inline u32 gr_gpcs_ppcs_cbm_cfg_r(void)
+{
+ return 0x0041bec0;
+}
+static inline u32 gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 gr_bes_zrop_settings_r(void)
+{
+ return 0x00408850;
+}
+static inline u32 gr_bes_zrop_settings_num_active_fbps_f(u32 v)
+{
+ return (v & 0xf) << 0;
+}
+static inline u32 gr_bes_crop_settings_r(void)
+{
+ return 0x00408958;
+}
+static inline u32 gr_bes_crop_settings_num_active_fbps_f(u32 v)
+{
+ return (v & 0xf) << 0;
+}
+static inline u32 gr_zcull_bytes_per_aliquot_per_gpu_v(void)
+{
+ return 0x00000020;
+}
+static inline u32 gr_zcull_save_restore_header_bytes_per_gpc_v(void)
+{
+ return 0x00000020;
+}
+static inline u32 gr_zcull_save_restore_subregion_header_bytes_per_gpc_v(void)
+{
+ return 0x000000c0;
+}
+static inline u32 gr_zcull_subregion_qty_v(void)
+{
+ return 0x00000010;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel0_r(void)
+{
+ return 0x00504604;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel1_r(void)
+{
+ return 0x00504608;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r(void)
+{
+ return 0x0050465c;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control1_r(void)
+{
+ return 0x00504660;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control2_r(void)
+{
+ return 0x00504664;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control3_r(void)
+{
+ return 0x00504668;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control4_r(void)
+{
+ return 0x0050466c;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r(void)
+{
+ return 0x00504658;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_r(void)
+{
+ return 0x00504670;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status1_r(void)
+{
+ return 0x00504694;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_control_r(void)
+{
+ return 0x00504730;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_control_r(void)
+{
+ return 0x00504734;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_control_r(void)
+{
+ return 0x00504738;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_control_r(void)
+{
+ return 0x0050473c;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_control_r(void)
+{
+ return 0x00504740;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_control_r(void)
+{
+ return 0x00504744;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_control_r(void)
+{
+ return 0x00504748;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_control_r(void)
+{
+ return 0x0050474c;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_r(void)
+{
+ return 0x00504674;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_r(void)
+{
+ return 0x00504678;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_r(void)
+{
+ return 0x0050467c;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_r(void)
+{
+ return 0x00504680;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_r(void)
+{
+ return 0x00504684;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r(void)
+{
+ return 0x00504688;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r(void)
+{
+ return 0x0050468c;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(void)
+{
+ return 0x00504690;
+}
+static inline u32 gr_fe_pwr_mode_r(void)
+{
+ return 0x00404170;
+}
+static inline u32 gr_fe_pwr_mode_mode_auto_f(void)
+{
+ return 0x0;
+}
+static inline u32 gr_fe_pwr_mode_mode_force_on_f(void)
+{
+ return 0x2;
+}
+static inline u32 gr_fe_pwr_mode_req_v(u32 r)
+{
+ return (r >> 4) & 0x1;
+}
+static inline u32 gr_fe_pwr_mode_req_send_f(void)
+{
+ return 0x10;
+}
+static inline u32 gr_fe_pwr_mode_req_done_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 gr_gpc0_tpc0_l1c_dbg_r(void)
+{
+ return 0x005044b0;
+}
+static inline u32 gr_gpc0_tpc0_l1c_dbg_cya15_en_f(void)
+{
+ return 0x8000000;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h
new file mode 100644
index 000000000000..65221b59909a
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_o(void) : Returns the offset for element <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+#ifndef _hw_ltc_gk20a_h_
+#define _hw_ltc_gk20a_h_
+
+static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void)
+{
+ return 0x001410c8;
+}
+static inline u32 ltc_ltc0_lts0_tstg_cfg1_r(void)
+{
+ return 0x00141104;
+}
+static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_ways_v(u32 r)
+{
+ return (r >> 0) & 0xffff;
+}
+static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_v(u32 r)
+{
+ return (r >> 16) & 0x3;
+}
+static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v(void)
+{
+ return 0x00000002;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl1_r(void)
+{
+ return 0x0017e8c8;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_v(u32 r)
+{
+ return (r >> 2) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_active_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(void)
+{
+ return 0x4;
+}
+static inline u32 ltc_ltc0_lts0_cbc_ctrl1_r(void)
+{
+ return 0x0017e8c8;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl2_r(void)
+{
+ return 0x0017e8cc;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(u32 v)
+{
+ return (v & 0x1ffff) << 0;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl3_r(void)
+{
+ return 0x0017e8d0;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(u32 v)
+{
+ return (v & 0x1ffff) << 0;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(void)
+{
+ return 0x0001ffff;
+}
+static inline u32 ltc_ltcs_ltss_cbc_base_r(void)
+{
+ return 0x0017e8d4;
+}
+static inline u32 ltc_ltcs_ltss_cbc_base_alignment_shift_v(void)
+{
+ return 0x0000000b;
+}
+static inline u32 ltc_ltcs_ltss_cbc_base_address_v(u32 r)
+{
+ return (r >> 0) & 0x3ffffff;
+}
+static inline u32 ltc_ltcs_ltss_cbc_param_r(void)
+{
+ return 0x0017e8dc;
+}
+static inline u32 ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(u32 r)
+{
+ return (r >> 0) & 0xffff;
+}
+static inline u32 ltc_ltcs_ltss_cbc_param_cache_line_size_v(u32 r)
+{
+ return (r >> 24) & 0xf;
+}
+static inline u32 ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(u32 r)
+{
+ return (r >> 28) & 0xf;
+}
+static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_r(void)
+{
+ return 0x0017e91c;
+}
+static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(u32 v)
+{
+ return (v & 0x1f) << 16;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_index_r(void)
+{
+ return 0x0017ea44;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_index_address_f(u32 v)
+{
+ return (v & 0xf) << 0;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(u32 i)
+{
+ return 0x0017ea48 + i*4;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(void)
+{
+ return 0x00000004;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(void)
+{
+ return 0x0017ea58;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_s(void)
+{
+ return 32;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_f(u32 v)
+{
+ return (v & 0xffffffff) << 0;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_m(void)
+{
+ return 0xffffffff << 0;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_v(u32 r)
+{
+ return (r >> 0) & 0xffffffff;
+}
+static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_2_r(void)
+{
+ return 0x0017e924;
+}
+static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(void)
+{
+ return 0x10000000;
+}
+static inline u32 ltc_ltss_g_elpg_r(void)
+{
+ return 0x0017e828;
+}
+static inline u32 ltc_ltss_g_elpg_flush_v(u32 r)
+{
+ return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltss_g_elpg_flush_pending_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 ltc_ltss_g_elpg_flush_pending_f(void)
+{
+ return 0x1;
+}
+static inline u32 ltc_ltc0_ltss_intr_r(void)
+{
+ return 0x00140820;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_mc_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_mc_gk20a.h
new file mode 100644
index 000000000000..1692bb5430cb
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_mc_gk20a.h
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_o(void) : Returns the offset for element <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+#ifndef _hw_mc_gk20a_h_
+#define _hw_mc_gk20a_h_
+
+static inline u32 mc_boot_0_r(void)
+{
+ return 0x00000000;
+}
+static inline u32 mc_boot_0_architecture_v(u32 r)
+{
+ return (r >> 24) & 0x1f;
+}
+static inline u32 mc_boot_0_implementation_v(u32 r)
+{
+ return (r >> 20) & 0xf;
+}
+static inline u32 mc_boot_0_major_revision_v(u32 r)
+{
+ return (r >> 4) & 0xf;
+}
+static inline u32 mc_boot_0_minor_revision_v(u32 r)
+{
+ return (r >> 0) & 0xf;
+}
+static inline u32 mc_intr_0_r(void)
+{
+ return 0x00000100;
+}
+static inline u32 mc_intr_0_pfifo_pending_f(void)
+{
+ return 0x100;
+}
+static inline u32 mc_intr_0_pgraph_pending_f(void)
+{
+ return 0x1000;
+}
+static inline u32 mc_intr_0_pmu_pending_f(void)
+{
+ return 0x1000000;
+}
+static inline u32 mc_intr_0_ltc_pending_f(void)
+{
+ return 0x2000000;
+}
+static inline u32 mc_intr_0_priv_ring_pending_f(void)
+{
+ return 0x40000000;
+}
+static inline u32 mc_intr_0_pbus_pending_f(void)
+{
+ return 0x10000000;
+}
+static inline u32 mc_intr_1_r(void)
+{
+ return 0x00000104;
+}
+static inline u32 mc_intr_mask_0_r(void)
+{
+ return 0x00000640;
+}
+static inline u32 mc_intr_mask_0_pmu_enabled_f(void)
+{
+ return 0x1000000;
+}
+static inline u32 mc_intr_mask_1_r(void)
+{
+ return 0x00000644;
+}
+static inline u32 mc_intr_mask_1_pmu_enabled_f(void)
+{
+ return 0x1000000;
+}
+static inline u32 mc_intr_en_0_r(void)
+{
+ return 0x00000140;
+}
+static inline u32 mc_intr_en_0_inta_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 mc_intr_en_0_inta_hardware_f(void)
+{
+ return 0x1;
+}
+static inline u32 mc_intr_en_1_r(void)
+{
+ return 0x00000144;
+}
+static inline u32 mc_intr_en_1_inta_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 mc_intr_en_1_inta_hardware_f(void)
+{
+ return 0x1;
+}
+static inline u32 mc_enable_r(void)
+{
+ return 0x00000200;
+}
+static inline u32 mc_enable_xbar_enabled_f(void)
+{
+ return 0x4;
+}
+static inline u32 mc_enable_l2_enabled_f(void)
+{
+ return 0x8;
+}
+static inline u32 mc_enable_pmedia_s(void)
+{
+ return 1;
+}
+static inline u32 mc_enable_pmedia_f(u32 v)
+{
+ return (v & 0x1) << 4;
+}
+static inline u32 mc_enable_pmedia_m(void)
+{
+ return 0x1 << 4;
+}
+static inline u32 mc_enable_pmedia_v(u32 r)
+{
+ return (r >> 4) & 0x1;
+}
+static inline u32 mc_enable_priv_ring_enabled_f(void)
+{
+ return 0x20;
+}
+static inline u32 mc_enable_ce0_m(void)
+{
+ return 0x1 << 6;
+}
+static inline u32 mc_enable_pfifo_enabled_f(void)
+{
+ return 0x100;
+}
+static inline u32 mc_enable_pgraph_enabled_f(void)
+{
+ return 0x1000;
+}
+static inline u32 mc_enable_pwr_v(u32 r)
+{
+ return (r >> 13) & 0x1;
+}
+static inline u32 mc_enable_pwr_disabled_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 mc_enable_pwr_enabled_f(void)
+{
+ return 0x2000;
+}
+static inline u32 mc_enable_pfb_enabled_f(void)
+{
+ return 0x100000;
+}
+static inline u32 mc_enable_ce2_m(void)
+{
+ return 0x1 << 21;
+}
+static inline u32 mc_enable_ce2_enabled_f(void)
+{
+ return 0x200000;
+}
+static inline u32 mc_enable_blg_enabled_f(void)
+{
+ return 0x8000000;
+}
+static inline u32 mc_enable_perfmon_enabled_f(void)
+{
+ return 0x10000000;
+}
+static inline u32 mc_enable_hub_enabled_f(void)
+{
+ return 0x20000000;
+}
+static inline u32 mc_enable_pb_r(void)
+{
+ return 0x00000204;
+}
+static inline u32 mc_enable_pb_0_s(void)
+{
+ return 1;
+}
+static inline u32 mc_enable_pb_0_f(u32 v)
+{
+ return (v & 0x1) << 0;
+}
+static inline u32 mc_enable_pb_0_m(void)
+{
+ return 0x1 << 0;
+}
+static inline u32 mc_enable_pb_0_v(u32 r)
+{
+ return (r >> 0) & 0x1;
+}
+static inline u32 mc_enable_pb_0_enabled_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 mc_enable_pb_sel_f(u32 v, u32 i)
+{
+ return (v & 0x1) << (0 + i*1);
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_pbdma_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pbdma_gk20a.h
new file mode 100644
index 000000000000..df1a6d48541f
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_pbdma_gk20a.h
@@ -0,0 +1,469 @@
+/*
+ * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_o(void) : Returns the offset for element <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+#ifndef _hw_pbdma_gk20a_h_
+#define _hw_pbdma_gk20a_h_
+
+static inline u32 pbdma_gp_entry1_r(void)
+{
+ return 0x10000004;
+}
+static inline u32 pbdma_gp_entry1_get_hi_v(u32 r)
+{
+ return (r >> 0) & 0xff;
+}
+static inline u32 pbdma_gp_entry1_length_f(u32 v)
+{
+ return (v & 0x1fffff) << 10;
+}
+static inline u32 pbdma_gp_entry1_length_v(u32 r)
+{
+ return (r >> 10) & 0x1fffff;
+}
+static inline u32 pbdma_gp_base_r(u32 i)
+{
+ return 0x00040048 + i*8192;
+}
+static inline u32 pbdma_gp_base__size_1_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 pbdma_gp_base_offset_f(u32 v)
+{
+ return (v & 0x1fffffff) << 3;
+}
+static inline u32 pbdma_gp_base_rsvd_s(void)
+{
+ return 3;
+}
+static inline u32 pbdma_gp_base_hi_r(u32 i)
+{
+ return 0x0004004c + i*8192;
+}
+static inline u32 pbdma_gp_base_hi_offset_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+static inline u32 pbdma_gp_base_hi_limit2_f(u32 v)
+{
+ return (v & 0x1f) << 16;
+}
+static inline u32 pbdma_gp_fetch_r(u32 i)
+{
+ return 0x00040050 + i*8192;
+}
+static inline u32 pbdma_gp_get_r(u32 i)
+{
+ return 0x00040014 + i*8192;
+}
+static inline u32 pbdma_gp_put_r(u32 i)
+{
+ return 0x00040000 + i*8192;
+}
+static inline u32 pbdma_pb_fetch_r(u32 i)
+{
+ return 0x00040054 + i*8192;
+}
+static inline u32 pbdma_pb_fetch_hi_r(u32 i)
+{
+ return 0x00040058 + i*8192;
+}
+static inline u32 pbdma_get_r(u32 i)
+{
+ return 0x00040018 + i*8192;
+}
+static inline u32 pbdma_get_hi_r(u32 i)
+{
+ return 0x0004001c + i*8192;
+}
+static inline u32 pbdma_put_r(u32 i)
+{
+ return 0x0004005c + i*8192;
+}
+static inline u32 pbdma_put_hi_r(u32 i)
+{
+ return 0x00040060 + i*8192;
+}
+static inline u32 pbdma_formats_r(u32 i)
+{
+ return 0x0004009c + i*8192;
+}
+static inline u32 pbdma_formats_gp_fermi0_f(void)
+{
+ return 0x0;
+}
+static inline u32 pbdma_formats_pb_fermi1_f(void)
+{
+ return 0x100;
+}
+static inline u32 pbdma_formats_mp_fermi0_f(void)
+{
+ return 0x0;
+}
+static inline u32 pbdma_syncpointa_r(u32 i)
+{
+ return 0x000400a4 + i*8192;
+}
+static inline u32 pbdma_syncpointa_payload_v(u32 r)
+{
+ return (r >> 0) & 0xffffffff;
+}
+static inline u32 pbdma_syncpointb_r(u32 i)
+{
+ return 0x000400a8 + i*8192;
+}
+static inline u32 pbdma_syncpointb_op_v(u32 r)
+{
+ return (r >> 0) & 0x3;
+}
+static inline u32 pbdma_syncpointb_op_wait_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 pbdma_syncpointb_wait_switch_v(u32 r)
+{
+ return (r >> 4) & 0x1;
+}
+static inline u32 pbdma_syncpointb_wait_switch_en_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 pbdma_syncpointb_syncpt_index_v(u32 r)
+{
+ return (r >> 8) & 0xff;
+}
+static inline u32 pbdma_pb_header_r(u32 i)
+{
+ return 0x00040084 + i*8192;
+}
+static inline u32 pbdma_pb_header_priv_user_f(void)
+{
+ return 0x0;
+}
+static inline u32 pbdma_pb_header_method_zero_f(void)
+{
+ return 0x0;
+}
+static inline u32 pbdma_pb_header_subchannel_zero_f(void)
+{
+ return 0x0;
+}
+static inline u32 pbdma_pb_header_level_main_f(void)
+{
+ return 0x0;
+}
+static inline u32 pbdma_pb_header_first_true_f(void)
+{
+ return 0x400000;
+}
+static inline u32 pbdma_pb_header_type_inc_f(void)
+{
+ return 0x20000000;
+}
+static inline u32 pbdma_subdevice_r(u32 i)
+{
+ return 0x00040094 + i*8192;
+}
+static inline u32 pbdma_subdevice_id_f(u32 v)
+{
+ return (v & 0xfff) << 0;
+}
+static inline u32 pbdma_subdevice_status_active_f(void)
+{
+ return 0x10000000;
+}
+static inline u32 pbdma_subdevice_channel_dma_enable_f(void)
+{
+ return 0x20000000;
+}
+static inline u32 pbdma_method0_r(u32 i)
+{
+ return 0x000400c0 + i*8192;
+}
+static inline u32 pbdma_data0_r(u32 i)
+{
+ return 0x000400c4 + i*8192;
+}
+static inline u32 pbdma_target_r(u32 i)
+{
+ return 0x000400ac + i*8192;
+}
+static inline u32 pbdma_target_engine_sw_f(void)
+{
+ return 0x1f;
+}
+static inline u32 pbdma_acquire_r(u32 i)
+{
+ return 0x00040030 + i*8192;
+}
+static inline u32 pbdma_acquire_retry_man_2_f(void)
+{
+ return 0x2;
+}
+static inline u32 pbdma_acquire_retry_exp_2_f(void)
+{
+ return 0x100;
+}
+static inline u32 pbdma_acquire_timeout_exp_max_f(void)
+{
+ return 0x7800;
+}
+static inline u32 pbdma_acquire_timeout_man_max_f(void)
+{
+ return 0x7fff8000;
+}
+static inline u32 pbdma_acquire_timeout_en_disable_f(void)
+{
+ return 0x0;
+}
+static inline u32 pbdma_status_r(u32 i)
+{
+ return 0x00040100 + i*8192;
+}
+static inline u32 pbdma_channel_r(u32 i)
+{
+ return 0x00040120 + i*8192;
+}
+static inline u32 pbdma_signature_r(u32 i)
+{
+ return 0x00040010 + i*8192;
+}
+static inline u32 pbdma_signature_hw_valid_f(void)
+{
+ return 0xface;
+}
+static inline u32 pbdma_signature_sw_zero_f(void)
+{
+ return 0x0;
+}
+static inline u32 pbdma_userd_r(u32 i)
+{
+ return 0x00040008 + i*8192;
+}
+static inline u32 pbdma_userd_target_vid_mem_f(void)
+{
+ return 0x0;
+}
+static inline u32 pbdma_userd_addr_f(u32 v)
+{
+ return (v & 0x7fffff) << 9;
+}
+static inline u32 pbdma_userd_hi_r(u32 i)
+{
+ return 0x0004000c + i*8192;
+}
+static inline u32 pbdma_userd_hi_addr_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+static inline u32 pbdma_hce_ctrl_r(u32 i)
+{
+ return 0x000400e4 + i*8192;
+}
+static inline u32 pbdma_hce_ctrl_hce_priv_mode_yes_f(void)
+{
+ return 0x20;
+}
+static inline u32 pbdma_intr_0_r(u32 i)
+{
+ return 0x00040108 + i*8192;
+}
+static inline u32 pbdma_intr_0_memreq_v(u32 r)
+{
+ return (r >> 0) & 0x1;
+}
+static inline u32 pbdma_intr_0_memreq_pending_f(void)
+{
+ return 0x1;
+}
+static inline u32 pbdma_intr_0_memack_timeout_pending_f(void)
+{
+ return 0x2;
+}
+static inline u32 pbdma_intr_0_memack_extra_pending_f(void)
+{
+ return 0x4;
+}
+static inline u32 pbdma_intr_0_memdat_timeout_pending_f(void)
+{
+ return 0x8;
+}
+static inline u32 pbdma_intr_0_memdat_extra_pending_f(void)
+{
+ return 0x10;
+}
+static inline u32 pbdma_intr_0_memflush_pending_f(void)
+{
+ return 0x20;
+}
+static inline u32 pbdma_intr_0_memop_pending_f(void)
+{
+ return 0x40;
+}
+static inline u32 pbdma_intr_0_lbconnect_pending_f(void)
+{
+ return 0x80;
+}
+static inline u32 pbdma_intr_0_lbreq_pending_f(void)
+{
+ return 0x100;
+}
+static inline u32 pbdma_intr_0_lback_timeout_pending_f(void)
+{
+ return 0x200;
+}
+static inline u32 pbdma_intr_0_lback_extra_pending_f(void)
+{
+ return 0x400;
+}
+static inline u32 pbdma_intr_0_lbdat_timeout_pending_f(void)
+{
+ return 0x800;
+}
+static inline u32 pbdma_intr_0_lbdat_extra_pending_f(void)
+{
+ return 0x1000;
+}
+static inline u32 pbdma_intr_0_gpfifo_pending_f(void)
+{
+ return 0x2000;
+}
+static inline u32 pbdma_intr_0_gpptr_pending_f(void)
+{
+ return 0x4000;
+}
+static inline u32 pbdma_intr_0_gpentry_pending_f(void)
+{
+ return 0x8000;
+}
+static inline u32 pbdma_intr_0_gpcrc_pending_f(void)
+{
+ return 0x10000;
+}
+static inline u32 pbdma_intr_0_pbptr_pending_f(void)
+{
+ return 0x20000;
+}
+static inline u32 pbdma_intr_0_pbentry_pending_f(void)
+{
+ return 0x40000;
+}
+static inline u32 pbdma_intr_0_pbcrc_pending_f(void)
+{
+ return 0x80000;
+}
+static inline u32 pbdma_intr_0_xbarconnect_pending_f(void)
+{
+ return 0x100000;
+}
+static inline u32 pbdma_intr_0_method_pending_f(void)
+{
+ return 0x200000;
+}
+static inline u32 pbdma_intr_0_methodcrc_pending_f(void)
+{
+ return 0x400000;
+}
+static inline u32 pbdma_intr_0_device_pending_f(void)
+{
+ return 0x800000;
+}
+static inline u32 pbdma_intr_0_semaphore_pending_f(void)
+{
+ return 0x2000000;
+}
+static inline u32 pbdma_intr_0_acquire_pending_f(void)
+{
+ return 0x4000000;
+}
+static inline u32 pbdma_intr_0_pri_pending_f(void)
+{
+ return 0x8000000;
+}
+static inline u32 pbdma_intr_0_no_ctxsw_seg_pending_f(void)
+{
+ return 0x20000000;
+}
+static inline u32 pbdma_intr_0_pbseg_pending_f(void)
+{
+ return 0x40000000;
+}
+static inline u32 pbdma_intr_0_signature_pending_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 pbdma_intr_1_r(u32 i)
+{
+ return 0x00040148 + i*8192;
+}
+static inline u32 pbdma_intr_en_0_r(u32 i)
+{
+ return 0x0004010c + i*8192;
+}
+static inline u32 pbdma_intr_en_0_lbreq_enabled_f(void)
+{
+ return 0x100;
+}
+static inline u32 pbdma_intr_en_1_r(u32 i)
+{
+ return 0x0004014c + i*8192;
+}
+static inline u32 pbdma_intr_stall_r(u32 i)
+{
+ return 0x0004013c + i*8192;
+}
+static inline u32 pbdma_intr_stall_lbreq_enabled_f(void)
+{
+ return 0x100;
+}
+static inline u32 pbdma_udma_nop_r(void)
+{
+ return 0x00000008;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_pri_ringmaster_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pri_ringmaster_gk20a.h
new file mode 100644
index 000000000000..d40076139aa6
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_pri_ringmaster_gk20a.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_o(void) : Returns the offset for element <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+#ifndef _hw_pri_ringmaster_gk20a_h_
+#define _hw_pri_ringmaster_gk20a_h_
+
+static inline u32 pri_ringmaster_command_r(void)
+{
+ return 0x0012004c;
+}
+static inline u32 pri_ringmaster_command_cmd_m(void)
+{
+ return 0x3f << 0;
+}
+static inline u32 pri_ringmaster_command_cmd_v(u32 r)
+{
+ return (r >> 0) & 0x3f;
+}
+static inline u32 pri_ringmaster_command_cmd_no_cmd_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 pri_ringmaster_command_cmd_start_ring_f(void)
+{
+ return 0x1;
+}
+static inline u32 pri_ringmaster_command_cmd_ack_interrupt_f(void)
+{
+ return 0x2;
+}
+static inline u32 pri_ringmaster_command_cmd_enumerate_stations_f(void)
+{
+ return 0x3;
+}
+static inline u32 pri_ringmaster_command_cmd_enumerate_stations_bc_grp_all_f(void)
+{
+ return 0x0;
+}
+static inline u32 pri_ringmaster_command_data_r(void)
+{
+ return 0x00120048;
+}
+static inline u32 pri_ringmaster_start_results_r(void)
+{
+ return 0x00120050;
+}
+static inline u32 pri_ringmaster_start_results_connectivity_v(u32 r)
+{
+ return (r >> 0) & 0x1;
+}
+static inline u32 pri_ringmaster_start_results_connectivity_pass_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 pri_ringmaster_intr_status0_r(void)
+{
+ return 0x00120058;
+}
+static inline u32 pri_ringmaster_intr_status1_r(void)
+{
+ return 0x0012005c;
+}
+static inline u32 pri_ringmaster_global_ctl_r(void)
+{
+ return 0x00120060;
+}
+static inline u32 pri_ringmaster_global_ctl_ring_reset_asserted_f(void)
+{
+ return 0x1;
+}
+static inline u32 pri_ringmaster_global_ctl_ring_reset_deasserted_f(void)
+{
+ return 0x0;
+}
+static inline u32 pri_ringmaster_enum_fbp_r(void)
+{
+ return 0x00120074;
+}
+static inline u32 pri_ringmaster_enum_fbp_count_v(u32 r)
+{
+ return (r >> 0) & 0x1f;
+}
+static inline u32 pri_ringmaster_enum_gpc_r(void)
+{
+ return 0x00120078;
+}
+static inline u32 pri_ringmaster_enum_gpc_count_v(u32 r)
+{
+ return (r >> 0) & 0x1f;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_fbp_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_fbp_gk20a.h
new file mode 100644
index 000000000000..db16a8de991e
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_fbp_gk20a.h
@@ -0,0 +1,226 @@
+/*
+ * drivers/video/tegra/host/gk20a/hw_pri_ringstation_fbp_gk20a.h
+ *
+ * Copyright (c) 2012-2013, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+ /*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+
+#ifndef __hw_pri_ringstation_fbp_gk20a_h__
+#define __hw_pri_ringstation_fbp_gk20a_h__
+/*This file is autogenerated. Do not edit. */
+
+static inline u32 pri_ringstation_fbp_master_config_r(u32 i)
+{
+ return 0x00124300+((i)*4);
+}
+static inline u32 pri_ringstation_fbp_master_config__size_1_v(void)
+{
+ return 64;
+}
+static inline u32 pri_ringstation_fbp_master_config_timeout_s(void)
+{
+ return 18;
+}
+static inline u32 pri_ringstation_fbp_master_config_timeout_f(u32 v)
+{
+ return (v & 0x3ffff) << 0;
+}
+static inline u32 pri_ringstation_fbp_master_config_timeout_m(void)
+{
+ return 0x3ffff << 0;
+}
+static inline u32 pri_ringstation_fbp_master_config_timeout_v(u32 r)
+{
+ return (r >> 0) & 0x3ffff;
+}
+static inline u32 pri_ringstation_fbp_master_config_timeout_i_v(void)
+{
+ return 0x00000064;
+}
+static inline u32 pri_ringstation_fbp_master_config_timeout_i_f(void)
+{
+ return 0x64;
+}
+static inline u32 pri_ringstation_fbp_master_config_fs_action_s(void)
+{
+ return 1;
+}
+static inline u32 pri_ringstation_fbp_master_config_fs_action_f(u32 v)
+{
+ return (v & 0x1) << 30;
+}
+static inline u32 pri_ringstation_fbp_master_config_fs_action_m(void)
+{
+ return 0x1 << 30;
+}
+static inline u32 pri_ringstation_fbp_master_config_fs_action_v(u32 r)
+{
+ return (r >> 30) & 0x1;
+}
+static inline u32 pri_ringstation_fbp_master_config_fs_action_error_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 pri_ringstation_fbp_master_config_fs_action_error_f(void)
+{
+ return 0x0;
+}
+static inline u32 pri_ringstation_fbp_master_config_fs_action_soldier_on_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 pri_ringstation_fbp_master_config_fs_action_soldier_on_f(void)
+{
+ return 0x40000000;
+}
+static inline u32 pri_ringstation_fbp_master_config_reset_action_s(void)
+{
+ return 1;
+}
+static inline u32 pri_ringstation_fbp_master_config_reset_action_f(u32 v)
+{
+ return (v & 0x1) << 31;
+}
+static inline u32 pri_ringstation_fbp_master_config_reset_action_m(void)
+{
+ return 0x1 << 31;
+}
+static inline u32 pri_ringstation_fbp_master_config_reset_action_v(u32 r)
+{
+ return (r >> 31) & 0x1;
+}
+static inline u32 pri_ringstation_fbp_master_config_reset_action_error_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 pri_ringstation_fbp_master_config_reset_action_error_f(void)
+{
+ return 0x0;
+}
+static inline u32 pri_ringstation_fbp_master_config_reset_action_soldier_on_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 pri_ringstation_fbp_master_config_reset_action_soldier_on_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 pri_ringstation_fbp_master_config_setup_clocks_s(void)
+{
+ return 3;
+}
+static inline u32 pri_ringstation_fbp_master_config_setup_clocks_f(u32 v)
+{
+ return (v & 0x7) << 20;
+}
+static inline u32 pri_ringstation_fbp_master_config_setup_clocks_m(void)
+{
+ return 0x7 << 20;
+}
+static inline u32 pri_ringstation_fbp_master_config_setup_clocks_v(u32 r)
+{
+ return (r >> 20) & 0x7;
+}
+static inline u32 pri_ringstation_fbp_master_config_setup_clocks_i_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 pri_ringstation_fbp_master_config_setup_clocks_i_f(void)
+{
+ return 0x0;
+}
+static inline u32 pri_ringstation_fbp_master_config_wait_clocks_s(void)
+{
+ return 3;
+}
+static inline u32 pri_ringstation_fbp_master_config_wait_clocks_f(u32 v)
+{
+ return (v & 0x7) << 24;
+}
+static inline u32 pri_ringstation_fbp_master_config_wait_clocks_m(void)
+{
+ return 0x7 << 24;
+}
+static inline u32 pri_ringstation_fbp_master_config_wait_clocks_v(u32 r)
+{
+ return (r >> 24) & 0x7;
+}
+static inline u32 pri_ringstation_fbp_master_config_wait_clocks_i_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 pri_ringstation_fbp_master_config_wait_clocks_i_f(void)
+{
+ return 0x0;
+}
+static inline u32 pri_ringstation_fbp_master_config_hold_clocks_s(void)
+{
+ return 3;
+}
+static inline u32 pri_ringstation_fbp_master_config_hold_clocks_f(u32 v)
+{
+ return (v & 0x7) << 27;
+}
+static inline u32 pri_ringstation_fbp_master_config_hold_clocks_m(void)
+{
+ return 0x7 << 27;
+}
+static inline u32 pri_ringstation_fbp_master_config_hold_clocks_v(u32 r)
+{
+ return (r >> 27) & 0x7;
+}
+static inline u32 pri_ringstation_fbp_master_config_hold_clocks_i_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 pri_ringstation_fbp_master_config_hold_clocks_i_f(void)
+{
+ return 0x0;
+}
+
+#endif /* __hw_pri_ringstation_fbp_gk20a_h__ */
diff --git a/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_gpc_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_gpc_gk20a.h
new file mode 100644
index 000000000000..e8aad933336d
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_gpc_gk20a.h
@@ -0,0 +1,226 @@
+/*
+ * drivers/video/tegra/host/gk20a/hw_pri_ringstation_gpc_gk20a.h
+ *
+ * Copyright (c) 2012-2013, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+ /*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+
+#ifndef __hw_pri_ringstation_gpc_gk20a_h__
+#define __hw_pri_ringstation_gpc_gk20a_h__
+/*This file is autogenerated. Do not edit. */
+
+static inline u32 pri_ringstation_gpc_master_config_r(u32 i)
+{
+ return 0x00128300+((i)*4);
+}
+static inline u32 pri_ringstation_gpc_master_config__size_1_v(void)
+{
+ return 64;
+}
+static inline u32 pri_ringstation_gpc_master_config_timeout_s(void)
+{
+ return 18;
+}
+static inline u32 pri_ringstation_gpc_master_config_timeout_f(u32 v)
+{
+ return (v & 0x3ffff) << 0;
+}
+static inline u32 pri_ringstation_gpc_master_config_timeout_m(void)
+{
+ return 0x3ffff << 0;
+}
+static inline u32 pri_ringstation_gpc_master_config_timeout_v(u32 r)
+{
+ return (r >> 0) & 0x3ffff;
+}
+static inline u32 pri_ringstation_gpc_master_config_timeout_i_v(void)
+{
+ return 0x00000064;
+}
+static inline u32 pri_ringstation_gpc_master_config_timeout_i_f(void)
+{
+ return 0x64;
+}
+static inline u32 pri_ringstation_gpc_master_config_fs_action_s(void)
+{
+ return 1;
+}
+static inline u32 pri_ringstation_gpc_master_config_fs_action_f(u32 v)
+{
+ return (v & 0x1) << 30;
+}
+static inline u32 pri_ringstation_gpc_master_config_fs_action_m(void)
+{
+ return 0x1 << 30;
+}
+static inline u32 pri_ringstation_gpc_master_config_fs_action_v(u32 r)
+{
+ return (r >> 30) & 0x1;
+}
+static inline u32 pri_ringstation_gpc_master_config_fs_action_error_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 pri_ringstation_gpc_master_config_fs_action_error_f(void)
+{
+ return 0x0;
+}
+static inline u32 pri_ringstation_gpc_master_config_fs_action_soldier_on_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 pri_ringstation_gpc_master_config_fs_action_soldier_on_f(void)
+{
+ return 0x40000000;
+}
+static inline u32 pri_ringstation_gpc_master_config_reset_action_s(void)
+{
+ return 1;
+}
+static inline u32 pri_ringstation_gpc_master_config_reset_action_f(u32 v)
+{
+ return (v & 0x1) << 31;
+}
+static inline u32 pri_ringstation_gpc_master_config_reset_action_m(void)
+{
+ return 0x1 << 31;
+}
+static inline u32 pri_ringstation_gpc_master_config_reset_action_v(u32 r)
+{
+ return (r >> 31) & 0x1;
+}
+static inline u32 pri_ringstation_gpc_master_config_reset_action_error_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 pri_ringstation_gpc_master_config_reset_action_error_f(void)
+{
+ return 0x0;
+}
+static inline u32 pri_ringstation_gpc_master_config_reset_action_soldier_on_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 pri_ringstation_gpc_master_config_reset_action_soldier_on_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 pri_ringstation_gpc_master_config_setup_clocks_s(void)
+{
+ return 3;
+}
+static inline u32 pri_ringstation_gpc_master_config_setup_clocks_f(u32 v)
+{
+ return (v & 0x7) << 20;
+}
+static inline u32 pri_ringstation_gpc_master_config_setup_clocks_m(void)
+{
+ return 0x7 << 20;
+}
+static inline u32 pri_ringstation_gpc_master_config_setup_clocks_v(u32 r)
+{
+ return (r >> 20) & 0x7;
+}
+static inline u32 pri_ringstation_gpc_master_config_setup_clocks_i_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 pri_ringstation_gpc_master_config_setup_clocks_i_f(void)
+{
+ return 0x0;
+}
+static inline u32 pri_ringstation_gpc_master_config_wait_clocks_s(void)
+{
+ return 3;
+}
+static inline u32 pri_ringstation_gpc_master_config_wait_clocks_f(u32 v)
+{
+ return (v & 0x7) << 24;
+}
+static inline u32 pri_ringstation_gpc_master_config_wait_clocks_m(void)
+{
+ return 0x7 << 24;
+}
+static inline u32 pri_ringstation_gpc_master_config_wait_clocks_v(u32 r)
+{
+ return (r >> 24) & 0x7;
+}
+static inline u32 pri_ringstation_gpc_master_config_wait_clocks_i_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 pri_ringstation_gpc_master_config_wait_clocks_i_f(void)
+{
+ return 0x0;
+}
+static inline u32 pri_ringstation_gpc_master_config_hold_clocks_s(void)
+{
+ return 3;
+}
+static inline u32 pri_ringstation_gpc_master_config_hold_clocks_f(u32 v)
+{
+ return (v & 0x7) << 27;
+}
+static inline u32 pri_ringstation_gpc_master_config_hold_clocks_m(void)
+{
+ return 0x7 << 27;
+}
+static inline u32 pri_ringstation_gpc_master_config_hold_clocks_v(u32 r)
+{
+ return (r >> 27) & 0x7;
+}
+static inline u32 pri_ringstation_gpc_master_config_hold_clocks_i_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 pri_ringstation_gpc_master_config_hold_clocks_i_f(void)
+{
+ return 0x0;
+}
+
+#endif /* __hw_pri_ringstation_gpc_gk20a_h__ */
diff --git a/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_sys_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_sys_gk20a.h
new file mode 100644
index 000000000000..c281dd54dea9
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_pri_ringstation_sys_gk20a.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_o(void) : Returns the offset for element <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+#ifndef _hw_pri_ringstation_sys_gk20a_h_
+#define _hw_pri_ringstation_sys_gk20a_h_
+
+static inline u32 pri_ringstation_sys_master_config_r(u32 i)
+{
+ return 0x00122300 + i*4;
+}
+static inline u32 pri_ringstation_sys_decode_config_r(void)
+{
+ return 0x00122204;
+}
+static inline u32 pri_ringstation_sys_decode_config_ring_m(void)
+{
+ return 0x7 << 0;
+}
+static inline u32 pri_ringstation_sys_decode_config_ring_drop_on_ring_not_started_f(void)
+{
+ return 0x1;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h
new file mode 100644
index 000000000000..93c55c307c75
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_o(void) : Returns the offset for element <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+#ifndef _hw_proj_gk20a_h_
+#define _hw_proj_gk20a_h_
+
+static inline u32 proj_gpc_base_v(void)
+{
+ return 0x00500000;
+}
+static inline u32 proj_gpc_shared_base_v(void)
+{
+ return 0x00418000;
+}
+static inline u32 proj_gpc_stride_v(void)
+{
+ return 0x00008000;
+}
+static inline u32 proj_ltc_stride_v(void)
+{
+ return 0x00002000;
+}
+static inline u32 proj_lts_stride_v(void)
+{
+ return 0x00000400;
+}
+static inline u32 proj_ppc_in_gpc_base_v(void)
+{
+ return 0x00003000;
+}
+static inline u32 proj_ppc_in_gpc_stride_v(void)
+{
+ return 0x00000200;
+}
+static inline u32 proj_rop_base_v(void)
+{
+ return 0x00410000;
+}
+static inline u32 proj_rop_shared_base_v(void)
+{
+ return 0x00408800;
+}
+static inline u32 proj_rop_stride_v(void)
+{
+ return 0x00000400;
+}
+static inline u32 proj_tpc_in_gpc_base_v(void)
+{
+ return 0x00004000;
+}
+static inline u32 proj_tpc_in_gpc_stride_v(void)
+{
+ return 0x00000800;
+}
+static inline u32 proj_tpc_in_gpc_shared_base_v(void)
+{
+ return 0x00001800;
+}
+static inline u32 proj_host_num_pbdma_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 proj_scal_litter_num_tpc_per_gpc_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 proj_scal_litter_num_fbps_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 proj_scal_litter_num_gpcs_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 proj_scal_litter_num_pes_per_gpc_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 proj_scal_litter_num_tpcs_per_pes_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 proj_scal_litter_num_zcull_banks_v(void)
+{
+ return 0x00000004;
+}
+static inline u32 proj_scal_max_gpcs_v(void)
+{
+ return 0x00000020;
+}
+static inline u32 proj_scal_max_tpc_per_gpc_v(void)
+{
+ return 0x00000008;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_pwr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pwr_gk20a.h
new file mode 100644
index 000000000000..d7d26b806cd2
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_pwr_gk20a.h
@@ -0,0 +1,737 @@
+/*
+ * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_o(void) : Returns the offset for element <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+#ifndef _hw_pwr_gk20a_h_
+#define _hw_pwr_gk20a_h_
+
+static inline u32 pwr_falcon_irqsset_r(void)
+{
+ return 0x0010a000;
+}
+static inline u32 pwr_falcon_irqsset_swgen0_set_f(void)
+{
+ return 0x40;
+}
+static inline u32 pwr_falcon_irqsclr_r(void)
+{
+ return 0x0010a004;
+}
+static inline u32 pwr_falcon_irqstat_r(void)
+{
+ return 0x0010a008;
+}
+static inline u32 pwr_falcon_irqstat_halt_true_f(void)
+{
+ return 0x10;
+}
+static inline u32 pwr_falcon_irqstat_exterr_true_f(void)
+{
+ return 0x20;
+}
+static inline u32 pwr_falcon_irqstat_swgen0_true_f(void)
+{
+ return 0x40;
+}
+static inline u32 pwr_falcon_irqmode_r(void)
+{
+ return 0x0010a00c;
+}
+static inline u32 pwr_falcon_irqmset_r(void)
+{
+ return 0x0010a010;
+}
+static inline u32 pwr_falcon_irqmset_gptmr_f(u32 v)
+{
+ return (v & 0x1) << 0;
+}
+static inline u32 pwr_falcon_irqmset_wdtmr_f(u32 v)
+{
+ return (v & 0x1) << 1;
+}
+static inline u32 pwr_falcon_irqmset_mthd_f(u32 v)
+{
+ return (v & 0x1) << 2;
+}
+static inline u32 pwr_falcon_irqmset_ctxsw_f(u32 v)
+{
+ return (v & 0x1) << 3;
+}
+static inline u32 pwr_falcon_irqmset_halt_f(u32 v)
+{
+ return (v & 0x1) << 4;
+}
+static inline u32 pwr_falcon_irqmset_exterr_f(u32 v)
+{
+ return (v & 0x1) << 5;
+}
+static inline u32 pwr_falcon_irqmset_swgen0_f(u32 v)
+{
+ return (v & 0x1) << 6;
+}
+static inline u32 pwr_falcon_irqmset_swgen1_f(u32 v)
+{
+ return (v & 0x1) << 7;
+}
+static inline u32 pwr_falcon_irqmclr_r(void)
+{
+ return 0x0010a014;
+}
+static inline u32 pwr_falcon_irqmclr_gptmr_f(u32 v)
+{
+ return (v & 0x1) << 0;
+}
+static inline u32 pwr_falcon_irqmclr_wdtmr_f(u32 v)
+{
+ return (v & 0x1) << 1;
+}
+static inline u32 pwr_falcon_irqmclr_mthd_f(u32 v)
+{
+ return (v & 0x1) << 2;
+}
+static inline u32 pwr_falcon_irqmclr_ctxsw_f(u32 v)
+{
+ return (v & 0x1) << 3;
+}
+static inline u32 pwr_falcon_irqmclr_halt_f(u32 v)
+{
+ return (v & 0x1) << 4;
+}
+static inline u32 pwr_falcon_irqmclr_exterr_f(u32 v)
+{
+ return (v & 0x1) << 5;
+}
+static inline u32 pwr_falcon_irqmclr_swgen0_f(u32 v)
+{
+ return (v & 0x1) << 6;
+}
+static inline u32 pwr_falcon_irqmclr_swgen1_f(u32 v)
+{
+ return (v & 0x1) << 7;
+}
+static inline u32 pwr_falcon_irqmclr_ext_f(u32 v)
+{
+ return (v & 0xff) << 8;
+}
+static inline u32 pwr_falcon_irqmask_r(void)
+{
+ return 0x0010a018;
+}
+static inline u32 pwr_falcon_irqdest_r(void)
+{
+ return 0x0010a01c;
+}
+static inline u32 pwr_falcon_irqdest_host_gptmr_f(u32 v)
+{
+ return (v & 0x1) << 0;
+}
+static inline u32 pwr_falcon_irqdest_host_wdtmr_f(u32 v)
+{
+ return (v & 0x1) << 1;
+}
+static inline u32 pwr_falcon_irqdest_host_mthd_f(u32 v)
+{
+ return (v & 0x1) << 2;
+}
+static inline u32 pwr_falcon_irqdest_host_ctxsw_f(u32 v)
+{
+ return (v & 0x1) << 3;
+}
+static inline u32 pwr_falcon_irqdest_host_halt_f(u32 v)
+{
+ return (v & 0x1) << 4;
+}
+static inline u32 pwr_falcon_irqdest_host_exterr_f(u32 v)
+{
+ return (v & 0x1) << 5;
+}
+static inline u32 pwr_falcon_irqdest_host_swgen0_f(u32 v)
+{
+ return (v & 0x1) << 6;
+}
+static inline u32 pwr_falcon_irqdest_host_swgen1_f(u32 v)
+{
+ return (v & 0x1) << 7;
+}
+static inline u32 pwr_falcon_irqdest_host_ext_f(u32 v)
+{
+ return (v & 0xff) << 8;
+}
+static inline u32 pwr_falcon_irqdest_target_gptmr_f(u32 v)
+{
+ return (v & 0x1) << 16;
+}
+static inline u32 pwr_falcon_irqdest_target_wdtmr_f(u32 v)
+{
+ return (v & 0x1) << 17;
+}
+static inline u32 pwr_falcon_irqdest_target_mthd_f(u32 v)
+{
+ return (v & 0x1) << 18;
+}
+static inline u32 pwr_falcon_irqdest_target_ctxsw_f(u32 v)
+{
+ return (v & 0x1) << 19;
+}
+static inline u32 pwr_falcon_irqdest_target_halt_f(u32 v)
+{
+ return (v & 0x1) << 20;
+}
+static inline u32 pwr_falcon_irqdest_target_exterr_f(u32 v)
+{
+ return (v & 0x1) << 21;
+}
+static inline u32 pwr_falcon_irqdest_target_swgen0_f(u32 v)
+{
+ return (v & 0x1) << 22;
+}
+static inline u32 pwr_falcon_irqdest_target_swgen1_f(u32 v)
+{
+ return (v & 0x1) << 23;
+}
+static inline u32 pwr_falcon_irqdest_target_ext_f(u32 v)
+{
+ return (v & 0xff) << 24;
+}
+static inline u32 pwr_falcon_curctx_r(void)
+{
+ return 0x0010a050;
+}
+static inline u32 pwr_falcon_nxtctx_r(void)
+{
+ return 0x0010a054;
+}
+static inline u32 pwr_falcon_mailbox0_r(void)
+{
+ return 0x0010a040;
+}
+static inline u32 pwr_falcon_mailbox1_r(void)
+{
+ return 0x0010a044;
+}
+static inline u32 pwr_falcon_itfen_r(void)
+{
+ return 0x0010a048;
+}
+static inline u32 pwr_falcon_itfen_ctxen_enable_f(void)
+{
+ return 0x1;
+}
+static inline u32 pwr_falcon_idlestate_r(void)
+{
+ return 0x0010a04c;
+}
+static inline u32 pwr_falcon_idlestate_falcon_busy_v(u32 r)
+{
+ return (r >> 0) & 0x1;
+}
+static inline u32 pwr_falcon_idlestate_ext_busy_v(u32 r)
+{
+ return (r >> 1) & 0x7fff;
+}
+static inline u32 pwr_falcon_os_r(void)
+{
+ return 0x0010a080;
+}
+static inline u32 pwr_falcon_engctl_r(void)
+{
+ return 0x0010a0a4;
+}
+static inline u32 pwr_falcon_cpuctl_r(void)
+{
+ return 0x0010a100;
+}
+static inline u32 pwr_falcon_cpuctl_startcpu_f(u32 v)
+{
+ return (v & 0x1) << 1;
+}
+static inline u32 pwr_falcon_bootvec_r(void)
+{
+ return 0x0010a104;
+}
+static inline u32 pwr_falcon_bootvec_vec_f(u32 v)
+{
+ return (v & 0xffffffff) << 0;
+}
+static inline u32 pwr_falcon_dmactl_r(void)
+{
+ return 0x0010a10c;
+}
+static inline u32 pwr_falcon_dmactl_dmem_scrubbing_m(void)
+{
+ return 0x1 << 1;
+}
+static inline u32 pwr_falcon_dmactl_imem_scrubbing_m(void)
+{
+ return 0x1 << 2;
+}
+static inline u32 pwr_falcon_hwcfg_r(void)
+{
+ return 0x0010a108;
+}
+static inline u32 pwr_falcon_hwcfg_imem_size_v(u32 r)
+{
+ return (r >> 0) & 0x1ff;
+}
+static inline u32 pwr_falcon_hwcfg_dmem_size_v(u32 r)
+{
+ return (r >> 9) & 0x1ff;
+}
+static inline u32 pwr_falcon_dmatrfbase_r(void)
+{
+ return 0x0010a110;
+}
+static inline u32 pwr_falcon_dmatrfmoffs_r(void)
+{
+ return 0x0010a114;
+}
+static inline u32 pwr_falcon_dmatrfcmd_r(void)
+{
+ return 0x0010a118;
+}
+static inline u32 pwr_falcon_dmatrfcmd_imem_f(u32 v)
+{
+ return (v & 0x1) << 4;
+}
+static inline u32 pwr_falcon_dmatrfcmd_write_f(u32 v)
+{
+ return (v & 0x1) << 5;
+}
+static inline u32 pwr_falcon_dmatrfcmd_size_f(u32 v)
+{
+ return (v & 0x7) << 8;
+}
+static inline u32 pwr_falcon_dmatrfcmd_ctxdma_f(u32 v)
+{
+ return (v & 0x7) << 12;
+}
+static inline u32 pwr_falcon_dmatrffboffs_r(void)
+{
+ return 0x0010a11c;
+}
+static inline u32 pwr_falcon_exterraddr_r(void)
+{
+ return 0x0010a168;
+}
+static inline u32 pwr_falcon_exterrstat_r(void)
+{
+ return 0x0010a16c;
+}
+static inline u32 pwr_falcon_exterrstat_valid_m(void)
+{
+ return 0x1 << 31;
+}
+static inline u32 pwr_falcon_exterrstat_valid_v(u32 r)
+{
+ return (r >> 31) & 0x1;
+}
+static inline u32 pwr_falcon_exterrstat_valid_true_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_r(void)
+{
+ return 0x0010a200;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_opc_s(void)
+{
+ return 4;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_opc_f(u32 v)
+{
+ return (v & 0xf) << 0;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_opc_m(void)
+{
+ return 0xf << 0;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_opc_v(u32 r)
+{
+ return (r >> 0) & 0xf;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_opc_rreg_f(void)
+{
+ return 0x8;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_opc_rstat_f(void)
+{
+ return 0xe;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_idx_f(u32 v)
+{
+ return (v & 0x1f) << 8;
+}
+static inline u32 pwr_pmu_falcon_icd_rdata_r(void)
+{
+ return 0x0010a20c;
+}
+static inline u32 pwr_falcon_dmemc_r(u32 i)
+{
+ return 0x0010a1c0 + i*8;
+}
+static inline u32 pwr_falcon_dmemc_offs_f(u32 v)
+{
+ return (v & 0x3f) << 2;
+}
+static inline u32 pwr_falcon_dmemc_offs_m(void)
+{
+ return 0x3f << 2;
+}
+static inline u32 pwr_falcon_dmemc_blk_f(u32 v)
+{
+ return (v & 0xff) << 8;
+}
+static inline u32 pwr_falcon_dmemc_blk_m(void)
+{
+ return 0xff << 8;
+}
+static inline u32 pwr_falcon_dmemc_aincw_f(u32 v)
+{
+ return (v & 0x1) << 24;
+}
+static inline u32 pwr_falcon_dmemc_aincr_f(u32 v)
+{
+ return (v & 0x1) << 25;
+}
+static inline u32 pwr_falcon_dmemd_r(u32 i)
+{
+ return 0x0010a1c4 + i*8;
+}
+static inline u32 pwr_pmu_new_instblk_r(void)
+{
+ return 0x0010a480;
+}
+static inline u32 pwr_pmu_new_instblk_ptr_f(u32 v)
+{
+ return (v & 0xfffffff) << 0;
+}
+static inline u32 pwr_pmu_new_instblk_target_fb_f(void)
+{
+ return 0x0;
+}
+static inline u32 pwr_pmu_new_instblk_target_sys_coh_f(void)
+{
+ return 0x20000000;
+}
+static inline u32 pwr_pmu_new_instblk_valid_f(u32 v)
+{
+ return (v & 0x1) << 30;
+}
+static inline u32 pwr_pmu_mutex_id_r(void)
+{
+ return 0x0010a488;
+}
+static inline u32 pwr_pmu_mutex_id_value_v(u32 r)
+{
+ return (r >> 0) & 0xff;
+}
+static inline u32 pwr_pmu_mutex_id_value_init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 pwr_pmu_mutex_id_value_not_avail_v(void)
+{
+ return 0x000000ff;
+}
+static inline u32 pwr_pmu_mutex_id_release_r(void)
+{
+ return 0x0010a48c;
+}
+static inline u32 pwr_pmu_mutex_id_release_value_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+static inline u32 pwr_pmu_mutex_id_release_value_m(void)
+{
+ return 0xff << 0;
+}
+static inline u32 pwr_pmu_mutex_id_release_value_init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 pwr_pmu_mutex_id_release_value_init_f(void)
+{
+ return 0x0;
+}
+static inline u32 pwr_pmu_mutex_r(u32 i)
+{
+ return 0x0010a580 + i*4;
+}
+static inline u32 pwr_pmu_mutex__size_1_v(void)
+{
+ return 0x00000010;
+}
+static inline u32 pwr_pmu_mutex_value_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+static inline u32 pwr_pmu_mutex_value_v(u32 r)
+{
+ return (r >> 0) & 0xff;
+}
+static inline u32 pwr_pmu_mutex_value_initial_lock_f(void)
+{
+ return 0x0;
+}
+static inline u32 pwr_pmu_queue_head_r(u32 i)
+{
+ return 0x0010a4a0 + i*4;
+}
+static inline u32 pwr_pmu_queue_head__size_1_v(void)
+{
+ return 0x00000004;
+}
+static inline u32 pwr_pmu_queue_head_address_f(u32 v)
+{
+ return (v & 0xffffffff) << 0;
+}
+static inline u32 pwr_pmu_queue_head_address_v(u32 r)
+{
+ return (r >> 0) & 0xffffffff;
+}
+static inline u32 pwr_pmu_queue_tail_r(u32 i)
+{
+ return 0x0010a4b0 + i*4;
+}
+static inline u32 pwr_pmu_queue_tail__size_1_v(void)
+{
+ return 0x00000004;
+}
+static inline u32 pwr_pmu_queue_tail_address_f(u32 v)
+{
+ return (v & 0xffffffff) << 0;
+}
+static inline u32 pwr_pmu_queue_tail_address_v(u32 r)
+{
+ return (r >> 0) & 0xffffffff;
+}
+static inline u32 pwr_pmu_msgq_head_r(void)
+{
+ return 0x0010a4c8;
+}
+static inline u32 pwr_pmu_msgq_head_val_f(u32 v)
+{
+ return (v & 0xffffffff) << 0;
+}
+static inline u32 pwr_pmu_msgq_head_val_v(u32 r)
+{
+ return (r >> 0) & 0xffffffff;
+}
+static inline u32 pwr_pmu_msgq_tail_r(void)
+{
+ return 0x0010a4cc;
+}
+static inline u32 pwr_pmu_msgq_tail_val_f(u32 v)
+{
+ return (v & 0xffffffff) << 0;
+}
+static inline u32 pwr_pmu_msgq_tail_val_v(u32 r)
+{
+ return (r >> 0) & 0xffffffff;
+}
+static inline u32 pwr_pmu_idle_mask_r(u32 i)
+{
+ return 0x0010a504 + i*16;
+}
+static inline u32 pwr_pmu_idle_mask_gr_enabled_f(void)
+{
+ return 0x1;
+}
+static inline u32 pwr_pmu_idle_mask_ce_2_enabled_f(void)
+{
+ return 0x200000;
+}
+static inline u32 pwr_pmu_idle_count_r(u32 i)
+{
+ return 0x0010a508 + i*16;
+}
+static inline u32 pwr_pmu_idle_count_value_f(u32 v)
+{
+ return (v & 0x7fffffff) << 0;
+}
+static inline u32 pwr_pmu_idle_count_value_v(u32 r)
+{
+ return (r >> 0) & 0x7fffffff;
+}
+static inline u32 pwr_pmu_idle_count_reset_f(u32 v)
+{
+ return (v & 0x1) << 31;
+}
+static inline u32 pwr_pmu_idle_ctrl_r(u32 i)
+{
+ return 0x0010a50c + i*16;
+}
+static inline u32 pwr_pmu_idle_ctrl_value_m(void)
+{
+ return 0x3 << 0;
+}
+static inline u32 pwr_pmu_idle_ctrl_value_busy_f(void)
+{
+ return 0x2;
+}
+static inline u32 pwr_pmu_idle_ctrl_value_always_f(void)
+{
+ return 0x3;
+}
+static inline u32 pwr_pmu_idle_ctrl_filter_m(void)
+{
+ return 0x1 << 2;
+}
+static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
+{
+ return 0x0010a9f0 + i*8;
+}
+static inline u32 pwr_pmu_idle_mask_1_supp_r(u32 i)
+{
+ return 0x0010a9f4 + i*8;
+}
+static inline u32 pwr_pmu_idle_ctrl_supp_r(u32 i)
+{
+ return 0x0010aa30 + i*8;
+}
+static inline u32 pwr_pmu_debug_r(u32 i)
+{
+ return 0x0010a5c0 + i*4;
+}
+static inline u32 pwr_pmu_debug__size_1_v(void)
+{
+ return 0x00000004;
+}
+static inline u32 pwr_pmu_mailbox_r(u32 i)
+{
+ return 0x0010a450 + i*4;
+}
+static inline u32 pwr_pmu_mailbox__size_1_v(void)
+{
+ return 0x0000000c;
+}
+static inline u32 pwr_pmu_bar0_addr_r(void)
+{
+ return 0x0010a7a0;
+}
+static inline u32 pwr_pmu_bar0_data_r(void)
+{
+ return 0x0010a7a4;
+}
+static inline u32 pwr_pmu_bar0_ctl_r(void)
+{
+ return 0x0010a7ac;
+}
+static inline u32 pwr_pmu_bar0_timeout_r(void)
+{
+ return 0x0010a7a8;
+}
+static inline u32 pwr_pmu_bar0_fecs_error_r(void)
+{
+ return 0x0010a988;
+}
+static inline u32 pwr_pmu_bar0_error_status_r(void)
+{
+ return 0x0010a7b0;
+}
+static inline u32 pwr_pmu_pg_idlefilth_r(u32 i)
+{
+ return 0x0010a6c0 + i*4;
+}
+static inline u32 pwr_pmu_pg_ppuidlefilth_r(u32 i)
+{
+ return 0x0010a6e8 + i*4;
+}
+static inline u32 pwr_pmu_pg_idle_cnt_r(u32 i)
+{
+ return 0x0010a710 + i*4;
+}
+static inline u32 pwr_pmu_pg_intren_r(u32 i)
+{
+ return 0x0010a760 + i*4;
+}
+static inline u32 pwr_fbif_transcfg_r(u32 i)
+{
+ return 0x0010a600 + i*4;
+}
+static inline u32 pwr_fbif_transcfg_target_local_fb_f(void)
+{
+ return 0x0;
+}
+static inline u32 pwr_fbif_transcfg_target_coherent_sysmem_f(void)
+{
+ return 0x1;
+}
+static inline u32 pwr_fbif_transcfg_target_noncoherent_sysmem_f(void)
+{
+ return 0x2;
+}
+static inline u32 pwr_fbif_transcfg_mem_type_s(void)
+{
+ return 1;
+}
+static inline u32 pwr_fbif_transcfg_mem_type_f(u32 v)
+{
+ return (v & 0x1) << 2;
+}
+static inline u32 pwr_fbif_transcfg_mem_type_m(void)
+{
+ return 0x1 << 2;
+}
+static inline u32 pwr_fbif_transcfg_mem_type_v(u32 r)
+{
+ return (r >> 2) & 0x1;
+}
+static inline u32 pwr_fbif_transcfg_mem_type_virtual_f(void)
+{
+ return 0x0;
+}
+static inline u32 pwr_fbif_transcfg_mem_type_physical_f(void)
+{
+ return 0x4;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_ram_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ram_gk20a.h
new file mode 100644
index 000000000000..7eff3881e864
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_ram_gk20a.h
@@ -0,0 +1,389 @@
+/*
+ * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_o(void) : Returns the offset for element <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+#ifndef _hw_ram_gk20a_h_
+#define _hw_ram_gk20a_h_
+
+static inline u32 ram_in_ramfc_s(void)
+{
+ return 4096;
+}
+static inline u32 ram_in_ramfc_w(void)
+{
+ return 0;
+}
+static inline u32 ram_in_page_dir_base_target_f(u32 v)
+{
+ return (v & 0x3) << 0;
+}
+static inline u32 ram_in_page_dir_base_target_w(void)
+{
+ return 128;
+}
+static inline u32 ram_in_page_dir_base_target_vid_mem_f(void)
+{
+ return 0x0;
+}
+static inline u32 ram_in_page_dir_base_vol_w(void)
+{
+ return 128;
+}
+static inline u32 ram_in_page_dir_base_vol_true_f(void)
+{
+ return 0x4;
+}
+static inline u32 ram_in_page_dir_base_lo_f(u32 v)
+{
+ return (v & 0xfffff) << 12;
+}
+static inline u32 ram_in_page_dir_base_lo_w(void)
+{
+ return 128;
+}
+static inline u32 ram_in_page_dir_base_hi_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+static inline u32 ram_in_page_dir_base_hi_w(void)
+{
+ return 129;
+}
+static inline u32 ram_in_adr_limit_lo_f(u32 v)
+{
+ return (v & 0xfffff) << 12;
+}
+static inline u32 ram_in_adr_limit_lo_w(void)
+{
+ return 130;
+}
+static inline u32 ram_in_adr_limit_hi_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+static inline u32 ram_in_adr_limit_hi_w(void)
+{
+ return 131;
+}
+static inline u32 ram_in_engine_cs_w(void)
+{
+ return 132;
+}
+static inline u32 ram_in_engine_cs_wfi_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 ram_in_engine_cs_wfi_f(void)
+{
+ return 0x0;
+}
+static inline u32 ram_in_engine_cs_fg_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 ram_in_engine_cs_fg_f(void)
+{
+ return 0x8;
+}
+static inline u32 ram_in_gr_cs_w(void)
+{
+ return 132;
+}
+static inline u32 ram_in_gr_cs_wfi_f(void)
+{
+ return 0x0;
+}
+static inline u32 ram_in_gr_wfi_target_w(void)
+{
+ return 132;
+}
+static inline u32 ram_in_gr_wfi_mode_w(void)
+{
+ return 132;
+}
+static inline u32 ram_in_gr_wfi_mode_physical_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 ram_in_gr_wfi_mode_physical_f(void)
+{
+ return 0x0;
+}
+static inline u32 ram_in_gr_wfi_mode_virtual_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 ram_in_gr_wfi_mode_virtual_f(void)
+{
+ return 0x4;
+}
+static inline u32 ram_in_gr_wfi_ptr_lo_f(u32 v)
+{
+ return (v & 0xfffff) << 12;
+}
+static inline u32 ram_in_gr_wfi_ptr_lo_w(void)
+{
+ return 132;
+}
+static inline u32 ram_in_gr_wfi_ptr_hi_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+static inline u32 ram_in_gr_wfi_ptr_hi_w(void)
+{
+ return 133;
+}
+static inline u32 ram_in_base_shift_v(void)
+{
+ return 0x0000000c;
+}
+static inline u32 ram_in_alloc_size_v(void)
+{
+ return 0x00001000;
+}
+static inline u32 ram_fc_size_val_v(void)
+{
+ return 0x00000200;
+}
+static inline u32 ram_fc_gp_put_w(void)
+{
+ return 0;
+}
+static inline u32 ram_fc_userd_w(void)
+{
+ return 2;
+}
+static inline u32 ram_fc_userd_hi_w(void)
+{
+ return 3;
+}
+static inline u32 ram_fc_signature_w(void)
+{
+ return 4;
+}
+static inline u32 ram_fc_gp_get_w(void)
+{
+ return 5;
+}
+static inline u32 ram_fc_pb_get_w(void)
+{
+ return 6;
+}
+static inline u32 ram_fc_pb_get_hi_w(void)
+{
+ return 7;
+}
+static inline u32 ram_fc_pb_top_level_get_w(void)
+{
+ return 8;
+}
+static inline u32 ram_fc_pb_top_level_get_hi_w(void)
+{
+ return 9;
+}
+static inline u32 ram_fc_acquire_w(void)
+{
+ return 12;
+}
+static inline u32 ram_fc_semaphorea_w(void)
+{
+ return 14;
+}
+static inline u32 ram_fc_semaphoreb_w(void)
+{
+ return 15;
+}
+static inline u32 ram_fc_semaphorec_w(void)
+{
+ return 16;
+}
+static inline u32 ram_fc_semaphored_w(void)
+{
+ return 17;
+}
+static inline u32 ram_fc_gp_base_w(void)
+{
+ return 18;
+}
+static inline u32 ram_fc_gp_base_hi_w(void)
+{
+ return 19;
+}
+static inline u32 ram_fc_gp_fetch_w(void)
+{
+ return 20;
+}
+static inline u32 ram_fc_pb_fetch_w(void)
+{
+ return 21;
+}
+static inline u32 ram_fc_pb_fetch_hi_w(void)
+{
+ return 22;
+}
+static inline u32 ram_fc_pb_put_w(void)
+{
+ return 23;
+}
+static inline u32 ram_fc_pb_put_hi_w(void)
+{
+ return 24;
+}
+static inline u32 ram_fc_pb_header_w(void)
+{
+ return 33;
+}
+static inline u32 ram_fc_pb_count_w(void)
+{
+ return 34;
+}
+static inline u32 ram_fc_subdevice_w(void)
+{
+ return 37;
+}
+static inline u32 ram_fc_formats_w(void)
+{
+ return 39;
+}
+static inline u32 ram_fc_syncpointa_w(void)
+{
+ return 41;
+}
+static inline u32 ram_fc_syncpointb_w(void)
+{
+ return 42;
+}
+static inline u32 ram_fc_target_w(void)
+{
+ return 43;
+}
+static inline u32 ram_fc_hce_ctrl_w(void)
+{
+ return 57;
+}
+static inline u32 ram_fc_chid_w(void)
+{
+ return 58;
+}
+static inline u32 ram_fc_chid_id_f(u32 v)
+{
+ return (v & 0xfff) << 0;
+}
+static inline u32 ram_fc_chid_id_w(void)
+{
+ return 0;
+}
+static inline u32 ram_fc_eng_timeslice_w(void)
+{
+ return 62;
+}
+static inline u32 ram_fc_pb_timeslice_w(void)
+{
+ return 63;
+}
+static inline u32 ram_userd_base_shift_v(void)
+{
+ return 0x00000009;
+}
+static inline u32 ram_userd_chan_size_v(void)
+{
+ return 0x00000200;
+}
+static inline u32 ram_userd_put_w(void)
+{
+ return 16;
+}
+static inline u32 ram_userd_get_w(void)
+{
+ return 17;
+}
+static inline u32 ram_userd_ref_w(void)
+{
+ return 18;
+}
+static inline u32 ram_userd_put_hi_w(void)
+{
+ return 19;
+}
+static inline u32 ram_userd_ref_threshold_w(void)
+{
+ return 20;
+}
+static inline u32 ram_userd_top_level_get_w(void)
+{
+ return 22;
+}
+static inline u32 ram_userd_top_level_get_hi_w(void)
+{
+ return 23;
+}
+static inline u32 ram_userd_get_hi_w(void)
+{
+ return 24;
+}
+static inline u32 ram_userd_gp_get_w(void)
+{
+ return 34;
+}
+static inline u32 ram_userd_gp_put_w(void)
+{
+ return 35;
+}
+static inline u32 ram_userd_gp_top_level_get_w(void)
+{
+ return 22;
+}
+static inline u32 ram_userd_gp_top_level_get_hi_w(void)
+{
+ return 23;
+}
+static inline u32 ram_rl_entry_size_v(void)
+{
+ return 0x00000008;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_sim_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_sim_gk20a.h
new file mode 100644
index 000000000000..b1e6658d2338
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_sim_gk20a.h
@@ -0,0 +1,2150 @@
+/*
+ * drivers/video/tegra/host/gk20a/hw_sim_gk20a.h
+ *
+ * Copyright (c) 2012, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+ /*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+
+#ifndef __hw_sim_gk20a_h__
+#define __hw_sim_gk20a_h__
+/*This file is autogenerated. Do not edit. */
+
+static inline u32 sim_send_ring_r(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_send_ring_target_s(void)
+{
+ return 2;
+}
+static inline u32 sim_send_ring_target_f(u32 v)
+{
+ return (v & 0x3) << 0;
+}
+static inline u32 sim_send_ring_target_m(void)
+{
+ return 0x3 << 0;
+}
+static inline u32 sim_send_ring_target_v(u32 r)
+{
+ return (r >> 0) & 0x3;
+}
+static inline u32 sim_send_ring_target_phys_init_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_send_ring_target_phys_init_f(void)
+{
+ return 0x1;
+}
+static inline u32 sim_send_ring_target_phys__init_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_send_ring_target_phys__init_f(void)
+{
+ return 0x1;
+}
+static inline u32 sim_send_ring_target_phys__prod_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_send_ring_target_phys__prod_f(void)
+{
+ return 0x1;
+}
+static inline u32 sim_send_ring_target_phys_nvm_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_send_ring_target_phys_nvm_f(void)
+{
+ return 0x1;
+}
+static inline u32 sim_send_ring_target_phys_pci_v(void)
+{
+ return 0x00000002;
+}
+static inline u32 sim_send_ring_target_phys_pci_f(void)
+{
+ return 0x2;
+}
+static inline u32 sim_send_ring_target_phys_pci_coherent_v(void)
+{
+ return 0x00000003;
+}
+static inline u32 sim_send_ring_target_phys_pci_coherent_f(void)
+{
+ return 0x3;
+}
+static inline u32 sim_send_ring_status_s(void)
+{
+ return 1;
+}
+static inline u32 sim_send_ring_status_f(u32 v)
+{
+ return (v & 0x1) << 3;
+}
+static inline u32 sim_send_ring_status_m(void)
+{
+ return 0x1 << 3;
+}
+static inline u32 sim_send_ring_status_v(u32 r)
+{
+ return (r >> 3) & 0x1;
+}
+static inline u32 sim_send_ring_status_init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_send_ring_status_init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_send_ring_status__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_send_ring_status__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_send_ring_status__prod_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_send_ring_status__prod_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_send_ring_status_invalid_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_send_ring_status_invalid_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_send_ring_status_valid_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_send_ring_status_valid_f(void)
+{
+ return 0x8;
+}
+static inline u32 sim_send_ring_size_s(void)
+{
+ return 2;
+}
+static inline u32 sim_send_ring_size_f(u32 v)
+{
+ return (v & 0x3) << 4;
+}
+static inline u32 sim_send_ring_size_m(void)
+{
+ return 0x3 << 4;
+}
+static inline u32 sim_send_ring_size_v(u32 r)
+{
+ return (r >> 4) & 0x3;
+}
+static inline u32 sim_send_ring_size_init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_send_ring_size_init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_send_ring_size__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_send_ring_size__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_send_ring_size__prod_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_send_ring_size__prod_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_send_ring_size_4kb_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_send_ring_size_4kb_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_send_ring_size_8kb_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_send_ring_size_8kb_f(void)
+{
+ return 0x10;
+}
+static inline u32 sim_send_ring_size_12kb_v(void)
+{
+ return 0x00000002;
+}
+static inline u32 sim_send_ring_size_12kb_f(void)
+{
+ return 0x20;
+}
+static inline u32 sim_send_ring_size_16kb_v(void)
+{
+ return 0x00000003;
+}
+static inline u32 sim_send_ring_size_16kb_f(void)
+{
+ return 0x30;
+}
+static inline u32 sim_send_ring_gp_in_ring_s(void)
+{
+ return 1;
+}
+static inline u32 sim_send_ring_gp_in_ring_f(u32 v)
+{
+ return (v & 0x1) << 11;
+}
+static inline u32 sim_send_ring_gp_in_ring_m(void)
+{
+ return 0x1 << 11;
+}
+static inline u32 sim_send_ring_gp_in_ring_v(u32 r)
+{
+ return (r >> 11) & 0x1;
+}
+static inline u32 sim_send_ring_gp_in_ring__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_send_ring_gp_in_ring__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_send_ring_gp_in_ring__prod_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_send_ring_gp_in_ring__prod_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_send_ring_gp_in_ring_no_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_send_ring_gp_in_ring_no_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_send_ring_gp_in_ring_yes_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_send_ring_gp_in_ring_yes_f(void)
+{
+ return 0x800;
+}
+static inline u32 sim_send_ring_addr_lo_s(void)
+{
+ return 20;
+}
+static inline u32 sim_send_ring_addr_lo_f(u32 v)
+{
+ return (v & 0xfffff) << 12;
+}
+static inline u32 sim_send_ring_addr_lo_m(void)
+{
+ return 0xfffff << 12;
+}
+static inline u32 sim_send_ring_addr_lo_v(u32 r)
+{
+ return (r >> 12) & 0xfffff;
+}
+static inline u32 sim_send_ring_addr_lo__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_send_ring_addr_lo__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_send_ring_addr_lo__prod_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_send_ring_addr_lo__prod_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_send_ring_hi_r(void)
+{
+ return 0x00000004;
+}
+static inline u32 sim_send_ring_hi_addr_s(void)
+{
+ return 20;
+}
+static inline u32 sim_send_ring_hi_addr_f(u32 v)
+{
+ return (v & 0xfffff) << 0;
+}
+static inline u32 sim_send_ring_hi_addr_m(void)
+{
+ return 0xfffff << 0;
+}
+static inline u32 sim_send_ring_hi_addr_v(u32 r)
+{
+ return (r >> 0) & 0xfffff;
+}
+static inline u32 sim_send_ring_hi_addr__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_send_ring_hi_addr__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_send_ring_hi_addr__prod_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_send_ring_hi_addr__prod_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_send_put_r(void)
+{
+ return 0x00000008;
+}
+static inline u32 sim_send_put_pointer_s(void)
+{
+ return 29;
+}
+static inline u32 sim_send_put_pointer_f(u32 v)
+{
+ return (v & 0x1fffffff) << 3;
+}
+static inline u32 sim_send_put_pointer_m(void)
+{
+ return 0x1fffffff << 3;
+}
+static inline u32 sim_send_put_pointer_v(u32 r)
+{
+ return (r >> 3) & 0x1fffffff;
+}
+static inline u32 sim_send_get_r(void)
+{
+ return 0x0000000c;
+}
+static inline u32 sim_send_get_pointer_s(void)
+{
+ return 29;
+}
+static inline u32 sim_send_get_pointer_f(u32 v)
+{
+ return (v & 0x1fffffff) << 3;
+}
+static inline u32 sim_send_get_pointer_m(void)
+{
+ return 0x1fffffff << 3;
+}
+static inline u32 sim_send_get_pointer_v(u32 r)
+{
+ return (r >> 3) & 0x1fffffff;
+}
+static inline u32 sim_recv_ring_r(void)
+{
+ return 0x00000010;
+}
+static inline u32 sim_recv_ring_target_s(void)
+{
+ return 2;
+}
+static inline u32 sim_recv_ring_target_f(u32 v)
+{
+ return (v & 0x3) << 0;
+}
+static inline u32 sim_recv_ring_target_m(void)
+{
+ return 0x3 << 0;
+}
+static inline u32 sim_recv_ring_target_v(u32 r)
+{
+ return (r >> 0) & 0x3;
+}
+static inline u32 sim_recv_ring_target_phys_init_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_recv_ring_target_phys_init_f(void)
+{
+ return 0x1;
+}
+static inline u32 sim_recv_ring_target_phys__init_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_recv_ring_target_phys__init_f(void)
+{
+ return 0x1;
+}
+static inline u32 sim_recv_ring_target_phys__prod_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_recv_ring_target_phys__prod_f(void)
+{
+ return 0x1;
+}
+static inline u32 sim_recv_ring_target_phys_nvm_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_recv_ring_target_phys_nvm_f(void)
+{
+ return 0x1;
+}
+static inline u32 sim_recv_ring_target_phys_pci_v(void)
+{
+ return 0x00000002;
+}
+static inline u32 sim_recv_ring_target_phys_pci_f(void)
+{
+ return 0x2;
+}
+static inline u32 sim_recv_ring_target_phys_pci_coherent_v(void)
+{
+ return 0x00000003;
+}
+static inline u32 sim_recv_ring_target_phys_pci_coherent_f(void)
+{
+ return 0x3;
+}
+static inline u32 sim_recv_ring_status_s(void)
+{
+ return 1;
+}
+static inline u32 sim_recv_ring_status_f(u32 v)
+{
+ return (v & 0x1) << 3;
+}
+static inline u32 sim_recv_ring_status_m(void)
+{
+ return 0x1 << 3;
+}
+static inline u32 sim_recv_ring_status_v(u32 r)
+{
+ return (r >> 3) & 0x1;
+}
+static inline u32 sim_recv_ring_status_init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_recv_ring_status_init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_recv_ring_status__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_recv_ring_status__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_recv_ring_status__prod_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_recv_ring_status__prod_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_recv_ring_status_invalid_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_recv_ring_status_invalid_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_recv_ring_status_valid_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_recv_ring_status_valid_f(void)
+{
+ return 0x8;
+}
+static inline u32 sim_recv_ring_size_s(void)
+{
+ return 2;
+}
+static inline u32 sim_recv_ring_size_f(u32 v)
+{
+ return (v & 0x3) << 4;
+}
+static inline u32 sim_recv_ring_size_m(void)
+{
+ return 0x3 << 4;
+}
+static inline u32 sim_recv_ring_size_v(u32 r)
+{
+ return (r >> 4) & 0x3;
+}
+static inline u32 sim_recv_ring_size_init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_recv_ring_size_init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_recv_ring_size__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_recv_ring_size__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_recv_ring_size__prod_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_recv_ring_size__prod_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_recv_ring_size_4kb_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_recv_ring_size_4kb_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_recv_ring_size_8kb_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_recv_ring_size_8kb_f(void)
+{
+ return 0x10;
+}
+static inline u32 sim_recv_ring_size_12kb_v(void)
+{
+ return 0x00000002;
+}
+static inline u32 sim_recv_ring_size_12kb_f(void)
+{
+ return 0x20;
+}
+static inline u32 sim_recv_ring_size_16kb_v(void)
+{
+ return 0x00000003;
+}
+static inline u32 sim_recv_ring_size_16kb_f(void)
+{
+ return 0x30;
+}
+static inline u32 sim_recv_ring_gp_in_ring_s(void)
+{
+ return 1;
+}
+static inline u32 sim_recv_ring_gp_in_ring_f(u32 v)
+{
+ return (v & 0x1) << 11;
+}
+static inline u32 sim_recv_ring_gp_in_ring_m(void)
+{
+ return 0x1 << 11;
+}
+static inline u32 sim_recv_ring_gp_in_ring_v(u32 r)
+{
+ return (r >> 11) & 0x1;
+}
+static inline u32 sim_recv_ring_gp_in_ring__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_recv_ring_gp_in_ring__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_recv_ring_gp_in_ring__prod_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_recv_ring_gp_in_ring__prod_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_recv_ring_gp_in_ring_no_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_recv_ring_gp_in_ring_no_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_recv_ring_gp_in_ring_yes_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_recv_ring_gp_in_ring_yes_f(void)
+{
+ return 0x800;
+}
+static inline u32 sim_recv_ring_addr_lo_s(void)
+{
+ return 20;
+}
+static inline u32 sim_recv_ring_addr_lo_f(u32 v)
+{
+ return (v & 0xfffff) << 12;
+}
+static inline u32 sim_recv_ring_addr_lo_m(void)
+{
+ return 0xfffff << 12;
+}
+static inline u32 sim_recv_ring_addr_lo_v(u32 r)
+{
+ return (r >> 12) & 0xfffff;
+}
+static inline u32 sim_recv_ring_addr_lo__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_recv_ring_addr_lo__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_recv_ring_addr_lo__prod_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_recv_ring_addr_lo__prod_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_recv_ring_hi_r(void)
+{
+ return 0x00000014;
+}
+static inline u32 sim_recv_ring_hi_addr_s(void)
+{
+ return 20;
+}
+static inline u32 sim_recv_ring_hi_addr_f(u32 v)
+{
+ return (v & 0xfffff) << 0;
+}
+static inline u32 sim_recv_ring_hi_addr_m(void)
+{
+ return 0xfffff << 0;
+}
+static inline u32 sim_recv_ring_hi_addr_v(u32 r)
+{
+ return (r >> 0) & 0xfffff;
+}
+static inline u32 sim_recv_ring_hi_addr__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_recv_ring_hi_addr__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_recv_ring_hi_addr__prod_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_recv_ring_hi_addr__prod_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_recv_put_r(void)
+{
+ return 0x00000018;
+}
+static inline u32 sim_recv_put_pointer_s(void)
+{
+ return 11;
+}
+static inline u32 sim_recv_put_pointer_f(u32 v)
+{
+ return (v & 0x7ff) << 3;
+}
+static inline u32 sim_recv_put_pointer_m(void)
+{
+ return 0x7ff << 3;
+}
+static inline u32 sim_recv_put_pointer_v(u32 r)
+{
+ return (r >> 3) & 0x7ff;
+}
+static inline u32 sim_recv_get_r(void)
+{
+ return 0x0000001c;
+}
+static inline u32 sim_recv_get_pointer_s(void)
+{
+ return 11;
+}
+static inline u32 sim_recv_get_pointer_f(u32 v)
+{
+ return (v & 0x7ff) << 3;
+}
+static inline u32 sim_recv_get_pointer_m(void)
+{
+ return 0x7ff << 3;
+}
+static inline u32 sim_recv_get_pointer_v(u32 r)
+{
+ return (r >> 3) & 0x7ff;
+}
+static inline u32 sim_config_r(void)
+{
+ return 0x00000020;
+}
+static inline u32 sim_config_mode_s(void)
+{
+ return 1;
+}
+static inline u32 sim_config_mode_f(u32 v)
+{
+ return (v & 0x1) << 0;
+}
+static inline u32 sim_config_mode_m(void)
+{
+ return 0x1 << 0;
+}
+static inline u32 sim_config_mode_v(u32 r)
+{
+ return (r >> 0) & 0x1;
+}
+static inline u32 sim_config_mode_disabled_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_config_mode_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_config_mode_enabled_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_config_mode_enabled_f(void)
+{
+ return 0x1;
+}
+static inline u32 sim_config_channels_s(void)
+{
+ return 7;
+}
+static inline u32 sim_config_channels_f(u32 v)
+{
+ return (v & 0x7f) << 1;
+}
+static inline u32 sim_config_channels_m(void)
+{
+ return 0x7f << 1;
+}
+static inline u32 sim_config_channels_v(u32 r)
+{
+ return (r >> 1) & 0x7f;
+}
+static inline u32 sim_config_channels_none_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_config_channels_none_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_config_cached_only_s(void)
+{
+ return 1;
+}
+static inline u32 sim_config_cached_only_f(u32 v)
+{
+ return (v & 0x1) << 8;
+}
+static inline u32 sim_config_cached_only_m(void)
+{
+ return 0x1 << 8;
+}
+static inline u32 sim_config_cached_only_v(u32 r)
+{
+ return (r >> 8) & 0x1;
+}
+static inline u32 sim_config_cached_only_disabled_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_config_cached_only_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_config_cached_only_enabled_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_config_cached_only_enabled_f(void)
+{
+ return 0x100;
+}
+static inline u32 sim_config_validity_s(void)
+{
+ return 2;
+}
+static inline u32 sim_config_validity_f(u32 v)
+{
+ return (v & 0x3) << 9;
+}
+static inline u32 sim_config_validity_m(void)
+{
+ return 0x3 << 9;
+}
+static inline u32 sim_config_validity_v(u32 r)
+{
+ return (r >> 9) & 0x3;
+}
+static inline u32 sim_config_validity__init_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_config_validity__init_f(void)
+{
+ return 0x200;
+}
+static inline u32 sim_config_validity_valid_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_config_validity_valid_f(void)
+{
+ return 0x200;
+}
+static inline u32 sim_config_simulation_s(void)
+{
+ return 2;
+}
+static inline u32 sim_config_simulation_f(u32 v)
+{
+ return (v & 0x3) << 12;
+}
+static inline u32 sim_config_simulation_m(void)
+{
+ return 0x3 << 12;
+}
+static inline u32 sim_config_simulation_v(u32 r)
+{
+ return (r >> 12) & 0x3;
+}
+static inline u32 sim_config_simulation_disabled_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_config_simulation_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_config_simulation_fmodel_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_config_simulation_fmodel_f(void)
+{
+ return 0x1000;
+}
+static inline u32 sim_config_simulation_rtlsim_v(void)
+{
+ return 0x00000002;
+}
+static inline u32 sim_config_simulation_rtlsim_f(void)
+{
+ return 0x2000;
+}
+static inline u32 sim_config_secondary_display_s(void)
+{
+ return 1;
+}
+static inline u32 sim_config_secondary_display_f(u32 v)
+{
+ return (v & 0x1) << 14;
+}
+static inline u32 sim_config_secondary_display_m(void)
+{
+ return 0x1 << 14;
+}
+static inline u32 sim_config_secondary_display_v(u32 r)
+{
+ return (r >> 14) & 0x1;
+}
+static inline u32 sim_config_secondary_display_disabled_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_config_secondary_display_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_config_secondary_display_enabled_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_config_secondary_display_enabled_f(void)
+{
+ return 0x4000;
+}
+static inline u32 sim_config_num_heads_s(void)
+{
+ return 8;
+}
+static inline u32 sim_config_num_heads_f(u32 v)
+{
+ return (v & 0xff) << 17;
+}
+static inline u32 sim_config_num_heads_m(void)
+{
+ return 0xff << 17;
+}
+static inline u32 sim_config_num_heads_v(u32 r)
+{
+ return (r >> 17) & 0xff;
+}
+static inline u32 sim_event_ring_r(void)
+{
+ return 0x00000030;
+}
+static inline u32 sim_event_ring_target_s(void)
+{
+ return 2;
+}
+static inline u32 sim_event_ring_target_f(u32 v)
+{
+ return (v & 0x3) << 0;
+}
+static inline u32 sim_event_ring_target_m(void)
+{
+ return 0x3 << 0;
+}
+static inline u32 sim_event_ring_target_v(u32 r)
+{
+ return (r >> 0) & 0x3;
+}
+static inline u32 sim_event_ring_target_phys_init_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_event_ring_target_phys_init_f(void)
+{
+ return 0x1;
+}
+static inline u32 sim_event_ring_target_phys__init_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_event_ring_target_phys__init_f(void)
+{
+ return 0x1;
+}
+static inline u32 sim_event_ring_target_phys__prod_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_event_ring_target_phys__prod_f(void)
+{
+ return 0x1;
+}
+static inline u32 sim_event_ring_target_phys_nvm_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_event_ring_target_phys_nvm_f(void)
+{
+ return 0x1;
+}
+static inline u32 sim_event_ring_target_phys_pci_v(void)
+{
+ return 0x00000002;
+}
+static inline u32 sim_event_ring_target_phys_pci_f(void)
+{
+ return 0x2;
+}
+static inline u32 sim_event_ring_target_phys_pci_coherent_v(void)
+{
+ return 0x00000003;
+}
+static inline u32 sim_event_ring_target_phys_pci_coherent_f(void)
+{
+ return 0x3;
+}
+static inline u32 sim_event_ring_status_s(void)
+{
+ return 1;
+}
+static inline u32 sim_event_ring_status_f(u32 v)
+{
+ return (v & 0x1) << 3;
+}
+static inline u32 sim_event_ring_status_m(void)
+{
+ return 0x1 << 3;
+}
+static inline u32 sim_event_ring_status_v(u32 r)
+{
+ return (r >> 3) & 0x1;
+}
+static inline u32 sim_event_ring_status_init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_event_ring_status_init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_event_ring_status__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_event_ring_status__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_event_ring_status__prod_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_event_ring_status__prod_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_event_ring_status_invalid_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_event_ring_status_invalid_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_event_ring_status_valid_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_event_ring_status_valid_f(void)
+{
+ return 0x8;
+}
+static inline u32 sim_event_ring_size_s(void)
+{
+ return 2;
+}
+static inline u32 sim_event_ring_size_f(u32 v)
+{
+ return (v & 0x3) << 4;
+}
+static inline u32 sim_event_ring_size_m(void)
+{
+ return 0x3 << 4;
+}
+static inline u32 sim_event_ring_size_v(u32 r)
+{
+ return (r >> 4) & 0x3;
+}
+static inline u32 sim_event_ring_size_init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_event_ring_size_init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_event_ring_size__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_event_ring_size__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_event_ring_size__prod_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_event_ring_size__prod_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_event_ring_size_4kb_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_event_ring_size_4kb_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_event_ring_size_8kb_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_event_ring_size_8kb_f(void)
+{
+ return 0x10;
+}
+static inline u32 sim_event_ring_size_12kb_v(void)
+{
+ return 0x00000002;
+}
+static inline u32 sim_event_ring_size_12kb_f(void)
+{
+ return 0x20;
+}
+static inline u32 sim_event_ring_size_16kb_v(void)
+{
+ return 0x00000003;
+}
+static inline u32 sim_event_ring_size_16kb_f(void)
+{
+ return 0x30;
+}
+static inline u32 sim_event_ring_gp_in_ring_s(void)
+{
+ return 1;
+}
+static inline u32 sim_event_ring_gp_in_ring_f(u32 v)
+{
+ return (v & 0x1) << 11;
+}
+static inline u32 sim_event_ring_gp_in_ring_m(void)
+{
+ return 0x1 << 11;
+}
+static inline u32 sim_event_ring_gp_in_ring_v(u32 r)
+{
+ return (r >> 11) & 0x1;
+}
+static inline u32 sim_event_ring_gp_in_ring__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_event_ring_gp_in_ring__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_event_ring_gp_in_ring__prod_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_event_ring_gp_in_ring__prod_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_event_ring_gp_in_ring_no_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_event_ring_gp_in_ring_no_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_event_ring_gp_in_ring_yes_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_event_ring_gp_in_ring_yes_f(void)
+{
+ return 0x800;
+}
+static inline u32 sim_event_ring_addr_lo_s(void)
+{
+ return 20;
+}
+static inline u32 sim_event_ring_addr_lo_f(u32 v)
+{
+ return (v & 0xfffff) << 12;
+}
+static inline u32 sim_event_ring_addr_lo_m(void)
+{
+ return 0xfffff << 12;
+}
+static inline u32 sim_event_ring_addr_lo_v(u32 r)
+{
+ return (r >> 12) & 0xfffff;
+}
+static inline u32 sim_event_ring_addr_lo__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_event_ring_addr_lo__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_event_ring_addr_lo__prod_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_event_ring_addr_lo__prod_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_event_ring_hi_v(void)
+{
+ return 0x00000034;
+}
+static inline u32 sim_event_ring_hi_addr_s(void)
+{
+ return 20;
+}
+static inline u32 sim_event_ring_hi_addr_f(u32 v)
+{
+ return (v & 0xfffff) << 0;
+}
+static inline u32 sim_event_ring_hi_addr_m(void)
+{
+ return 0xfffff << 0;
+}
+static inline u32 sim_event_ring_hi_addr_v(u32 r)
+{
+ return (r >> 0) & 0xfffff;
+}
+static inline u32 sim_event_ring_hi_addr__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_event_ring_hi_addr__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_event_ring_hi_addr__prod_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_event_ring_hi_addr__prod_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_event_put_r(void)
+{
+ return 0x00000038;
+}
+static inline u32 sim_event_put_pointer_s(void)
+{
+ return 30;
+}
+static inline u32 sim_event_put_pointer_f(u32 v)
+{
+ return (v & 0x3fffffff) << 2;
+}
+static inline u32 sim_event_put_pointer_m(void)
+{
+ return 0x3fffffff << 2;
+}
+static inline u32 sim_event_put_pointer_v(u32 r)
+{
+ return (r >> 2) & 0x3fffffff;
+}
+static inline u32 sim_event_get_r(void)
+{
+ return 0x0000003c;
+}
+static inline u32 sim_event_get_pointer_s(void)
+{
+ return 30;
+}
+static inline u32 sim_event_get_pointer_f(u32 v)
+{
+ return (v & 0x3fffffff) << 2;
+}
+static inline u32 sim_event_get_pointer_m(void)
+{
+ return 0x3fffffff << 2;
+}
+static inline u32 sim_event_get_pointer_v(u32 r)
+{
+ return (r >> 2) & 0x3fffffff;
+}
+static inline u32 sim_status_r(void)
+{
+ return 0x00000028;
+}
+static inline u32 sim_status_send_put_s(void)
+{
+ return 1;
+}
+static inline u32 sim_status_send_put_f(u32 v)
+{
+ return (v & 0x1) << 0;
+}
+static inline u32 sim_status_send_put_m(void)
+{
+ return 0x1 << 0;
+}
+static inline u32 sim_status_send_put_v(u32 r)
+{
+ return (r >> 0) & 0x1;
+}
+static inline u32 sim_status_send_put__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_status_send_put__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_status_send_put_idle_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_status_send_put_idle_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_status_send_put_pending_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_status_send_put_pending_f(void)
+{
+ return 0x1;
+}
+static inline u32 sim_status_send_get_s(void)
+{
+ return 1;
+}
+static inline u32 sim_status_send_get_f(u32 v)
+{
+ return (v & 0x1) << 1;
+}
+static inline u32 sim_status_send_get_m(void)
+{
+ return 0x1 << 1;
+}
+static inline u32 sim_status_send_get_v(u32 r)
+{
+ return (r >> 1) & 0x1;
+}
+static inline u32 sim_status_send_get__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_status_send_get__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_status_send_get_idle_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_status_send_get_idle_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_status_send_get_pending_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_status_send_get_pending_f(void)
+{
+ return 0x2;
+}
+static inline u32 sim_status_send_get_clear_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_status_send_get_clear_f(void)
+{
+ return 0x2;
+}
+static inline u32 sim_status_recv_put_s(void)
+{
+ return 1;
+}
+static inline u32 sim_status_recv_put_f(u32 v)
+{
+ return (v & 0x1) << 2;
+}
+static inline u32 sim_status_recv_put_m(void)
+{
+ return 0x1 << 2;
+}
+static inline u32 sim_status_recv_put_v(u32 r)
+{
+ return (r >> 2) & 0x1;
+}
+static inline u32 sim_status_recv_put__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_status_recv_put__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_status_recv_put_idle_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_status_recv_put_idle_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_status_recv_put_pending_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_status_recv_put_pending_f(void)
+{
+ return 0x4;
+}
+static inline u32 sim_status_recv_put_clear_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_status_recv_put_clear_f(void)
+{
+ return 0x4;
+}
+static inline u32 sim_status_recv_get_s(void)
+{
+ return 1;
+}
+static inline u32 sim_status_recv_get_f(u32 v)
+{
+ return (v & 0x1) << 3;
+}
+static inline u32 sim_status_recv_get_m(void)
+{
+ return 0x1 << 3;
+}
+static inline u32 sim_status_recv_get_v(u32 r)
+{
+ return (r >> 3) & 0x1;
+}
+static inline u32 sim_status_recv_get__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_status_recv_get__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_status_recv_get_idle_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_status_recv_get_idle_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_status_recv_get_pending_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_status_recv_get_pending_f(void)
+{
+ return 0x8;
+}
+static inline u32 sim_status_event_put_s(void)
+{
+ return 1;
+}
+static inline u32 sim_status_event_put_f(u32 v)
+{
+ return (v & 0x1) << 4;
+}
+static inline u32 sim_status_event_put_m(void)
+{
+ return 0x1 << 4;
+}
+static inline u32 sim_status_event_put_v(u32 r)
+{
+ return (r >> 4) & 0x1;
+}
+static inline u32 sim_status_event_put__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_status_event_put__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_status_event_put_idle_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_status_event_put_idle_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_status_event_put_pending_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_status_event_put_pending_f(void)
+{
+ return 0x10;
+}
+static inline u32 sim_status_event_put_clear_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_status_event_put_clear_f(void)
+{
+ return 0x10;
+}
+static inline u32 sim_status_event_get_s(void)
+{
+ return 1;
+}
+static inline u32 sim_status_event_get_f(u32 v)
+{
+ return (v & 0x1) << 5;
+}
+static inline u32 sim_status_event_get_m(void)
+{
+ return 0x1 << 5;
+}
+static inline u32 sim_status_event_get_v(u32 r)
+{
+ return (r >> 5) & 0x1;
+}
+static inline u32 sim_status_event_get__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_status_event_get__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_status_event_get_idle_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_status_event_get_idle_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_status_event_get_pending_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_status_event_get_pending_f(void)
+{
+ return 0x20;
+}
+static inline u32 sim_control_r(void)
+{
+ return 0x0000002c;
+}
+static inline u32 sim_control_send_put_s(void)
+{
+ return 1;
+}
+static inline u32 sim_control_send_put_f(u32 v)
+{
+ return (v & 0x1) << 0;
+}
+static inline u32 sim_control_send_put_m(void)
+{
+ return 0x1 << 0;
+}
+static inline u32 sim_control_send_put_v(u32 r)
+{
+ return (r >> 0) & 0x1;
+}
+static inline u32 sim_control_send_put__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_control_send_put__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_control_send_put_disabled_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_control_send_put_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_control_send_put_enabled_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_control_send_put_enabled_f(void)
+{
+ return 0x1;
+}
+static inline u32 sim_control_send_get_s(void)
+{
+ return 1;
+}
+static inline u32 sim_control_send_get_f(u32 v)
+{
+ return (v & 0x1) << 1;
+}
+static inline u32 sim_control_send_get_m(void)
+{
+ return 0x1 << 1;
+}
+static inline u32 sim_control_send_get_v(u32 r)
+{
+ return (r >> 1) & 0x1;
+}
+static inline u32 sim_control_send_get__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_control_send_get__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_control_send_get_disabled_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_control_send_get_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_control_send_get_enabled_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_control_send_get_enabled_f(void)
+{
+ return 0x2;
+}
+static inline u32 sim_control_recv_put_s(void)
+{
+ return 1;
+}
+static inline u32 sim_control_recv_put_f(u32 v)
+{
+ return (v & 0x1) << 2;
+}
+static inline u32 sim_control_recv_put_m(void)
+{
+ return 0x1 << 2;
+}
+static inline u32 sim_control_recv_put_v(u32 r)
+{
+ return (r >> 2) & 0x1;
+}
+static inline u32 sim_control_recv_put__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_control_recv_put__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_control_recv_put_disabled_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_control_recv_put_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_control_recv_put_enabled_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_control_recv_put_enabled_f(void)
+{
+ return 0x4;
+}
+static inline u32 sim_control_recv_get_s(void)
+{
+ return 1;
+}
+static inline u32 sim_control_recv_get_f(u32 v)
+{
+ return (v & 0x1) << 3;
+}
+static inline u32 sim_control_recv_get_m(void)
+{
+ return 0x1 << 3;
+}
+static inline u32 sim_control_recv_get_v(u32 r)
+{
+ return (r >> 3) & 0x1;
+}
+static inline u32 sim_control_recv_get__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_control_recv_get__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_control_recv_get_disabled_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_control_recv_get_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_control_recv_get_enabled_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_control_recv_get_enabled_f(void)
+{
+ return 0x8;
+}
+static inline u32 sim_control_event_put_s(void)
+{
+ return 1;
+}
+static inline u32 sim_control_event_put_f(u32 v)
+{
+ return (v & 0x1) << 4;
+}
+static inline u32 sim_control_event_put_m(void)
+{
+ return 0x1 << 4;
+}
+static inline u32 sim_control_event_put_v(u32 r)
+{
+ return (r >> 4) & 0x1;
+}
+static inline u32 sim_control_event_put__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_control_event_put__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_control_event_put_disabled_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_control_event_put_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_control_event_put_enabled_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_control_event_put_enabled_f(void)
+{
+ return 0x10;
+}
+static inline u32 sim_control_event_get_s(void)
+{
+ return 1;
+}
+static inline u32 sim_control_event_get_f(u32 v)
+{
+ return (v & 0x1) << 5;
+}
+static inline u32 sim_control_event_get_m(void)
+{
+ return 0x1 << 5;
+}
+static inline u32 sim_control_event_get_v(u32 r)
+{
+ return (r >> 5) & 0x1;
+}
+static inline u32 sim_control_event_get__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_control_event_get__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_control_event_get_disabled_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_control_event_get_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_control_event_get_enabled_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_control_event_get_enabled_f(void)
+{
+ return 0x20;
+}
+static inline u32 sim_dma_r(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_dma_target_s(void)
+{
+ return 2;
+}
+static inline u32 sim_dma_target_f(u32 v)
+{
+ return (v & 0x3) << 0;
+}
+static inline u32 sim_dma_target_m(void)
+{
+ return 0x3 << 0;
+}
+static inline u32 sim_dma_target_v(u32 r)
+{
+ return (r >> 0) & 0x3;
+}
+static inline u32 sim_dma_target_phys_init_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_dma_target_phys_init_f(void)
+{
+ return 0x1;
+}
+static inline u32 sim_dma_target_phys__init_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_dma_target_phys__init_f(void)
+{
+ return 0x1;
+}
+static inline u32 sim_dma_target_phys__prod_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_dma_target_phys__prod_f(void)
+{
+ return 0x1;
+}
+static inline u32 sim_dma_target_phys_nvm_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_dma_target_phys_nvm_f(void)
+{
+ return 0x1;
+}
+static inline u32 sim_dma_target_phys_pci_v(void)
+{
+ return 0x00000002;
+}
+static inline u32 sim_dma_target_phys_pci_f(void)
+{
+ return 0x2;
+}
+static inline u32 sim_dma_target_phys_pci_coherent_v(void)
+{
+ return 0x00000003;
+}
+static inline u32 sim_dma_target_phys_pci_coherent_f(void)
+{
+ return 0x3;
+}
+static inline u32 sim_dma_status_s(void)
+{
+ return 1;
+}
+static inline u32 sim_dma_status_f(u32 v)
+{
+ return (v & 0x1) << 3;
+}
+static inline u32 sim_dma_status_m(void)
+{
+ return 0x1 << 3;
+}
+static inline u32 sim_dma_status_v(u32 r)
+{
+ return (r >> 3) & 0x1;
+}
+static inline u32 sim_dma_status_init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_dma_status_init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_dma_status__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_dma_status__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_dma_status__prod_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_dma_status__prod_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_dma_status_invalid_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_dma_status_invalid_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_dma_status_valid_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_dma_status_valid_f(void)
+{
+ return 0x8;
+}
+static inline u32 sim_dma_size_s(void)
+{
+ return 2;
+}
+static inline u32 sim_dma_size_f(u32 v)
+{
+ return (v & 0x3) << 4;
+}
+static inline u32 sim_dma_size_m(void)
+{
+ return 0x3 << 4;
+}
+static inline u32 sim_dma_size_v(u32 r)
+{
+ return (r >> 4) & 0x3;
+}
+static inline u32 sim_dma_size_init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_dma_size_init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_dma_size__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_dma_size__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_dma_size__prod_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_dma_size__prod_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_dma_size_4kb_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_dma_size_4kb_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_dma_size_8kb_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 sim_dma_size_8kb_f(void)
+{
+ return 0x10;
+}
+static inline u32 sim_dma_size_12kb_v(void)
+{
+ return 0x00000002;
+}
+static inline u32 sim_dma_size_12kb_f(void)
+{
+ return 0x20;
+}
+static inline u32 sim_dma_size_16kb_v(void)
+{
+ return 0x00000003;
+}
+static inline u32 sim_dma_size_16kb_f(void)
+{
+ return 0x30;
+}
+static inline u32 sim_dma_addr_lo_s(void)
+{
+ return 20;
+}
+static inline u32 sim_dma_addr_lo_f(u32 v)
+{
+ return (v & 0xfffff) << 12;
+}
+static inline u32 sim_dma_addr_lo_m(void)
+{
+ return 0xfffff << 12;
+}
+static inline u32 sim_dma_addr_lo_v(u32 r)
+{
+ return (r >> 12) & 0xfffff;
+}
+static inline u32 sim_dma_addr_lo__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_dma_addr_lo__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_dma_addr_lo__prod_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_dma_addr_lo__prod_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_dma_hi_r(void)
+{
+ return 0x00000004;
+}
+static inline u32 sim_dma_hi_addr_s(void)
+{
+ return 20;
+}
+static inline u32 sim_dma_hi_addr_f(u32 v)
+{
+ return (v & 0xfffff) << 0;
+}
+static inline u32 sim_dma_hi_addr_m(void)
+{
+ return 0xfffff << 0;
+}
+static inline u32 sim_dma_hi_addr_v(u32 r)
+{
+ return (r >> 0) & 0xfffff;
+}
+static inline u32 sim_dma_hi_addr__init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_dma_hi_addr__init_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_dma_hi_addr__prod_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_dma_hi_addr__prod_f(void)
+{
+ return 0x0;
+}
+static inline u32 sim_msg_signature_r(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_msg_signature_valid_v(void)
+{
+ return 0x43505256;
+}
+static inline u32 sim_msg_length_r(void)
+{
+ return 0x00000004;
+}
+static inline u32 sim_msg_function_r(void)
+{
+ return 0x00000008;
+}
+static inline u32 sim_msg_function_sim_escape_read_v(void)
+{
+ return 0x00000023;
+}
+static inline u32 sim_msg_function_sim_escape_write_v(void)
+{
+ return 0x00000024;
+}
+static inline u32 sim_msg_result_r(void)
+{
+ return 0x0000000c;
+}
+static inline u32 sim_msg_result_success_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 sim_msg_result_rpc_pending_v(void)
+{
+ return 0xFFFFFFFF;
+}
+static inline u32 sim_msg_sequence_r(void)
+{
+ return 0x00000010;
+}
+static inline u32 sim_msg_spare_r(void)
+{
+ return 0x00000014;
+}
+static inline u32 sim_msg_spare__init_v(void)
+{
+ return 0x00000000;
+}
+
+#endif /* __hw_sim_gk20a_h__ */
diff --git a/drivers/gpu/nvgpu/gk20a/hw_therm_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_therm_gk20a.h
new file mode 100644
index 000000000000..5d6397b4d10b
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_therm_gk20a.h
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_o(void) : Returns the offset for element <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+#ifndef _hw_therm_gk20a_h_
+#define _hw_therm_gk20a_h_
+
+static inline u32 therm_use_a_r(void)
+{
+ return 0x00020798;
+}
+static inline u32 therm_evt_ext_therm_0_r(void)
+{
+ return 0x00020700;
+}
+static inline u32 therm_evt_ext_therm_1_r(void)
+{
+ return 0x00020704;
+}
+static inline u32 therm_evt_ext_therm_2_r(void)
+{
+ return 0x00020708;
+}
+static inline u32 therm_evt_ba_w0_t1h_r(void)
+{
+ return 0x00020750;
+}
+static inline u32 therm_weight_1_r(void)
+{
+ return 0x00020024;
+}
+static inline u32 therm_peakpower_config1_r(u32 i)
+{
+ return 0x00020154 + i*4;
+}
+static inline u32 therm_peakpower_config1_window_period_2m_v(void)
+{
+ return 0x0000000f;
+}
+static inline u32 therm_peakpower_config1_window_period_2m_f(void)
+{
+ return 0xf;
+}
+static inline u32 therm_peakpower_config1_ba_sum_shift_s(void)
+{
+ return 6;
+}
+static inline u32 therm_peakpower_config1_ba_sum_shift_f(u32 v)
+{
+ return (v & 0x3f) << 8;
+}
+static inline u32 therm_peakpower_config1_ba_sum_shift_m(void)
+{
+ return 0x3f << 8;
+}
+static inline u32 therm_peakpower_config1_ba_sum_shift_v(u32 r)
+{
+ return (r >> 8) & 0x3f;
+}
+static inline u32 therm_peakpower_config1_ba_sum_shift_20_f(void)
+{
+ return 0x1400;
+}
+static inline u32 therm_peakpower_config1_window_en_enabled_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 therm_peakpower_config2_r(u32 i)
+{
+ return 0x00020170 + i*4;
+}
+static inline u32 therm_peakpower_config4_r(u32 i)
+{
+ return 0x000201c0 + i*4;
+}
+static inline u32 therm_peakpower_config6_r(u32 i)
+{
+ return 0x00020270 + i*4;
+}
+static inline u32 therm_peakpower_config8_r(u32 i)
+{
+ return 0x000202e8 + i*4;
+}
+static inline u32 therm_peakpower_config9_r(u32 i)
+{
+ return 0x000202f4 + i*4;
+}
+static inline u32 therm_config1_r(void)
+{
+ return 0x00020050;
+}
+static inline u32 therm_gate_ctrl_r(u32 i)
+{
+ return 0x00020200 + i*4;
+}
+static inline u32 therm_gate_ctrl_eng_clk_m(void)
+{
+ return 0x3 << 0;
+}
+static inline u32 therm_gate_ctrl_eng_clk_run_f(void)
+{
+ return 0x0;
+}
+static inline u32 therm_gate_ctrl_eng_clk_auto_f(void)
+{
+ return 0x1;
+}
+static inline u32 therm_gate_ctrl_eng_clk_stop_f(void)
+{
+ return 0x2;
+}
+static inline u32 therm_gate_ctrl_blk_clk_m(void)
+{
+ return 0x3 << 2;
+}
+static inline u32 therm_gate_ctrl_blk_clk_run_f(void)
+{
+ return 0x0;
+}
+static inline u32 therm_gate_ctrl_blk_clk_auto_f(void)
+{
+ return 0x4;
+}
+static inline u32 therm_gate_ctrl_eng_pwr_m(void)
+{
+ return 0x3 << 4;
+}
+static inline u32 therm_gate_ctrl_eng_pwr_auto_f(void)
+{
+ return 0x10;
+}
+static inline u32 therm_gate_ctrl_eng_pwr_off_v(void)
+{
+ return 0x00000002;
+}
+static inline u32 therm_gate_ctrl_eng_pwr_off_f(void)
+{
+ return 0x20;
+}
+static inline u32 therm_gate_ctrl_eng_idle_filt_exp_f(u32 v)
+{
+ return (v & 0x1f) << 8;
+}
+static inline u32 therm_gate_ctrl_eng_idle_filt_exp_m(void)
+{
+ return 0x1f << 8;
+}
+static inline u32 therm_gate_ctrl_eng_idle_filt_mant_f(u32 v)
+{
+ return (v & 0x7) << 13;
+}
+static inline u32 therm_gate_ctrl_eng_idle_filt_mant_m(void)
+{
+ return 0x7 << 13;
+}
+static inline u32 therm_gate_ctrl_eng_delay_after_f(u32 v)
+{
+ return (v & 0xf) << 20;
+}
+static inline u32 therm_gate_ctrl_eng_delay_after_m(void)
+{
+ return 0xf << 20;
+}
+static inline u32 therm_fecs_idle_filter_r(void)
+{
+ return 0x00020288;
+}
+static inline u32 therm_fecs_idle_filter_value_m(void)
+{
+ return 0xffffffff << 0;
+}
+static inline u32 therm_hubmmu_idle_filter_r(void)
+{
+ return 0x0002028c;
+}
+static inline u32 therm_hubmmu_idle_filter_value_m(void)
+{
+ return 0xffffffff << 0;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h
new file mode 100644
index 000000000000..22bc50acfaf4
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_o(void) : Returns the offset for element <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+#ifndef _hw_timer_gk20a_h_
+#define _hw_timer_gk20a_h_
+
+static inline u32 timer_pri_timeout_r(void)
+{
+ return 0x00009080;
+}
+static inline u32 timer_pri_timeout_period_f(u32 v)
+{
+ return (v & 0xffffff) << 0;
+}
+static inline u32 timer_pri_timeout_period_m(void)
+{
+ return 0xffffff << 0;
+}
+static inline u32 timer_pri_timeout_period_v(u32 r)
+{
+ return (r >> 0) & 0xffffff;
+}
+static inline u32 timer_pri_timeout_en_f(u32 v)
+{
+ return (v & 0x1) << 31;
+}
+static inline u32 timer_pri_timeout_en_m(void)
+{
+ return 0x1 << 31;
+}
+static inline u32 timer_pri_timeout_en_v(u32 r)
+{
+ return (r >> 31) & 0x1;
+}
+static inline u32 timer_pri_timeout_en_en_enabled_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 timer_pri_timeout_en_en_disabled_f(void)
+{
+ return 0x0;
+}
+static inline u32 timer_pri_timeout_save_0_r(void)
+{
+ return 0x00009084;
+}
+static inline u32 timer_pri_timeout_save_1_r(void)
+{
+ return 0x00009088;
+}
+static inline u32 timer_pri_timeout_fecs_errcode_r(void)
+{
+ return 0x0000908c;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h
new file mode 100644
index 000000000000..c2922814a7ab
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_o(void) : Returns the offset for element <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+#ifndef _hw_top_gk20a_h_
+#define _hw_top_gk20a_h_
+
+static inline u32 top_num_gpcs_r(void)
+{
+ return 0x00022430;
+}
+static inline u32 top_num_gpcs_value_v(u32 r)
+{
+ return (r >> 0) & 0x1f;
+}
+static inline u32 top_tpc_per_gpc_r(void)
+{
+ return 0x00022434;
+}
+static inline u32 top_tpc_per_gpc_value_v(u32 r)
+{
+ return (r >> 0) & 0x1f;
+}
+static inline u32 top_num_fbps_r(void)
+{
+ return 0x00022438;
+}
+static inline u32 top_num_fbps_value_v(u32 r)
+{
+ return (r >> 0) & 0x1f;
+}
+static inline u32 top_fs_status_r(void)
+{
+ return 0x00022500;
+}
+static inline u32 top_device_info_r(u32 i)
+{
+ return 0x00022700 + i*4;
+}
+static inline u32 top_device_info__size_1_v(void)
+{
+ return 0x00000040;
+}
+static inline u32 top_device_info_chain_v(u32 r)
+{
+ return (r >> 31) & 0x1;
+}
+static inline u32 top_device_info_chain_enable_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 top_device_info_engine_enum_v(u32 r)
+{
+ return (r >> 26) & 0xf;
+}
+static inline u32 top_device_info_runlist_enum_v(u32 r)
+{
+ return (r >> 21) & 0xf;
+}
+static inline u32 top_device_info_type_enum_v(u32 r)
+{
+ return (r >> 2) & 0x1fffffff;
+}
+static inline u32 top_device_info_type_enum_graphics_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 top_device_info_type_enum_graphics_f(void)
+{
+ return 0x0;
+}
+static inline u32 top_device_info_type_enum_copy0_v(void)
+{
+ return 0x00000001;
+}
+static inline u32 top_device_info_type_enum_copy0_f(void)
+{
+ return 0x4;
+}
+static inline u32 top_device_info_entry_v(u32 r)
+{
+ return (r >> 0) & 0x3;
+}
+static inline u32 top_device_info_entry_not_valid_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 top_device_info_entry_enum_v(void)
+{
+ return 0x00000002;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/hw_trim_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_trim_gk20a.h
new file mode 100644
index 000000000000..826e9bd11fc7
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_trim_gk20a.h
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_o(void) : Returns the offset for element <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+#ifndef _hw_trim_gk20a_h_
+#define _hw_trim_gk20a_h_
+
+static inline u32 trim_sys_gpcpll_cfg_r(void)
+{
+ return 0x00137000;
+}
+static inline u32 trim_sys_gpcpll_cfg_enable_m(void)
+{
+ return 0x1 << 0;
+}
+static inline u32 trim_sys_gpcpll_cfg_enable_v(u32 r)
+{
+ return (r >> 0) & 0x1;
+}
+static inline u32 trim_sys_gpcpll_cfg_enable_no_f(void)
+{
+ return 0x0;
+}
+static inline u32 trim_sys_gpcpll_cfg_enable_yes_f(void)
+{
+ return 0x1;
+}
+static inline u32 trim_sys_gpcpll_cfg_iddq_m(void)
+{
+ return 0x1 << 1;
+}
+static inline u32 trim_sys_gpcpll_cfg_iddq_v(u32 r)
+{
+ return (r >> 1) & 0x1;
+}
+static inline u32 trim_sys_gpcpll_cfg_iddq_power_on_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 trim_sys_gpcpll_cfg_enb_lckdet_m(void)
+{
+ return 0x1 << 4;
+}
+static inline u32 trim_sys_gpcpll_cfg_enb_lckdet_power_on_f(void)
+{
+ return 0x0;
+}
+static inline u32 trim_sys_gpcpll_cfg_enb_lckdet_power_off_f(void)
+{
+ return 0x10;
+}
+static inline u32 trim_sys_gpcpll_cfg_pll_lock_v(u32 r)
+{
+ return (r >> 17) & 0x1;
+}
+static inline u32 trim_sys_gpcpll_cfg_pll_lock_true_f(void)
+{
+ return 0x20000;
+}
+static inline u32 trim_sys_gpcpll_coeff_r(void)
+{
+ return 0x00137004;
+}
+static inline u32 trim_sys_gpcpll_coeff_mdiv_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+static inline u32 trim_sys_gpcpll_coeff_mdiv_v(u32 r)
+{
+ return (r >> 0) & 0xff;
+}
+static inline u32 trim_sys_gpcpll_coeff_ndiv_f(u32 v)
+{
+ return (v & 0xff) << 8;
+}
+static inline u32 trim_sys_gpcpll_coeff_ndiv_m(void)
+{
+ return 0xff << 8;
+}
+static inline u32 trim_sys_gpcpll_coeff_ndiv_v(u32 r)
+{
+ return (r >> 8) & 0xff;
+}
+static inline u32 trim_sys_gpcpll_coeff_pldiv_f(u32 v)
+{
+ return (v & 0x3f) << 16;
+}
+static inline u32 trim_sys_gpcpll_coeff_pldiv_v(u32 r)
+{
+ return (r >> 16) & 0x3f;
+}
+static inline u32 trim_sys_sel_vco_r(void)
+{
+ return 0x00137100;
+}
+static inline u32 trim_sys_sel_vco_gpc2clk_out_m(void)
+{
+ return 0x1 << 0;
+}
+static inline u32 trim_sys_sel_vco_gpc2clk_out_init_v(void)
+{
+ return 0x00000000;
+}
+static inline u32 trim_sys_sel_vco_gpc2clk_out_init_f(void)
+{
+ return 0x0;
+}
+static inline u32 trim_sys_sel_vco_gpc2clk_out_bypass_f(void)
+{
+ return 0x0;
+}
+static inline u32 trim_sys_sel_vco_gpc2clk_out_vco_f(void)
+{
+ return 0x1;
+}
+static inline u32 trim_sys_gpc2clk_out_r(void)
+{
+ return 0x00137250;
+}
+static inline u32 trim_sys_gpc2clk_out_bypdiv_s(void)
+{
+ return 6;
+}
+static inline u32 trim_sys_gpc2clk_out_bypdiv_f(u32 v)
+{
+ return (v & 0x3f) << 0;
+}
+static inline u32 trim_sys_gpc2clk_out_bypdiv_m(void)
+{
+ return 0x3f << 0;
+}
+static inline u32 trim_sys_gpc2clk_out_bypdiv_v(u32 r)
+{
+ return (r >> 0) & 0x3f;
+}
+static inline u32 trim_sys_gpc2clk_out_bypdiv_by31_f(void)
+{
+ return 0x3c;
+}
+static inline u32 trim_sys_gpc2clk_out_vcodiv_s(void)
+{
+ return 6;
+}
+static inline u32 trim_sys_gpc2clk_out_vcodiv_f(u32 v)
+{
+ return (v & 0x3f) << 8;
+}
+static inline u32 trim_sys_gpc2clk_out_vcodiv_m(void)
+{
+ return 0x3f << 8;
+}
+static inline u32 trim_sys_gpc2clk_out_vcodiv_v(u32 r)
+{
+ return (r >> 8) & 0x3f;
+}
+static inline u32 trim_sys_gpc2clk_out_vcodiv_by1_f(void)
+{
+ return 0x0;
+}
+static inline u32 trim_sys_gpc2clk_out_sdiv14_m(void)
+{
+ return 0x1 << 31;
+}
+static inline u32 trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_r(u32 i)
+{
+ return 0x00134124 + i*512;
+}
+static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(u32 v)
+{
+ return (v & 0x3fff) << 0;
+}
+static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f(void)
+{
+ return 0x10000;
+}
+static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_enable_asserted_f(void)
+{
+ return 0x100000;
+}
+static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f(void)
+{
+ return 0x1000000;
+}
+static inline u32 trim_gpc_clk_cntr_ncgpcclk_cnt_r(u32 i)
+{
+ return 0x00134128 + i*512;
+}
+static inline u32 trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(u32 r)
+{
+ return (r >> 0) & 0xfffff;
+}
+static inline u32 trim_sys_gpcpll_cfg2_r(void)
+{
+ return 0x0013700c;
+}
+static inline u32 trim_sys_gpcpll_cfg2_pll_stepa_f(u32 v)
+{
+ return (v & 0xff) << 24;
+}
+static inline u32 trim_sys_gpcpll_cfg2_pll_stepa_m(void)
+{
+ return 0xff << 24;
+}
+static inline u32 trim_sys_gpcpll_cfg3_r(void)
+{
+ return 0x00137018;
+}
+static inline u32 trim_sys_gpcpll_cfg3_pll_stepb_f(u32 v)
+{
+ return (v & 0xff) << 16;
+}
+static inline u32 trim_sys_gpcpll_cfg3_pll_stepb_m(void)
+{
+ return 0xff << 16;
+}
+static inline u32 trim_sys_gpcpll_ndiv_slowdown_r(void)
+{
+ return 0x0013701c;
+}
+static inline u32 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(void)
+{
+ return 0x1 << 22;
+}
+static inline u32 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f(void)
+{
+ return 0x400000;
+}
+static inline u32 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f(void)
+{
+ return 0x0;
+}
+static inline u32 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(void)
+{
+ return 0x1 << 31;
+}
+static inline u32 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f(void)
+{
+ return 0x80000000;
+}
+static inline u32 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f(void)
+{
+ return 0x0;
+}
+static inline u32 trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r(void)
+{
+ return 0x001328a0;
+}
+static inline u32 trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(u32 r)
+{
+ return (r >> 24) & 0x1;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/kind_gk20a.c b/drivers/gpu/nvgpu/gk20a/kind_gk20a.c
new file mode 100644
index 000000000000..b0a740563691
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/kind_gk20a.c
@@ -0,0 +1,424 @@
+/*
+ * drivers/video/tegra/host/gk20a/kind_gk20a.c
+ *
+ * GK20A memory kind management
+ *
+ * Copyright (c) 2011, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include <linux/bitops.h>
+
+#include "hw_gmmu_gk20a.h"
+#include "kind_gk20a.h"
+
+/* TBD: generate these from kind_macros.h */
+
+/* TBD: not sure on the work creation for gk20a, doubtful */
+static inline bool gk20a_kind_work_creation_sked(u8 k)
+{
+ return false;
+}
+static inline bool gk20a_kind_work_creation_host(u8 k)
+{
+ return false;
+}
+
+static inline bool gk20a_kind_work_creation(u8 k)
+{
+ return gk20a_kind_work_creation_sked(k) ||
+ gk20a_kind_work_creation_host(k);
+}
+
+/* note: taken from the !2cs_compression case */
+static inline bool gk20a_kind_supported(u8 k)
+{
+ return gk20a_kind_work_creation(k) ||
+ (k == gmmu_pte_kind_invalid_v()) ||
+ (k == gmmu_pte_kind_pitch_v()) ||
+ (k >= gmmu_pte_kind_z16_v() &&
+ k <= gmmu_pte_kind_z16_ms8_2c_v()) ||
+ (k >= gmmu_pte_kind_z16_2z_v() &&
+ k <= gmmu_pte_kind_z16_ms8_2z_v()) ||
+ (k == gmmu_pte_kind_s8z24_v()) ||
+ (k >= gmmu_pte_kind_s8z24_2cz_v() &&
+ k <= gmmu_pte_kind_s8z24_ms8_2cz_v()) ||
+ (k >= gmmu_pte_kind_v8z24_ms4_vc12_v() &&
+ k <= gmmu_pte_kind_v8z24_ms8_vc24_v()) ||
+ (k >= gmmu_pte_kind_v8z24_ms4_vc12_2czv_v() &&
+ k <= gmmu_pte_kind_v8z24_ms8_vc24_2zv_v()) ||
+ (k == gmmu_pte_kind_z24s8_v()) ||
+ (k >= gmmu_pte_kind_z24s8_2cz_v() &&
+ k <= gmmu_pte_kind_z24s8_ms8_2cz_v()) ||
+ (k == gmmu_pte_kind_zf32_v()) ||
+ (k >= gmmu_pte_kind_zf32_2cz_v() &&
+ k <= gmmu_pte_kind_zf32_ms8_2cz_v()) ||
+ (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_v() &&
+ k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_v()) ||
+ (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cszv_v() &&
+ k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_v()) ||
+ (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cszv_v() &&
+ k <= gmmu_pte_kind_zf32_x24s8_v()) ||
+ (k >= gmmu_pte_kind_zf32_x24s8_2cszv_v() &&
+ k <= gmmu_pte_kind_zf32_x24s8_ms8_2cszv_v()) ||
+ (k == gmmu_pte_kind_generic_16bx2_v()) ||
+ (k == gmmu_pte_kind_c32_2c_v()) ||
+ (k == gmmu_pte_kind_c32_2cra_v()) ||
+ (k == gmmu_pte_kind_c32_ms2_2c_v()) ||
+ (k == gmmu_pte_kind_c32_ms2_2cra_v()) ||
+ (k >= gmmu_pte_kind_c32_ms4_2c_v() &&
+ k <= gmmu_pte_kind_c32_ms4_2cbr_v()) ||
+ (k >= gmmu_pte_kind_c32_ms4_2cra_v() &&
+ k <= gmmu_pte_kind_c64_2c_v()) ||
+ (k == gmmu_pte_kind_c64_2cra_v()) ||
+ (k == gmmu_pte_kind_c64_ms2_2c_v()) ||
+ (k == gmmu_pte_kind_c64_ms2_2cra_v()) ||
+ (k >= gmmu_pte_kind_c64_ms4_2c_v() &&
+ k <= gmmu_pte_kind_c64_ms4_2cbr_v()) ||
+ (k >= gmmu_pte_kind_c64_ms4_2cra_v() &&
+ k <= gmmu_pte_kind_c128_ms8_ms16_2cr_v()) ||
+ (k == gmmu_pte_kind_pitch_no_swizzle_v());
+ }
+
+static inline bool gk20a_kind_z(u8 k)
+{
+ return (k >= gmmu_pte_kind_z16_v() &&
+ k <= gmmu_pte_kind_v8z24_ms8_vc24_v()) ||
+ (k >= gmmu_pte_kind_v8z24_ms4_vc12_1zv_v() &&
+ k <= gmmu_pte_kind_v8z24_ms8_vc24_2cs_v()) ||
+ (k >= gmmu_pte_kind_v8z24_ms4_vc12_2czv_v() &&
+ k <= gmmu_pte_kind_z24v8_ms8_vc24_v()) ||
+ (k >= gmmu_pte_kind_z24v8_ms4_vc12_1zv_v() &&
+ k <= gmmu_pte_kind_z24v8_ms8_vc24_2cs_v()) ||
+ (k >= gmmu_pte_kind_z24v8_ms4_vc12_2czv_v() &&
+ k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1cs_v()) ||
+ (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1zv_v() &&
+ k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1cs_v()) ||
+ (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1zv_v() &&
+ k <= gmmu_pte_kind_zf32_x24s8_ms16_1cs_v())
+ /* ||
+ (k >= gmmu_pte_kind_zv32_x24s8_2cszv_v() &&
+ k <= gmmu_pte_kind_xf32_x24s8_ms16_2cs_v())*/;
+}
+
+static inline bool gk20a_kind_c(u8 k)
+{
+ return gk20a_kind_work_creation(k) ||
+ (k == gmmu_pte_kind_pitch_v()) ||
+ (k == gmmu_pte_kind_generic_16bx2_v()) ||
+ (k >= gmmu_pte_kind_c32_2c_v() &&
+ k <= gmmu_pte_kind_c32_ms2_2cbr_v()) ||
+ (k == gmmu_pte_kind_c32_ms2_2cra_v()) ||
+ (k >= gmmu_pte_kind_c32_ms4_2c_v() &&
+ k <= gmmu_pte_kind_c64_ms2_2cbr_v()) ||
+ (k == gmmu_pte_kind_c64_ms2_2cra_v()) ||
+ (k >= gmmu_pte_kind_c64_ms4_2c_v() &&
+ k <= gmmu_pte_kind_pitch_no_swizzle_v());
+}
+
+static inline bool gk20a_kind_compressible(u8 k)
+{
+ return (k >= gmmu_pte_kind_z16_2c_v() &&
+ k <= gmmu_pte_kind_z16_ms16_4cz_v()) ||
+ (k >= gmmu_pte_kind_s8z24_1z_v() &&
+ k <= gmmu_pte_kind_s8z24_ms16_4cszv_v()) ||
+ (k >= gmmu_pte_kind_v8z24_ms4_vc12_1zv_v() &&
+ k <= gmmu_pte_kind_v8z24_ms8_vc24_2cs_v()) ||
+ (k >= gmmu_pte_kind_v8z24_ms4_vc12_2czv_v() &&
+ k <= gmmu_pte_kind_v8z24_ms8_vc24_4cszv_v()) ||
+ (k >= gmmu_pte_kind_z24s8_1z_v() &&
+ k <= gmmu_pte_kind_z24s8_ms16_4cszv_v()) ||
+ (k >= gmmu_pte_kind_z24v8_ms4_vc12_1zv_v() &&
+ k <= gmmu_pte_kind_z24v8_ms8_vc24_2cs_v()) ||
+ (k >= gmmu_pte_kind_z24v8_ms4_vc12_2czv_v() &&
+ k <= gmmu_pte_kind_z24v8_ms8_vc24_4cszv_v()) ||
+ (k >= gmmu_pte_kind_zf32_1z_v() &&
+ k <= gmmu_pte_kind_zf32_ms16_2cz_v()) ||
+ (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1cs_v() &&
+ k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1cs_v()) ||
+ (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1zv_v() &&
+ k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cszv_v()) ||
+ (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1cs_v() &&
+ k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1cs_v()) ||
+ (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1zv_v() &&
+ k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cszv_v()) ||
+ (k >= gmmu_pte_kind_zf32_x24s8_1cs_v() &&
+ k <= gmmu_pte_kind_zf32_x24s8_ms16_1cs_v()) ||
+ (k >= gmmu_pte_kind_zf32_x24s8_2cszv_v() &&
+ k <= gmmu_pte_kind_c32_ms2_2cbr_v()) ||
+ (k == gmmu_pte_kind_c32_ms2_2cra_v()) ||
+ (k >= gmmu_pte_kind_c32_ms4_2c_v() &&
+ k <= gmmu_pte_kind_c64_ms2_2cbr_v()) ||
+ (k == gmmu_pte_kind_c64_ms2_2cra_v()) ||
+ (k >= gmmu_pte_kind_c64_ms4_2c_v() &&
+ k <= gmmu_pte_kind_c128_ms8_ms16_2cr_v());
+}
+
+static inline bool gk20a_kind_zbc(u8 k)
+{
+ return (k >= gmmu_pte_kind_z16_2c_v() &&
+ k <= gmmu_pte_kind_z16_ms16_2c_v()) ||
+ (k >= gmmu_pte_kind_z16_4cz_v() &&
+ k <= gmmu_pte_kind_z16_ms16_4cz_v()) ||
+ (k >= gmmu_pte_kind_s8z24_2cz_v() &&
+ k <= gmmu_pte_kind_s8z24_ms16_4cszv_v()) ||
+ (k >= gmmu_pte_kind_v8z24_ms4_vc12_2cs_v() &&
+ k <= gmmu_pte_kind_v8z24_ms8_vc24_2cs_v()) ||
+ (k >= gmmu_pte_kind_v8z24_ms4_vc12_2czv_v() &&
+ k <= gmmu_pte_kind_v8z24_ms8_vc24_2czv_v()) ||
+ (k >= gmmu_pte_kind_v8z24_ms4_vc12_4cszv_v() &&
+ k <= gmmu_pte_kind_v8z24_ms8_vc24_4cszv_v()) ||
+ (k >= gmmu_pte_kind_z24s8_2cs_v() &&
+ k <= gmmu_pte_kind_z24s8_ms16_4cszv_v()) ||
+ (k >= gmmu_pte_kind_z24v8_ms4_vc12_2cs_v() &&
+ k <= gmmu_pte_kind_z24v8_ms8_vc24_2cs_v()) ||
+ (k >= gmmu_pte_kind_z24v8_ms4_vc12_2czv_v() &&
+ k <= gmmu_pte_kind_z24v8_ms8_vc24_2czv_v()) ||
+ (k >= gmmu_pte_kind_z24v8_ms4_vc12_4cszv_v() &&
+ k <= gmmu_pte_kind_z24v8_ms8_vc24_4cszv_v()) ||
+ (k >= gmmu_pte_kind_zf32_2cs_v() &&
+ k <= gmmu_pte_kind_zf32_ms16_2cz_v()) ||
+ (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1cs_v() &&
+ k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1cs_v()) ||
+ (k >= gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1czv_v() &&
+ k <= gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cszv_v()) ||
+ (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1cs_v() &&
+ k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1cs_v()) ||
+ (k >= gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1czv_v() &&
+ k <= gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cszv_v()) ||
+ (k >= gmmu_pte_kind_zf32_x24s8_1cs_v() &&
+ k <= gmmu_pte_kind_zf32_x24s8_ms16_1cs_v()) ||
+ (k >= gmmu_pte_kind_zf32_x24s8_2cszv_v() &&
+ k <= gmmu_pte_kind_c32_2cra_v()) ||
+ (k >= gmmu_pte_kind_c32_ms2_2c_v() &&
+ k <= gmmu_pte_kind_c32_ms2_2cbr_v()) ||
+ (k == gmmu_pte_kind_c32_ms2_2cra_v()) ||
+ (k >= gmmu_pte_kind_c32_ms4_2c_v() &&
+ k <= gmmu_pte_kind_c32_ms4_2cra_v()) ||
+ (k >= gmmu_pte_kind_c32_ms8_ms16_2c_v() &&
+ k <= gmmu_pte_kind_c64_2cra_v()) ||
+ (k >= gmmu_pte_kind_c64_ms2_2c_v() &&
+ k <= gmmu_pte_kind_c64_ms2_2cbr_v()) ||
+ (k == gmmu_pte_kind_c64_ms2_2cra_v()) ||
+ (k >= gmmu_pte_kind_c64_ms4_2c_v() &&
+ k <= gmmu_pte_kind_c64_ms4_2cra_v()) ||
+ (k >= gmmu_pte_kind_c64_ms8_ms16_2c_v() &&
+ k <= gmmu_pte_kind_c128_ms8_ms16_2cr_v());
+}
+
+u8 gk20a_uc_kind_map[256];
+void gk20a_init_uncompressed_kind_map(void)
+{
+ int i;
+ for (i = 0; i < 256; i++)
+ gk20a_uc_kind_map[i] = gmmu_pte_kind_invalid_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_z16_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z16_2c_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z16_ms2_2c_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z16_ms4_2c_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z16_ms8_2c_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z16_2z_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z16_ms2_2z_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z16_ms4_2z_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z16_ms8_2z_v()] =
+ gmmu_pte_kind_z16_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_s8z24_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_s8z24_2cz_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms2_2cz_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms4_2cz_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms8_2cz_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_s8z24_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms2_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms4_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_s8z24_ms8_2cs_v()] =
+ gmmu_pte_kind_s8z24_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc4_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc4_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc4_2czv_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc4_2zv_v()] =
+ gmmu_pte_kind_v8z24_ms4_vc4_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc8_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc8_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc8_2czv_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc8_2zv_v()] =
+ gmmu_pte_kind_v8z24_ms8_vc8_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc12_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc12_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc12_2czv_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms4_vc12_2zv_v()] =
+ gmmu_pte_kind_v8z24_ms4_vc12_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc24_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc24_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc24_2czv_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_v8z24_ms8_vc24_2zv_v()] =
+ gmmu_pte_kind_v8z24_ms8_vc24_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_z24s8_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z24s8_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms2_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms4_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms8_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z24s8_2cz_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms2_2cz_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms4_2cz_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z24s8_ms8_2cz_v()] =
+ gmmu_pte_kind_z24s8_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms2_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms4_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms8_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_2cz_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms2_2cz_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms4_2cz_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_ms8_2cz_v()] =
+ gmmu_pte_kind_zf32_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cszv_v()] =
+ gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_2cszv_v()] =
+ gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_2cszv_v()] =
+ gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cszv_v()] =
+ gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cszv_v()] =
+ gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_2cszv_v()] =
+ gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_2cszv_v()] =
+ gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cszv_v()] =
+ gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_2cszv_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms2_2cszv_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms4_2cszv_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms8_2cszv_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms2_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms4_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_zf32_x24s8_ms8_2cs_v()] =
+ gmmu_pte_kind_zf32_x24s8_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_c32_2c_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c32_2cba_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c32_2cra_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c32_2bra_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c32_ms2_2c_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c32_ms2_2cra_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c32_ms4_2c_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c32_ms4_2cbr_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c32_ms4_2cba_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c32_ms4_2cra_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c32_ms4_2bra_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c32_ms8_ms16_2c_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c32_ms8_ms16_2cra_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c64_2c_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c64_2cbr_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c64_2cba_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c64_2cra_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c64_2bra_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c64_ms2_2c_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c64_ms2_2cra_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c64_ms4_2c_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c64_ms4_2cbr_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c64_ms4_2cba_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c64_ms4_2cra_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c64_ms4_2bra_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c64_ms8_ms16_2c_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c64_ms8_ms16_2cra_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c128_2c_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c128_2cr_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c128_ms2_2c_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c128_ms2_2cr_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c128_ms4_2c_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c128_ms4_2cr_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c128_ms8_ms16_2c_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_c128_ms8_ms16_2cr_v()] =
+ gmmu_pte_kind_generic_16bx2_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc4_2czv_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc4_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc4_2zv_v()] =
+ gmmu_pte_kind_z24v8_ms4_vc4_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc12_2czv_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc12_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms4_vc12_2zv_v()] =
+ gmmu_pte_kind_z24v8_ms4_vc12_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc8_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc8_2czv_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc8_2zv_v()] =
+ gmmu_pte_kind_z24v8_ms8_vc8_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc24_2cs_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc24_2czv_v()] =
+ gk20a_uc_kind_map[gmmu_pte_kind_z24v8_ms8_vc24_2zv_v()] =
+ gmmu_pte_kind_z24v8_ms8_vc24_v();
+
+ gk20a_uc_kind_map[gmmu_pte_kind_x8c24_v()] =
+ gmmu_pte_kind_x8c24_v();
+}
+
+u16 gk20a_kind_attr[256];
+void gk20a_init_kind_attr(void)
+{
+ u16 k;
+ for (k = 0; k < 256; k++) {
+ gk20a_kind_attr[k] = 0;
+ if (gk20a_kind_supported((u8)k))
+ gk20a_kind_attr[k] |= GK20A_KIND_ATTR_SUPPORTED;
+ if (gk20a_kind_compressible((u8)k))
+ gk20a_kind_attr[k] |= GK20A_KIND_ATTR_COMPRESSIBLE;
+ if (gk20a_kind_z((u8)k))
+ gk20a_kind_attr[k] |= GK20A_KIND_ATTR_Z;
+ if (gk20a_kind_c((u8)k))
+ gk20a_kind_attr[k] |= GK20A_KIND_ATTR_C;
+ if (gk20a_kind_zbc((u8)k))
+ gk20a_kind_attr[k] |= GK20A_KIND_ATTR_ZBC;
+ }
+}
diff --git a/drivers/gpu/nvgpu/gk20a/kind_gk20a.h b/drivers/gpu/nvgpu/gk20a/kind_gk20a.h
new file mode 100644
index 000000000000..93f011d4a84b
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/kind_gk20a.h
@@ -0,0 +1,67 @@
+/*
+ * drivers/video/tegra/host/gk20a/kind_gk20a.h
+ *
+ * GK20A memory kind management
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef __KIND_GK20A_H__
+#define __KIND_GK20A_H__
+
+
+void gk20a_init_uncompressed_kind_map(void);
+void gk20a_init_kind_attr(void);
+
+extern u16 gk20a_kind_attr[];
+#define NV_KIND_DEFAULT -1
+
+#define GK20A_KIND_ATTR_SUPPORTED BIT(0)
+#define GK20A_KIND_ATTR_COMPRESSIBLE BIT(1)
+#define GK20A_KIND_ATTR_Z BIT(2)
+#define GK20A_KIND_ATTR_C BIT(3)
+#define GK20A_KIND_ATTR_ZBC BIT(4)
+
+static inline bool gk20a_kind_is_supported(u8 k)
+{
+ return !!(gk20a_kind_attr[k] & GK20A_KIND_ATTR_SUPPORTED);
+}
+static inline bool gk20a_kind_is_compressible(u8 k)
+{
+ return !!(gk20a_kind_attr[k] & GK20A_KIND_ATTR_COMPRESSIBLE);
+}
+
+static inline bool gk20a_kind_is_z(u8 k)
+{
+ return !!(gk20a_kind_attr[k] & GK20A_KIND_ATTR_Z);
+}
+
+static inline bool gk20a_kind_is_c(u8 k)
+{
+ return !!(gk20a_kind_attr[k] & GK20A_KIND_ATTR_C);
+}
+static inline bool gk20a_kind_is_zbc(u8 k)
+{
+ return !!(gk20a_kind_attr[k] & GK20A_KIND_ATTR_ZBC);
+}
+
+/* maps kind to its uncompressed version */
+extern u8 gk20a_uc_kind_map[];
+static inline u8 gk20a_get_uncompressed_kind(u8 k)
+{
+ return gk20a_uc_kind_map[k];
+}
+
+#endif /* __KIND_GK20A_H__ */
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_common.c b/drivers/gpu/nvgpu/gk20a/ltc_common.c
new file mode 100644
index 000000000000..cbb27cc77a1e
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/ltc_common.c
@@ -0,0 +1,243 @@
+/*
+ * drivers/video/tegra/host/gk20a/ltc_common.c
+ *
+ * GK20A Graphics
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+
+#include "gk20a.h"
+#include "gr_gk20a.h"
+
+static int gk20a_determine_L2_size_bytes(struct gk20a *g)
+{
+ const u32 gpuid = GK20A_GPUID(g->gpu_characteristics.arch,
+ g->gpu_characteristics.impl);
+ u32 lts_per_ltc;
+ u32 ways;
+ u32 sets;
+ u32 bytes_per_line;
+ u32 active_ltcs;
+ u32 cache_size;
+
+ u32 tmp;
+ u32 active_sets_value;
+
+ tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_cfg1_r());
+ ways = hweight32(ltc_ltc0_lts0_tstg_cfg1_active_ways_v(tmp));
+
+ active_sets_value = ltc_ltc0_lts0_tstg_cfg1_active_sets_v(tmp);
+ if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v()) {
+ sets = 64;
+ } else if (active_sets_value ==
+ ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v()) {
+ sets = 32;
+ } else if (active_sets_value ==
+ ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v()) {
+ sets = 16;
+ } else {
+ dev_err(dev_from_gk20a(g),
+ "Unknown constant %u for active sets",
+ (unsigned)active_sets_value);
+ sets = 0;
+ }
+
+ active_ltcs = g->gr.num_fbps;
+
+ /* chip-specific values */
+ switch (gpuid) {
+ case GK20A_GPUID_GK20A:
+ lts_per_ltc = 1;
+ bytes_per_line = 128;
+ break;
+
+ default:
+ dev_err(dev_from_gk20a(g), "Unknown GPU id 0x%02x\n",
+ (unsigned)gpuid);
+ lts_per_ltc = 0;
+ bytes_per_line = 0;
+ }
+
+ cache_size = active_ltcs * lts_per_ltc * ways * sets * bytes_per_line;
+
+ return cache_size;
+}
+
+/*
+ * Set the maximum number of ways that can have the "EVIST_LAST" class.
+ */
+static void gk20a_ltc_set_max_ways_evict_last(struct gk20a *g, u32 max_ways)
+{
+ u32 mgmt_reg;
+
+ mgmt_reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_r()) &
+ ~ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(~0);
+ mgmt_reg |= ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(max_ways);
+
+ gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_r(), mgmt_reg);
+}
+
+/*
+ * Sets the ZBC color for the passed index.
+ */
+static void gk20a_ltc_set_zbc_color_entry(struct gk20a *g,
+ struct zbc_entry *color_val,
+ u32 index)
+{
+ u32 i;
+ u32 real_index = index + GK20A_STARTOF_ZBC_TABLE;
+
+ gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
+ ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index));
+
+ for (i = 0;
+ i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++)
+ gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i),
+ color_val->color_l2[i]);
+}
+
+/*
+ * Sets the ZBC depth for the passed index.
+ */
+static void gk20a_ltc_set_zbc_depth_entry(struct gk20a *g,
+ struct zbc_entry *depth_val,
+ u32 index)
+{
+ u32 real_index = index + GK20A_STARTOF_ZBC_TABLE;
+
+ gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
+ ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index));
+
+ gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(),
+ depth_val->depth);
+}
+
+/*
+ * Clear the L2 ZBC color table for the passed index.
+ */
+static void gk20a_ltc_clear_zbc_color_entry(struct gk20a *g, u32 index)
+{
+ u32 i;
+ u32 real_index = index + GK20A_STARTOF_ZBC_TABLE;
+
+ gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
+ ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index));
+
+ for (i = 0;
+ i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++)
+ gk20a_writel(g,
+ ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i), 0);
+}
+
+/*
+ * Clear the L2 ZBC depth entry for the passed index.
+ */
+static void gk20a_ltc_clear_zbc_depth_entry(struct gk20a *g, u32 index)
+{
+ u32 real_index = index + GK20A_STARTOF_ZBC_TABLE;
+
+ gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
+ ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index));
+
+ gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), 0);
+}
+
+static int gk20a_ltc_init_zbc(struct gk20a *g, struct gr_gk20a *gr)
+{
+ u32 i, j;
+
+ /* reset zbc clear */
+ for (i = 0; i < GK20A_SIZEOF_ZBC_TABLE -
+ GK20A_STARTOF_ZBC_TABLE; i++) {
+ gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
+ (gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()) &
+ ~ltc_ltcs_ltss_dstg_zbc_index_address_f(~0)) |
+ ltc_ltcs_ltss_dstg_zbc_index_address_f(
+ i + GK20A_STARTOF_ZBC_TABLE));
+ for (j = 0; j < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); j++)
+ gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(j), 0);
+ gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), 0);
+ }
+
+ gr_gk20a_clear_zbc_table(g, gr);
+ gr_gk20a_load_zbc_default_table(g, gr);
+
+ return 0;
+}
+
+static void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr)
+{
+ u32 compbit_base_post_divide;
+ u64 compbit_base_post_multiply64;
+ u64 compbit_store_base_iova =
+ NV_MC_SMMU_VADDR_TRANSLATE(gr->compbit_store.base_iova);
+ u64 compbit_base_post_divide64 = (compbit_store_base_iova >>
+ ltc_ltcs_ltss_cbc_base_alignment_shift_v());
+
+ do_div(compbit_base_post_divide64, gr->num_fbps);
+ compbit_base_post_divide = u64_lo32(compbit_base_post_divide64);
+
+ compbit_base_post_multiply64 = ((u64)compbit_base_post_divide *
+ gr->num_fbps) << ltc_ltcs_ltss_cbc_base_alignment_shift_v();
+
+ if (compbit_base_post_multiply64 < compbit_store_base_iova)
+ compbit_base_post_divide++;
+
+ gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(),
+ compbit_base_post_divide);
+
+ gk20a_dbg(gpu_dbg_info | gpu_dbg_map | gpu_dbg_pte,
+ "compbit base.pa: 0x%x,%08x cbc_base:0x%08x\n",
+ (u32)(compbit_store_base_iova >> 32),
+ (u32)(compbit_store_base_iova & 0xffffffff),
+ compbit_base_post_divide);
+}
+
+/* Flushes the compression bit cache as well as "data".
+ * Note: the name here is a bit of a misnomer. ELPG uses this
+ * internally... but ELPG doesn't have to be on to do it manually.
+ */
+static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
+{
+ u32 data;
+ s32 retry = 100;
+
+ gk20a_dbg_fn("");
+
+ /* Make sure all previous writes are committed to the L2. There's no
+ guarantee that writes are to DRAM. This will be a sysmembar internal
+ to the L2. */
+ gk20a_writel(g, ltc_ltss_g_elpg_r(),
+ ltc_ltss_g_elpg_flush_pending_f());
+ do {
+ data = gk20a_readl(g, ltc_ltss_g_elpg_r());
+
+ if (ltc_ltss_g_elpg_flush_v(data) ==
+ ltc_ltss_g_elpg_flush_pending_v()) {
+ gk20a_dbg_info("g_elpg_flush 0x%x", data);
+ retry--;
+ usleep_range(20, 40);
+ } else
+ break;
+ } while (retry >= 0 || !tegra_platform_is_silicon());
+
+ if (retry < 0)
+ gk20a_warn(dev_from_gk20a(g),
+ "g_elpg_flush too many retries");
+
+}
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
new file mode 100644
index 000000000000..08aedecd5db0
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
@@ -0,0 +1,203 @@
+/*
+ * drivers/video/tegra/host/gk20a/ltc_gk20a.c
+ *
+ * GK20A Graphics
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+
+#include "hw_ltc_gk20a.h"
+#include "hw_proj_gk20a.h"
+
+#include "ltc_common.c"
+
+static int gk20a_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
+{
+ struct device *d = dev_from_gk20a(g);
+ DEFINE_DMA_ATTRS(attrs);
+ dma_addr_t iova;
+
+ /* max memory size (MB) to cover */
+ u32 max_size = gr->max_comptag_mem;
+ /* one tag line covers 128KB */
+ u32 max_comptag_lines = max_size << 3;
+
+ u32 hw_max_comptag_lines =
+ ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v();
+
+ u32 cbc_param =
+ gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r());
+ u32 comptags_per_cacheline =
+ ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param);
+ u32 slices_per_fbp =
+ ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(cbc_param);
+ u32 cacheline_size =
+ 512 << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param);
+
+ u32 compbit_backing_size;
+
+ gk20a_dbg_fn("");
+
+ if (max_comptag_lines == 0) {
+ gr->compbit_store.size = 0;
+ return 0;
+ }
+
+ if (max_comptag_lines > hw_max_comptag_lines)
+ max_comptag_lines = hw_max_comptag_lines;
+
+ /* no hybird fb */
+ compbit_backing_size =
+ DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) *
+ cacheline_size * slices_per_fbp * gr->num_fbps;
+
+ /* aligned to 2KB * num_fbps */
+ compbit_backing_size +=
+ gr->num_fbps << ltc_ltcs_ltss_cbc_base_alignment_shift_v();
+
+ /* must be a multiple of 64KB */
+ compbit_backing_size = roundup(compbit_backing_size, 64*1024);
+
+ max_comptag_lines =
+ (compbit_backing_size * comptags_per_cacheline) /
+ cacheline_size * slices_per_fbp * gr->num_fbps;
+
+ if (max_comptag_lines > hw_max_comptag_lines)
+ max_comptag_lines = hw_max_comptag_lines;
+
+ gk20a_dbg_info("compbit backing store size : %d",
+ compbit_backing_size);
+ gk20a_dbg_info("max comptag lines : %d",
+ max_comptag_lines);
+
+ dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
+ gr->compbit_store.size = compbit_backing_size;
+ gr->compbit_store.pages = dma_alloc_attrs(d, gr->compbit_store.size,
+ &iova, GFP_KERNEL, &attrs);
+ if (!gr->compbit_store.pages) {
+ gk20a_err(dev_from_gk20a(g), "failed to allocate"
+ "backing store for compbit : size %d",
+ compbit_backing_size);
+ return -ENOMEM;
+ }
+ gr->compbit_store.base_iova = iova;
+
+ gk20a_allocator_init(&gr->comp_tags, "comptag",
+ 1, /* start */
+ max_comptag_lines - 1, /* length*/
+ 1); /* align */
+
+ return 0;
+}
+
+static int gk20a_ltc_clear_comptags(struct gk20a *g, u32 min, u32 max)
+{
+ struct gr_gk20a *gr = &g->gr;
+ u32 fbp, slice, ctrl1, val;
+ unsigned long end_jiffies = jiffies +
+ msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
+ u32 delay = GR_IDLE_CHECK_DEFAULT;
+ u32 slices_per_fbp =
+ ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(
+ gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
+
+ gk20a_dbg_fn("");
+
+ if (gr->compbit_store.size == 0)
+ return 0;
+
+ gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(),
+ ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min));
+ gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(),
+ ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max));
+ gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(),
+ gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) |
+ ltc_ltcs_ltss_cbc_ctrl1_clear_active_f());
+
+ for (fbp = 0; fbp < gr->num_fbps; fbp++) {
+ for (slice = 0; slice < slices_per_fbp; slice++) {
+
+ delay = GR_IDLE_CHECK_DEFAULT;
+
+ ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
+ fbp * proj_ltc_stride_v() +
+ slice * proj_lts_stride_v();
+
+ do {
+ val = gk20a_readl(g, ctrl1);
+ if (ltc_ltcs_ltss_cbc_ctrl1_clear_v(val) !=
+ ltc_ltcs_ltss_cbc_ctrl1_clear_active_v())
+ break;
+
+ usleep_range(delay, delay * 2);
+ delay = min_t(u32, delay << 1,
+ GR_IDLE_CHECK_MAX);
+
+ } while (time_before(jiffies, end_jiffies) ||
+ !tegra_platform_is_silicon());
+
+ if (!time_before(jiffies, end_jiffies)) {
+ gk20a_err(dev_from_gk20a(g),
+ "comp tag clear timeout\n");
+ return -EBUSY;
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+#ifdef CONFIG_DEBUG_FS
+static void gk20a_ltc_sync_debugfs(struct gk20a *g)
+{
+ u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f();
+
+ spin_lock(&g->debugfs_lock);
+ if (g->mm.ltc_enabled != g->mm.ltc_enabled_debug) {
+ u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r());
+ if (g->mm.ltc_enabled_debug)
+ /* bypass disabled (normal caching ops)*/
+ reg &= ~reg_f;
+ else
+ /* bypass enabled (no caching) */
+ reg |= reg_f;
+
+ gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg);
+ g->mm.ltc_enabled = g->mm.ltc_enabled_debug;
+ }
+ spin_unlock(&g->debugfs_lock);
+}
+#endif
+
+void gk20a_init_ltc(struct gpu_ops *gops)
+{
+ gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes;
+ gops->ltc.set_max_ways_evict_last = gk20a_ltc_set_max_ways_evict_last;
+ gops->ltc.init_comptags = gk20a_ltc_init_comptags;
+ gops->ltc.clear_comptags = gk20a_ltc_clear_comptags;
+ gops->ltc.set_zbc_color_entry = gk20a_ltc_set_zbc_color_entry;
+ gops->ltc.set_zbc_depth_entry = gk20a_ltc_set_zbc_depth_entry;
+ gops->ltc.clear_zbc_color_entry = gk20a_ltc_clear_zbc_color_entry;
+ gops->ltc.clear_zbc_depth_entry = gk20a_ltc_clear_zbc_depth_entry;
+ gops->ltc.init_zbc = gk20a_ltc_init_zbc;
+ gops->ltc.init_cbc = gk20a_ltc_init_cbc;
+#ifdef CONFIG_DEBUG_FS
+ gops->ltc.sync_debugfs = gk20a_ltc_sync_debugfs;
+#endif
+ gops->ltc.elpg_flush = gk20a_mm_g_elpg_flush_locked;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.h b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.h
new file mode 100644
index 000000000000..208811b256cc
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.h
@@ -0,0 +1,21 @@
+/*
+ * GK20A L2
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVHOST_GK20A_LTC
+#define _NVHOST_GK20A_LTC
+struct gk20a;
+
+void gk20a_init_ltc(struct gpu_ops *gops);
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
new file mode 100644
index 000000000000..b22df5e87de6
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -0,0 +1,2984 @@
+/*
+ * drivers/video/tegra/host/gk20a/mm_gk20a.c
+ *
+ * GK20A memory management
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/delay.h>
+#include <linux/highmem.h>
+#include <linux/log2.h>
+#include <linux/nvhost.h>
+#include <linux/pm_runtime.h>
+#include <linux/scatterlist.h>
+#include <linux/nvmap.h>
+#include <linux/tegra-soc.h>
+#include <linux/vmalloc.h>
+#include <linux/dma-buf.h>
+#include <asm/cacheflush.h>
+
+#include "gk20a.h"
+#include "mm_gk20a.h"
+#include "hw_gmmu_gk20a.h"
+#include "hw_fb_gk20a.h"
+#include "hw_bus_gk20a.h"
+#include "hw_ram_gk20a.h"
+#include "hw_mc_gk20a.h"
+#include "hw_flush_gk20a.h"
+#include "hw_ltc_gk20a.h"
+
+#include "kind_gk20a.h"
+
+#ifdef CONFIG_ARM64
+#define outer_flush_range(a, b)
+#define __cpuc_flush_dcache_area __flush_dcache_area
+#endif
+
+/*
+ * GPU mapping life cycle
+ * ======================
+ *
+ * Kernel mappings
+ * ---------------
+ *
+ * Kernel mappings are created through vm.map(..., false):
+ *
+ * - Mappings to the same allocations are reused and refcounted.
+ * - This path does not support deferred unmapping (i.e. kernel must wait for
+ * all hw operations on the buffer to complete before unmapping).
+ * - References to dmabuf are owned and managed by the (kernel) clients of
+ * the gk20a_vm layer.
+ *
+ *
+ * User space mappings
+ * -------------------
+ *
+ * User space mappings are created through as.map_buffer -> vm.map(..., true):
+ *
+ * - Mappings to the same allocations are reused and refcounted.
+ * - This path supports deferred unmapping (i.e. we delay the actual unmapping
+ * until all hw operations have completed).
+ * - References to dmabuf are owned and managed by the vm_gk20a
+ * layer itself. vm.map acquires these refs, and sets
+ * mapped_buffer->own_mem_ref to record that we must release the refs when we
+ * actually unmap.
+ *
+ */
+
+static inline int vm_aspace_id(struct vm_gk20a *vm)
+{
+ /* -1 is bar1 or pmu, etc. */
+ return vm->as_share ? vm->as_share->id : -1;
+}
+static inline u32 hi32(u64 f)
+{
+ return (u32)(f >> 32);
+}
+static inline u32 lo32(u64 f)
+{
+ return (u32)(f & 0xffffffff);
+}
+
+#define FLUSH_CPU_DCACHE(va, pa, size) \
+ do { \
+ __cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \
+ outer_flush_range(pa, pa + (size_t)(size)); \
+ } while (0)
+
+static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer);
+static struct mapped_buffer_node *find_mapped_buffer_locked(
+ struct rb_root *root, u64 addr);
+static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
+ struct rb_root *root, struct dma_buf *dmabuf,
+ u32 kind);
+static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
+ enum gmmu_pgsz_gk20a pgsz_idx,
+ struct sg_table *sgt,
+ u64 first_vaddr, u64 last_vaddr,
+ u8 kind_v, u32 ctag_offset, bool cacheable,
+ int rw_flag);
+static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i);
+static void gk20a_vm_remove_support(struct vm_gk20a *vm);
+
+
+/* note: keep the page sizes sorted lowest to highest here */
+static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
+static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
+static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL,
+ 0x1ffffLL };
+static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL };
+
+struct gk20a_comptags {
+ u32 offset;
+ u32 lines;
+};
+
+struct gk20a_dmabuf_priv {
+ struct mutex lock;
+
+ struct gk20a_allocator *comptag_allocator;
+ struct gk20a_comptags comptags;
+
+ struct dma_buf_attachment *attach;
+ struct sg_table *sgt;
+
+ int pin_count;
+};
+
+static void gk20a_mm_delete_priv(void *_priv)
+{
+ struct gk20a_dmabuf_priv *priv = _priv;
+ if (!priv)
+ return;
+
+ if (priv->comptags.lines) {
+ BUG_ON(!priv->comptag_allocator);
+ priv->comptag_allocator->free(priv->comptag_allocator,
+ priv->comptags.offset,
+ priv->comptags.lines);
+ }
+
+ kfree(priv);
+}
+
+struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf)
+{
+ struct gk20a_dmabuf_priv *priv;
+
+ priv = dma_buf_get_drvdata(dmabuf, dev);
+ if (WARN_ON(!priv))
+ return ERR_PTR(-EINVAL);
+
+ mutex_lock(&priv->lock);
+
+ if (priv->pin_count == 0) {
+ priv->attach = dma_buf_attach(dmabuf, dev);
+ if (IS_ERR(priv->attach)) {
+ mutex_unlock(&priv->lock);
+ return (struct sg_table *)priv->attach;
+ }
+
+ priv->sgt = dma_buf_map_attachment(priv->attach,
+ DMA_BIDIRECTIONAL);
+ if (IS_ERR(priv->sgt)) {
+ dma_buf_detach(dmabuf, priv->attach);
+ mutex_unlock(&priv->lock);
+ return priv->sgt;
+ }
+ }
+
+ priv->pin_count++;
+ mutex_unlock(&priv->lock);
+ return priv->sgt;
+}
+
+void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
+ struct sg_table *sgt)
+{
+ struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
+ dma_addr_t dma_addr;
+
+ if (IS_ERR(priv) || !priv)
+ return;
+
+ mutex_lock(&priv->lock);
+ WARN_ON(priv->sgt != sgt);
+ priv->pin_count--;
+ WARN_ON(priv->pin_count < 0);
+ dma_addr = sg_dma_address(priv->sgt->sgl);
+ if (priv->pin_count == 0) {
+ dma_buf_unmap_attachment(priv->attach, priv->sgt,
+ DMA_BIDIRECTIONAL);
+ dma_buf_detach(dmabuf, priv->attach);
+ }
+ mutex_unlock(&priv->lock);
+}
+
+
+static void gk20a_get_comptags(struct device *dev,
+ struct dma_buf *dmabuf,
+ struct gk20a_comptags *comptags)
+{
+ struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
+
+ if (!comptags)
+ return;
+
+ if (!priv) {
+ comptags->lines = 0;
+ comptags->offset = 0;
+ return;
+ }
+
+ *comptags = priv->comptags;
+}
+
+static int gk20a_alloc_comptags(struct device *dev,
+ struct dma_buf *dmabuf,
+ struct gk20a_allocator *allocator,
+ int lines)
+{
+ struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
+ u32 offset = 0;
+ int err;
+
+ if (!priv)
+ return -ENOSYS;
+
+ if (!lines)
+ return -EINVAL;
+
+ /* store the allocator so we can use it when we free the ctags */
+ priv->comptag_allocator = allocator;
+ err = allocator->alloc(allocator, &offset, lines);
+ if (!err) {
+ priv->comptags.lines = lines;
+ priv->comptags.offset = offset;
+ }
+ return err;
+}
+
+
+
+
+static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
+{
+ gk20a_dbg_fn("");
+ if (g->ops.fb.reset)
+ g->ops.fb.reset(g);
+
+ if (g->ops.fb.init_fs_state)
+ g->ops.fb.init_fs_state(g);
+
+ return 0;
+}
+
+void gk20a_remove_mm_support(struct mm_gk20a *mm)
+{
+ struct gk20a *g = mm->g;
+ struct device *d = dev_from_gk20a(g);
+ struct vm_gk20a *vm = &mm->bar1.vm;
+ struct inst_desc *inst_block = &mm->bar1.inst_block;
+
+ gk20a_dbg_fn("");
+
+ if (inst_block->cpuva)
+ dma_free_coherent(d, inst_block->size,
+ inst_block->cpuva, inst_block->iova);
+ inst_block->cpuva = NULL;
+ inst_block->iova = 0;
+
+ gk20a_vm_remove_support(vm);
+}
+
+int gk20a_init_mm_setup_sw(struct gk20a *g)
+{
+ struct mm_gk20a *mm = &g->mm;
+ int i;
+
+ gk20a_dbg_fn("");
+
+ if (mm->sw_ready) {
+ gk20a_dbg_fn("skip init");
+ return 0;
+ }
+
+ mm->g = g;
+ mutex_init(&mm->tlb_lock);
+ mutex_init(&mm->l2_op_lock);
+ mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
+ mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big];
+ mm->pde_stride = mm->big_page_size << 10;
+ mm->pde_stride_shift = ilog2(mm->pde_stride);
+ BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */
+
+ for (i = 0; i < ARRAY_SIZE(gmmu_page_sizes); i++) {
+
+ u32 num_ptes, pte_space, num_pages;
+
+ /* assuming "full" page tables */
+ num_ptes = mm->pde_stride / gmmu_page_sizes[i];
+
+ pte_space = num_ptes * gmmu_pte__size_v();
+ /* allocate whole pages */
+ pte_space = roundup(pte_space, PAGE_SIZE);
+
+ num_pages = pte_space / PAGE_SIZE;
+ /* make sure "order" is viable */
+ BUG_ON(!is_power_of_2(num_pages));
+
+ mm->page_table_sizing[i].num_ptes = num_ptes;
+ mm->page_table_sizing[i].order = ilog2(num_pages);
+ }
+
+ /*TBD: make channel vm size configurable */
+ mm->channel.size = 1ULL << NV_GMMU_VA_RANGE;
+
+ gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20));
+
+ gk20a_dbg_info("small page-size (%dKB) pte array: %dKB",
+ gmmu_page_sizes[gmmu_page_size_small] >> 10,
+ (mm->page_table_sizing[gmmu_page_size_small].num_ptes *
+ gmmu_pte__size_v()) >> 10);
+
+ gk20a_dbg_info("big page-size (%dKB) pte array: %dKB",
+ gmmu_page_sizes[gmmu_page_size_big] >> 10,
+ (mm->page_table_sizing[gmmu_page_size_big].num_ptes *
+ gmmu_pte__size_v()) >> 10);
+
+
+ gk20a_init_bar1_vm(mm);
+
+ mm->remove_support = gk20a_remove_mm_support;
+ mm->sw_ready = true;
+
+ gk20a_dbg_fn("done");
+ return 0;
+}
+
+/* make sure gk20a_init_mm_support is called before */
+static int gk20a_init_mm_setup_hw(struct gk20a *g)
+{
+ struct mm_gk20a *mm = &g->mm;
+ struct inst_desc *inst_block = &mm->bar1.inst_block;
+ phys_addr_t inst_pa = inst_block->cpu_pa;
+
+ gk20a_dbg_fn("");
+
+ /* set large page size in fb
+ * note this is very early on, can we defer it ? */
+ {
+ u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r());
+
+ if (gmmu_page_sizes[gmmu_page_size_big] == SZ_128K)
+ fb_mmu_ctrl = (fb_mmu_ctrl &
+ ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) |
+ fb_mmu_ctrl_vm_pg_size_128kb_f();
+ else
+ BUG_ON(1); /* no support/testing for larger ones yet */
+
+ gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
+ }
+
+ inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a());
+ gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa);
+
+ /* this is very early in init... can we defer this? */
+ {
+ gk20a_writel(g, bus_bar1_block_r(),
+ bus_bar1_block_target_vid_mem_f() |
+ bus_bar1_block_mode_virtual_f() |
+ bus_bar1_block_ptr_f(inst_pa));
+ }
+
+ gk20a_dbg_fn("done");
+ return 0;
+}
+
+int gk20a_init_mm_support(struct gk20a *g)
+{
+ u32 err;
+
+ err = gk20a_init_mm_reset_enable_hw(g);
+ if (err)
+ return err;
+
+ err = gk20a_init_mm_setup_sw(g);
+ if (err)
+ return err;
+
+ err = gk20a_init_mm_setup_hw(g);
+ if (err)
+ return err;
+
+ return err;
+}
+
+#ifdef CONFIG_GK20A_PHYS_PAGE_TABLES
+static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
+ void **handle,
+ struct sg_table **sgt,
+ size_t *size)
+{
+ u32 num_pages = 1 << order;
+ u32 len = num_pages * PAGE_SIZE;
+ int err;
+ struct page *pages;
+
+ gk20a_dbg_fn("");
+
+ pages = alloc_pages(GFP_KERNEL, order);
+ if (!pages) {
+ gk20a_dbg(gpu_dbg_pte, "alloc_pages failed\n");
+ goto err_out;
+ }
+ *sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+ if (!sgt) {
+ gk20a_dbg(gpu_dbg_pte, "cannot allocate sg table");
+ goto err_alloced;
+ }
+ err = sg_alloc_table(*sgt, 1, GFP_KERNEL);
+ if (err) {
+ gk20a_dbg(gpu_dbg_pte, "sg_alloc_table failed\n");
+ goto err_sg_table;
+ }
+ sg_set_page((*sgt)->sgl, pages, len, 0);
+ *handle = page_address(pages);
+ memset(*handle, 0, len);
+ *size = len;
+ FLUSH_CPU_DCACHE(*handle, sg_phys((*sgt)->sgl), len);
+
+ return 0;
+
+err_sg_table:
+ kfree(*sgt);
+err_alloced:
+ __free_pages(pages, order);
+err_out:
+ return -ENOMEM;
+}
+
+static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
+ struct sg_table *sgt, u32 order,
+ size_t size)
+{
+ gk20a_dbg_fn("");
+ BUG_ON(sgt == NULL);
+ free_pages((unsigned long)handle, order);
+ sg_free_table(sgt);
+ kfree(sgt);
+}
+
+static int map_gmmu_pages(void *handle, struct sg_table *sgt,
+ void **va, size_t size)
+{
+ FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
+ *va = handle;
+ return 0;
+}
+
+static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va)
+{
+ FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
+}
+#else
+static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
+ void **handle,
+ struct sg_table **sgt,
+ size_t *size)
+{
+ struct device *d = dev_from_vm(vm);
+ u32 num_pages = 1 << order;
+ u32 len = num_pages * PAGE_SIZE;
+ dma_addr_t iova;
+ DEFINE_DMA_ATTRS(attrs);
+ struct page **pages;
+ int err = 0;
+
+ gk20a_dbg_fn("");
+
+ *size = len;
+ dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
+ pages = dma_alloc_attrs(d, len, &iova, GFP_KERNEL, &attrs);
+ if (!pages) {
+ gk20a_err(d, "memory allocation failed\n");
+ goto err_out;
+ }
+
+ err = gk20a_get_sgtable_from_pages(d, sgt, pages,
+ iova, len);
+ if (err) {
+ gk20a_err(d, "sgt allocation failed\n");
+ goto err_free;
+ }
+
+ *handle = (void *)pages;
+
+ return 0;
+
+err_free:
+ dma_free_attrs(d, len, pages, iova, &attrs);
+ pages = NULL;
+ iova = 0;
+err_out:
+ return -ENOMEM;
+}
+
+static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
+ struct sg_table *sgt, u32 order,
+ size_t size)
+{
+ struct device *d = dev_from_vm(vm);
+ u64 iova;
+ DEFINE_DMA_ATTRS(attrs);
+ struct page **pages = (struct page **)handle;
+
+ gk20a_dbg_fn("");
+ BUG_ON(sgt == NULL);
+
+ iova = sg_dma_address(sgt->sgl);
+
+ gk20a_free_sgtable(&sgt);
+
+ dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
+ dma_free_attrs(d, size, pages, iova, &attrs);
+ pages = NULL;
+ iova = 0;
+}
+
+static int map_gmmu_pages(void *handle, struct sg_table *sgt,
+ void **kva, size_t size)
+{
+ int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ struct page **pages = (struct page **)handle;
+ gk20a_dbg_fn("");
+
+ *kva = vmap(pages, count, 0, pgprot_dmacoherent(PAGE_KERNEL));
+ if (!(*kva))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va)
+{
+ gk20a_dbg_fn("");
+ vunmap(va);
+}
+#endif
+
+/* allocate a phys contig region big enough for a full
+ * sized gmmu page table for the given gmmu_page_size.
+ * the whole range is zeroed so it's "invalid"/will fault
+ */
+
+static int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
+ enum gmmu_pgsz_gk20a gmmu_pgsz_idx,
+ struct page_table_gk20a *pte)
+{
+ int err;
+ u32 pte_order;
+ void *handle = NULL;
+ struct sg_table *sgt;
+ size_t size;
+
+ gk20a_dbg_fn("");
+
+ /* allocate enough pages for the table */
+ pte_order = vm->mm->page_table_sizing[gmmu_pgsz_idx].order;
+
+ err = alloc_gmmu_pages(vm, pte_order, &handle, &sgt, &size);
+ if (err)
+ return err;
+
+ gk20a_dbg(gpu_dbg_pte, "pte = 0x%p, addr=%08llx, size %d",
+ pte, gk20a_mm_iova_addr(sgt->sgl), pte_order);
+
+ pte->ref = handle;
+ pte->sgt = sgt;
+ pte->size = size;
+
+ return 0;
+}
+
+/* given address range (inclusive) determine the pdes crossed */
+static inline void pde_range_from_vaddr_range(struct vm_gk20a *vm,
+ u64 addr_lo, u64 addr_hi,
+ u32 *pde_lo, u32 *pde_hi)
+{
+ *pde_lo = (u32)(addr_lo >> vm->mm->pde_stride_shift);
+ *pde_hi = (u32)(addr_hi >> vm->mm->pde_stride_shift);
+ gk20a_dbg(gpu_dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d",
+ addr_lo, addr_hi, vm->mm->pde_stride_shift);
+ gk20a_dbg(gpu_dbg_pte, "pde_lo=%d pde_hi=%d",
+ *pde_lo, *pde_hi);
+}
+
+static inline u32 *pde_from_index(struct vm_gk20a *vm, u32 i)
+{
+ return (u32 *) (((u8 *)vm->pdes.kv) + i*gmmu_pde__size_v());
+}
+
+static inline u32 pte_index_from_vaddr(struct vm_gk20a *vm,
+ u64 addr, enum gmmu_pgsz_gk20a pgsz_idx)
+{
+ u32 ret;
+ /* mask off pde part */
+ addr = addr & ((((u64)1) << vm->mm->pde_stride_shift) - ((u64)1));
+ /* shift over to get pte index. note assumption that pte index
+ * doesn't leak over into the high 32b */
+ ret = (u32)(addr >> gmmu_page_shifts[pgsz_idx]);
+
+ gk20a_dbg(gpu_dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret);
+ return ret;
+}
+
+static inline void pte_space_page_offset_from_index(u32 i, u32 *pte_page,
+ u32 *pte_offset)
+{
+ /* ptes are 8B regardless of pagesize */
+ /* pte space pages are 4KB. so 512 ptes per 4KB page*/
+ *pte_page = i >> 9;
+
+ /* this offset is a pte offset, not a byte offset */
+ *pte_offset = i & ((1<<9)-1);
+
+ gk20a_dbg(gpu_dbg_pte, "i=0x%x pte_page=0x%x pte_offset=0x%x",
+ i, *pte_page, *pte_offset);
+}
+
+
+/*
+ * given a pde index/page table number make sure it has
+ * backing store and if not go ahead allocate it and
+ * record it in the appropriate pde
+ */
+static int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm,
+ u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
+{
+ int err;
+ struct page_table_gk20a *pte =
+ vm->pdes.ptes[gmmu_pgsz_idx] + i;
+
+ gk20a_dbg_fn("");
+
+ /* if it's already in place it's valid */
+ if (pte->ref)
+ return 0;
+
+ gk20a_dbg(gpu_dbg_pte, "alloc %dKB ptes for pde %d",
+ gmmu_page_sizes[gmmu_pgsz_idx]/1024, i);
+
+ err = zalloc_gmmu_page_table_gk20a(vm, gmmu_pgsz_idx, pte);
+ if (err)
+ return err;
+
+ /* rewrite pde */
+ update_gmmu_pde_locked(vm, i);
+
+ return 0;
+}
+
+static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm,
+ u64 addr)
+{
+ struct vm_reserved_va_node *va_node;
+ list_for_each_entry(va_node, &vm->reserved_va_list, reserved_va_list)
+ if (addr >= va_node->vaddr_start &&
+ addr < (u64)va_node->vaddr_start + (u64)va_node->size)
+ return va_node;
+
+ return NULL;
+}
+
+int gk20a_vm_get_buffers(struct vm_gk20a *vm,
+ struct mapped_buffer_node ***mapped_buffers,
+ int *num_buffers)
+{
+ struct mapped_buffer_node *mapped_buffer;
+ struct mapped_buffer_node **buffer_list;
+ struct rb_node *node;
+ int i = 0;
+
+ mutex_lock(&vm->update_gmmu_lock);
+
+ buffer_list = kzalloc(sizeof(*buffer_list) *
+ vm->num_user_mapped_buffers, GFP_KERNEL);
+ if (!buffer_list) {
+ mutex_unlock(&vm->update_gmmu_lock);
+ return -ENOMEM;
+ }
+
+ node = rb_first(&vm->mapped_buffers);
+ while (node) {
+ mapped_buffer =
+ container_of(node, struct mapped_buffer_node, node);
+ if (mapped_buffer->user_mapped) {
+ buffer_list[i] = mapped_buffer;
+ kref_get(&mapped_buffer->ref);
+ i++;
+ }
+ node = rb_next(&mapped_buffer->node);
+ }
+
+ BUG_ON(i != vm->num_user_mapped_buffers);
+
+ *num_buffers = vm->num_user_mapped_buffers;
+ *mapped_buffers = buffer_list;
+
+ mutex_unlock(&vm->update_gmmu_lock);
+
+ return 0;
+}
+
+static void gk20a_vm_unmap_locked_kref(struct kref *ref)
+{
+ struct mapped_buffer_node *mapped_buffer =
+ container_of(ref, struct mapped_buffer_node, ref);
+ gk20a_vm_unmap_locked(mapped_buffer);
+}
+
+void gk20a_vm_put_buffers(struct vm_gk20a *vm,
+ struct mapped_buffer_node **mapped_buffers,
+ int num_buffers)
+{
+ int i;
+
+ mutex_lock(&vm->update_gmmu_lock);
+
+ for (i = 0; i < num_buffers; ++i)
+ kref_put(&mapped_buffers[i]->ref,
+ gk20a_vm_unmap_locked_kref);
+
+ mutex_unlock(&vm->update_gmmu_lock);
+
+ kfree(mapped_buffers);
+}
+
+static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
+{
+ struct device *d = dev_from_vm(vm);
+ int retries;
+ struct mapped_buffer_node *mapped_buffer;
+
+ mutex_lock(&vm->update_gmmu_lock);
+
+ mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset);
+ if (!mapped_buffer) {
+ mutex_unlock(&vm->update_gmmu_lock);
+ gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
+ return;
+ }
+
+ if (mapped_buffer->flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
+ mutex_unlock(&vm->update_gmmu_lock);
+
+ retries = 1000;
+ while (retries) {
+ if (atomic_read(&mapped_buffer->ref.refcount) == 1)
+ break;
+ retries--;
+ udelay(50);
+ }
+ if (!retries)
+ gk20a_err(d, "sync-unmap failed on 0x%llx",
+ offset);
+ mutex_lock(&vm->update_gmmu_lock);
+ }
+
+ mapped_buffer->user_mapped--;
+ if (mapped_buffer->user_mapped == 0)
+ vm->num_user_mapped_buffers--;
+ kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
+
+ mutex_unlock(&vm->update_gmmu_lock);
+}
+
+static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
+ u64 size,
+ enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
+
+{
+ struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx];
+ int err;
+ u64 offset;
+ u32 start_page_nr = 0, num_pages;
+ u64 gmmu_page_size = gmmu_page_sizes[gmmu_pgsz_idx];
+
+ if (gmmu_pgsz_idx >= ARRAY_SIZE(gmmu_page_sizes)) {
+ dev_warn(dev_from_vm(vm),
+ "invalid page size requested in gk20a vm alloc");
+ return -EINVAL;
+ }
+
+ if ((gmmu_pgsz_idx == gmmu_page_size_big) && !vm->big_pages) {
+ dev_warn(dev_from_vm(vm),
+ "unsupportd page size requested");
+ return -EINVAL;
+
+ }
+
+ /* be certain we round up to gmmu_page_size if needed */
+ /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */
+ size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1);
+
+ gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size,
+ gmmu_page_sizes[gmmu_pgsz_idx]>>10);
+
+ /* The vma allocator represents page accounting. */
+ num_pages = size >> gmmu_page_shifts[gmmu_pgsz_idx];
+
+ err = vma->alloc(vma, &start_page_nr, num_pages);
+
+ if (err) {
+ gk20a_err(dev_from_vm(vm),
+ "%s oom: sz=0x%llx", vma->name, size);
+ return 0;
+ }
+
+ offset = (u64)start_page_nr << gmmu_page_shifts[gmmu_pgsz_idx];
+ gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset);
+
+ return offset;
+}
+
+static int gk20a_vm_free_va(struct vm_gk20a *vm,
+ u64 offset, u64 size,
+ enum gmmu_pgsz_gk20a pgsz_idx)
+{
+ struct gk20a_allocator *vma = &vm->vma[pgsz_idx];
+ u32 page_size = gmmu_page_sizes[pgsz_idx];
+ u32 page_shift = gmmu_page_shifts[pgsz_idx];
+ u32 start_page_nr, num_pages;
+ int err;
+
+ gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
+ vma->name, offset, size);
+
+ start_page_nr = (u32)(offset >> page_shift);
+ num_pages = (u32)((size + page_size - 1) >> page_shift);
+
+ err = vma->free(vma, start_page_nr, num_pages);
+ if (err) {
+ gk20a_err(dev_from_vm(vm),
+ "not found: offset=0x%llx, sz=0x%llx",
+ offset, size);
+ }
+
+ return err;
+}
+
+static int insert_mapped_buffer(struct rb_root *root,
+ struct mapped_buffer_node *mapped_buffer)
+{
+ struct rb_node **new_node = &(root->rb_node), *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*new_node) {
+ struct mapped_buffer_node *cmp_with =
+ container_of(*new_node, struct mapped_buffer_node,
+ node);
+
+ parent = *new_node;
+
+ if (cmp_with->addr > mapped_buffer->addr) /* u64 cmp */
+ new_node = &((*new_node)->rb_left);
+ else if (cmp_with->addr != mapped_buffer->addr) /* u64 cmp */
+ new_node = &((*new_node)->rb_right);
+ else
+ return -EINVAL; /* no fair dup'ing */
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&mapped_buffer->node, parent, new_node);
+ rb_insert_color(&mapped_buffer->node, root);
+
+ return 0;
+}
+
+static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
+ struct rb_root *root, struct dma_buf *dmabuf,
+ u32 kind)
+{
+ struct rb_node *node = rb_first(root);
+ while (node) {
+ struct mapped_buffer_node *mapped_buffer =
+ container_of(node, struct mapped_buffer_node, node);
+ if (mapped_buffer->dmabuf == dmabuf &&
+ kind == mapped_buffer->kind)
+ return mapped_buffer;
+ node = rb_next(&mapped_buffer->node);
+ }
+ return 0;
+}
+
+static struct mapped_buffer_node *find_mapped_buffer_locked(
+ struct rb_root *root, u64 addr)
+{
+
+ struct rb_node *node = root->rb_node;
+ while (node) {
+ struct mapped_buffer_node *mapped_buffer =
+ container_of(node, struct mapped_buffer_node, node);
+ if (mapped_buffer->addr > addr) /* u64 cmp */
+ node = node->rb_left;
+ else if (mapped_buffer->addr != addr) /* u64 cmp */
+ node = node->rb_right;
+ else
+ return mapped_buffer;
+ }
+ return 0;
+}
+
+static struct mapped_buffer_node *find_mapped_buffer_range_locked(
+ struct rb_root *root, u64 addr)
+{
+ struct rb_node *node = root->rb_node;
+ while (node) {
+ struct mapped_buffer_node *m =
+ container_of(node, struct mapped_buffer_node, node);
+ if (m->addr <= addr && m->addr + m->size > addr)
+ return m;
+ else if (m->addr > addr) /* u64 cmp */
+ node = node->rb_left;
+ else
+ node = node->rb_right;
+ }
+ return 0;
+}
+
+#define BFR_ATTRS (sizeof(nvmap_bfr_param)/sizeof(nvmap_bfr_param[0]))
+
+struct buffer_attrs {
+ struct sg_table *sgt;
+ u64 size;
+ u64 align;
+ u32 ctag_offset;
+ u32 ctag_lines;
+ int pgsz_idx;
+ u8 kind_v;
+ u8 uc_kind_v;
+};
+
+static void gmmu_select_page_size(struct buffer_attrs *bfr)
+{
+ int i;
+ /* choose the biggest first (top->bottom) */
+ for (i = (gmmu_nr_page_sizes-1); i >= 0; i--)
+ if (!(gmmu_page_offset_masks[i] & bfr->align)) {
+ /* would like to add this too but nvmap returns the
+ * original requested size not the allocated size.
+ * (!(gmmu_page_offset_masks[i] & bfr->size)) */
+ bfr->pgsz_idx = i;
+ break;
+ }
+}
+
+static int setup_buffer_kind_and_compression(struct device *d,
+ u32 flags,
+ struct buffer_attrs *bfr,
+ enum gmmu_pgsz_gk20a pgsz_idx)
+{
+ bool kind_compressible;
+
+ if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v()))
+ bfr->kind_v = gmmu_pte_kind_pitch_v();
+
+ if (unlikely(!gk20a_kind_is_supported(bfr->kind_v))) {
+ gk20a_err(d, "kind 0x%x not supported", bfr->kind_v);
+ return -EINVAL;
+ }
+
+ bfr->uc_kind_v = gmmu_pte_kind_invalid_v();
+ /* find a suitable uncompressed kind if it becomes necessary later */
+ kind_compressible = gk20a_kind_is_compressible(bfr->kind_v);
+ if (kind_compressible) {
+ bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v);
+ if (unlikely(bfr->uc_kind_v == gmmu_pte_kind_invalid_v())) {
+ /* shouldn't happen, but it is worth cross-checking */
+ gk20a_err(d, "comptag kind 0x%x can't be"
+ " downgraded to uncompressed kind",
+ bfr->kind_v);
+ return -EINVAL;
+ }
+ }
+ /* comptags only supported for suitable kinds, 128KB pagesize */
+ if (unlikely(kind_compressible &&
+ (gmmu_page_sizes[pgsz_idx] != 128*1024))) {
+ /*
+ gk20a_warn(d, "comptags specified"
+ " but pagesize being used doesn't support it");*/
+ /* it is safe to fall back to uncompressed as
+ functionality is not harmed */
+ bfr->kind_v = bfr->uc_kind_v;
+ kind_compressible = false;
+ }
+ if (kind_compressible)
+ bfr->ctag_lines = ALIGN(bfr->size, COMP_TAG_LINE_SIZE) >>
+ COMP_TAG_LINE_SIZE_SHIFT;
+ else
+ bfr->ctag_lines = 0;
+
+ return 0;
+}
+
+static int validate_fixed_buffer(struct vm_gk20a *vm,
+ struct buffer_attrs *bfr,
+ u64 map_offset)
+{
+ struct device *dev = dev_from_vm(vm);
+ struct vm_reserved_va_node *va_node;
+ struct mapped_buffer_node *buffer;
+
+ if (map_offset & gmmu_page_offset_masks[bfr->pgsz_idx]) {
+ gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx",
+ map_offset);
+ return -EINVAL;
+ }
+
+ /* find the space reservation */
+ va_node = addr_to_reservation(vm, map_offset);
+ if (!va_node) {
+ gk20a_warn(dev, "fixed offset mapping without space allocation");
+ return -EINVAL;
+ }
+
+ /* check that this mappings does not collide with existing
+ * mappings by checking the overlapping area between the current
+ * buffer and all other mapped buffers */
+
+ list_for_each_entry(buffer,
+ &va_node->va_buffers_list, va_buffers_list) {
+ s64 begin = max(buffer->addr, map_offset);
+ s64 end = min(buffer->addr +
+ buffer->size, map_offset + bfr->size);
+ if (end - begin > 0) {
+ gk20a_warn(dev, "overlapping buffer map requested");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static u64 __locked_gmmu_map(struct vm_gk20a *vm,
+ u64 map_offset,
+ struct sg_table *sgt,
+ u64 size,
+ int pgsz_idx,
+ u8 kind_v,
+ u32 ctag_offset,
+ u32 flags,
+ int rw_flag)
+{
+ int err = 0, i = 0;
+ u32 pde_lo, pde_hi;
+ struct device *d = dev_from_vm(vm);
+
+ /* Allocate (or validate when map_offset != 0) the virtual address. */
+ if (!map_offset) {
+ map_offset = gk20a_vm_alloc_va(vm, size,
+ pgsz_idx);
+ if (!map_offset) {
+ gk20a_err(d, "failed to allocate va space");
+ err = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ pde_range_from_vaddr_range(vm,
+ map_offset,
+ map_offset + size - 1,
+ &pde_lo, &pde_hi);
+
+ /* mark the addr range valid (but with 0 phys addr, which will fault) */
+ for (i = pde_lo; i <= pde_hi; i++) {
+ err = validate_gmmu_page_table_gk20a_locked(vm, i,
+ pgsz_idx);
+ if (err) {
+ gk20a_err(d, "failed to validate page table %d: %d",
+ i, err);
+ goto fail;
+ }
+ }
+
+ err = update_gmmu_ptes_locked(vm, pgsz_idx,
+ sgt,
+ map_offset, map_offset + size - 1,
+ kind_v,
+ ctag_offset,
+ flags &
+ NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
+ rw_flag);
+ if (err) {
+ gk20a_err(d, "failed to update ptes on map");
+ goto fail;
+ }
+
+ return map_offset;
+ fail:
+ gk20a_err(d, "%s: failed with err=%d\n", __func__, err);
+ return 0;
+}
+
+static void __locked_gmmu_unmap(struct vm_gk20a *vm,
+ u64 vaddr,
+ u64 size,
+ int pgsz_idx,
+ bool va_allocated,
+ int rw_flag)
+{
+ int err = 0;
+ struct gk20a *g = gk20a_from_vm(vm);
+
+ if (va_allocated) {
+ err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx);
+ if (err) {
+ dev_err(dev_from_vm(vm),
+ "failed to free va");
+ return;
+ }
+ }
+
+ /* unmap here needs to know the page size we assigned at mapping */
+ err = update_gmmu_ptes_locked(vm,
+ pgsz_idx,
+ 0, /* n/a for unmap */
+ vaddr,
+ vaddr + size - 1,
+ 0, 0, false /* n/a for unmap */,
+ rw_flag);
+ if (err)
+ dev_err(dev_from_vm(vm),
+ "failed to update gmmu ptes on unmap");
+
+ /* detect which if any pdes/ptes can now be released */
+
+ /* flush l2 so any dirty lines are written out *now*.
+ * also as we could potentially be switching this buffer
+ * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at
+ * some point in the future we need to invalidate l2. e.g. switching
+ * from a render buffer unmap (here) to later using the same memory
+ * for gmmu ptes. note the positioning of this relative to any smmu
+ * unmapping (below). */
+
+ gk20a_mm_l2_flush(g, true);
+}
+
+static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
+ struct dma_buf *dmabuf,
+ u64 offset_align,
+ u32 flags,
+ int kind,
+ struct sg_table **sgt,
+ bool user_mapped,
+ int rw_flag)
+{
+ struct mapped_buffer_node *mapped_buffer = 0;
+
+ mapped_buffer =
+ find_mapped_buffer_reverse_locked(&vm->mapped_buffers,
+ dmabuf, kind);
+ if (!mapped_buffer)
+ return 0;
+
+ if (mapped_buffer->flags != flags)
+ return 0;
+
+ if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET &&
+ mapped_buffer->addr != offset_align)
+ return 0;
+
+ BUG_ON(mapped_buffer->vm != vm);
+
+ /* mark the buffer as used */
+ if (user_mapped) {
+ if (mapped_buffer->user_mapped == 0)
+ vm->num_user_mapped_buffers++;
+ mapped_buffer->user_mapped++;
+
+ /* If the mapping comes from user space, we own
+ * the handle ref. Since we reuse an
+ * existing mapping here, we need to give back those
+ * refs once in order not to leak.
+ */
+ if (mapped_buffer->own_mem_ref)
+ dma_buf_put(mapped_buffer->dmabuf);
+ else
+ mapped_buffer->own_mem_ref = true;
+ }
+ kref_get(&mapped_buffer->ref);
+
+ gk20a_dbg(gpu_dbg_map,
+ "reusing as=%d pgsz=%d flags=0x%x ctags=%d "
+ "start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x "
+ "own_mem_ref=%d user_mapped=%d",
+ vm_aspace_id(vm), mapped_buffer->pgsz_idx,
+ mapped_buffer->flags,
+ mapped_buffer->ctag_lines,
+ mapped_buffer->ctag_offset,
+ hi32(mapped_buffer->addr), lo32(mapped_buffer->addr),
+ hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
+ lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
+ hi32((u64)sg_phys(mapped_buffer->sgt->sgl)),
+ lo32((u64)sg_phys(mapped_buffer->sgt->sgl)),
+ mapped_buffer->own_mem_ref, user_mapped);
+
+ if (sgt)
+ *sgt = mapped_buffer->sgt;
+ return mapped_buffer->addr;
+}
+
+u64 gk20a_vm_map(struct vm_gk20a *vm,
+ struct dma_buf *dmabuf,
+ u64 offset_align,
+ u32 flags /*NVHOST_AS_MAP_BUFFER_FLAGS_*/,
+ int kind,
+ struct sg_table **sgt,
+ bool user_mapped,
+ int rw_flag)
+{
+ struct gk20a *g = gk20a_from_vm(vm);
+ struct gk20a_allocator *ctag_allocator = &g->gr.comp_tags;
+ struct device *d = dev_from_vm(vm);
+ struct mapped_buffer_node *mapped_buffer = 0;
+ bool inserted = false, va_allocated = false;
+ u32 gmmu_page_size = 0;
+ u64 map_offset = 0;
+ int err = 0;
+ struct buffer_attrs bfr = {0};
+ struct gk20a_comptags comptags;
+
+ mutex_lock(&vm->update_gmmu_lock);
+
+ /* check if this buffer is already mapped */
+ map_offset = gk20a_vm_map_duplicate_locked(vm, dmabuf, offset_align,
+ flags, kind, sgt,
+ user_mapped, rw_flag);
+ if (map_offset) {
+ mutex_unlock(&vm->update_gmmu_lock);
+ return map_offset;
+ }
+
+ /* pin buffer to get phys/iovmm addr */
+ bfr.sgt = gk20a_mm_pin(d, dmabuf);
+ if (IS_ERR(bfr.sgt)) {
+ /* Falling back to physical is actually possible
+ * here in many cases if we use 4K phys pages in the
+ * gmmu. However we have some regions which require
+ * contig regions to work properly (either phys-contig
+ * or contig through smmu io_vaspace). Until we can
+ * track the difference between those two cases we have
+ * to fail the mapping when we run out of SMMU space.
+ */
+ gk20a_warn(d, "oom allocating tracking buffer");
+ goto clean_up;
+ }
+
+ if (sgt)
+ *sgt = bfr.sgt;
+
+ bfr.kind_v = kind;
+ bfr.size = dmabuf->size;
+ bfr.align = 1 << __ffs((u64)sg_dma_address(bfr.sgt->sgl));
+ bfr.pgsz_idx = -1;
+
+ /* If FIX_OFFSET is set, pgsz is determined. Otherwise, select
+ * page size according to memory alignment */
+ if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
+ bfr.pgsz_idx = NV_GMMU_VA_IS_UPPER(offset_align) ?
+ gmmu_page_size_big : gmmu_page_size_small;
+ } else {
+ gmmu_select_page_size(&bfr);
+ }
+
+ /* validate/adjust bfr attributes */
+ if (unlikely(bfr.pgsz_idx == -1)) {
+ gk20a_err(d, "unsupported page size detected");
+ goto clean_up;
+ }
+
+ if (unlikely(bfr.pgsz_idx < gmmu_page_size_small ||
+ bfr.pgsz_idx > gmmu_page_size_big)) {
+ BUG_ON(1);
+ err = -EINVAL;
+ goto clean_up;
+ }
+ gmmu_page_size = gmmu_page_sizes[bfr.pgsz_idx];
+
+ /* Check if we should use a fixed offset for mapping this buffer */
+ if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
+ err = validate_fixed_buffer(vm, &bfr, offset_align);
+ if (err)
+ goto clean_up;
+
+ map_offset = offset_align;
+ va_allocated = false;
+ } else
+ va_allocated = true;
+
+ if (sgt)
+ *sgt = bfr.sgt;
+
+ err = setup_buffer_kind_and_compression(d, flags, &bfr, bfr.pgsz_idx);
+ if (unlikely(err)) {
+ gk20a_err(d, "failure setting up kind and compression");
+ goto clean_up;
+ }
+
+ /* bar1 and pmu vm don't need ctag */
+ if (!vm->enable_ctag)
+ bfr.ctag_lines = 0;
+
+ gk20a_get_comptags(d, dmabuf, &comptags);
+
+ if (bfr.ctag_lines && !comptags.lines) {
+ /* allocate compression resources if needed */
+ err = gk20a_alloc_comptags(d, dmabuf, ctag_allocator,
+ bfr.ctag_lines);
+ if (err) {
+ /* ok to fall back here if we ran out */
+ /* TBD: we can partially alloc ctags as well... */
+ bfr.ctag_lines = bfr.ctag_offset = 0;
+ bfr.kind_v = bfr.uc_kind_v;
+ } else {
+ gk20a_get_comptags(d, dmabuf, &comptags);
+
+ /* init/clear the ctag buffer */
+ g->ops.ltc.clear_comptags(g,
+ comptags.offset,
+ comptags.offset + comptags.lines - 1);
+ }
+ }
+
+ /* store the comptag info */
+ bfr.ctag_offset = comptags.offset;
+
+ /* update gmmu ptes */
+ map_offset = __locked_gmmu_map(vm, map_offset,
+ bfr.sgt,
+ bfr.size,
+ bfr.pgsz_idx,
+ bfr.kind_v,
+ bfr.ctag_offset,
+ flags, rw_flag);
+ if (!map_offset)
+ goto clean_up;
+
+ gk20a_dbg(gpu_dbg_map,
+ "as=%d pgsz=%d "
+ "kind=0x%x kind_uc=0x%x flags=0x%x "
+ "ctags=%d start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x",
+ vm_aspace_id(vm), gmmu_page_size,
+ bfr.kind_v, bfr.uc_kind_v, flags,
+ bfr.ctag_lines, bfr.ctag_offset,
+ hi32(map_offset), lo32(map_offset),
+ hi32((u64)sg_dma_address(bfr.sgt->sgl)),
+ lo32((u64)sg_dma_address(bfr.sgt->sgl)),
+ hi32((u64)sg_phys(bfr.sgt->sgl)),
+ lo32((u64)sg_phys(bfr.sgt->sgl)));
+
+#if defined(NVHOST_DEBUG)
+ {
+ int i;
+ struct scatterlist *sg = NULL;
+ gk20a_dbg(gpu_dbg_pte, "for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i)");
+ for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i ) {
+ u64 da = sg_dma_address(sg);
+ u64 pa = sg_phys(sg);
+ u64 len = sg->length;
+ gk20a_dbg(gpu_dbg_pte, "i=%d pa=0x%x,%08x da=0x%x,%08x len=0x%x,%08x",
+ i, hi32(pa), lo32(pa), hi32(da), lo32(da),
+ hi32(len), lo32(len));
+ }
+ }
+#endif
+
+ /* keep track of the buffer for unmapping */
+ /* TBD: check for multiple mapping of same buffer */
+ mapped_buffer = kzalloc(sizeof(*mapped_buffer), GFP_KERNEL);
+ if (!mapped_buffer) {
+ gk20a_warn(d, "oom allocating tracking buffer");
+ goto clean_up;
+ }
+ mapped_buffer->dmabuf = dmabuf;
+ mapped_buffer->sgt = bfr.sgt;
+ mapped_buffer->addr = map_offset;
+ mapped_buffer->size = bfr.size;
+ mapped_buffer->pgsz_idx = bfr.pgsz_idx;
+ mapped_buffer->ctag_offset = bfr.ctag_offset;
+ mapped_buffer->ctag_lines = bfr.ctag_lines;
+ mapped_buffer->vm = vm;
+ mapped_buffer->flags = flags;
+ mapped_buffer->kind = kind;
+ mapped_buffer->va_allocated = va_allocated;
+ mapped_buffer->user_mapped = user_mapped ? 1 : 0;
+ mapped_buffer->own_mem_ref = user_mapped;
+ INIT_LIST_HEAD(&mapped_buffer->unmap_list);
+ INIT_LIST_HEAD(&mapped_buffer->va_buffers_list);
+ kref_init(&mapped_buffer->ref);
+
+ err = insert_mapped_buffer(&vm->mapped_buffers, mapped_buffer);
+ if (err) {
+ gk20a_err(d, "failed to insert into mapped buffer tree");
+ goto clean_up;
+ }
+ inserted = true;
+ if (user_mapped)
+ vm->num_user_mapped_buffers++;
+
+ gk20a_dbg_info("allocated va @ 0x%llx", map_offset);
+
+ if (!va_allocated) {
+ struct vm_reserved_va_node *va_node;
+
+ /* find the space reservation */
+ va_node = addr_to_reservation(vm, map_offset);
+ list_add_tail(&mapped_buffer->va_buffers_list,
+ &va_node->va_buffers_list);
+ mapped_buffer->va_node = va_node;
+ }
+
+ mutex_unlock(&vm->update_gmmu_lock);
+
+ /* Invalidate kernel mappings immediately */
+ if (vm_aspace_id(vm) == -1)
+ gk20a_mm_tlb_invalidate(vm);
+
+ return map_offset;
+
+clean_up:
+ if (inserted) {
+ rb_erase(&mapped_buffer->node, &vm->mapped_buffers);
+ if (user_mapped)
+ vm->num_user_mapped_buffers--;
+ }
+ kfree(mapped_buffer);
+ if (va_allocated)
+ gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx);
+ if (!IS_ERR(bfr.sgt))
+ gk20a_mm_unpin(d, dmabuf, bfr.sgt);
+
+ mutex_unlock(&vm->update_gmmu_lock);
+ gk20a_dbg_info("err=%d\n", err);
+ return 0;
+}
+
+u64 gk20a_gmmu_map(struct vm_gk20a *vm,
+ struct sg_table **sgt,
+ u64 size,
+ u32 flags,
+ int rw_flag)
+{
+ u64 vaddr;
+
+ mutex_lock(&vm->update_gmmu_lock);
+ vaddr = __locked_gmmu_map(vm, 0, /* already mapped? - No */
+ *sgt, /* sg table */
+ size,
+ 0, /* page size index = 0 i.e. SZ_4K */
+ 0, /* kind */
+ 0, /* ctag_offset */
+ flags, rw_flag);
+ mutex_unlock(&vm->update_gmmu_lock);
+ if (!vaddr) {
+ gk20a_err(dev_from_vm(vm), "failed to allocate va space");
+ return 0;
+ }
+
+ /* Invalidate kernel mappings immediately */
+ gk20a_mm_tlb_invalidate(vm);
+
+ return vaddr;
+}
+
+void gk20a_gmmu_unmap(struct vm_gk20a *vm,
+ u64 vaddr,
+ u64 size,
+ int rw_flag)
+{
+ mutex_lock(&vm->update_gmmu_lock);
+ __locked_gmmu_unmap(vm,
+ vaddr,
+ size,
+ 0, /* page size 4K */
+ true, /*va_allocated */
+ rw_flag);
+ mutex_unlock(&vm->update_gmmu_lock);
+}
+
+phys_addr_t gk20a_get_phys_from_iova(struct device *d,
+ u64 dma_addr)
+{
+ phys_addr_t phys;
+ u64 iova;
+
+ struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
+ if (!mapping)
+ return dma_addr;
+
+ iova = dma_addr & PAGE_MASK;
+ phys = iommu_iova_to_phys(mapping->domain, iova);
+ return phys;
+}
+
+/* get sg_table from already allocated buffer */
+int gk20a_get_sgtable(struct device *d, struct sg_table **sgt,
+ void *cpuva, u64 iova,
+ size_t size)
+{
+ int err = 0;
+ *sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
+ if (!(*sgt)) {
+ dev_err(d, "failed to allocate memory\n");
+ err = -ENOMEM;
+ goto fail;
+ }
+ err = dma_get_sgtable(d, *sgt,
+ cpuva, iova,
+ size);
+ if (err) {
+ dev_err(d, "failed to create sg table\n");
+ goto fail;
+ }
+ sg_dma_address((*sgt)->sgl) = iova;
+
+ return 0;
+ fail:
+ if (*sgt) {
+ kfree(*sgt);
+ *sgt = NULL;
+ }
+ return err;
+}
+
+int gk20a_get_sgtable_from_pages(struct device *d, struct sg_table **sgt,
+ struct page **pages, u64 iova,
+ size_t size)
+{
+ int err = 0;
+ *sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
+ if (!(*sgt)) {
+ dev_err(d, "failed to allocate memory\n");
+ err = -ENOMEM;
+ goto fail;
+ }
+ err = sg_alloc_table(*sgt, 1, GFP_KERNEL);
+ if (err) {
+ dev_err(d, "failed to allocate sg_table\n");
+ goto fail;
+ }
+ sg_set_page((*sgt)->sgl, *pages, size, 0);
+ sg_dma_address((*sgt)->sgl) = iova;
+
+ return 0;
+ fail:
+ if (*sgt) {
+ kfree(*sgt);
+ *sgt = NULL;
+ }
+ return err;
+}
+
+void gk20a_free_sgtable(struct sg_table **sgt)
+{
+ sg_free_table(*sgt);
+ kfree(*sgt);
+ *sgt = NULL;
+}
+
+u64 gk20a_mm_iova_addr(struct scatterlist *sgl)
+{
+ u64 result = sg_phys(sgl);
+#ifdef CONFIG_TEGRA_IOMMU_SMMU
+ if (sg_dma_address(sgl) == DMA_ERROR_CODE)
+ result = 0;
+ else if (sg_dma_address(sgl)) {
+ result = sg_dma_address(sgl) |
+ 1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT;
+ }
+#endif
+ return result;
+}
+
+static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
+ enum gmmu_pgsz_gk20a pgsz_idx,
+ struct sg_table *sgt,
+ u64 first_vaddr, u64 last_vaddr,
+ u8 kind_v, u32 ctag_offset,
+ bool cacheable,
+ int rw_flag)
+{
+ int err;
+ u32 pde_lo, pde_hi, pde_i;
+ struct scatterlist *cur_chunk;
+ unsigned int cur_offset;
+ u32 pte_w[2] = {0, 0}; /* invalid pte */
+ u32 ctag = ctag_offset;
+ u32 ctag_incr;
+ u32 page_size = gmmu_page_sizes[pgsz_idx];
+ u64 addr = 0;
+
+ pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr,
+ &pde_lo, &pde_hi);
+
+ gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d",
+ pgsz_idx, pde_lo, pde_hi);
+
+ /* If ctag_offset !=0 add 1 else add 0. The idea is to avoid a branch
+ * below (per-pte). Note: this doesn't work unless page size (when
+ * comptags are active) is 128KB. We have checks elsewhere for that. */
+ ctag_incr = !!ctag_offset;
+
+ if (sgt)
+ cur_chunk = sgt->sgl;
+ else
+ cur_chunk = NULL;
+
+ cur_offset = 0;
+
+ for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
+ u32 pte_lo, pte_hi;
+ u32 pte_cur;
+ void *pte_kv_cur;
+
+ struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i;
+
+ if (pde_i == pde_lo)
+ pte_lo = pte_index_from_vaddr(vm, first_vaddr,
+ pgsz_idx);
+ else
+ pte_lo = 0;
+
+ if ((pde_i != pde_hi) && (pde_hi != pde_lo))
+ pte_hi = vm->mm->page_table_sizing[pgsz_idx].num_ptes-1;
+ else
+ pte_hi = pte_index_from_vaddr(vm, last_vaddr,
+ pgsz_idx);
+
+ /* get cpu access to the ptes */
+ err = map_gmmu_pages(pte->ref, pte->sgt, &pte_kv_cur,
+ pte->size);
+ if (err) {
+ gk20a_err(dev_from_vm(vm),
+ "couldn't map ptes for update as=%d pte_ref_cnt=%d",
+ vm_aspace_id(vm), pte->ref_cnt);
+ goto clean_up;
+ }
+
+ gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);
+ for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {
+
+ if (likely(sgt)) {
+ u64 new_addr = gk20a_mm_iova_addr(cur_chunk);
+ if (new_addr) {
+ addr = new_addr;
+ addr += cur_offset;
+ }
+
+ pte_w[0] = gmmu_pte_valid_true_f() |
+ gmmu_pte_address_sys_f(addr
+ >> gmmu_pte_address_shift_v());
+ pte_w[1] = gmmu_pte_aperture_video_memory_f() |
+ gmmu_pte_kind_f(kind_v) |
+ gmmu_pte_comptagline_f(ctag);
+
+ if (rw_flag == gk20a_mem_flag_read_only) {
+ pte_w[0] |= gmmu_pte_read_only_true_f();
+ pte_w[1] |=
+ gmmu_pte_write_disable_true_f();
+ } else if (rw_flag ==
+ gk20a_mem_flag_write_only) {
+ pte_w[1] |=
+ gmmu_pte_read_disable_true_f();
+ }
+
+ if (!cacheable)
+ pte_w[1] |= gmmu_pte_vol_true_f();
+
+ pte->ref_cnt++;
+
+ gk20a_dbg(gpu_dbg_pte,
+ "pte_cur=%d addr=0x%x,%08x kind=%d"
+ " ctag=%d vol=%d refs=%d"
+ " [0x%08x,0x%08x]",
+ pte_cur, hi32(addr), lo32(addr),
+ kind_v, ctag, !cacheable,
+ pte->ref_cnt, pte_w[1], pte_w[0]);
+
+ ctag += ctag_incr;
+ cur_offset += page_size;
+ addr += page_size;
+ while (cur_chunk &&
+ cur_offset >= cur_chunk->length) {
+ cur_offset -= cur_chunk->length;
+ cur_chunk = sg_next(cur_chunk);
+ }
+
+ } else {
+ pte->ref_cnt--;
+ gk20a_dbg(gpu_dbg_pte,
+ "pte_cur=%d ref=%d [0x0,0x0]",
+ pte_cur, pte->ref_cnt);
+ }
+
+ gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]);
+ gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 1, pte_w[1]);
+ }
+
+ unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur);
+
+ if (pte->ref_cnt == 0) {
+ /* It can make sense to keep around one page table for
+ * each flavor (empty)... in case a new map is coming
+ * right back to alloc (and fill it in) again.
+ * But: deferring unmapping should help with pathologic
+ * unmap/map/unmap/map cases where we'd trigger pte
+ * free/alloc/free/alloc.
+ */
+ free_gmmu_pages(vm, pte->ref, pte->sgt,
+ vm->mm->page_table_sizing[pgsz_idx].order,
+ pte->size);
+ pte->ref = NULL;
+
+ /* rewrite pde */
+ update_gmmu_pde_locked(vm, pde_i);
+ }
+
+ }
+
+ smp_mb();
+ vm->tlb_dirty = true;
+ gk20a_dbg_fn("set tlb dirty");
+
+ return 0;
+
+clean_up:
+ /*TBD: potentially rewrite above to pre-map everything it needs to
+ * as that's the only way it can fail */
+ return err;
+
+}
+
+
+/* for gk20a the "video memory" apertures here are misnomers. */
+static inline u32 big_valid_pde0_bits(u64 pte_addr)
+{
+ u32 pde0_bits =
+ gmmu_pde_aperture_big_video_memory_f() |
+ gmmu_pde_address_big_sys_f(
+ (u32)(pte_addr >> gmmu_pde_address_shift_v()));
+ return pde0_bits;
+}
+static inline u32 small_valid_pde1_bits(u64 pte_addr)
+{
+ u32 pde1_bits =
+ gmmu_pde_aperture_small_video_memory_f() |
+ gmmu_pde_vol_small_true_f() | /* tbd: why? */
+ gmmu_pde_address_small_sys_f(
+ (u32)(pte_addr >> gmmu_pde_address_shift_v()));
+ return pde1_bits;
+}
+
+/* Given the current state of the ptes associated with a pde,
+ determine value and write it out. There's no checking
+ here to determine whether or not a change was actually
+ made. So, superfluous updates will cause unnecessary
+ pde invalidations.
+*/
+static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
+{
+ bool small_valid, big_valid;
+ u64 pte_addr[2] = {0, 0};
+ struct page_table_gk20a *small_pte =
+ vm->pdes.ptes[gmmu_page_size_small] + i;
+ struct page_table_gk20a *big_pte =
+ vm->pdes.ptes[gmmu_page_size_big] + i;
+ u32 pde_v[2] = {0, 0};
+ u32 *pde;
+
+ small_valid = small_pte && small_pte->ref;
+ big_valid = big_pte && big_pte->ref;
+
+ if (small_valid)
+ pte_addr[gmmu_page_size_small] =
+ gk20a_mm_iova_addr(small_pte->sgt->sgl);
+ if (big_valid)
+ pte_addr[gmmu_page_size_big] =
+ gk20a_mm_iova_addr(big_pte->sgt->sgl);
+
+ pde_v[0] = gmmu_pde_size_full_f();
+ pde_v[0] |= big_valid ?
+ big_valid_pde0_bits(pte_addr[gmmu_page_size_big])
+ :
+ (gmmu_pde_aperture_big_invalid_f());
+
+ pde_v[1] |= (small_valid ?
+ small_valid_pde1_bits(pte_addr[gmmu_page_size_small])
+ :
+ (gmmu_pde_aperture_small_invalid_f() |
+ gmmu_pde_vol_small_false_f())
+ )
+ |
+ (big_valid ? (gmmu_pde_vol_big_true_f()) :
+ gmmu_pde_vol_big_false_f());
+
+ pde = pde_from_index(vm, i);
+
+ gk20a_mem_wr32(pde, 0, pde_v[0]);
+ gk20a_mem_wr32(pde, 1, pde_v[1]);
+
+ smp_mb();
+
+ FLUSH_CPU_DCACHE(pde,
+ sg_phys(vm->pdes.sgt->sgl) + (i*gmmu_pde__size_v()),
+ sizeof(u32)*2);
+
+ gk20a_mm_l2_invalidate(vm->mm->g);
+
+ gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]);
+
+ vm->tlb_dirty = true;
+}
+
+
+static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
+ u32 num_pages, u32 pgsz_idx)
+{
+ struct mm_gk20a *mm = vm->mm;
+ struct gk20a *g = mm->g;
+ u32 pgsz = gmmu_page_sizes[pgsz_idx];
+ u32 i;
+ dma_addr_t iova;
+
+ /* allocate the zero page if the va does not already have one */
+ if (!vm->zero_page_cpuva) {
+ int err = 0;
+ vm->zero_page_cpuva = dma_alloc_coherent(&g->dev->dev,
+ mm->big_page_size,
+ &iova,
+ GFP_KERNEL);
+ if (!vm->zero_page_cpuva) {
+ dev_err(&g->dev->dev, "failed to allocate zero page\n");
+ return -ENOMEM;
+ }
+
+ vm->zero_page_iova = iova;
+ err = gk20a_get_sgtable(&g->dev->dev, &vm->zero_page_sgt,
+ vm->zero_page_cpuva, vm->zero_page_iova,
+ mm->big_page_size);
+ if (err) {
+ dma_free_coherent(&g->dev->dev, mm->big_page_size,
+ vm->zero_page_cpuva,
+ vm->zero_page_iova);
+ vm->zero_page_iova = 0;
+ vm->zero_page_cpuva = NULL;
+
+ dev_err(&g->dev->dev, "failed to create sg table for zero page\n");
+ return -ENOMEM;
+ }
+ }
+
+ for (i = 0; i < num_pages; i++) {
+ u64 page_vaddr = __locked_gmmu_map(vm, vaddr,
+ vm->zero_page_sgt, pgsz, pgsz_idx, 0, 0,
+ NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET,
+ gk20a_mem_flag_none);
+
+ if (!page_vaddr) {
+ gk20a_err(dev_from_vm(vm), "failed to remap clean buffers!");
+ goto err_unmap;
+ }
+ vaddr += pgsz;
+ }
+
+ gk20a_mm_l2_flush(mm->g, true);
+
+ return 0;
+
+err_unmap:
+
+ WARN_ON(1);
+ /* something went wrong. unmap pages */
+ while (i--) {
+ vaddr -= pgsz;
+ __locked_gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0,
+ gk20a_mem_flag_none);
+ }
+
+ return -EINVAL;
+}
+
+/* NOTE! mapped_buffers lock must be held */
+static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
+{
+ struct vm_gk20a *vm = mapped_buffer->vm;
+
+ if (mapped_buffer->va_node &&
+ mapped_buffer->va_node->sparse) {
+ u64 vaddr = mapped_buffer->addr;
+ u32 pgsz_idx = mapped_buffer->pgsz_idx;
+ u32 num_pages = mapped_buffer->size >>
+ gmmu_page_shifts[pgsz_idx];
+
+ /* there is little we can do if this fails... */
+ gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx);
+
+ } else
+ __locked_gmmu_unmap(vm,
+ mapped_buffer->addr,
+ mapped_buffer->size,
+ mapped_buffer->pgsz_idx,
+ mapped_buffer->va_allocated,
+ gk20a_mem_flag_none);
+
+ gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d",
+ vm_aspace_id(vm), gmmu_page_sizes[mapped_buffer->pgsz_idx],
+ hi32(mapped_buffer->addr), lo32(mapped_buffer->addr),
+ mapped_buffer->own_mem_ref);
+
+ gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf,
+ mapped_buffer->sgt);
+
+ /* remove from mapped buffer tree and remove list, free */
+ rb_erase(&mapped_buffer->node, &vm->mapped_buffers);
+ if (!list_empty(&mapped_buffer->va_buffers_list))
+ list_del(&mapped_buffer->va_buffers_list);
+
+ /* keep track of mapped buffers */
+ if (mapped_buffer->user_mapped)
+ vm->num_user_mapped_buffers--;
+
+ if (mapped_buffer->own_mem_ref)
+ dma_buf_put(mapped_buffer->dmabuf);
+
+ kfree(mapped_buffer);
+
+ return;
+}
+
+void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
+{
+ struct device *d = dev_from_vm(vm);
+ struct mapped_buffer_node *mapped_buffer;
+
+ mutex_lock(&vm->update_gmmu_lock);
+ mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset);
+ if (!mapped_buffer) {
+ mutex_unlock(&vm->update_gmmu_lock);
+ gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
+ return;
+ }
+ kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
+ mutex_unlock(&vm->update_gmmu_lock);
+}
+
+static void gk20a_vm_remove_support(struct vm_gk20a *vm)
+{
+ struct gk20a *g = vm->mm->g;
+ struct mapped_buffer_node *mapped_buffer;
+ struct vm_reserved_va_node *va_node, *va_node_tmp;
+ struct rb_node *node;
+
+ gk20a_dbg_fn("");
+ mutex_lock(&vm->update_gmmu_lock);
+
+ /* TBD: add a flag here for the unmap code to recognize teardown
+ * and short-circuit any otherwise expensive operations. */
+
+ node = rb_first(&vm->mapped_buffers);
+ while (node) {
+ mapped_buffer =
+ container_of(node, struct mapped_buffer_node, node);
+ gk20a_vm_unmap_locked(mapped_buffer);
+ node = rb_first(&vm->mapped_buffers);
+ }
+
+ /* destroy remaining reserved memory areas */
+ list_for_each_entry_safe(va_node, va_node_tmp, &vm->reserved_va_list,
+ reserved_va_list) {
+ list_del(&va_node->reserved_va_list);
+ kfree(va_node);
+ }
+
+ /* TBD: unmapping all buffers above may not actually free
+ * all vm ptes. jettison them here for certain... */
+
+ unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);
+ free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, vm->pdes.size);
+
+ kfree(vm->pdes.ptes[gmmu_page_size_small]);
+ kfree(vm->pdes.ptes[gmmu_page_size_big]);
+ gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
+ gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
+
+ mutex_unlock(&vm->update_gmmu_lock);
+
+ /* release zero page if used */
+ if (vm->zero_page_cpuva)
+ dma_free_coherent(&g->dev->dev, vm->mm->big_page_size,
+ vm->zero_page_cpuva, vm->zero_page_iova);
+
+ /* vm is not used anymore. release it. */
+ kfree(vm);
+}
+
+static void gk20a_vm_remove_support_kref(struct kref *ref)
+{
+ struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref);
+ gk20a_vm_remove_support(vm);
+}
+
+void gk20a_vm_get(struct vm_gk20a *vm)
+{
+ kref_get(&vm->ref);
+}
+
+void gk20a_vm_put(struct vm_gk20a *vm)
+{
+ kref_put(&vm->ref, gk20a_vm_remove_support_kref);
+}
+
+/* address space interfaces for the gk20a module */
+int gk20a_vm_alloc_share(struct gk20a_as_share *as_share)
+{
+ struct gk20a_as *as = as_share->as;
+ struct gk20a *g = gk20a_from_as(as);
+ struct mm_gk20a *mm = &g->mm;
+ struct vm_gk20a *vm;
+ u64 vma_size;
+ u32 num_pages, low_hole_pages;
+ char name[32];
+ int err;
+
+ gk20a_dbg_fn("");
+
+ vm = kzalloc(sizeof(*vm), GFP_KERNEL);
+ if (!vm)
+ return -ENOMEM;
+
+ as_share->vm = vm;
+
+ vm->mm = mm;
+ vm->as_share = as_share;
+
+ vm->big_pages = true;
+
+ vm->va_start = mm->pde_stride; /* create a one pde hole */
+ vm->va_limit = mm->channel.size; /* note this means channel.size is
+ really just the max */
+ {
+ u32 pde_lo, pde_hi;
+ pde_range_from_vaddr_range(vm,
+ 0, vm->va_limit-1,
+ &pde_lo, &pde_hi);
+ vm->pdes.num_pdes = pde_hi + 1;
+ }
+
+ vm->pdes.ptes[gmmu_page_size_small] =
+ kzalloc(sizeof(struct page_table_gk20a) *
+ vm->pdes.num_pdes, GFP_KERNEL);
+
+ vm->pdes.ptes[gmmu_page_size_big] =
+ kzalloc(sizeof(struct page_table_gk20a) *
+ vm->pdes.num_pdes, GFP_KERNEL);
+
+ if (!(vm->pdes.ptes[gmmu_page_size_small] &&
+ vm->pdes.ptes[gmmu_page_size_big]))
+ return -ENOMEM;
+
+ gk20a_dbg_info("init space for va_limit=0x%llx num_pdes=%d",
+ vm->va_limit, vm->pdes.num_pdes);
+
+ /* allocate the page table directory */
+ err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
+ &vm->pdes.sgt, &vm->pdes.size);
+ if (err)
+ return -ENOMEM;
+
+ err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv,
+ vm->pdes.size);
+ if (err) {
+ free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
+ vm->pdes.size);
+ return -ENOMEM;
+ }
+ gk20a_dbg(gpu_dbg_pte, "pdes.kv = 0x%p, pdes.phys = 0x%llx",
+ vm->pdes.kv,
+ gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
+ /* we could release vm->pdes.kv but it's only one page... */
+
+
+ /* low-half: alloc small pages */
+ /* high-half: alloc big pages */
+ vma_size = mm->channel.size >> 1;
+
+ snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
+ gmmu_page_sizes[gmmu_page_size_small]>>10);
+ num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]);
+
+ /* num_pages above is without regard to the low-side hole. */
+ low_hole_pages = (vm->va_start >>
+ gmmu_page_shifts[gmmu_page_size_small]);
+
+ gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name,
+ low_hole_pages, /* start */
+ num_pages - low_hole_pages, /* length */
+ 1); /* align */
+
+ snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
+ gmmu_page_sizes[gmmu_page_size_big]>>10);
+
+ num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]);
+ gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name,
+ num_pages, /* start */
+ num_pages, /* length */
+ 1); /* align */
+
+ vm->mapped_buffers = RB_ROOT;
+
+ mutex_init(&vm->update_gmmu_lock);
+ kref_init(&vm->ref);
+ INIT_LIST_HEAD(&vm->reserved_va_list);
+
+ vm->enable_ctag = true;
+
+ return 0;
+}
+
+
+int gk20a_vm_release_share(struct gk20a_as_share *as_share)
+{
+ struct vm_gk20a *vm = as_share->vm;
+
+ gk20a_dbg_fn("");
+
+ vm->as_share = NULL;
+
+ /* put as reference to vm */
+ gk20a_vm_put(vm);
+
+ as_share->vm = NULL;
+
+ return 0;
+}
+
+
+int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
+ struct nvhost_as_alloc_space_args *args)
+
+{ int err = -ENOMEM;
+ int pgsz_idx;
+ u32 start_page_nr;
+ struct gk20a_allocator *vma;
+ struct vm_gk20a *vm = as_share->vm;
+ struct vm_reserved_va_node *va_node;
+ u64 vaddr_start = 0;
+
+ gk20a_dbg_fn("flags=0x%x pgsz=0x%x nr_pages=0x%x o/a=0x%llx",
+ args->flags, args->page_size, args->pages,
+ args->o_a.offset);
+
+ /* determine pagesz idx */
+ for (pgsz_idx = gmmu_page_size_small;
+ pgsz_idx < gmmu_nr_page_sizes;
+ pgsz_idx++) {
+ if (gmmu_page_sizes[pgsz_idx] == args->page_size)
+ break;
+ }
+
+ if (pgsz_idx >= gmmu_nr_page_sizes) {
+ err = -EINVAL;
+ goto clean_up;
+ }
+
+ va_node = kzalloc(sizeof(*va_node), GFP_KERNEL);
+ if (!va_node) {
+ err = -ENOMEM;
+ goto clean_up;
+ }
+
+ if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE &&
+ pgsz_idx != gmmu_page_size_big) {
+ err = -ENOSYS;
+ kfree(va_node);
+ goto clean_up;
+ }
+
+ start_page_nr = 0;
+ if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
+ start_page_nr = (u32)(args->o_a.offset >>
+ gmmu_page_shifts[pgsz_idx]);
+
+ vma = &vm->vma[pgsz_idx];
+ err = vma->alloc(vma, &start_page_nr, args->pages);
+ if (err) {
+ kfree(va_node);
+ goto clean_up;
+ }
+
+ vaddr_start = (u64)start_page_nr << gmmu_page_shifts[pgsz_idx];
+
+ va_node->vaddr_start = vaddr_start;
+ va_node->size = (u64)args->page_size * (u64)args->pages;
+ va_node->pgsz_idx = args->page_size;
+ INIT_LIST_HEAD(&va_node->va_buffers_list);
+ INIT_LIST_HEAD(&va_node->reserved_va_list);
+
+ mutex_lock(&vm->update_gmmu_lock);
+
+ /* mark that we need to use sparse mappings here */
+ if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) {
+ err = gk20a_vm_put_empty(vm, vaddr_start, args->pages,
+ pgsz_idx);
+ if (err) {
+ mutex_unlock(&vm->update_gmmu_lock);
+ vma->free(vma, start_page_nr, args->pages);
+ kfree(va_node);
+ goto clean_up;
+ }
+
+ va_node->sparse = true;
+ }
+
+ list_add_tail(&va_node->reserved_va_list, &vm->reserved_va_list);
+
+ mutex_unlock(&vm->update_gmmu_lock);
+
+ args->o_a.offset = vaddr_start;
+
+clean_up:
+ return err;
+}
+
+int gk20a_vm_free_space(struct gk20a_as_share *as_share,
+ struct nvhost_as_free_space_args *args)
+{
+ int err = -ENOMEM;
+ int pgsz_idx;
+ u32 start_page_nr;
+ struct gk20a_allocator *vma;
+ struct vm_gk20a *vm = as_share->vm;
+ struct vm_reserved_va_node *va_node;
+
+ gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size,
+ args->pages, args->offset);
+
+ /* determine pagesz idx */
+ for (pgsz_idx = gmmu_page_size_small;
+ pgsz_idx < gmmu_nr_page_sizes;
+ pgsz_idx++) {
+ if (gmmu_page_sizes[pgsz_idx] == args->page_size)
+ break;
+ }
+
+ if (pgsz_idx >= gmmu_nr_page_sizes) {
+ err = -EINVAL;
+ goto clean_up;
+ }
+
+ start_page_nr = (u32)(args->offset >>
+ gmmu_page_shifts[pgsz_idx]);
+
+ vma = &vm->vma[pgsz_idx];
+ err = vma->free(vma, start_page_nr, args->pages);
+
+ if (err)
+ goto clean_up;
+
+ mutex_lock(&vm->update_gmmu_lock);
+ va_node = addr_to_reservation(vm, args->offset);
+ if (va_node) {
+ struct mapped_buffer_node *buffer;
+
+ /* there is no need to unallocate the buffers in va. Just
+ * convert them into normal buffers */
+
+ list_for_each_entry(buffer,
+ &va_node->va_buffers_list, va_buffers_list)
+ list_del_init(&buffer->va_buffers_list);
+
+ list_del(&va_node->reserved_va_list);
+
+ /* if this was a sparse mapping, free the va */
+ if (va_node->sparse)
+ __locked_gmmu_unmap(vm,
+ va_node->vaddr_start,
+ va_node->size,
+ va_node->pgsz_idx,
+ false,
+ gk20a_mem_flag_none);
+ kfree(va_node);
+ }
+ mutex_unlock(&vm->update_gmmu_lock);
+
+clean_up:
+ return err;
+}
+
+int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
+ struct channel_gk20a *ch)
+{
+ int err = 0;
+ struct vm_gk20a *vm = as_share->vm;
+
+ gk20a_dbg_fn("");
+
+ ch->vm = vm;
+ err = channel_gk20a_commit_va(ch);
+ if (err)
+ ch->vm = 0;
+
+ return err;
+}
+
+int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev)
+{
+ struct gk20a_dmabuf_priv *priv;
+ static DEFINE_MUTEX(priv_lock);
+
+ priv = dma_buf_get_drvdata(dmabuf, dev);
+ if (likely(priv))
+ return 0;
+
+ mutex_lock(&priv_lock);
+ priv = dma_buf_get_drvdata(dmabuf, dev);
+ if (priv)
+ goto priv_exist_or_err;
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv) {
+ priv = ERR_PTR(-ENOMEM);
+ goto priv_exist_or_err;
+ }
+ mutex_init(&priv->lock);
+ dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv);
+priv_exist_or_err:
+ mutex_unlock(&priv_lock);
+ if (IS_ERR(priv))
+ return -ENOMEM;
+
+ return 0;
+}
+
+
+static int gk20a_dmabuf_get_kind(struct dma_buf *dmabuf)
+{
+ int kind = 0;
+#ifdef CONFIG_TEGRA_NVMAP
+ int err;
+ u64 nvmap_param;
+
+ err = nvmap_get_dmabuf_param(dmabuf, NVMAP_HANDLE_PARAM_KIND,
+ &nvmap_param);
+ kind = err ? kind : nvmap_param;
+#endif
+ return kind;
+}
+
+int gk20a_vm_map_buffer(struct gk20a_as_share *as_share,
+ int dmabuf_fd,
+ u64 *offset_align,
+ u32 flags, /*NVHOST_AS_MAP_BUFFER_FLAGS_*/
+ int kind)
+{
+ int err = 0;
+ struct vm_gk20a *vm = as_share->vm;
+ struct dma_buf *dmabuf;
+ u64 ret_va;
+
+ gk20a_dbg_fn("");
+
+ /* get ref to the mem handle (released on unmap_locked) */
+ dmabuf = dma_buf_get(dmabuf_fd);
+ if (!dmabuf)
+ return 0;
+
+ err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm));
+ if (err) {
+ dma_buf_put(dmabuf);
+ return err;
+ }
+
+ if (kind == -1)
+ kind = gk20a_dmabuf_get_kind(dmabuf);
+
+ ret_va = gk20a_vm_map(vm, dmabuf, *offset_align,
+ flags, kind, NULL, true,
+ gk20a_mem_flag_none);
+ *offset_align = ret_va;
+ if (!ret_va) {
+ dma_buf_put(dmabuf);
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+int gk20a_vm_unmap_buffer(struct gk20a_as_share *as_share, u64 offset)
+{
+ struct vm_gk20a *vm = as_share->vm;
+
+ gk20a_dbg_fn("");
+
+ gk20a_vm_unmap_user(vm, offset);
+ return 0;
+}
+
+int gk20a_init_bar1_vm(struct mm_gk20a *mm)
+{
+ int err;
+ phys_addr_t inst_pa;
+ void *inst_ptr;
+ struct vm_gk20a *vm = &mm->bar1.vm;
+ struct gk20a *g = gk20a_from_mm(mm);
+ struct device *d = dev_from_gk20a(g);
+ struct inst_desc *inst_block = &mm->bar1.inst_block;
+ u64 pde_addr;
+ u32 pde_addr_lo;
+ u32 pde_addr_hi;
+ dma_addr_t iova;
+
+ vm->mm = mm;
+
+ mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
+
+ gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
+
+ vm->va_start = mm->pde_stride * 1;
+ vm->va_limit = mm->bar1.aperture_size;
+
+ {
+ u32 pde_lo, pde_hi;
+ pde_range_from_vaddr_range(vm,
+ 0, vm->va_limit-1,
+ &pde_lo, &pde_hi);
+ vm->pdes.num_pdes = pde_hi + 1;
+ }
+
+ /* bar1 is likely only to ever use/need small page sizes. */
+ /* But just in case, for now... arrange for both.*/
+ vm->pdes.ptes[gmmu_page_size_small] =
+ kzalloc(sizeof(struct page_table_gk20a) *
+ vm->pdes.num_pdes, GFP_KERNEL);
+
+ vm->pdes.ptes[gmmu_page_size_big] =
+ kzalloc(sizeof(struct page_table_gk20a) *
+ vm->pdes.num_pdes, GFP_KERNEL);
+
+ if (!(vm->pdes.ptes[gmmu_page_size_small] &&
+ vm->pdes.ptes[gmmu_page_size_big]))
+ return -ENOMEM;
+
+ gk20a_dbg_info("init space for bar1 va_limit=0x%llx num_pdes=%d",
+ vm->va_limit, vm->pdes.num_pdes);
+
+
+ /* allocate the page table directory */
+ err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
+ &vm->pdes.sgt, &vm->pdes.size);
+ if (err)
+ goto clean_up;
+
+ err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv,
+ vm->pdes.size);
+ if (err) {
+ free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
+ vm->pdes.size);
+ goto clean_up;
+ }
+ gk20a_dbg(gpu_dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx",
+ vm->pdes.kv, gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
+ /* we could release vm->pdes.kv but it's only one page... */
+
+ pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
+ pde_addr_lo = u64_lo32(pde_addr >> 12);
+ pde_addr_hi = u64_hi32(pde_addr);
+
+ gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
+ (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl),
+ pde_addr_lo, pde_addr_hi);
+
+ /* allocate instance mem for bar1 */
+ inst_block->size = ram_in_alloc_size_v();
+ inst_block->cpuva = dma_alloc_coherent(d, inst_block->size,
+ &iova, GFP_KERNEL);
+ if (!inst_block->cpuva) {
+ gk20a_err(d, "%s: memory allocation failed\n", __func__);
+ err = -ENOMEM;
+ goto clean_up;
+ }
+
+ inst_block->iova = iova;
+ inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova);
+ if (!inst_block->cpu_pa) {
+ gk20a_err(d, "%s: failed to get phys address\n", __func__);
+ err = -ENOMEM;
+ goto clean_up;
+ }
+
+ inst_pa = inst_block->cpu_pa;
+ inst_ptr = inst_block->cpuva;
+
+ gk20a_dbg_info("bar1 inst block physical phys = 0x%llx, kv = 0x%p",
+ (u64)inst_pa, inst_ptr);
+
+ memset(inst_ptr, 0, ram_fc_size_val_v());
+
+ gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
+ ram_in_page_dir_base_target_vid_mem_f() |
+ ram_in_page_dir_base_vol_true_f() |
+ ram_in_page_dir_base_lo_f(pde_addr_lo));
+
+ gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
+ ram_in_page_dir_base_hi_f(pde_addr_hi));
+
+ gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
+ u64_lo32(vm->va_limit) | 0xFFF);
+
+ gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
+ ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
+
+ gk20a_dbg_info("bar1 inst block ptr: %08llx", (u64)inst_pa);
+ gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_bar1",
+ 1,/*start*/
+ (vm->va_limit >> 12) - 1 /* length*/,
+ 1); /* align */
+ /* initialize just in case we try to use it anyway */
+ gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_bar1-unused",
+ 0x0badc0de, /* start */
+ 1, /* length */
+ 1); /* align */
+
+ vm->mapped_buffers = RB_ROOT;
+
+ mutex_init(&vm->update_gmmu_lock);
+ kref_init(&vm->ref);
+ INIT_LIST_HEAD(&vm->reserved_va_list);
+
+ return 0;
+
+clean_up:
+ /* free, etc */
+ if (inst_block->cpuva)
+ dma_free_coherent(d, inst_block->size,
+ inst_block->cpuva, inst_block->iova);
+ inst_block->cpuva = NULL;
+ inst_block->iova = 0;
+ return err;
+}
+
+/* pmu vm, share channel_vm interfaces */
+int gk20a_init_pmu_vm(struct mm_gk20a *mm)
+{
+ int err;
+ phys_addr_t inst_pa;
+ void *inst_ptr;
+ struct vm_gk20a *vm = &mm->pmu.vm;
+ struct gk20a *g = gk20a_from_mm(mm);
+ struct device *d = dev_from_gk20a(g);
+ struct inst_desc *inst_block = &mm->pmu.inst_block;
+ u64 pde_addr;
+ u32 pde_addr_lo;
+ u32 pde_addr_hi;
+ dma_addr_t iova;
+
+ vm->mm = mm;
+
+ mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
+
+ gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
+
+ vm->va_start = GK20A_PMU_VA_START;
+ vm->va_limit = vm->va_start + mm->pmu.aperture_size;
+
+ {
+ u32 pde_lo, pde_hi;
+ pde_range_from_vaddr_range(vm,
+ 0, vm->va_limit-1,
+ &pde_lo, &pde_hi);
+ vm->pdes.num_pdes = pde_hi + 1;
+ }
+
+ /* The pmu is likely only to ever use/need small page sizes. */
+ /* But just in case, for now... arrange for both.*/
+ vm->pdes.ptes[gmmu_page_size_small] =
+ kzalloc(sizeof(struct page_table_gk20a) *
+ vm->pdes.num_pdes, GFP_KERNEL);
+
+ vm->pdes.ptes[gmmu_page_size_big] =
+ kzalloc(sizeof(struct page_table_gk20a) *
+ vm->pdes.num_pdes, GFP_KERNEL);
+
+ if (!(vm->pdes.ptes[gmmu_page_size_small] &&
+ vm->pdes.ptes[gmmu_page_size_big]))
+ return -ENOMEM;
+
+ gk20a_dbg_info("init space for pmu va_limit=0x%llx num_pdes=%d",
+ vm->va_limit, vm->pdes.num_pdes);
+
+ /* allocate the page table directory */
+ err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
+ &vm->pdes.sgt, &vm->pdes.size);
+ if (err)
+ goto clean_up;
+
+ err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv,
+ vm->pdes.size);
+ if (err) {
+ free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
+ vm->pdes.size);
+ goto clean_up;
+ }
+ gk20a_dbg_info("pmu pdes phys @ 0x%llx",
+ (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
+ /* we could release vm->pdes.kv but it's only one page... */
+
+ pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
+ pde_addr_lo = u64_lo32(pde_addr >> 12);
+ pde_addr_hi = u64_hi32(pde_addr);
+
+ gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
+ (u64)pde_addr, pde_addr_lo, pde_addr_hi);
+
+ /* allocate instance mem for pmu */
+ inst_block->size = GK20A_PMU_INST_SIZE;
+ inst_block->cpuva = dma_alloc_coherent(d, inst_block->size,
+ &iova, GFP_KERNEL);
+ if (!inst_block->cpuva) {
+ gk20a_err(d, "%s: memory allocation failed\n", __func__);
+ err = -ENOMEM;
+ goto clean_up;
+ }
+
+ inst_block->iova = iova;
+ inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova);
+ if (!inst_block->cpu_pa) {
+ gk20a_err(d, "%s: failed to get phys address\n", __func__);
+ err = -ENOMEM;
+ goto clean_up;
+ }
+
+ inst_pa = inst_block->cpu_pa;
+ inst_ptr = inst_block->cpuva;
+
+ gk20a_dbg_info("pmu inst block physical addr: 0x%llx", (u64)inst_pa);
+
+ memset(inst_ptr, 0, GK20A_PMU_INST_SIZE);
+
+ gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
+ ram_in_page_dir_base_target_vid_mem_f() |
+ ram_in_page_dir_base_vol_true_f() |
+ ram_in_page_dir_base_lo_f(pde_addr_lo));
+
+ gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
+ ram_in_page_dir_base_hi_f(pde_addr_hi));
+
+ gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
+ u64_lo32(vm->va_limit) | 0xFFF);
+
+ gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
+ ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
+
+ gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_pmu",
+ (vm->va_start >> 12), /* start */
+ (vm->va_limit - vm->va_start) >> 12, /*length*/
+ 1); /* align */
+ /* initialize just in case we try to use it anyway */
+ gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_pmu-unused",
+ 0x0badc0de, /* start */
+ 1, /* length */
+ 1); /* align */
+
+
+ vm->mapped_buffers = RB_ROOT;
+
+ mutex_init(&vm->update_gmmu_lock);
+ kref_init(&vm->ref);
+ INIT_LIST_HEAD(&vm->reserved_va_list);
+
+ return 0;
+
+clean_up:
+ /* free, etc */
+ if (inst_block->cpuva)
+ dma_free_coherent(d, inst_block->size,
+ inst_block->cpuva, inst_block->iova);
+ inst_block->cpuva = NULL;
+ inst_block->iova = 0;
+ return err;
+}
+
+void gk20a_mm_fb_flush(struct gk20a *g)
+{
+ struct mm_gk20a *mm = &g->mm;
+ u32 data;
+ s32 retry = 100;
+
+ gk20a_dbg_fn("");
+
+ mutex_lock(&mm->l2_op_lock);
+
+ g->ops.ltc.elpg_flush(g);
+
+ /* Make sure all previous writes are committed to the L2. There's no
+ guarantee that writes are to DRAM. This will be a sysmembar internal
+ to the L2. */
+ gk20a_writel(g, flush_fb_flush_r(),
+ flush_fb_flush_pending_busy_f());
+
+ do {
+ data = gk20a_readl(g, flush_fb_flush_r());
+
+ if (flush_fb_flush_outstanding_v(data) ==
+ flush_fb_flush_outstanding_true_v() ||
+ flush_fb_flush_pending_v(data) ==
+ flush_fb_flush_pending_busy_v()) {
+ gk20a_dbg_info("fb_flush 0x%x", data);
+ retry--;
+ usleep_range(20, 40);
+ } else
+ break;
+ } while (retry >= 0 || !tegra_platform_is_silicon());
+
+ if (retry < 0)
+ gk20a_warn(dev_from_gk20a(g),
+ "fb_flush too many retries");
+
+ mutex_unlock(&mm->l2_op_lock);
+}
+
+static void gk20a_mm_l2_invalidate_locked(struct gk20a *g)
+{
+ u32 data;
+ s32 retry = 200;
+
+ /* Invalidate any clean lines from the L2 so subsequent reads go to
+ DRAM. Dirty lines are not affected by this operation. */
+ gk20a_writel(g, flush_l2_system_invalidate_r(),
+ flush_l2_system_invalidate_pending_busy_f());
+
+ do {
+ data = gk20a_readl(g, flush_l2_system_invalidate_r());
+
+ if (flush_l2_system_invalidate_outstanding_v(data) ==
+ flush_l2_system_invalidate_outstanding_true_v() ||
+ flush_l2_system_invalidate_pending_v(data) ==
+ flush_l2_system_invalidate_pending_busy_v()) {
+ gk20a_dbg_info("l2_system_invalidate 0x%x",
+ data);
+ retry--;
+ usleep_range(20, 40);
+ } else
+ break;
+ } while (retry >= 0 || !tegra_platform_is_silicon());
+
+ if (retry < 0)
+ gk20a_warn(dev_from_gk20a(g),
+ "l2_system_invalidate too many retries");
+}
+
+void gk20a_mm_l2_invalidate(struct gk20a *g)
+{
+ struct mm_gk20a *mm = &g->mm;
+ mutex_lock(&mm->l2_op_lock);
+ gk20a_mm_l2_invalidate_locked(g);
+ mutex_unlock(&mm->l2_op_lock);
+}
+
+void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
+{
+ struct mm_gk20a *mm = &g->mm;
+ u32 data;
+ s32 retry = 200;
+
+ gk20a_dbg_fn("");
+
+ mutex_lock(&mm->l2_op_lock);
+
+ /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2
+ as clean, so subsequent reads might hit in the L2. */
+ gk20a_writel(g, flush_l2_flush_dirty_r(),
+ flush_l2_flush_dirty_pending_busy_f());
+
+ do {
+ data = gk20a_readl(g, flush_l2_flush_dirty_r());
+
+ if (flush_l2_flush_dirty_outstanding_v(data) ==
+ flush_l2_flush_dirty_outstanding_true_v() ||
+ flush_l2_flush_dirty_pending_v(data) ==
+ flush_l2_flush_dirty_pending_busy_v()) {
+ gk20a_dbg_info("l2_flush_dirty 0x%x", data);
+ retry--;
+ usleep_range(20, 40);
+ } else
+ break;
+ } while (retry >= 0 || !tegra_platform_is_silicon());
+
+ if (retry < 0)
+ gk20a_warn(dev_from_gk20a(g),
+ "l2_flush_dirty too many retries");
+
+ if (invalidate)
+ gk20a_mm_l2_invalidate_locked(g);
+
+ mutex_unlock(&mm->l2_op_lock);
+}
+
+
+int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
+ struct dma_buf **dmabuf,
+ u64 *offset)
+{
+ struct mapped_buffer_node *mapped_buffer;
+
+ gk20a_dbg_fn("gpu_va=0x%llx", gpu_va);
+
+ mutex_lock(&vm->update_gmmu_lock);
+
+ mapped_buffer = find_mapped_buffer_range_locked(&vm->mapped_buffers,
+ gpu_va);
+ if (!mapped_buffer) {
+ mutex_unlock(&vm->update_gmmu_lock);
+ return -EINVAL;
+ }
+
+ *dmabuf = mapped_buffer->dmabuf;
+ *offset = gpu_va - mapped_buffer->addr;
+
+ mutex_unlock(&vm->update_gmmu_lock);
+
+ return 0;
+}
+
+void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
+{
+ struct mm_gk20a *mm = vm->mm;
+ struct gk20a *g = gk20a_from_vm(vm);
+ u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->pdes.sgt->sgl) >> 12);
+ u32 data;
+ s32 retry = 200;
+
+ gk20a_dbg_fn("");
+
+ /* pagetables are considered sw states which are preserved after
+ prepare_poweroff. When gk20a deinit releases those pagetables,
+ common code in vm unmap path calls tlb invalidate that touches
+ hw. Use the power_on flag to skip tlb invalidation when gpu
+ power is turned off */
+
+ if (!g->power_on)
+ return;
+
+ /* No need to invalidate if tlb is clean */
+ mutex_lock(&vm->update_gmmu_lock);
+ if (!vm->tlb_dirty) {
+ mutex_unlock(&vm->update_gmmu_lock);
+ return;
+ }
+ vm->tlb_dirty = false;
+ mutex_unlock(&vm->update_gmmu_lock);
+
+ mutex_lock(&mm->tlb_lock);
+ do {
+ data = gk20a_readl(g, fb_mmu_ctrl_r());
+ if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0)
+ break;
+ usleep_range(20, 40);
+ retry--;
+ } while (retry >= 0 || !tegra_platform_is_silicon());
+
+ if (retry < 0)
+ gk20a_warn(dev_from_gk20a(g),
+ "wait mmu fifo space too many retries");
+
+ gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
+ fb_mmu_invalidate_pdb_addr_f(addr_lo) |
+ fb_mmu_invalidate_pdb_aperture_vid_mem_f());
+
+ /* this is a sledgehammer, it would seem */
+ gk20a_writel(g, fb_mmu_invalidate_r(),
+ fb_mmu_invalidate_all_pdb_true_f() |
+ fb_mmu_invalidate_all_va_true_f() |
+ fb_mmu_invalidate_trigger_true_f());
+
+ do {
+ data = gk20a_readl(g, fb_mmu_ctrl_r());
+ if (fb_mmu_ctrl_pri_fifo_empty_v(data) !=
+ fb_mmu_ctrl_pri_fifo_empty_false_f())
+ break;
+ retry--;
+ usleep_range(20, 40);
+ } while (retry >= 0 || !tegra_platform_is_silicon());
+
+ if (retry < 0)
+ gk20a_warn(dev_from_gk20a(g),
+ "mmu invalidate too many retries");
+
+ mutex_unlock(&mm->tlb_lock);
+}
+
+int gk20a_mm_suspend(struct gk20a *g)
+{
+ gk20a_dbg_fn("");
+
+ gk20a_mm_fb_flush(g);
+ gk20a_mm_l2_flush(g, true);
+
+ gk20a_dbg_fn("done");
+ return 0;
+}
+
+void gk20a_mm_ltc_isr(struct gk20a *g)
+{
+ u32 intr;
+
+ intr = gk20a_readl(g, ltc_ltc0_ltss_intr_r());
+ gk20a_err(dev_from_gk20a(g), "ltc: %08x\n", intr);
+ gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
+}
+
+bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g)
+{
+ u32 debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r());
+ return fb_mmu_debug_ctrl_debug_v(debug_ctrl) ==
+ fb_mmu_debug_ctrl_debug_enabled_v();
+}
+
+static int gk20a_mm_mmu_vpr_info_fetch_wait(struct gk20a *g,
+ const unsigned int msec)
+{
+ unsigned long timeout;
+
+ timeout = jiffies + msecs_to_jiffies(msec);
+ while (1) {
+ u32 val;
+
+ val = gk20a_readl(g, fb_mmu_vpr_info_r());
+ if (fb_mmu_vpr_info_fetch_v(val) ==
+ fb_mmu_vpr_info_fetch_false_v())
+ break;
+
+ if (tegra_platform_is_silicon() &&
+ WARN_ON(time_after(jiffies, timeout)))
+ return -ETIME;
+ }
+
+ return 0;
+}
+
+int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g)
+{
+ int ret = 0;
+
+ gk20a_busy_noresume(g->dev);
+ if (!pm_runtime_active(&g->dev->dev))
+ goto fail;
+
+ if (gk20a_mm_mmu_vpr_info_fetch_wait(g, 5)) {
+ ret = -ETIME;
+ goto fail;
+ }
+
+ gk20a_writel(g, fb_mmu_vpr_info_r(),
+ fb_mmu_vpr_info_fetch_true_v());
+
+ ret = gk20a_mm_mmu_vpr_info_fetch_wait(g, 5);
+
+ fail:
+ gk20a_idle(g->dev);
+ return ret;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
new file mode 100644
index 000000000000..23d15c232763
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -0,0 +1,464 @@
+/*
+ * drivers/video/tegra/host/gk20a/mm_gk20a.h
+ *
+ * GK20A memory management
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef __MM_GK20A_H__
+#define __MM_GK20A_H__
+
+#include <linux/scatterlist.h>
+#include <linux/dma-attrs.h>
+#include <linux/iommu.h>
+#include <asm/dma-iommu.h>
+#include "gk20a_allocator.h"
+
+/* This "address bit" in the gmmu ptes (and other gk20a accesses)
+ * signals the address as presented should be translated by the SMMU.
+ * Without this bit present gk20a accesses are *not* translated.
+ */
+/* Hack, get this from manuals somehow... */
+#define NV_MC_SMMU_VADDR_TRANSLATION_BIT 34
+#define NV_MC_SMMU_VADDR_TRANSLATE(x) (x | \
+ (1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT))
+
+/* For now keep the size relatively small-ish compared to the full
+ * 40b va. 32GB for now. It consists of two 16GB spaces. */
+#define NV_GMMU_VA_RANGE 35ULL
+#define NV_GMMU_VA_IS_UPPER(x) ((x) >= ((u64)0x1 << (NV_GMMU_VA_RANGE-1)))
+
+struct mem_desc {
+ struct dma_buf *ref;
+ struct sg_table *sgt;
+ u32 size;
+};
+
+struct mem_desc_sub {
+ u32 offset;
+ u32 size;
+};
+
+struct gpfifo_desc {
+ size_t size;
+ u32 entry_num;
+
+ u32 get;
+ u32 put;
+
+ bool wrap;
+
+ u64 iova;
+ struct gpfifo *cpu_va;
+ u64 gpu_va;
+};
+
+struct mmu_desc {
+ void *cpuva;
+ u64 iova;
+ size_t size;
+};
+
+struct inst_desc {
+ u64 iova;
+ void *cpuva;
+ phys_addr_t cpu_pa;
+ size_t size;
+};
+
+struct surface_mem_desc {
+ u64 iova;
+ void *cpuva;
+ struct sg_table *sgt;
+ size_t size;
+};
+
+struct userd_desc {
+ struct sg_table *sgt;
+ u64 iova;
+ void *cpuva;
+ size_t size;
+ u64 gpu_va;
+};
+
+struct runlist_mem_desc {
+ u64 iova;
+ void *cpuva;
+ size_t size;
+};
+
+struct patch_desc {
+ struct page **pages;
+ u64 iova;
+ size_t size;
+ void *cpu_va;
+ u64 gpu_va;
+ u32 data_count;
+};
+
+struct pmu_mem_desc {
+ void *cpuva;
+ u64 iova;
+ u64 pmu_va;
+ size_t size;
+};
+
+struct priv_cmd_queue_mem_desc {
+ u64 base_iova;
+ u32 *base_cpuva;
+ size_t size;
+};
+
+struct zcull_ctx_desc {
+ struct mem_desc mem;
+ u64 gpu_va;
+ u32 ctx_attr;
+ u32 ctx_sw_mode;
+};
+
+struct pm_ctx_desc {
+ struct mem_desc mem;
+ u64 gpu_va;
+ u32 ctx_attr;
+ u32 ctx_sw_mode;
+};
+
+struct gr_ctx_buffer_desc;
+struct platform_device;
+struct gr_ctx_buffer_desc {
+ void (*destroy)(struct platform_device *, struct gr_ctx_buffer_desc *);
+ struct sg_table *sgt;
+ struct page **pages;
+ size_t size;
+ u64 iova;
+ struct dma_attrs attrs;
+ void *priv;
+};
+
+struct gr_ctx_desc {
+ struct page **pages;
+ u64 iova;
+ size_t size;
+ u64 gpu_va;
+};
+
+struct compbit_store_desc {
+ struct pages **pages;
+ size_t size;
+ u64 base_iova;
+};
+
+struct page_table_gk20a {
+ /* backing for */
+ /* Either a *page or a *mem_handle */
+ void *ref;
+ /* track mapping cnt on this page table */
+ u32 ref_cnt;
+ struct sg_table *sgt;
+ size_t size;
+};
+
+#ifndef _NVHOST_MEM_MGR_H
+enum gk20a_mem_rw_flag {
+ gk20a_mem_flag_none = 0,
+ gk20a_mem_flag_read_only = 1,
+ gk20a_mem_flag_write_only = 2,
+};
+#endif
+
+enum gmmu_pgsz_gk20a {
+ gmmu_page_size_small = 0,
+ gmmu_page_size_big = 1,
+ gmmu_nr_page_sizes = 2
+};
+
+
+struct page_directory_gk20a {
+ /* backing for */
+ u32 num_pdes;
+ void *kv;
+ /* Either a *page or a *mem_handle */
+ void *ref;
+ struct sg_table *sgt;
+ size_t size;
+ struct page_table_gk20a *ptes[gmmu_nr_page_sizes];
+};
+
+struct priv_cmd_queue {
+ struct priv_cmd_queue_mem_desc mem;
+ u64 base_gpuva; /* gpu_va base */
+ u16 size; /* num of entries in words */
+ u16 put; /* put for priv cmd queue */
+ u16 get; /* get for priv cmd queue */
+ struct list_head free; /* list of pre-allocated free entries */
+ struct list_head head; /* list of used entries */
+};
+
+struct priv_cmd_entry {
+ u32 *ptr;
+ u64 gva;
+ u16 get; /* start of entry in queue */
+ u16 size; /* in words */
+ u32 gp_get; /* gp_get when submitting last priv cmd */
+ u32 gp_put; /* gp_put when submitting last priv cmd */
+ u32 gp_wrap; /* wrap when submitting last priv cmd */
+ bool pre_alloc; /* prealloc entry, free to free list */
+ struct list_head list; /* node for lists */
+};
+
+struct mapped_buffer_node {
+ struct vm_gk20a *vm;
+ struct rb_node node;
+ struct list_head unmap_list;
+ struct list_head va_buffers_list;
+ struct vm_reserved_va_node *va_node;
+ u64 addr;
+ u64 size;
+ struct dma_buf *dmabuf;
+ struct sg_table *sgt;
+ struct kref ref;
+ u32 user_mapped;
+ bool own_mem_ref;
+ u32 pgsz_idx;
+ u32 ctag_offset;
+ u32 ctag_lines;
+ u32 flags;
+ u32 kind;
+ bool va_allocated;
+};
+
+struct vm_reserved_va_node {
+ struct list_head reserved_va_list;
+ struct list_head va_buffers_list;
+ u32 pgsz_idx;
+ u64 vaddr_start;
+ u64 size;
+ bool sparse;
+};
+
+struct vm_gk20a {
+ struct mm_gk20a *mm;
+ struct gk20a_as_share *as_share; /* as_share this represents */
+
+ u64 va_start;
+ u64 va_limit;
+
+ int num_user_mapped_buffers;
+
+ bool big_pages; /* enable large page support */
+ bool enable_ctag;
+ bool tlb_dirty;
+ bool mapped;
+
+ struct kref ref;
+
+ struct mutex update_gmmu_lock;
+
+ struct page_directory_gk20a pdes;
+
+ struct gk20a_allocator vma[gmmu_nr_page_sizes];
+ struct rb_root mapped_buffers;
+
+ struct list_head reserved_va_list;
+
+ dma_addr_t zero_page_iova;
+ void *zero_page_cpuva;
+ struct sg_table *zero_page_sgt;
+};
+
+struct gk20a;
+struct channel_gk20a;
+
+int gk20a_init_mm_support(struct gk20a *g);
+int gk20a_init_mm_setup_sw(struct gk20a *g);
+int gk20a_init_bar1_vm(struct mm_gk20a *mm);
+int gk20a_init_pmu_vm(struct mm_gk20a *mm);
+
+void gk20a_mm_fb_flush(struct gk20a *g);
+void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate);
+void gk20a_mm_l2_invalidate(struct gk20a *g);
+
+struct mm_gk20a {
+ struct gk20a *g;
+
+ u32 compression_page_size;
+ u32 big_page_size;
+ u32 pde_stride;
+ u32 pde_stride_shift;
+
+ struct {
+ u32 order;
+ u32 num_ptes;
+ } page_table_sizing[gmmu_nr_page_sizes];
+
+
+ struct {
+ u64 size;
+ } channel;
+
+ struct {
+ u32 aperture_size;
+ struct vm_gk20a vm;
+ struct inst_desc inst_block;
+ } bar1;
+
+ struct {
+ u32 aperture_size;
+ struct vm_gk20a vm;
+ struct inst_desc inst_block;
+ } pmu;
+
+ struct mutex tlb_lock;
+ struct mutex l2_op_lock;
+
+ void (*remove_support)(struct mm_gk20a *mm);
+ bool sw_ready;
+#ifdef CONFIG_DEBUG_FS
+ u32 ltc_enabled;
+ u32 ltc_enabled_debug;
+#endif
+};
+
+int gk20a_mm_init(struct mm_gk20a *mm);
+
+#define gk20a_from_mm(mm) ((mm)->g)
+#define gk20a_from_vm(vm) ((vm)->mm->g)
+
+#define dev_from_vm(vm) dev_from_gk20a(vm->mm->g)
+
+#define DEFAULT_ALLOC_ALIGNMENT (4*1024)
+
+static inline int bar1_aperture_size_mb_gk20a(void)
+{
+ return 128; /*TBD read this from fuses?*/
+}
+/* max address bits */
+static inline int max_physaddr_bits_gk20a(void)
+{
+ return 40;/*"old" sys physaddr, meaningful? */
+}
+static inline int max_vid_physaddr_bits_gk20a(void)
+{
+ /* "vid phys" is asid/smmu phys?,
+ * i.e. is this the real sys physaddr? */
+ return 37;
+}
+static inline int max_vaddr_bits_gk20a(void)
+{
+ return 40; /* chopped for area? */
+}
+
+#if 0 /*related to addr bits above, concern below TBD on which is accurate */
+#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\
+ bus_bar1_block_ptr_s())
+#else
+#define bar1_instance_block_shift_gk20a() bus_bar1_block_ptr_shift_v()
+#endif
+
+void gk20a_mm_dump_vm(struct vm_gk20a *vm,
+ u64 va_begin, u64 va_end, char *label);
+
+int gk20a_mm_suspend(struct gk20a *g);
+
+phys_addr_t gk20a_get_phys_from_iova(struct device *d,
+ u64 dma_addr);
+
+int gk20a_get_sgtable(struct device *d, struct sg_table **sgt,
+ void *cpuva, u64 iova,
+ size_t size);
+
+int gk20a_get_sgtable_from_pages(struct device *d, struct sg_table **sgt,
+ struct page **pages, u64 iova,
+ size_t size);
+
+void gk20a_free_sgtable(struct sg_table **sgt);
+
+u64 gk20a_mm_iova_addr(struct scatterlist *sgl);
+
+void gk20a_mm_ltc_isr(struct gk20a *g);
+
+bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g);
+
+int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g);
+
+u64 gk20a_gmmu_map(struct vm_gk20a *vm,
+ struct sg_table **sgt,
+ u64 size,
+ u32 flags,
+ int rw_flag);
+
+void gk20a_gmmu_unmap(struct vm_gk20a *vm,
+ u64 vaddr,
+ u64 size,
+ int rw_flag);
+
+struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf);
+void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
+ struct sg_table *sgt);
+
+u64 gk20a_vm_map(struct vm_gk20a *vm,
+ struct dma_buf *dmabuf,
+ u64 offset_align,
+ u32 flags /*NVHOST_AS_MAP_BUFFER_FLAGS_*/,
+ int kind,
+ struct sg_table **sgt,
+ bool user_mapped,
+ int rw_flag);
+
+/* unmap handle from kernel */
+void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset);
+
+/* get reference to all currently mapped buffers */
+int gk20a_vm_get_buffers(struct vm_gk20a *vm,
+ struct mapped_buffer_node ***mapped_buffers,
+ int *num_buffers);
+
+/* put references on the given buffers */
+void gk20a_vm_put_buffers(struct vm_gk20a *vm,
+ struct mapped_buffer_node **mapped_buffers,
+ int num_buffers);
+
+/* invalidate tlbs for the vm area */
+void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm);
+
+/* find buffer corresponding to va */
+int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
+ struct dma_buf **dmabuf,
+ u64 *offset);
+
+void gk20a_vm_get(struct vm_gk20a *vm);
+void gk20a_vm_put(struct vm_gk20a *vm);
+
+/* vm-as interface */
+struct nvhost_as_alloc_space_args;
+struct nvhost_as_free_space_args;
+int gk20a_vm_alloc_share(struct gk20a_as_share *as_share);
+int gk20a_vm_release_share(struct gk20a_as_share *as_share);
+int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
+ struct nvhost_as_alloc_space_args *args);
+int gk20a_vm_free_space(struct gk20a_as_share *as_share,
+ struct nvhost_as_free_space_args *args);
+int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
+ struct channel_gk20a *ch);
+int gk20a_vm_map_buffer(struct gk20a_as_share *as_share,
+ int dmabuf_fd,
+ u64 *offset_align,
+ u32 flags, /*NVHOST_AS_MAP_BUFFER_FLAGS_*/
+ int kind);
+int gk20a_vm_unmap_buffer(struct gk20a_as_share *, u64 offset);
+
+int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev);
+#endif /*_MM_GK20A_H_ */
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
new file mode 100644
index 000000000000..09f348cb9f53
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
@@ -0,0 +1,160 @@
+/*
+ * drivers/video/tegra/host/gk20a/soc/platform_gk20a.h
+ *
+ * GK20A Platform (SoC) Interface
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _GK20A_PLATFORM_H_
+#define _GK20A_PLATFORM_H_
+
+#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+
+struct gk20a;
+struct channel_gk20a;
+struct gr_ctx_buffer_desc;
+struct gk20a_scale_profile;
+
+struct gk20a_platform {
+#ifdef CONFIG_TEGRA_GK20A
+ u32 syncpt_base;
+#endif
+ /* Populated by the gk20a driver before probing the platform. */
+ struct gk20a *g;
+
+ /* Should be populated at probe. */
+ bool can_railgate;
+
+ /* Should be populated at probe. */
+ bool has_syncpoints;
+
+ /* Should be populated by probe. */
+ struct dentry *debugfs;
+
+ /* Clock configuration is stored here. Platform probe is responsible
+ * for filling this data. */
+ struct clk *clk[3];
+ int num_clks;
+
+ /* Delay before rail gated */
+ int railgate_delay;
+
+ /* Delay before clock gated */
+ int clockgate_delay;
+
+ /* Initialize the platform interface of the gk20a driver.
+ *
+ * The platform implementation of this function must
+ * - set the power and clocks of the gk20a device to a known
+ * state, and
+ * - populate the gk20a_platform structure (a pointer to the
+ * structure can be obtained by calling gk20a_get_platform).
+ *
+ * After this function is finished, the driver will initialise
+ * pm runtime and genpd based on the platform configuration.
+ */
+ int (*probe)(struct platform_device *dev);
+
+ /* Second stage initialisation - called once all power management
+ * initialisations are done.
+ */
+ int (*late_probe)(struct platform_device *dev);
+
+ /* Called before submitting work to the gpu. The platform may use this
+ * hook to ensure that any other hw modules that the gpu depends on are
+ * powered. The platform implementation must count refs to this call. */
+ int (*channel_busy)(struct platform_device *dev);
+
+ /* Called after the work on the gpu is completed. The platform may use
+ * this hook to release power refs to any other hw modules that the gpu
+ * depends on. The platform implementation must count refs to this
+ * call. */
+ void (*channel_idle)(struct platform_device *dev);
+
+ /* This function is called to allocate secure memory (memory that the
+ * CPU cannot see). The function should fill the context buffer
+ * descriptor (especially fields destroy, sgt, size).
+ */
+ int (*secure_alloc)(struct platform_device *dev,
+ struct gr_ctx_buffer_desc *desc,
+ size_t size);
+
+ /* Device is going to be suspended */
+ int (*suspend)(struct device *);
+
+ /* Called to turn off the device */
+ int (*railgate)(struct platform_device *dev);
+
+ /* Called to turn on the device */
+ int (*unrailgate)(struct platform_device *dev);
+
+ /* Postscale callback is called after frequency change */
+ void (*postscale)(struct platform_device *pdev,
+ unsigned long freq);
+
+ /* Pre callback is called before frequency change */
+ void (*prescale)(struct platform_device *pdev);
+
+ /* Devfreq governor name. If scaling is enabled, we request
+ * this governor to be used in scaling */
+ const char *devfreq_governor;
+
+ /* Quality of service id. If this is set, the scaling routines
+ * will register a callback to id. Each time we receive a new value,
+ * the postscale callback gets called. */
+ int qos_id;
+
+ /* Called as part of debug dump. If the gpu gets hung, this function
+ * is responsible for delivering all necessary debug data of other
+ * hw units which may interact with the gpu without direct supervision
+ * of the CPU.
+ */
+ void (*dump_platform_dependencies)(struct platform_device *dev);
+};
+
+static inline struct gk20a_platform *gk20a_get_platform(
+ struct platform_device *dev)
+{
+ return (struct gk20a_platform *)platform_get_drvdata(dev);
+}
+
+extern struct gk20a_platform gk20a_generic_platform;
+#ifdef CONFIG_TEGRA_GK20A
+extern struct gk20a_platform gk20a_tegra_platform;
+#endif
+
+static inline int gk20a_platform_channel_busy(struct platform_device *dev)
+{
+ struct gk20a_platform *p = gk20a_get_platform(dev);
+ int ret = 0;
+ if (p->channel_busy)
+ ret = p->channel_busy(dev);
+
+ return ret;
+}
+
+static inline void gk20a_platform_channel_idle(struct platform_device *dev)
+{
+ struct gk20a_platform *p = gk20a_get_platform(dev);
+ if (p->channel_idle)
+ p->channel_idle(dev);
+}
+
+static inline bool gk20a_platform_has_syncpoints(struct platform_device *dev)
+{
+ struct gk20a_platform *p = gk20a_get_platform(dev);
+ return p->has_syncpoints;
+}
+
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_generic.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_generic.c
new file mode 100644
index 000000000000..7b750df61751
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_generic.c
@@ -0,0 +1,35 @@
+/*
+ * drivers/video/tegra/host/gk20a/platform_gk20a_generic.c
+ *
+ * GK20A Generic Platform Interface
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "platform_gk20a.h"
+
+static int gk20a_generic_probe(struct platform_device *dev)
+{
+ struct gk20a_platform *platform = gk20a_get_platform(dev);
+
+ /* TODO: Initialize clocks and power */
+ (void)platform;
+
+ return 0;
+}
+
+struct gk20a_platform gk20a_generic_platform = {
+ .probe = gk20a_generic_probe,
+};
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
new file mode 100644
index 000000000000..35658f31c9d8
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
@@ -0,0 +1,561 @@
+/*
+ * drivers/video/tegra/host/gk20a/platform_gk20a_tegra.c
+ *
+ * GK20A Tegra Platform Interface
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/tegra-powergate.h>
+#include <linux/platform_data/tegra_edp.h>
+#include <linux/nvhost_ioctl.h>
+#include <linux/dma-buf.h>
+#include <linux/nvmap.h>
+#include <mach/irqs.h>
+#include <mach/pm_domains.h>
+
+#include "../../../arch/arm/mach-tegra/iomap.h"
+
+#include "gk20a.h"
+#include "hal_gk20a.h"
+#include "platform_gk20a.h"
+#include "gk20a_scale.h"
+
+#define TEGRA_GK20A_INTR INT_GPU
+#define TEGRA_GK20A_INTR_NONSTALL INT_GPU_NONSTALL
+
+#define TEGRA_GK20A_SIM_BASE 0x538F0000 /*tbd: get from iomap.h */
+#define TEGRA_GK20A_SIM_SIZE 0x1000 /*tbd: this is a high-side guess */
+
+extern struct device tegra_vpr_dev;
+struct gk20a_platform t132_gk20a_tegra_platform;
+
+struct gk20a_emc_params {
+ long emc_slope;
+ long emc_offset;
+ long emc_dip_slope;
+ long emc_dip_offset;
+ long emc_xmid;
+ bool linear;
+};
+
+/*
+ * 20.12 fixed point arithmetic
+ */
+
+static const int FXFRAC = 12;
+static const int FX_HALF = (1 << 12) / 2;
+
+#define INT_TO_FX(x) ((x) << FXFRAC)
+#define FX_TO_INT(x) ((x) >> FXFRAC)
+
+#define MHZ_TO_HZ(x) ((x) * 1000000)
+#define HZ_TO_MHZ(x) ((x) / 1000000)
+
+int FXMUL(int x, int y)
+{
+ return ((long long) x * (long long) y) >> FXFRAC;
+}
+
+int FXDIV(int x, int y)
+{
+ /* long long div operation not supported, must shift manually. This
+ * would have been
+ *
+ * return (((long long) x) << FXFRAC) / (long long) y;
+ */
+ int pos, t;
+ if (x == 0)
+ return 0;
+
+ /* find largest allowable right shift to numerator, limit to FXFRAC */
+ t = x < 0 ? -x : x;
+ pos = 31 - fls(t); /* fls can't be 32 if x != 0 */
+ if (pos > FXFRAC)
+ pos = FXFRAC;
+
+ y >>= FXFRAC - pos;
+ if (y == 0)
+ return 0x7FFFFFFF; /* overflow, return MAX_FIXED */
+
+ return (x << pos) / y;
+}
+
+static int gk20a_tegra_channel_busy(struct platform_device *dev)
+{
+ int ret = 0;
+
+ /* Explicitly turn on the host1x clocks
+ * - This is needed as host1x driver sets ignore_children = true
+ * to cater the use case of display clock ON but host1x clock OFF
+ * in OS-Idle-Display-ON case
+ * - This was easily done in ACM as it only checked the ref count
+ * of host1x (or any device for that matter) to be zero before
+ * turning off its clock
+ * - However, runtime PM checks to see if *ANY* child of device is
+ * in ACTIVE state and if yes, it doesn't suspend the parent. As a
+ * result of this, display && host1x clocks remains ON during
+ * OS-Idle-Display-ON case
+ * - The code below fixes this use-case
+ */
+ if (to_platform_device(dev->dev.parent))
+ ret = nvhost_module_busy_ext(
+ to_platform_device(dev->dev.parent));
+
+ return ret;
+}
+
+static void gk20a_tegra_channel_idle(struct platform_device *dev)
+{
+ /* Explicitly turn off the host1x clocks */
+ if (to_platform_device(dev->dev.parent))
+ nvhost_module_idle_ext(to_platform_device(dev->dev.parent));
+}
+
+static void gk20a_tegra_secure_destroy(struct platform_device *pdev,
+ struct gr_ctx_buffer_desc *desc)
+{
+ gk20a_free_sgtable(&desc->sgt);
+ dma_free_attrs(&tegra_vpr_dev, desc->size,
+ (void *)(uintptr_t)&desc->iova,
+ desc->iova, &desc->attrs);
+}
+
+static int gk20a_tegra_secure_alloc(struct platform_device *pdev,
+ struct gr_ctx_buffer_desc *desc,
+ size_t size)
+{
+ struct device *dev = &pdev->dev;
+ DEFINE_DMA_ATTRS(attrs);
+ dma_addr_t iova;
+ struct sg_table *sgt;
+ struct page *page;
+ int err = 0;
+
+ dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
+
+ (void)dma_alloc_attrs(&tegra_vpr_dev, size, &iova,
+ GFP_KERNEL, &attrs);
+ if (dma_mapping_error(&tegra_vpr_dev, iova))
+ return -ENOMEM;
+
+ desc->iova = iova;
+ desc->size = size;
+ desc->attrs = attrs;
+ desc->destroy = gk20a_tegra_secure_destroy;
+
+ sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+ if (!sgt) {
+ gk20a_err(dev, "failed to allocate memory\n");
+ goto fail;
+ }
+ err = sg_alloc_table(sgt, 1, GFP_KERNEL);
+ if (err) {
+ gk20a_err(dev, "failed to allocate sg_table\n");
+ goto fail_sgt;
+ }
+ page = phys_to_page(iova);
+ sg_set_page(sgt->sgl, page, size, 0);
+ sg_dma_address(sgt->sgl) = iova;
+
+ desc->sgt = sgt;
+
+ return err;
+
+fail_sgt:
+ kfree(sgt);
+fail:
+ dma_free_attrs(&tegra_vpr_dev, desc->size,
+ (void *)(uintptr_t)&desc->iova,
+ desc->iova, &desc->attrs);
+ return err;
+}
+
+/*
+ * gk20a_tegra_get_emc_rate()
+ *
+ * This function returns the minimum emc clock based on gpu frequency
+ */
+
+long gk20a_tegra_get_emc_rate(struct gk20a_emc_params *emc_params, long freq)
+{
+ long hz;
+
+ freq = INT_TO_FX(HZ_TO_MHZ(freq));
+ hz = FXMUL(freq, emc_params->emc_slope) + emc_params->emc_offset;
+
+ hz -= FXMUL(emc_params->emc_dip_slope,
+ FXMUL(freq - emc_params->emc_xmid,
+ freq - emc_params->emc_xmid)) +
+ emc_params->emc_dip_offset;
+
+ hz = MHZ_TO_HZ(FX_TO_INT(hz + FX_HALF)); /* round to nearest */
+ hz = (hz < 0) ? 0 : hz;
+
+ return hz;
+}
+
+/*
+ * gk20a_tegra_postscale(profile, freq)
+ *
+ * This function sets emc frequency based on current gpu frequency
+ */
+
+static void gk20a_tegra_postscale(struct platform_device *pdev,
+ unsigned long freq)
+{
+ struct gk20a_platform *platform = platform_get_drvdata(pdev);
+ struct gk20a_scale_profile *profile = platform->g->scale_profile;
+ struct gk20a_emc_params *emc_params = profile->private_data;
+ struct gk20a *g = get_gk20a(pdev);
+
+ long after = gk20a_clk_get_rate(g);
+ long emc_target = gk20a_tegra_get_emc_rate(emc_params, after);
+
+ clk_set_rate(platform->clk[2], emc_target);
+}
+
+/*
+ * gk20a_tegra_prescale(profile, freq)
+ *
+ * This function informs EDP about changed constraints.
+ */
+
+static void gk20a_tegra_prescale(struct platform_device *pdev)
+{
+ struct gk20a *g = get_gk20a(pdev);
+ u32 avg = 0;
+
+ gk20a_pmu_load_norm(g, &avg);
+ tegra_edp_notify_gpu_load(avg);
+}
+
+/*
+ * gk20a_tegra_calibrate_emc()
+ *
+ * Compute emc scaling parameters
+ *
+ * Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od)
+ *
+ * Remc - 3d.emc rate
+ * R3d - 3d.cbus rate
+ * Rm - 3d.cbus 'middle' rate = (max + min)/2
+ * S - emc_slope
+ * O - emc_offset
+ * Sd - emc_dip_slope
+ * Od - emc_dip_offset
+ *
+ * this superposes a quadratic dip centered around the middle 3d
+ * frequency over a linear correlation of 3d.emc to 3d clock
+ * rates.
+ *
+ * S, O are chosen so that the maximum 3d rate produces the
+ * maximum 3d.emc rate exactly, and the minimum 3d rate produces
+ * at least the minimum 3d.emc rate.
+ *
+ * Sd and Od are chosen to produce the largest dip that will
+ * keep 3d.emc frequencies monotonously decreasing with 3d
+ * frequencies. To achieve this, the first derivative of Remc
+ * with respect to R3d should be zero for the minimal 3d rate:
+ *
+ * R'emc = S - 2 * Sd * (R3d - Rm)
+ * R'emc(R3d-min) = 0
+ * S = 2 * Sd * (R3d-min - Rm)
+ * = 2 * Sd * (R3d-min - R3d-max) / 2
+ *
+ * +------------------------------+
+ * | Sd = S / (R3d-min - R3d-max) |
+ * +------------------------------+
+ *
+ * dip = Sd * (R3d - Rm)^2 + Od
+ *
+ * requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives
+ *
+ * Sd * (R3d-min - Rm)^2 + Od = 0
+ * Od = -Sd * ((R3d-min - R3d-max) / 2)^2
+ * = -Sd * ((R3d-min - R3d-max)^2) / 4
+ *
+ * +------------------------------+
+ * | Od = (emc-max - emc-min) / 4 |
+ * +------------------------------+
+ *
+ */
+
+void gk20a_tegra_calibrate_emc(struct gk20a_emc_params *emc_params,
+ struct clk *clk_3d, struct clk *clk_3d_emc)
+{
+ long correction;
+ unsigned long max_emc;
+ unsigned long min_emc;
+ unsigned long min_rate_3d;
+ unsigned long max_rate_3d;
+
+ max_emc = clk_round_rate(clk_3d_emc, UINT_MAX);
+ max_emc = INT_TO_FX(HZ_TO_MHZ(max_emc));
+
+ min_emc = clk_round_rate(clk_3d_emc, 0);
+ min_emc = INT_TO_FX(HZ_TO_MHZ(min_emc));
+
+ max_rate_3d = clk_round_rate(clk_3d, UINT_MAX);
+ max_rate_3d = INT_TO_FX(HZ_TO_MHZ(max_rate_3d));
+
+ min_rate_3d = clk_round_rate(clk_3d, 0);
+ min_rate_3d = INT_TO_FX(HZ_TO_MHZ(min_rate_3d));
+
+ emc_params->emc_slope =
+ FXDIV((max_emc - min_emc), (max_rate_3d - min_rate_3d));
+ emc_params->emc_offset = max_emc -
+ FXMUL(emc_params->emc_slope, max_rate_3d);
+ /* Guarantee max 3d rate maps to max emc rate */
+ emc_params->emc_offset += max_emc -
+ (FXMUL(emc_params->emc_slope, max_rate_3d) +
+ emc_params->emc_offset);
+
+ emc_params->emc_dip_offset = (max_emc - min_emc) / 4;
+ emc_params->emc_dip_slope =
+ -FXDIV(emc_params->emc_slope, max_rate_3d - min_rate_3d);
+ emc_params->emc_xmid = (max_rate_3d + min_rate_3d) / 2;
+ correction =
+ emc_params->emc_dip_offset +
+ FXMUL(emc_params->emc_dip_slope,
+ FXMUL(max_rate_3d - emc_params->emc_xmid,
+ max_rate_3d - emc_params->emc_xmid));
+ emc_params->emc_dip_offset -= correction;
+}
+
+/*
+ * gk20a_tegra_railgate()
+ *
+ * Gate (disable) gk20a power rail
+ */
+
+static int gk20a_tegra_railgate(struct platform_device *pdev)
+{
+ if (tegra_powergate_is_powered(TEGRA_POWERGATE_GPU))
+ tegra_powergate_partition(TEGRA_POWERGATE_GPU);
+ return 0;
+}
+
+/*
+ * gk20a_tegra_unrailgate()
+ *
+ * Ungate (enable) gk20a power rail
+ */
+
+static int gk20a_tegra_unrailgate(struct platform_device *pdev)
+{
+ tegra_unpowergate_partition(TEGRA_POWERGATE_GPU);
+ return 0;
+}
+
+struct {
+ char *name;
+ unsigned long default_rate;
+} tegra_gk20a_clocks[] = {
+ {"PLLG_ref", UINT_MAX},
+ {"pwr", 204000000},
+ {"emc", UINT_MAX} };
+
+/*
+ * gk20a_tegra_get_clocks()
+ *
+ * This function finds clocks in tegra platform and populates
+ * the clock information to gk20a platform data.
+ */
+
+static int gk20a_tegra_get_clocks(struct platform_device *pdev)
+{
+ struct gk20a_platform *platform = platform_get_drvdata(pdev);
+ char devname[16];
+ int i;
+ int ret = 0;
+
+ snprintf(devname, sizeof(devname),
+ (pdev->id <= 0) ? "tegra_%s" : "tegra_%s.%d\n",
+ pdev->name, pdev->id);
+
+ platform->num_clks = 0;
+ for (i = 0; i < ARRAY_SIZE(tegra_gk20a_clocks); i++) {
+ long rate = tegra_gk20a_clocks[i].default_rate;
+ struct clk *c;
+
+ c = clk_get_sys(devname, tegra_gk20a_clocks[i].name);
+ if (IS_ERR(c)) {
+ ret = PTR_ERR(c);
+ goto err_get_clock;
+ }
+ rate = clk_round_rate(c, rate);
+ clk_set_rate(c, rate);
+ platform->clk[i] = c;
+ }
+ platform->num_clks = i;
+
+ return 0;
+
+err_get_clock:
+
+ while (i--)
+ clk_put(platform->clk[i]);
+ return ret;
+}
+
+static void gk20a_tegra_scale_init(struct platform_device *pdev)
+{
+ struct gk20a_platform *platform = gk20a_get_platform(pdev);
+ struct gk20a_scale_profile *profile = platform->g->scale_profile;
+ struct gk20a_emc_params *emc_params;
+
+ if (!profile)
+ return;
+
+ emc_params = kzalloc(sizeof(*emc_params), GFP_KERNEL);
+ if (!emc_params)
+ return;
+
+ gk20a_tegra_calibrate_emc(emc_params, gk20a_clk_get(platform->g),
+ platform->clk[2]);
+
+ profile->private_data = emc_params;
+}
+
+static void gk20a_tegra_debug_dump(struct platform_device *pdev)
+{
+ struct gk20a_platform *platform = gk20a_get_platform(pdev);
+ struct gk20a *g = platform->g;
+ nvhost_debug_dump_device(g->dev);
+}
+
+static int gk20a_tegra_probe(struct platform_device *dev)
+{
+ struct gk20a_platform *platform = gk20a_get_platform(dev);
+
+ if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA13) {
+ t132_gk20a_tegra_platform.g = platform->g;
+ *platform = t132_gk20a_tegra_platform;
+ }
+
+ gk20a_tegra_get_clocks(dev);
+
+ return 0;
+}
+
+static int gk20a_tegra_late_probe(struct platform_device *dev)
+{
+ struct gk20a_platform *platform = gk20a_get_platform(dev);
+
+ /* Make gk20a power domain a subdomain of mc */
+ tegra_pd_add_sd(&platform->g->pd);
+
+ /* Initialise tegra specific scaling quirks */
+ gk20a_tegra_scale_init(dev);
+
+ return 0;
+}
+
+static int gk20a_tegra_suspend(struct device *dev)
+{
+ tegra_edp_notify_gpu_load(0);
+ return 0;
+}
+
+static struct resource gk20a_tegra_resources[] = {
+ {
+ .start = TEGRA_GK20A_BAR0_BASE,
+ .end = TEGRA_GK20A_BAR0_BASE + TEGRA_GK20A_BAR0_SIZE - 1,
+ .flags = IORESOURCE_MEM,
+ },
+ {
+ .start = TEGRA_GK20A_BAR1_BASE,
+ .end = TEGRA_GK20A_BAR1_BASE + TEGRA_GK20A_BAR1_SIZE - 1,
+ .flags = IORESOURCE_MEM,
+ },
+ { /* Used on ASIM only */
+ .start = TEGRA_GK20A_SIM_BASE,
+ .end = TEGRA_GK20A_SIM_BASE + TEGRA_GK20A_SIM_SIZE - 1,
+ .flags = IORESOURCE_MEM,
+ },
+ {
+ .start = TEGRA_GK20A_INTR,
+ .end = TEGRA_GK20A_INTR,
+ .flags = IORESOURCE_IRQ,
+ },
+ {
+ .start = TEGRA_GK20A_INTR_NONSTALL,
+ .end = TEGRA_GK20A_INTR_NONSTALL,
+ .flags = IORESOURCE_IRQ,
+ },
+};
+
+struct gk20a_platform t132_gk20a_tegra_platform = {
+ .has_syncpoints = true,
+
+ /* power management configuration */
+ .railgate_delay = 500,
+ .clockgate_delay = 50,
+
+ .probe = gk20a_tegra_probe,
+ .late_probe = gk20a_tegra_late_probe,
+
+ /* power management callbacks */
+ .suspend = gk20a_tegra_suspend,
+ .railgate = gk20a_tegra_railgate,
+ .unrailgate = gk20a_tegra_unrailgate,
+
+ /* frequency scaling configuration */
+ .prescale = gk20a_tegra_prescale,
+ .postscale = gk20a_tegra_postscale,
+ .devfreq_governor = "nvhost_podgov",
+ .qos_id = PM_QOS_GPU_FREQ_MIN,
+
+ .channel_busy = gk20a_tegra_channel_busy,
+ .channel_idle = gk20a_tegra_channel_idle,
+ .secure_alloc = gk20a_tegra_secure_alloc,
+ .dump_platform_dependencies = gk20a_tegra_debug_dump,
+};
+
+struct gk20a_platform gk20a_tegra_platform = {
+ .has_syncpoints = true,
+
+ /* power management configuration */
+ .railgate_delay = 500,
+ .clockgate_delay = 50,
+ .can_railgate = true,
+
+ .probe = gk20a_tegra_probe,
+ .late_probe = gk20a_tegra_late_probe,
+
+ /* power management callbacks */
+ .suspend = gk20a_tegra_suspend,
+ .railgate = gk20a_tegra_railgate,
+ .unrailgate = gk20a_tegra_unrailgate,
+
+ /* frequency scaling configuration */
+ .prescale = gk20a_tegra_prescale,
+ .postscale = gk20a_tegra_postscale,
+ .devfreq_governor = "nvhost_podgov",
+ .qos_id = PM_QOS_GPU_FREQ_MIN,
+
+ .channel_busy = gk20a_tegra_channel_busy,
+ .channel_idle = gk20a_tegra_channel_idle,
+ .secure_alloc = gk20a_tegra_secure_alloc,
+ .dump_platform_dependencies = gk20a_tegra_debug_dump,
+};
+
+struct platform_device tegra_gk20a_device = {
+ .name = "gk20a",
+ .resource = gk20a_tegra_resources,
+ .num_resources = ARRAY_SIZE(gk20a_tegra_resources),
+ .dev = {
+ .platform_data = &gk20a_tegra_platform,
+ },
+};
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
new file mode 100644
index 000000000000..a00499a98ab8
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -0,0 +1,3796 @@
+/*
+ * drivers/video/tegra/host/gk20a/pmu_gk20a.c
+ *
+ * GK20A PMU (aka. gPMU outside gk20a context)
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/delay.h> /* for mdelay */
+#include <linux/firmware.h>
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+
+#include "gk20a.h"
+#include "hw_mc_gk20a.h"
+#include "hw_pwr_gk20a.h"
+#include "hw_top_gk20a.h"
+
+#define GK20A_PMU_UCODE_IMAGE "gpmu_ucode.bin"
+
+#define gk20a_dbg_pmu(fmt, arg...) \
+ gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
+
+static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu);
+static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
+ u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt);
+static void gk20a_init_pmu_setup_hw2_workqueue(struct work_struct *work);
+static void pmu_save_zbc(struct gk20a *g, u32 entries);
+static void ap_callback_init_and_enable_ctrl(
+ struct gk20a *g, struct pmu_msg *msg,
+ void *param, u32 seq_desc, u32 status);
+static int gk20a_pmu_ap_send_command(struct gk20a *g,
+ union pmu_ap_cmd *p_ap_cmd, bool b_block);
+
+static u32 pmu_cmdline_size_v0(struct pmu_gk20a *pmu)
+{
+ return sizeof(struct pmu_cmdline_args_v0);
+}
+
+static u32 pmu_cmdline_size_v1(struct pmu_gk20a *pmu)
+{
+ return sizeof(struct pmu_cmdline_args_v1);
+}
+
+static void set_pmu_cmdline_args_cpufreq_v1(struct pmu_gk20a *pmu, u32 freq)
+{
+ pmu->args_v1.cpu_freq_hz = freq;
+}
+
+static void set_pmu_cmdline_args_cpufreq_v0(struct pmu_gk20a *pmu, u32 freq)
+{
+ pmu->args_v0.cpu_freq_hz = freq;
+}
+
+static void *get_pmu_cmdline_args_ptr_v1(struct pmu_gk20a *pmu)
+{
+ return (void *)(&pmu->args_v1);
+}
+
+static void *get_pmu_cmdline_args_ptr_v0(struct pmu_gk20a *pmu)
+{
+ return (void *)(&pmu->args_v0);
+}
+
+static u32 get_pmu_allocation_size_v1(struct pmu_gk20a *pmu)
+{
+ return sizeof(struct pmu_allocation_v1);
+}
+
+static u32 get_pmu_allocation_size_v0(struct pmu_gk20a *pmu)
+{
+ return sizeof(struct pmu_allocation_v0);
+}
+
+static void set_pmu_allocation_ptr_v1(struct pmu_gk20a *pmu,
+ void **pmu_alloc_ptr, void *assign_ptr)
+{
+ struct pmu_allocation_v1 **pmu_a_ptr =
+ (struct pmu_allocation_v1 **)pmu_alloc_ptr;
+ *pmu_a_ptr = (struct pmu_allocation_v1 *)assign_ptr;
+}
+
+static void set_pmu_allocation_ptr_v0(struct pmu_gk20a *pmu,
+ void **pmu_alloc_ptr, void *assign_ptr)
+{
+ struct pmu_allocation_v0 **pmu_a_ptr =
+ (struct pmu_allocation_v0 **)pmu_alloc_ptr;
+ *pmu_a_ptr = (struct pmu_allocation_v0 *)assign_ptr;
+}
+
+static void pmu_allocation_set_dmem_size_v1(struct pmu_gk20a *pmu,
+ void *pmu_alloc_ptr, u16 size)
+{
+ struct pmu_allocation_v1 *pmu_a_ptr =
+ (struct pmu_allocation_v1 *)pmu_alloc_ptr;
+ pmu_a_ptr->alloc.dmem.size = size;
+}
+
+static void pmu_allocation_set_dmem_size_v0(struct pmu_gk20a *pmu,
+ void *pmu_alloc_ptr, u16 size)
+{
+ struct pmu_allocation_v0 *pmu_a_ptr =
+ (struct pmu_allocation_v0 *)pmu_alloc_ptr;
+ pmu_a_ptr->alloc.dmem.size = size;
+}
+
+static u16 pmu_allocation_get_dmem_size_v1(struct pmu_gk20a *pmu,
+ void *pmu_alloc_ptr)
+{
+ struct pmu_allocation_v1 *pmu_a_ptr =
+ (struct pmu_allocation_v1 *)pmu_alloc_ptr;
+ return pmu_a_ptr->alloc.dmem.size;
+}
+
+static u16 pmu_allocation_get_dmem_size_v0(struct pmu_gk20a *pmu,
+ void *pmu_alloc_ptr)
+{
+ struct pmu_allocation_v0 *pmu_a_ptr =
+ (struct pmu_allocation_v0 *)pmu_alloc_ptr;
+ return pmu_a_ptr->alloc.dmem.size;
+}
+
+static u32 pmu_allocation_get_dmem_offset_v1(struct pmu_gk20a *pmu,
+ void *pmu_alloc_ptr)
+{
+ struct pmu_allocation_v1 *pmu_a_ptr =
+ (struct pmu_allocation_v1 *)pmu_alloc_ptr;
+ return pmu_a_ptr->alloc.dmem.offset;
+}
+
+static u32 pmu_allocation_get_dmem_offset_v0(struct pmu_gk20a *pmu,
+ void *pmu_alloc_ptr)
+{
+ struct pmu_allocation_v0 *pmu_a_ptr =
+ (struct pmu_allocation_v0 *)pmu_alloc_ptr;
+ return pmu_a_ptr->alloc.dmem.offset;
+}
+
+static u32 *pmu_allocation_get_dmem_offset_addr_v1(struct pmu_gk20a *pmu,
+ void *pmu_alloc_ptr)
+{
+ struct pmu_allocation_v1 *pmu_a_ptr =
+ (struct pmu_allocation_v1 *)pmu_alloc_ptr;
+ return &pmu_a_ptr->alloc.dmem.offset;
+}
+
+static u32 *pmu_allocation_get_dmem_offset_addr_v0(struct pmu_gk20a *pmu,
+ void *pmu_alloc_ptr)
+{
+ struct pmu_allocation_v0 *pmu_a_ptr =
+ (struct pmu_allocation_v0 *)pmu_alloc_ptr;
+ return &pmu_a_ptr->alloc.dmem.offset;
+}
+
+static void pmu_allocation_set_dmem_offset_v1(struct pmu_gk20a *pmu,
+ void *pmu_alloc_ptr, u32 offset)
+{
+ struct pmu_allocation_v1 *pmu_a_ptr =
+ (struct pmu_allocation_v1 *)pmu_alloc_ptr;
+ pmu_a_ptr->alloc.dmem.offset = offset;
+}
+
+static void pmu_allocation_set_dmem_offset_v0(struct pmu_gk20a *pmu,
+ void *pmu_alloc_ptr, u32 offset)
+{
+ struct pmu_allocation_v0 *pmu_a_ptr =
+ (struct pmu_allocation_v0 *)pmu_alloc_ptr;
+ pmu_a_ptr->alloc.dmem.offset = offset;
+}
+
+static void *get_pmu_msg_pmu_init_msg_ptr_v1(struct pmu_init_msg *init)
+{
+ return (void *)(&(init->pmu_init_v1));
+}
+
+static u16 get_pmu_init_msg_pmu_sw_mg_off_v1(union pmu_init_msg_pmu *init_msg)
+{
+ struct pmu_init_msg_pmu_v1 *init =
+ (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
+ return init->sw_managed_area_offset;
+}
+
+static u16 get_pmu_init_msg_pmu_sw_mg_size_v1(union pmu_init_msg_pmu *init_msg)
+{
+ struct pmu_init_msg_pmu_v1 *init =
+ (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
+ return init->sw_managed_area_size;
+}
+
+static void *get_pmu_msg_pmu_init_msg_ptr_v0(struct pmu_init_msg *init)
+{
+ return (void *)(&(init->pmu_init_v0));
+}
+
+static u16 get_pmu_init_msg_pmu_sw_mg_off_v0(union pmu_init_msg_pmu *init_msg)
+{
+ struct pmu_init_msg_pmu_v0 *init =
+ (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
+ return init->sw_managed_area_offset;
+}
+
+static u16 get_pmu_init_msg_pmu_sw_mg_size_v0(union pmu_init_msg_pmu *init_msg)
+{
+ struct pmu_init_msg_pmu_v0 *init =
+ (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
+ return init->sw_managed_area_size;
+}
+
+static u32 get_pmu_perfmon_cmd_start_size_v1(void)
+{
+ return sizeof(struct pmu_perfmon_cmd_start_v1);
+}
+
+static u32 get_pmu_perfmon_cmd_start_size_v0(void)
+{
+ return sizeof(struct pmu_perfmon_cmd_start_v0);
+}
+
+static int get_perfmon_cmd_start_offsetofvar_v1(
+ enum pmu_perfmon_cmd_start_fields field)
+{
+ switch (field) {
+ case COUNTER_ALLOC:
+ return offsetof(struct pmu_perfmon_cmd_start_v1,
+ counter_alloc);
+ default:
+ return -EINVAL;
+ break;
+ }
+ return 0;
+}
+
+static int get_perfmon_cmd_start_offsetofvar_v0(
+ enum pmu_perfmon_cmd_start_fields field)
+{
+ switch (field) {
+ case COUNTER_ALLOC:
+ return offsetof(struct pmu_perfmon_cmd_start_v0,
+ counter_alloc);
+ default:
+ return -EINVAL;
+ break;
+ }
+ return 0;
+}
+
+static u32 get_pmu_perfmon_cmd_init_size_v1(void)
+{
+ return sizeof(struct pmu_perfmon_cmd_init_v1);
+}
+
+static u32 get_pmu_perfmon_cmd_init_size_v0(void)
+{
+ return sizeof(struct pmu_perfmon_cmd_init_v0);
+}
+
+static int get_perfmon_cmd_init_offsetofvar_v1(
+ enum pmu_perfmon_cmd_start_fields field)
+{
+ switch (field) {
+ case COUNTER_ALLOC:
+ return offsetof(struct pmu_perfmon_cmd_init_v1,
+ counter_alloc);
+ default:
+ return -EINVAL;
+ break;
+ }
+ return 0;
+}
+
+static int get_perfmon_cmd_init_offsetofvar_v0(
+ enum pmu_perfmon_cmd_start_fields field)
+{
+ switch (field) {
+ case COUNTER_ALLOC:
+ return offsetof(struct pmu_perfmon_cmd_init_v0,
+ counter_alloc);
+ default:
+ return -EINVAL;
+ break;
+ }
+ return 0;
+}
+
+static void perfmon_start_set_cmd_type_v1(struct pmu_perfmon_cmd *pc, u8 value)
+{
+ struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
+ start->cmd_type = value;
+}
+
+static void perfmon_start_set_cmd_type_v0(struct pmu_perfmon_cmd *pc, u8 value)
+{
+ struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
+ start->cmd_type = value;
+}
+
+static void perfmon_start_set_group_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
+{
+ struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
+ start->group_id = value;
+}
+
+static void perfmon_start_set_group_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
+{
+ struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
+ start->group_id = value;
+}
+
+static void perfmon_start_set_state_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
+{
+ struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
+ start->state_id = value;
+}
+
+static void perfmon_start_set_state_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
+{
+ struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
+ start->state_id = value;
+}
+
+static void perfmon_start_set_flags_v1(struct pmu_perfmon_cmd *pc, u8 value)
+{
+ struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
+ start->flags = value;
+}
+
+static void perfmon_start_set_flags_v0(struct pmu_perfmon_cmd *pc, u8 value)
+{
+ struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
+ start->flags = value;
+}
+
+static u8 perfmon_start_get_flags_v1(struct pmu_perfmon_cmd *pc)
+{
+ struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
+ return start->flags;
+}
+
+static u8 perfmon_start_get_flags_v0(struct pmu_perfmon_cmd *pc)
+{
+ struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
+ return start->flags;
+}
+
+static void perfmon_cmd_init_set_sample_buffer_v1(struct pmu_perfmon_cmd *pc,
+ u16 value)
+{
+ struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
+ init->sample_buffer = value;
+}
+
+static void perfmon_cmd_init_set_sample_buffer_v0(struct pmu_perfmon_cmd *pc,
+ u16 value)
+{
+ struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
+ init->sample_buffer = value;
+}
+
+static void perfmon_cmd_init_set_dec_cnt_v1(struct pmu_perfmon_cmd *pc,
+ u8 value)
+{
+ struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
+ init->to_decrease_count = value;
+}
+
+static void perfmon_cmd_init_set_dec_cnt_v0(struct pmu_perfmon_cmd *pc,
+ u8 value)
+{
+ struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
+ init->to_decrease_count = value;
+}
+
+static void perfmon_cmd_init_set_base_cnt_id_v1(struct pmu_perfmon_cmd *pc,
+ u8 value)
+{
+ struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
+ init->base_counter_id = value;
+}
+
+static void perfmon_cmd_init_set_base_cnt_id_v0(struct pmu_perfmon_cmd *pc,
+ u8 value)
+{
+ struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
+ init->base_counter_id = value;
+}
+
+static void perfmon_cmd_init_set_samp_period_us_v1(struct pmu_perfmon_cmd *pc,
+ u32 value)
+{
+ struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
+ init->sample_period_us = value;
+}
+
+static void perfmon_cmd_init_set_samp_period_us_v0(struct pmu_perfmon_cmd *pc,
+ u32 value)
+{
+ struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
+ init->sample_period_us = value;
+}
+
+static void perfmon_cmd_init_set_num_cnt_v1(struct pmu_perfmon_cmd *pc,
+ u8 value)
+{
+ struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
+ init->num_counters = value;
+}
+
+static void perfmon_cmd_init_set_num_cnt_v0(struct pmu_perfmon_cmd *pc,
+ u8 value)
+{
+ struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
+ init->num_counters = value;
+}
+
+static void perfmon_cmd_init_set_mov_avg_v1(struct pmu_perfmon_cmd *pc,
+ u8 value)
+{
+ struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
+ init->samples_in_moving_avg = value;
+}
+
+static void perfmon_cmd_init_set_mov_avg_v0(struct pmu_perfmon_cmd *pc,
+ u8 value)
+{
+ struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
+ init->samples_in_moving_avg = value;
+}
+
+static void get_pmu_init_msg_pmu_queue_params_v0(struct pmu_queue *queue,
+ u32 id, void *pmu_init_msg)
+{
+ struct pmu_init_msg_pmu_v0 *init =
+ (struct pmu_init_msg_pmu_v0 *)pmu_init_msg;
+ queue->index = init->queue_info[id].index;
+ queue->offset = init->queue_info[id].offset;
+ queue->size = init->queue_info[id].size;
+}
+
+static void get_pmu_init_msg_pmu_queue_params_v1(struct pmu_queue *queue,
+ u32 id, void *pmu_init_msg)
+{
+ struct pmu_init_msg_pmu_v1 *init =
+ (struct pmu_init_msg_pmu_v1 *)pmu_init_msg;
+ queue->index = init->queue_info[id].index;
+ queue->offset = init->queue_info[id].offset;
+ queue->size = init->queue_info[id].size;
+}
+
+static void *get_pmu_sequence_in_alloc_ptr_v1(struct pmu_sequence *seq)
+{
+ return (void *)(&seq->in_v1);
+}
+
+static void *get_pmu_sequence_in_alloc_ptr_v0(struct pmu_sequence *seq)
+{
+ return (void *)(&seq->in_v0);
+}
+
+static void *get_pmu_sequence_out_alloc_ptr_v1(struct pmu_sequence *seq)
+{
+ return (void *)(&seq->out_v1);
+}
+
+static void *get_pmu_sequence_out_alloc_ptr_v0(struct pmu_sequence *seq)
+{
+ return (void *)(&seq->out_v0);
+}
+
+static int gk20a_init_pmu(struct pmu_gk20a *pmu)
+{
+ struct gk20a *g = pmu->g;
+ switch (pmu->desc->app_version) {
+ case APP_VERSION_1:
+ g->ops.pmu_ver.cmd_id_zbc_table_update = 16;
+ g->ops.pmu_ver.get_pmu_cmdline_args_size =
+ pmu_cmdline_size_v1;
+ g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
+ set_pmu_cmdline_args_cpufreq_v1;
+ g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
+ get_pmu_cmdline_args_ptr_v1;
+ g->ops.pmu_ver.get_pmu_allocation_struct_size =
+ get_pmu_allocation_size_v1;
+ g->ops.pmu_ver.set_pmu_allocation_ptr =
+ set_pmu_allocation_ptr_v1;
+ g->ops.pmu_ver.pmu_allocation_set_dmem_size =
+ pmu_allocation_set_dmem_size_v1;
+ g->ops.pmu_ver.pmu_allocation_get_dmem_size =
+ pmu_allocation_get_dmem_size_v1;
+ g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
+ pmu_allocation_get_dmem_offset_v1;
+ g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
+ pmu_allocation_get_dmem_offset_addr_v1;
+ g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
+ pmu_allocation_set_dmem_offset_v1;
+ g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
+ get_pmu_init_msg_pmu_queue_params_v1;
+ g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
+ get_pmu_msg_pmu_init_msg_ptr_v1;
+ g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
+ get_pmu_init_msg_pmu_sw_mg_off_v1;
+ g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
+ get_pmu_init_msg_pmu_sw_mg_size_v1;
+ g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
+ get_pmu_perfmon_cmd_start_size_v1;
+ g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
+ get_perfmon_cmd_start_offsetofvar_v1;
+ g->ops.pmu_ver.perfmon_start_set_cmd_type =
+ perfmon_start_set_cmd_type_v1;
+ g->ops.pmu_ver.perfmon_start_set_group_id =
+ perfmon_start_set_group_id_v1;
+ g->ops.pmu_ver.perfmon_start_set_state_id =
+ perfmon_start_set_state_id_v1;
+ g->ops.pmu_ver.perfmon_start_set_flags =
+ perfmon_start_set_flags_v1;
+ g->ops.pmu_ver.perfmon_start_get_flags =
+ perfmon_start_get_flags_v1;
+ g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
+ get_pmu_perfmon_cmd_init_size_v1;
+ g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
+ get_perfmon_cmd_init_offsetofvar_v1;
+ g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
+ perfmon_cmd_init_set_sample_buffer_v1;
+ g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
+ perfmon_cmd_init_set_dec_cnt_v1;
+ g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
+ perfmon_cmd_init_set_base_cnt_id_v1;
+ g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
+ perfmon_cmd_init_set_samp_period_us_v1;
+ g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
+ perfmon_cmd_init_set_num_cnt_v1;
+ g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
+ perfmon_cmd_init_set_mov_avg_v1;
+ g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
+ get_pmu_sequence_in_alloc_ptr_v1;
+ g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
+ get_pmu_sequence_out_alloc_ptr_v1;
+ break;
+ case APP_VERSION_0:
+ g->ops.pmu_ver.cmd_id_zbc_table_update = 14;
+ g->ops.pmu_ver.get_pmu_cmdline_args_size =
+ pmu_cmdline_size_v0;
+ g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
+ set_pmu_cmdline_args_cpufreq_v0;
+ g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
+ get_pmu_cmdline_args_ptr_v0;
+ g->ops.pmu_ver.get_pmu_allocation_struct_size =
+ get_pmu_allocation_size_v0;
+ g->ops.pmu_ver.set_pmu_allocation_ptr =
+ set_pmu_allocation_ptr_v0;
+ g->ops.pmu_ver.pmu_allocation_set_dmem_size =
+ pmu_allocation_set_dmem_size_v0;
+ g->ops.pmu_ver.pmu_allocation_get_dmem_size =
+ pmu_allocation_get_dmem_size_v0;
+ g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
+ pmu_allocation_get_dmem_offset_v0;
+ g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
+ pmu_allocation_get_dmem_offset_addr_v0;
+ g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
+ pmu_allocation_set_dmem_offset_v0;
+ g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
+ get_pmu_init_msg_pmu_queue_params_v0;
+ g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
+ get_pmu_msg_pmu_init_msg_ptr_v0;
+ g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
+ get_pmu_init_msg_pmu_sw_mg_off_v0;
+ g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
+ get_pmu_init_msg_pmu_sw_mg_size_v0;
+ g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
+ get_pmu_perfmon_cmd_start_size_v0;
+ g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
+ get_perfmon_cmd_start_offsetofvar_v0;
+ g->ops.pmu_ver.perfmon_start_set_cmd_type =
+ perfmon_start_set_cmd_type_v0;
+ g->ops.pmu_ver.perfmon_start_set_group_id =
+ perfmon_start_set_group_id_v0;
+ g->ops.pmu_ver.perfmon_start_set_state_id =
+ perfmon_start_set_state_id_v0;
+ g->ops.pmu_ver.perfmon_start_set_flags =
+ perfmon_start_set_flags_v0;
+ g->ops.pmu_ver.perfmon_start_get_flags =
+ perfmon_start_get_flags_v0;
+ g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
+ get_pmu_perfmon_cmd_init_size_v0;
+ g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
+ get_perfmon_cmd_init_offsetofvar_v0;
+ g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
+ perfmon_cmd_init_set_sample_buffer_v0;
+ g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
+ perfmon_cmd_init_set_dec_cnt_v0;
+ g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
+ perfmon_cmd_init_set_base_cnt_id_v0;
+ g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
+ perfmon_cmd_init_set_samp_period_us_v0;
+ g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
+ perfmon_cmd_init_set_num_cnt_v0;
+ g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
+ perfmon_cmd_init_set_mov_avg_v0;
+ g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
+ get_pmu_sequence_in_alloc_ptr_v0;
+ g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
+ get_pmu_sequence_out_alloc_ptr_v0;
+ break;
+ default:
+ gk20a_err(dev_from_gk20a(pmu->g),
+ "PMU code version not supported\n");
+ return -EINVAL;
+ break;
+ }
+ return 0;
+}
+
+static void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
+ u32 src, u8 *dst, u32 size, u8 port)
+{
+ struct gk20a *g = pmu->g;
+ u32 i, words, bytes;
+ u32 data, addr_mask;
+ u32 *dst_u32 = (u32*)dst;
+
+ if (size == 0) {
+ gk20a_err(dev_from_gk20a(g),
+ "size is zero");
+ return;
+ }
+
+ if (src & 0x3) {
+ gk20a_err(dev_from_gk20a(g),
+ "src (0x%08x) not 4-byte aligned", src);
+ return;
+ }
+
+ mutex_lock(&pmu->pmu_copy_lock);
+
+ words = size >> 2;
+ bytes = size & 0x3;
+
+ addr_mask = pwr_falcon_dmemc_offs_m() |
+ pwr_falcon_dmemc_blk_m();
+
+ src &= addr_mask;
+
+ gk20a_writel(g, pwr_falcon_dmemc_r(port),
+ src | pwr_falcon_dmemc_aincr_f(1));
+
+ for (i = 0; i < words; i++)
+ dst_u32[i] = gk20a_readl(g, pwr_falcon_dmemd_r(port));
+
+ if (bytes > 0) {
+ data = gk20a_readl(g, pwr_falcon_dmemd_r(port));
+ for (i = 0; i < bytes; i++) {
+ dst[(words << 2) + i] = ((u8 *)&data)[i];
+ gk20a_dbg_pmu("read: dst_u8[%d]=0x%08x",
+ i, dst[(words << 2) + i]);
+ }
+ }
+ mutex_unlock(&pmu->pmu_copy_lock);
+ return;
+}
+
+static void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
+ u32 dst, u8 *src, u32 size, u8 port)
+{
+ struct gk20a *g = pmu->g;
+ u32 i, words, bytes;
+ u32 data, addr_mask;
+ u32 *src_u32 = (u32*)src;
+
+ if (size == 0) {
+ gk20a_err(dev_from_gk20a(g),
+ "size is zero");
+ return;
+ }
+
+ if (dst & 0x3) {
+ gk20a_err(dev_from_gk20a(g),
+ "dst (0x%08x) not 4-byte aligned", dst);
+ return;
+ }
+
+ mutex_lock(&pmu->pmu_copy_lock);
+
+ words = size >> 2;
+ bytes = size & 0x3;
+
+ addr_mask = pwr_falcon_dmemc_offs_m() |
+ pwr_falcon_dmemc_blk_m();
+
+ dst &= addr_mask;
+
+ gk20a_writel(g, pwr_falcon_dmemc_r(port),
+ dst | pwr_falcon_dmemc_aincw_f(1));
+
+ for (i = 0; i < words; i++)
+ gk20a_writel(g, pwr_falcon_dmemd_r(port), src_u32[i]);
+
+ if (bytes > 0) {
+ data = 0;
+ for (i = 0; i < bytes; i++)
+ ((u8 *)&data)[i] = src[(words << 2) + i];
+ gk20a_writel(g, pwr_falcon_dmemd_r(port), data);
+ }
+
+ data = gk20a_readl(g, pwr_falcon_dmemc_r(port)) & addr_mask;
+ size = ALIGN(size, 4);
+ if (data != dst + size) {
+ gk20a_err(dev_from_gk20a(g),
+ "copy failed. bytes written %d, expected %d",
+ data - dst, size);
+ }
+ mutex_unlock(&pmu->pmu_copy_lock);
+ return;
+}
+
+static int pmu_idle(struct pmu_gk20a *pmu)
+{
+ struct gk20a *g = pmu->g;
+ unsigned long end_jiffies = jiffies +
+ msecs_to_jiffies(2000);
+ u32 idle_stat;
+
+ /* wait for pmu idle */
+ do {
+ idle_stat = gk20a_readl(g, pwr_falcon_idlestate_r());
+
+ if (pwr_falcon_idlestate_falcon_busy_v(idle_stat) == 0 &&
+ pwr_falcon_idlestate_ext_busy_v(idle_stat) == 0) {
+ break;
+ }
+
+ if (time_after_eq(jiffies, end_jiffies)) {
+ gk20a_err(dev_from_gk20a(g),
+ "timeout waiting pmu idle : 0x%08x",
+ idle_stat);
+ return -EBUSY;
+ }
+ usleep_range(100, 200);
+ } while (1);
+
+ gk20a_dbg_fn("done");
+ return 0;
+}
+
+static void pmu_enable_irq(struct pmu_gk20a *pmu, bool enable)
+{
+ struct gk20a *g = pmu->g;
+
+ gk20a_dbg_fn("");
+
+ gk20a_writel(g, mc_intr_mask_0_r(),
+ gk20a_readl(g, mc_intr_mask_0_r()) &
+ ~mc_intr_mask_0_pmu_enabled_f());
+ gk20a_writel(g, mc_intr_mask_1_r(),
+ gk20a_readl(g, mc_intr_mask_1_r()) &
+ ~mc_intr_mask_1_pmu_enabled_f());
+
+ gk20a_writel(g, pwr_falcon_irqmclr_r(),
+ pwr_falcon_irqmclr_gptmr_f(1) |
+ pwr_falcon_irqmclr_wdtmr_f(1) |
+ pwr_falcon_irqmclr_mthd_f(1) |
+ pwr_falcon_irqmclr_ctxsw_f(1) |
+ pwr_falcon_irqmclr_halt_f(1) |
+ pwr_falcon_irqmclr_exterr_f(1) |
+ pwr_falcon_irqmclr_swgen0_f(1) |
+ pwr_falcon_irqmclr_swgen1_f(1) |
+ pwr_falcon_irqmclr_ext_f(0xff));
+
+ if (enable) {
+ /* dest 0=falcon, 1=host; level 0=irq0, 1=irq1 */
+ gk20a_writel(g, pwr_falcon_irqdest_r(),
+ pwr_falcon_irqdest_host_gptmr_f(0) |
+ pwr_falcon_irqdest_host_wdtmr_f(1) |
+ pwr_falcon_irqdest_host_mthd_f(0) |
+ pwr_falcon_irqdest_host_ctxsw_f(0) |
+ pwr_falcon_irqdest_host_halt_f(1) |
+ pwr_falcon_irqdest_host_exterr_f(0) |
+ pwr_falcon_irqdest_host_swgen0_f(1) |
+ pwr_falcon_irqdest_host_swgen1_f(0) |
+ pwr_falcon_irqdest_host_ext_f(0xff) |
+ pwr_falcon_irqdest_target_gptmr_f(1) |
+ pwr_falcon_irqdest_target_wdtmr_f(0) |
+ pwr_falcon_irqdest_target_mthd_f(0) |
+ pwr_falcon_irqdest_target_ctxsw_f(0) |
+ pwr_falcon_irqdest_target_halt_f(0) |
+ pwr_falcon_irqdest_target_exterr_f(0) |
+ pwr_falcon_irqdest_target_swgen0_f(0) |
+ pwr_falcon_irqdest_target_swgen1_f(0) |
+ pwr_falcon_irqdest_target_ext_f(0xff));
+
+ /* 0=disable, 1=enable */
+ gk20a_writel(g, pwr_falcon_irqmset_r(),
+ pwr_falcon_irqmset_gptmr_f(1) |
+ pwr_falcon_irqmset_wdtmr_f(1) |
+ pwr_falcon_irqmset_mthd_f(0) |
+ pwr_falcon_irqmset_ctxsw_f(0) |
+ pwr_falcon_irqmset_halt_f(1) |
+ pwr_falcon_irqmset_exterr_f(1) |
+ pwr_falcon_irqmset_swgen0_f(1) |
+ pwr_falcon_irqmset_swgen1_f(1));
+
+ gk20a_writel(g, mc_intr_mask_0_r(),
+ gk20a_readl(g, mc_intr_mask_0_r()) |
+ mc_intr_mask_0_pmu_enabled_f());
+ }
+
+ gk20a_dbg_fn("done");
+}
+
+static int pmu_enable_hw(struct pmu_gk20a *pmu, bool enable)
+{
+ struct gk20a *g = pmu->g;
+
+ gk20a_dbg_fn("");
+
+ if (enable) {
+ int retries = GR_IDLE_CHECK_MAX / GR_IDLE_CHECK_DEFAULT;
+ gk20a_enable(g, mc_enable_pwr_enabled_f());
+
+ do {
+ u32 w = gk20a_readl(g, pwr_falcon_dmactl_r()) &
+ (pwr_falcon_dmactl_dmem_scrubbing_m() |
+ pwr_falcon_dmactl_imem_scrubbing_m());
+
+ if (!w) {
+ gk20a_dbg_fn("done");
+ return 0;
+ }
+ udelay(GR_IDLE_CHECK_DEFAULT);
+ } while (--retries || !tegra_platform_is_silicon());
+
+ gk20a_disable(g, mc_enable_pwr_enabled_f());
+ gk20a_err(dev_from_gk20a(g), "Falcon mem scrubbing timeout");
+
+ return -ETIMEDOUT;
+ } else {
+ gk20a_disable(g, mc_enable_pwr_enabled_f());
+ return 0;
+ }
+}
+
+static int pmu_enable(struct pmu_gk20a *pmu, bool enable)
+{
+ struct gk20a *g = pmu->g;
+ u32 pmc_enable;
+ int err;
+
+ gk20a_dbg_fn("");
+
+ if (!enable) {
+ pmc_enable = gk20a_readl(g, mc_enable_r());
+ if (mc_enable_pwr_v(pmc_enable) !=
+ mc_enable_pwr_disabled_v()) {
+
+ pmu_enable_irq(pmu, false);
+ pmu_enable_hw(pmu, false);
+ }
+ } else {
+ err = pmu_enable_hw(pmu, true);
+ if (err)
+ return err;
+
+ /* TBD: post reset */
+
+ err = pmu_idle(pmu);
+ if (err)
+ return err;
+
+ pmu_enable_irq(pmu, true);
+ }
+
+ gk20a_dbg_fn("done");
+ return 0;
+}
+
+static int pmu_reset(struct pmu_gk20a *pmu)
+{
+ int err;
+
+ err = pmu_idle(pmu);
+ if (err)
+ return err;
+
+ /* TBD: release pmu hw mutex */
+
+ err = pmu_enable(pmu, false);
+ if (err)
+ return err;
+
+ /* TBD: cancel all sequences */
+ /* TBD: init all sequences and state tables */
+ /* TBD: restore pre-init message handler */
+
+ err = pmu_enable(pmu, true);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int pmu_bootstrap(struct pmu_gk20a *pmu)
+{
+ struct gk20a *g = pmu->g;
+ struct gk20a_platform *platform = platform_get_drvdata(g->dev);
+ struct mm_gk20a *mm = &g->mm;
+ struct pmu_ucode_desc *desc = pmu->desc;
+ u64 addr_code, addr_data, addr_load;
+ u32 i, blocks, addr_args;
+
+ gk20a_dbg_fn("");
+
+ gk20a_writel(g, pwr_falcon_itfen_r(),
+ gk20a_readl(g, pwr_falcon_itfen_r()) |
+ pwr_falcon_itfen_ctxen_enable_f());
+ gk20a_writel(g, pwr_pmu_new_instblk_r(),
+ pwr_pmu_new_instblk_ptr_f(
+ mm->pmu.inst_block.cpu_pa >> 12) |
+ pwr_pmu_new_instblk_valid_f(1) |
+ pwr_pmu_new_instblk_target_sys_coh_f());
+
+ /* TBD: load all other surfaces */
+
+ g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
+ clk_get_rate(platform->clk[1]));
+
+ addr_args = (pwr_falcon_hwcfg_dmem_size_v(
+ gk20a_readl(g, pwr_falcon_hwcfg_r()))
+ << GK20A_PMU_DMEM_BLKSIZE2) -
+ g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu);
+
+ pmu_copy_to_dmem(pmu, addr_args,
+ (u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
+ g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
+
+ gk20a_writel(g, pwr_falcon_dmemc_r(0),
+ pwr_falcon_dmemc_offs_f(0) |
+ pwr_falcon_dmemc_blk_f(0) |
+ pwr_falcon_dmemc_aincw_f(1));
+
+ addr_code = u64_lo32((pmu->ucode.pmu_va +
+ desc->app_start_offset +
+ desc->app_resident_code_offset) >> 8) ;
+ addr_data = u64_lo32((pmu->ucode.pmu_va +
+ desc->app_start_offset +
+ desc->app_resident_data_offset) >> 8);
+ addr_load = u64_lo32((pmu->ucode.pmu_va +
+ desc->bootloader_start_offset) >> 8);
+
+ gk20a_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE);
+ gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
+ gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_size);
+ gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_code_size);
+ gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_imem_entry);
+ gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_data);
+ gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_data_size);
+ gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
+ gk20a_writel(g, pwr_falcon_dmemd_r(0), 0x1);
+ gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_args);
+
+ gk20a_writel(g, pwr_falcon_dmatrfbase_r(),
+ addr_load - (desc->bootloader_imem_offset >> 8));
+
+ blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8;
+
+ for (i = 0; i < blocks; i++) {
+ gk20a_writel(g, pwr_falcon_dmatrfmoffs_r(),
+ desc->bootloader_imem_offset + (i << 8));
+ gk20a_writel(g, pwr_falcon_dmatrffboffs_r(),
+ desc->bootloader_imem_offset + (i << 8));
+ gk20a_writel(g, pwr_falcon_dmatrfcmd_r(),
+ pwr_falcon_dmatrfcmd_imem_f(1) |
+ pwr_falcon_dmatrfcmd_write_f(0) |
+ pwr_falcon_dmatrfcmd_size_f(6) |
+ pwr_falcon_dmatrfcmd_ctxdma_f(GK20A_PMU_DMAIDX_UCODE));
+ }
+
+ gk20a_writel(g, pwr_falcon_bootvec_r(),
+ pwr_falcon_bootvec_vec_f(desc->bootloader_entry_point));
+
+ gk20a_writel(g, pwr_falcon_cpuctl_r(),
+ pwr_falcon_cpuctl_startcpu_f(1));
+
+ gk20a_writel(g, pwr_falcon_os_r(), desc->app_version);
+
+ return 0;
+}
+
+static void pmu_seq_init(struct pmu_gk20a *pmu)
+{
+ u32 i;
+
+ memset(pmu->seq, 0,
+ sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES);
+ memset(pmu->pmu_seq_tbl, 0,
+ sizeof(pmu->pmu_seq_tbl));
+
+ for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++)
+ pmu->seq[i].id = i;
+}
+
+static int pmu_seq_acquire(struct pmu_gk20a *pmu,
+ struct pmu_sequence **pseq)
+{
+ struct gk20a *g = pmu->g;
+ struct pmu_sequence *seq;
+ u32 index;
+
+ mutex_lock(&pmu->pmu_seq_lock);
+ index = find_first_zero_bit(pmu->pmu_seq_tbl,
+ sizeof(pmu->pmu_seq_tbl));
+ if (index >= sizeof(pmu->pmu_seq_tbl)) {
+ gk20a_err(dev_from_gk20a(g),
+ "no free sequence available");
+ mutex_unlock(&pmu->pmu_seq_lock);
+ return -EAGAIN;
+ }
+ set_bit(index, pmu->pmu_seq_tbl);
+ mutex_unlock(&pmu->pmu_seq_lock);
+
+ seq = &pmu->seq[index];
+ seq->state = PMU_SEQ_STATE_PENDING;
+
+ *pseq = seq;
+ return 0;
+}
+
+static void pmu_seq_release(struct pmu_gk20a *pmu,
+ struct pmu_sequence *seq)
+{
+ struct gk20a *g = pmu->g;
+ seq->state = PMU_SEQ_STATE_FREE;
+ seq->desc = PMU_INVALID_SEQ_DESC;
+ seq->callback = NULL;
+ seq->cb_params = NULL;
+ seq->msg = NULL;
+ seq->out_payload = NULL;
+ g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
+ g->ops.pmu_ver.get_pmu_seq_in_a_ptr(seq), 0);
+ g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
+ g->ops.pmu_ver.get_pmu_seq_out_a_ptr(seq), 0);
+
+ clear_bit(seq->id, pmu->pmu_seq_tbl);
+}
+
+static int pmu_queue_init(struct pmu_gk20a *pmu,
+ u32 id, union pmu_init_msg_pmu *init)
+{
+ struct gk20a *g = pmu->g;
+ struct pmu_queue *queue = &pmu->queue[id];
+ queue->id = id;
+ g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params(queue, id, init);
+
+ queue->mutex_id = id;
+ mutex_init(&queue->mutex);
+
+ gk20a_dbg_pmu("queue %d: index %d, offset 0x%08x, size 0x%08x",
+ id, queue->index, queue->offset, queue->size);
+
+ return 0;
+}
+
+static int pmu_queue_head(struct pmu_gk20a *pmu, struct pmu_queue *queue,
+ u32 *head, bool set)
+{
+ struct gk20a *g = pmu->g;
+
+ BUG_ON(!head);
+
+ if (PMU_IS_COMMAND_QUEUE(queue->id)) {
+
+ if (queue->index >= pwr_pmu_queue_head__size_1_v())
+ return -EINVAL;
+
+ if (!set)
+ *head = pwr_pmu_queue_head_address_v(
+ gk20a_readl(g,
+ pwr_pmu_queue_head_r(queue->index)));
+ else
+ gk20a_writel(g,
+ pwr_pmu_queue_head_r(queue->index),
+ pwr_pmu_queue_head_address_f(*head));
+ } else {
+ if (!set)
+ *head = pwr_pmu_msgq_head_val_v(
+ gk20a_readl(g, pwr_pmu_msgq_head_r()));
+ else
+ gk20a_writel(g,
+ pwr_pmu_msgq_head_r(),
+ pwr_pmu_msgq_head_val_f(*head));
+ }
+
+ return 0;
+}
+
+static int pmu_queue_tail(struct pmu_gk20a *pmu, struct pmu_queue *queue,
+ u32 *tail, bool set)
+{
+ struct gk20a *g = pmu->g;
+
+ BUG_ON(!tail);
+
+ if (PMU_IS_COMMAND_QUEUE(queue->id)) {
+
+ if (queue->index >= pwr_pmu_queue_tail__size_1_v())
+ return -EINVAL;
+
+ if (!set)
+ *tail = pwr_pmu_queue_tail_address_v(
+ gk20a_readl(g,
+ pwr_pmu_queue_tail_r(queue->index)));
+ else
+ gk20a_writel(g,
+ pwr_pmu_queue_tail_r(queue->index),
+ pwr_pmu_queue_tail_address_f(*tail));
+ } else {
+ if (!set)
+ *tail = pwr_pmu_msgq_tail_val_v(
+ gk20a_readl(g, pwr_pmu_msgq_tail_r()));
+ else
+ gk20a_writel(g,
+ pwr_pmu_msgq_tail_r(),
+ pwr_pmu_msgq_tail_val_f(*tail));
+ }
+
+ return 0;
+}
+
+static inline void pmu_queue_read(struct pmu_gk20a *pmu,
+ u32 offset, u8 *dst, u32 size)
+{
+ pmu_copy_from_dmem(pmu, offset, dst, size, 0);
+}
+
+static inline void pmu_queue_write(struct pmu_gk20a *pmu,
+ u32 offset, u8 *src, u32 size)
+{
+ pmu_copy_to_dmem(pmu, offset, src, size, 0);
+}
+
+int pmu_mutex_acquire(struct pmu_gk20a *pmu, u32 id, u32 *token)
+{
+ struct gk20a *g = pmu->g;
+ struct pmu_mutex *mutex;
+ u32 data, owner, max_retry;
+
+ if (!pmu->initialized)
+ return 0;
+
+ BUG_ON(!token);
+ BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
+ BUG_ON(id > pmu->mutex_cnt);
+
+ mutex = &pmu->mutex[id];
+
+ owner = pwr_pmu_mutex_value_v(
+ gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
+
+ if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) {
+ BUG_ON(mutex->ref_cnt == 0);
+ gk20a_dbg_pmu("already acquired by owner : 0x%08x", *token);
+ mutex->ref_cnt++;
+ return 0;
+ }
+
+ max_retry = 40;
+ do {
+ data = pwr_pmu_mutex_id_value_v(
+ gk20a_readl(g, pwr_pmu_mutex_id_r()));
+ if (data == pwr_pmu_mutex_id_value_init_v() ||
+ data == pwr_pmu_mutex_id_value_not_avail_v()) {
+ gk20a_warn(dev_from_gk20a(g),
+ "fail to generate mutex token: val 0x%08x",
+ owner);
+ usleep_range(20, 40);
+ continue;
+ }
+
+ owner = data;
+ gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
+ pwr_pmu_mutex_value_f(owner));
+
+ data = pwr_pmu_mutex_value_v(
+ gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
+
+ if (owner == data) {
+ mutex->ref_cnt = 1;
+ gk20a_dbg_pmu("mutex acquired: id=%d, token=0x%x",
+ mutex->index, *token);
+ *token = owner;
+ return 0;
+ } else {
+ gk20a_dbg_info("fail to acquire mutex idx=0x%08x",
+ mutex->index);
+
+ data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
+ data = set_field(data,
+ pwr_pmu_mutex_id_release_value_m(),
+ pwr_pmu_mutex_id_release_value_f(owner));
+ gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
+
+ usleep_range(20, 40);
+ continue;
+ }
+ } while (max_retry-- > 0);
+
+ return -EBUSY;
+}
+
+int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token)
+{
+ struct gk20a *g = pmu->g;
+ struct pmu_mutex *mutex;
+ u32 owner, data;
+
+ if (!pmu->initialized)
+ return 0;
+
+ BUG_ON(!token);
+ BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
+ BUG_ON(id > pmu->mutex_cnt);
+
+ mutex = &pmu->mutex[id];
+
+ owner = pwr_pmu_mutex_value_v(
+ gk20a_readl(g, pwr_pmu_mutex_r(mutex->index)));
+
+ if (*token != owner) {
+ gk20a_err(dev_from_gk20a(g),
+ "requester 0x%08x NOT match owner 0x%08x",
+ *token, owner);
+ return -EINVAL;
+ }
+
+ if (--mutex->ref_cnt == 0) {
+ gk20a_writel(g, pwr_pmu_mutex_r(mutex->index),
+ pwr_pmu_mutex_value_initial_lock_f());
+
+ data = gk20a_readl(g, pwr_pmu_mutex_id_release_r());
+ data = set_field(data, pwr_pmu_mutex_id_release_value_m(),
+ pwr_pmu_mutex_id_release_value_f(owner));
+ gk20a_writel(g, pwr_pmu_mutex_id_release_r(), data);
+
+ gk20a_dbg_pmu("mutex released: id=%d, token=0x%x",
+ mutex->index, *token);
+ }
+
+ return 0;
+}
+
+static int pmu_queue_lock(struct pmu_gk20a *pmu,
+ struct pmu_queue *queue)
+{
+ int err;
+
+ if (PMU_IS_MESSAGE_QUEUE(queue->id))
+ return 0;
+
+ if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
+ mutex_lock(&queue->mutex);
+ queue->locked = true;
+ return 0;
+ }
+
+ err = pmu_mutex_acquire(pmu, queue->mutex_id,
+ &queue->mutex_lock);
+ if (err == 0)
+ queue->locked = true;
+
+ return err;
+}
+
+static int pmu_queue_unlock(struct pmu_gk20a *pmu,
+ struct pmu_queue *queue)
+{
+ int err;
+
+ if (PMU_IS_MESSAGE_QUEUE(queue->id))
+ return 0;
+
+ if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
+ mutex_unlock(&queue->mutex);
+ queue->locked = false;
+ return 0;
+ }
+
+ if (queue->locked) {
+ err = pmu_mutex_release(pmu, queue->mutex_id,
+ &queue->mutex_lock);
+ if (err == 0)
+ queue->locked = false;
+ }
+
+ return 0;
+}
+
+/* called by pmu_read_message, no lock */
+static bool pmu_queue_is_empty(struct pmu_gk20a *pmu,
+ struct pmu_queue *queue)
+{
+ u32 head, tail;
+
+ pmu_queue_head(pmu, queue, &head, QUEUE_GET);
+ if (queue->opened && queue->oflag == OFLAG_READ)
+ tail = queue->position;
+ else
+ pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
+
+ return head == tail;
+}
+
+static bool pmu_queue_has_room(struct pmu_gk20a *pmu,
+ struct pmu_queue *queue, u32 size, bool *need_rewind)
+{
+ u32 head, tail, free;
+ bool rewind = false;
+
+ BUG_ON(!queue->locked);
+
+ size = ALIGN(size, QUEUE_ALIGNMENT);
+
+ pmu_queue_head(pmu, queue, &head, QUEUE_GET);
+ pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
+
+ if (head >= tail) {
+ free = queue->offset + queue->size - head;
+ free -= PMU_CMD_HDR_SIZE;
+
+ if (size > free) {
+ rewind = true;
+ head = queue->offset;
+ }
+ }
+
+ if (head < tail)
+ free = tail - head - 1;
+
+ if (need_rewind)
+ *need_rewind = rewind;
+
+ return size <= free;
+}
+
+static int pmu_queue_push(struct pmu_gk20a *pmu,
+ struct pmu_queue *queue, void *data, u32 size)
+{
+ gk20a_dbg_fn("");
+
+ if (!queue->opened && queue->oflag == OFLAG_WRITE){
+ gk20a_err(dev_from_gk20a(pmu->g),
+ "queue not opened for write");
+ return -EINVAL;
+ }
+
+ pmu_queue_write(pmu, queue->position, data, size);
+ queue->position += ALIGN(size, QUEUE_ALIGNMENT);
+ return 0;
+}
+
+static int pmu_queue_pop(struct pmu_gk20a *pmu,
+ struct pmu_queue *queue, void *data, u32 size,
+ u32 *bytes_read)
+{
+ u32 head, tail, used;
+
+ *bytes_read = 0;
+
+ if (!queue->opened && queue->oflag == OFLAG_READ){
+ gk20a_err(dev_from_gk20a(pmu->g),
+ "queue not opened for read");
+ return -EINVAL;
+ }
+
+ pmu_queue_head(pmu, queue, &head, QUEUE_GET);
+ tail = queue->position;
+
+ if (head == tail)
+ return 0;
+
+ if (head > tail)
+ used = head - tail;
+ else
+ used = queue->offset + queue->size - tail;
+
+ if (size > used) {
+ gk20a_warn(dev_from_gk20a(pmu->g),
+ "queue size smaller than request read");
+ size = used;
+ }
+
+ pmu_queue_read(pmu, tail, data, size);
+ queue->position += ALIGN(size, QUEUE_ALIGNMENT);
+ *bytes_read = size;
+ return 0;
+}
+
+static void pmu_queue_rewind(struct pmu_gk20a *pmu,
+ struct pmu_queue *queue)
+{
+ struct pmu_cmd cmd;
+
+ gk20a_dbg_fn("");
+
+ if (!queue->opened) {
+ gk20a_err(dev_from_gk20a(pmu->g),
+ "queue not opened");
+ return;
+ }
+
+ if (queue->oflag == OFLAG_WRITE) {
+ cmd.hdr.unit_id = PMU_UNIT_REWIND;
+ cmd.hdr.size = PMU_CMD_HDR_SIZE;
+ pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size);
+ gk20a_dbg_pmu("queue %d rewinded", queue->id);
+ }
+
+ queue->position = queue->offset;
+ return;
+}
+
+/* open for read and lock the queue */
+static int pmu_queue_open_read(struct pmu_gk20a *pmu,
+ struct pmu_queue *queue)
+{
+ int err;
+
+ err = pmu_queue_lock(pmu, queue);
+ if (err)
+ return err;
+
+ if (queue->opened)
+ BUG();
+
+ pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET);
+ queue->oflag = OFLAG_READ;
+ queue->opened = true;
+
+ return 0;
+}
+
+/* open for write and lock the queue
+ make sure there's enough free space for the write */
+static int pmu_queue_open_write(struct pmu_gk20a *pmu,
+ struct pmu_queue *queue, u32 size)
+{
+ bool rewind = false;
+ int err;
+
+ err = pmu_queue_lock(pmu, queue);
+ if (err)
+ return err;
+
+ if (queue->opened)
+ BUG();
+
+ if (!pmu_queue_has_room(pmu, queue, size, &rewind)) {
+ gk20a_err(dev_from_gk20a(pmu->g), "queue full");
+ return -EAGAIN;
+ }
+
+ pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET);
+ queue->oflag = OFLAG_WRITE;
+ queue->opened = true;
+
+ if (rewind)
+ pmu_queue_rewind(pmu, queue);
+
+ return 0;
+}
+
+/* close and unlock the queue */
+static int pmu_queue_close(struct pmu_gk20a *pmu,
+ struct pmu_queue *queue, bool commit)
+{
+ if (!queue->opened)
+ return 0;
+
+ if (commit) {
+ if (queue->oflag == OFLAG_READ) {
+ pmu_queue_tail(pmu, queue,
+ &queue->position, QUEUE_SET);
+ }
+ else {
+ pmu_queue_head(pmu, queue,
+ &queue->position, QUEUE_SET);
+ }
+ }
+
+ queue->opened = false;
+
+ pmu_queue_unlock(pmu, queue);
+
+ return 0;
+}
+
+static void gk20a_save_pmu_sw_state(struct pmu_gk20a *pmu,
+ struct gk20a_pmu_save_state *save)
+{
+ save->seq = pmu->seq;
+ save->next_seq_desc = pmu->next_seq_desc;
+ save->mutex = pmu->mutex;
+ save->mutex_cnt = pmu->mutex_cnt;
+ save->desc = pmu->desc;
+ save->ucode = pmu->ucode;
+ save->elpg_enable = pmu->elpg_enable;
+ save->pg_wq = pmu->pg_wq;
+ save->seq_buf = pmu->seq_buf;
+ save->pg_buf = pmu->pg_buf;
+ save->sw_ready = pmu->sw_ready;
+ save->pg_init = pmu->pg_init;
+}
+
+static void gk20a_restore_pmu_sw_state(struct pmu_gk20a *pmu,
+ struct gk20a_pmu_save_state *save)
+{
+ pmu->seq = save->seq;
+ pmu->next_seq_desc = save->next_seq_desc;
+ pmu->mutex = save->mutex;
+ pmu->mutex_cnt = save->mutex_cnt;
+ pmu->desc = save->desc;
+ pmu->ucode = save->ucode;
+ pmu->elpg_enable = save->elpg_enable;
+ pmu->pg_wq = save->pg_wq;
+ pmu->seq_buf = save->seq_buf;
+ pmu->pg_buf = save->pg_buf;
+ pmu->sw_ready = save->sw_ready;
+ pmu->pg_init = save->pg_init;
+}
+
+void gk20a_remove_pmu_support(struct pmu_gk20a *pmu)
+{
+ struct gk20a_pmu_save_state save;
+
+ gk20a_dbg_fn("");
+
+ gk20a_allocator_destroy(&pmu->dmem);
+
+ /* Save the stuff you don't want to lose */
+ gk20a_save_pmu_sw_state(pmu, &save);
+
+ /* this function is also called by pmu_destory outside gk20a deinit that
+ releases gk20a struct so fill up with zeros here. */
+ memset(pmu, 0, sizeof(struct pmu_gk20a));
+
+ /* Restore stuff you want to keep */
+ gk20a_restore_pmu_sw_state(pmu, &save);
+}
+
+int gk20a_init_pmu_reset_enable_hw(struct gk20a *g)
+{
+ struct pmu_gk20a *pmu = &g->pmu;
+
+ gk20a_dbg_fn("");
+
+ pmu_enable_hw(pmu, true);
+
+ return 0;
+}
+
+static void pmu_elpg_enable_allow(struct work_struct *work);
+
+int gk20a_init_pmu_setup_sw(struct gk20a *g)
+{
+ struct pmu_gk20a *pmu = &g->pmu;
+ struct mm_gk20a *mm = &g->mm;
+ struct vm_gk20a *vm = &mm->pmu.vm;
+ struct device *d = dev_from_gk20a(g);
+ int i, err = 0;
+ u8 *ptr;
+ void *ucode_ptr;
+ struct sg_table *sgt_pmu_ucode;
+ struct sg_table *sgt_seq_buf;
+ DEFINE_DMA_ATTRS(attrs);
+ dma_addr_t iova;
+
+ gk20a_dbg_fn("");
+
+ if (pmu->sw_ready) {
+ for (i = 0; i < pmu->mutex_cnt; i++) {
+ pmu->mutex[i].id = i;
+ pmu->mutex[i].index = i;
+ }
+ pmu_seq_init(pmu);
+
+ gk20a_dbg_fn("skip init");
+ goto skip_init;
+ }
+
+ /* no infoRom script from vbios? */
+
+ /* TBD: sysmon subtask */
+
+ pmu->mutex_cnt = pwr_pmu_mutex__size_1_v();
+ pmu->mutex = kzalloc(pmu->mutex_cnt *
+ sizeof(struct pmu_mutex), GFP_KERNEL);
+ if (!pmu->mutex) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ for (i = 0; i < pmu->mutex_cnt; i++) {
+ pmu->mutex[i].id = i;
+ pmu->mutex[i].index = i;
+ }
+
+ pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES *
+ sizeof(struct pmu_sequence), GFP_KERNEL);
+ if (!pmu->seq) {
+ err = -ENOMEM;
+ goto err_free_mutex;
+ }
+
+ pmu_seq_init(pmu);
+
+ if (!g->pmu_fw) {
+ g->pmu_fw = gk20a_request_firmware(g, GK20A_PMU_UCODE_IMAGE);
+ if (!g->pmu_fw) {
+ gk20a_err(d, "failed to load pmu ucode!!");
+ err = -ENOENT;
+ goto err_free_seq;
+ }
+ }
+
+ gk20a_dbg_fn("firmware loaded");
+
+ pmu->desc = (struct pmu_ucode_desc *)g->pmu_fw->data;
+ pmu->ucode_image = (u32 *)((u8 *)pmu->desc +
+ pmu->desc->descriptor_size);
+
+
+ INIT_DELAYED_WORK(&pmu->elpg_enable, pmu_elpg_enable_allow);
+ INIT_WORK(&pmu->pg_init, gk20a_init_pmu_setup_hw2_workqueue);
+
+ gk20a_init_pmu_vm(mm);
+
+ dma_set_attr(DMA_ATTR_READ_ONLY, &attrs);
+ pmu->ucode.cpuva = dma_alloc_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
+ &iova,
+ GFP_KERNEL,
+ &attrs);
+ if (!pmu->ucode.cpuva) {
+ gk20a_err(d, "failed to allocate memory\n");
+ err = -ENOMEM;
+ goto err_release_fw;
+ }
+
+ pmu->ucode.iova = iova;
+ pmu->seq_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
+ &iova,
+ GFP_KERNEL);
+ if (!pmu->seq_buf.cpuva) {
+ gk20a_err(d, "failed to allocate memory\n");
+ err = -ENOMEM;
+ goto err_free_pmu_ucode;
+ }
+
+ pmu->seq_buf.iova = iova;
+ init_waitqueue_head(&pmu->pg_wq);
+
+ err = gk20a_get_sgtable(d, &sgt_pmu_ucode,
+ pmu->ucode.cpuva,
+ pmu->ucode.iova,
+ GK20A_PMU_UCODE_SIZE_MAX);
+ if (err) {
+ gk20a_err(d, "failed to allocate sg table\n");
+ goto err_free_seq_buf;
+ }
+
+ pmu->ucode.pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode,
+ GK20A_PMU_UCODE_SIZE_MAX,
+ 0, /* flags */
+ gk20a_mem_flag_read_only);
+ if (!pmu->ucode.pmu_va) {
+ gk20a_err(d, "failed to map pmu ucode memory!!");
+ goto err_free_ucode_sgt;
+ }
+
+ err = gk20a_get_sgtable(d, &sgt_seq_buf,
+ pmu->seq_buf.cpuva,
+ pmu->seq_buf.iova,
+ GK20A_PMU_SEQ_BUF_SIZE);
+ if (err) {
+ gk20a_err(d, "failed to allocate sg table\n");
+ goto err_unmap_ucode;
+ }
+
+ pmu->seq_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_seq_buf,
+ GK20A_PMU_SEQ_BUF_SIZE,
+ 0, /* flags */
+ gk20a_mem_flag_none);
+ if (!pmu->seq_buf.pmu_va) {
+ gk20a_err(d, "failed to map pmu ucode memory!!");
+ goto err_free_seq_buf_sgt;
+ }
+
+ ptr = (u8 *)pmu->seq_buf.cpuva;
+ if (!ptr) {
+ gk20a_err(d, "failed to map cpu ptr for zbc buffer");
+ goto err_unmap_seq_buf;
+ }
+
+ /* TBD: remove this if ZBC save/restore is handled by PMU
+ * end an empty ZBC sequence for now */
+ ptr[0] = 0x16; /* opcode EXIT */
+ ptr[1] = 0; ptr[2] = 1; ptr[3] = 0;
+ ptr[4] = 0; ptr[5] = 0; ptr[6] = 0; ptr[7] = 0;
+
+ pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE;
+
+ ucode_ptr = pmu->ucode.cpuva;
+
+ for (i = 0; i < (pmu->desc->app_start_offset +
+ pmu->desc->app_size) >> 2; i++)
+ gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]);
+
+ gk20a_free_sgtable(&sgt_pmu_ucode);
+ gk20a_free_sgtable(&sgt_seq_buf);
+
+skip_init:
+ mutex_init(&pmu->elpg_mutex);
+ mutex_init(&pmu->isr_mutex);
+ mutex_init(&pmu->pmu_copy_lock);
+ mutex_init(&pmu->pmu_seq_lock);
+
+ pmu->perfmon_counter.index = 3; /* GR & CE2 */
+ pmu->perfmon_counter.group_id = PMU_DOMAIN_GROUP_PSTATE;
+
+ pmu->remove_support = gk20a_remove_pmu_support;
+ err = gk20a_init_pmu(pmu);
+ if (err) {
+ gk20a_err(d, "failed to set function pointers\n");
+ return err;
+ }
+
+ gk20a_dbg_fn("done");
+ return 0;
+
+ err_unmap_seq_buf:
+ gk20a_gmmu_unmap(vm, pmu->seq_buf.pmu_va,
+ GK20A_PMU_SEQ_BUF_SIZE, gk20a_mem_flag_none);
+ err_free_seq_buf_sgt:
+ gk20a_free_sgtable(&sgt_seq_buf);
+ err_unmap_ucode:
+ gk20a_gmmu_unmap(vm, pmu->ucode.pmu_va,
+ GK20A_PMU_UCODE_SIZE_MAX, gk20a_mem_flag_none);
+ err_free_ucode_sgt:
+ gk20a_free_sgtable(&sgt_pmu_ucode);
+ err_free_seq_buf:
+ dma_free_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
+ pmu->seq_buf.cpuva, pmu->seq_buf.iova);
+ pmu->seq_buf.cpuva = NULL;
+ pmu->seq_buf.iova = 0;
+ err_free_pmu_ucode:
+ dma_free_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
+ pmu->ucode.cpuva, pmu->ucode.iova, &attrs);
+ pmu->ucode.cpuva = NULL;
+ pmu->ucode.iova = 0;
+ err_release_fw:
+ release_firmware(g->pmu_fw);
+ err_free_seq:
+ kfree(pmu->seq);
+ err_free_mutex:
+ kfree(pmu->mutex);
+ err:
+ gk20a_dbg_fn("fail");
+ return err;
+}
+
+static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
+ void *param, u32 handle, u32 status);
+
+static void pmu_handle_pg_buf_config_msg(struct gk20a *g, struct pmu_msg *msg,
+ void *param, u32 handle, u32 status)
+{
+ struct pmu_gk20a *pmu = param;
+ struct pmu_pg_msg_eng_buf_stat *eng_buf_stat = &msg->msg.pg.eng_buf_stat;
+
+ gk20a_dbg_fn("");
+
+ if (status != 0) {
+ gk20a_err(dev_from_gk20a(g), "PGENG cmd aborted");
+ /* TBD: disable ELPG */
+ return;
+ }
+
+ if (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_FAILED) {
+ gk20a_err(dev_from_gk20a(g), "failed to load PGENG buffer");
+ }
+
+ pmu->buf_loaded = (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_LOADED);
+ wake_up(&pmu->pg_wq);
+}
+
+int gk20a_init_pmu_setup_hw1(struct gk20a *g)
+{
+ struct pmu_gk20a *pmu = &g->pmu;
+ int err;
+
+ gk20a_dbg_fn("");
+
+ pmu_reset(pmu);
+
+ /* setup apertures - virtual */
+ gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
+ pwr_fbif_transcfg_mem_type_virtual_f());
+ gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT),
+ pwr_fbif_transcfg_mem_type_virtual_f());
+ /* setup apertures - physical */
+ gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID),
+ pwr_fbif_transcfg_mem_type_physical_f() |
+ pwr_fbif_transcfg_target_local_fb_f());
+ gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH),
+ pwr_fbif_transcfg_mem_type_physical_f() |
+ pwr_fbif_transcfg_target_coherent_sysmem_f());
+ gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
+ pwr_fbif_transcfg_mem_type_physical_f() |
+ pwr_fbif_transcfg_target_noncoherent_sysmem_f());
+
+ /* TBD: load pmu ucode */
+ err = pmu_bootstrap(pmu);
+ if (err)
+ return err;
+
+ return 0;
+
+}
+
+static int gk20a_aelpg_init(struct gk20a *g);
+static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id);
+
+
+static void gk20a_init_pmu_setup_hw2_workqueue(struct work_struct *work)
+{
+ struct pmu_gk20a *pmu = container_of(work, struct pmu_gk20a, pg_init);
+ struct gk20a *g = pmu->g;
+ gk20a_init_pmu_setup_hw2(g);
+}
+
+int gk20a_init_pmu_setup_hw2(struct gk20a *g)
+{
+ struct pmu_gk20a *pmu = &g->pmu;
+ struct mm_gk20a *mm = &g->mm;
+ struct vm_gk20a *vm = &mm->pmu.vm;
+ struct device *d = dev_from_gk20a(g);
+ struct pmu_cmd cmd;
+ u32 desc;
+ long remain;
+ int err;
+ bool status;
+ u32 size;
+ struct sg_table *sgt_pg_buf;
+ dma_addr_t iova;
+
+ gk20a_dbg_fn("");
+
+ if (!support_gk20a_pmu())
+ return 0;
+
+ size = 0;
+ err = gr_gk20a_fecs_get_reglist_img_size(g, &size);
+ if (err) {
+ gk20a_err(dev_from_gk20a(g),
+ "fail to query fecs pg buffer size");
+ return err;
+ }
+
+ if (!pmu->sw_ready) {
+ pmu->pg_buf.cpuva = dma_alloc_coherent(d, size,
+ &iova,
+ GFP_KERNEL);
+ if (!pmu->pg_buf.cpuva) {
+ gk20a_err(d, "failed to allocate memory\n");
+ err = -ENOMEM;
+ goto err;
+ }
+
+ pmu->pg_buf.iova = iova;
+ pmu->pg_buf.size = size;
+
+ err = gk20a_get_sgtable(d, &sgt_pg_buf,
+ pmu->pg_buf.cpuva,
+ pmu->pg_buf.iova,
+ size);
+ if (err) {
+ gk20a_err(d, "failed to create sg table\n");
+ goto err_free_pg_buf;
+ }
+
+ pmu->pg_buf.pmu_va = gk20a_gmmu_map(vm,
+ &sgt_pg_buf,
+ size,
+ 0, /* flags */
+ gk20a_mem_flag_none);
+ if (!pmu->pg_buf.pmu_va) {
+ gk20a_err(d, "failed to map fecs pg buffer");
+ err = -ENOMEM;
+ goto err_free_sgtable;
+ }
+
+ gk20a_free_sgtable(&sgt_pg_buf);
+ }
+
+ /*
+ * This is the actual point at which sw setup is complete, so set the
+ * sw_ready flag here.
+ */
+ pmu->sw_ready = true;
+
+ /* TBD: acquire pmu hw mutex */
+
+ /* TBD: post reset again? */
+
+ /* PMU_INIT message handler will send PG_INIT */
+ remain = wait_event_timeout(
+ pmu->pg_wq,
+ (status = (pmu->elpg_ready &&
+ pmu->stat_dmem_offset != 0 &&
+ pmu->elpg_stat == PMU_ELPG_STAT_OFF)),
+ msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)));
+ if (status == 0) {
+ gk20a_err(dev_from_gk20a(g),
+ "PG_INIT_ACK failed, remaining timeout : 0x%lx", remain);
+ pmu_dump_falcon_stats(pmu);
+ return -EBUSY;
+ }
+
+ err = gr_gk20a_fecs_set_reglist_bind_inst(g, mm->pmu.inst_block.cpu_pa);
+ if (err) {
+ gk20a_err(dev_from_gk20a(g),
+ "fail to bind pmu inst to gr");
+ return err;
+ }
+
+ err = gr_gk20a_fecs_set_reglist_virual_addr(g, pmu->pg_buf.pmu_va);
+ if (err) {
+ gk20a_err(dev_from_gk20a(g),
+ "fail to set pg buffer pmu va");
+ return err;
+ }
+
+ memset(&cmd, 0, sizeof(struct pmu_cmd));
+ cmd.hdr.unit_id = PMU_UNIT_PG;
+ cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load);
+ cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD;
+ cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
+ cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_FECS;
+ cmd.cmd.pg.eng_buf_load.buf_size = pmu->pg_buf.size;
+ cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->pg_buf.pmu_va >> 8);
+ cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->pg_buf.pmu_va & 0xFF);
+ cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;
+
+ pmu->buf_loaded = false;
+ gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
+ pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
+
+ remain = wait_event_timeout(
+ pmu->pg_wq,
+ pmu->buf_loaded,
+ msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)));
+ if (!pmu->buf_loaded) {
+ gk20a_err(dev_from_gk20a(g),
+ "PGENG FECS buffer load failed, remaining timeout : 0x%lx",
+ remain);
+ return -EBUSY;
+ }
+
+ memset(&cmd, 0, sizeof(struct pmu_cmd));
+ cmd.hdr.unit_id = PMU_UNIT_PG;
+ cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_eng_buf_load);
+ cmd.cmd.pg.eng_buf_load.cmd_type = PMU_PG_CMD_ID_ENG_BUF_LOAD;
+ cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
+ cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_ZBC;
+ cmd.cmd.pg.eng_buf_load.buf_size = pmu->seq_buf.size;
+ cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->seq_buf.pmu_va >> 8);
+ cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->seq_buf.pmu_va & 0xFF);
+ cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;
+
+ pmu->buf_loaded = false;
+ gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
+ pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
+
+ remain = wait_event_timeout(
+ pmu->pg_wq,
+ pmu->buf_loaded,
+ msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)));
+ if (!pmu->buf_loaded) {
+ gk20a_err(dev_from_gk20a(g),
+ "PGENG ZBC buffer load failed, remaining timeout 0x%lx",
+ remain);
+ return -EBUSY;
+ }
+
+ /*
+ * FIXME: To enable ELPG, we increase the PMU ext2priv timeout unit to
+ * 7. This prevents PMU stalling on Host register accesses. Once the
+ * cause for this hang is discovered and fixed, this WAR should be
+ * removed.
+ */
+ gk20a_writel(g, 0x10a164, 0x109ff);
+
+ pmu->initialized = true;
+ pmu->zbc_ready = true;
+
+ /* Save zbc table after PMU is initialized. */
+ pmu_save_zbc(g, 0xf);
+
+ /*
+ * We can't guarantee that gr code to enable ELPG will be
+ * invoked, so we explicitly call disable-enable here
+ * to enable elpg.
+ */
+ gk20a_pmu_disable_elpg(g);
+
+ if (g->elpg_enabled)
+ gk20a_pmu_enable_elpg(g);
+
+ udelay(50);
+
+ /* Enable AELPG */
+ if (g->aelpg_enabled) {
+ gk20a_aelpg_init(g);
+ gk20a_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS);
+ }
+
+ return 0;
+
+ err_free_sgtable:
+ gk20a_free_sgtable(&sgt_pg_buf);
+ err_free_pg_buf:
+ dma_free_coherent(d, size,
+ pmu->pg_buf.cpuva, pmu->pg_buf.iova);
+ pmu->pg_buf.cpuva = NULL;
+ pmu->pg_buf.iova = 0;
+ err:
+ return err;
+}
+
+int gk20a_init_pmu_support(struct gk20a *g)
+{
+ struct pmu_gk20a *pmu = &g->pmu;
+ u32 err;
+
+ gk20a_dbg_fn("");
+
+ if (pmu->initialized)
+ return 0;
+
+ pmu->g = g;
+
+ err = gk20a_init_pmu_reset_enable_hw(g);
+ if (err)
+ return err;
+
+ if (support_gk20a_pmu()) {
+ err = gk20a_init_pmu_setup_sw(g);
+ if (err)
+ return err;
+
+ err = gk20a_init_pmu_setup_hw1(g);
+ if (err)
+ return err;
+ }
+
+ return err;
+}
+
+static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
+ void *param, u32 handle, u32 status)
+{
+ struct pmu_gk20a *pmu = param;
+ struct pmu_pg_msg_elpg_msg *elpg_msg = &msg->msg.pg.elpg_msg;
+
+ gk20a_dbg_fn("");
+
+ if (status != 0) {
+ gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted");
+ /* TBD: disable ELPG */
+ return;
+ }
+
+ switch (elpg_msg->msg) {
+ case PMU_PG_ELPG_MSG_INIT_ACK:
+ gk20a_dbg_pmu("INIT_PG is acknowledged from PMU");
+ pmu->elpg_ready = true;
+ wake_up(&pmu->pg_wq);
+ break;
+ case PMU_PG_ELPG_MSG_ALLOW_ACK:
+ gk20a_dbg_pmu("ALLOW is acknowledged from PMU");
+ pmu->elpg_stat = PMU_ELPG_STAT_ON;
+ wake_up(&pmu->pg_wq);
+ break;
+ case PMU_PG_ELPG_MSG_DISALLOW_ACK:
+ gk20a_dbg_pmu("DISALLOW is acknowledged from PMU");
+ pmu->elpg_stat = PMU_ELPG_STAT_OFF;
+ wake_up(&pmu->pg_wq);
+ break;
+ default:
+ gk20a_err(dev_from_gk20a(g),
+ "unsupported ELPG message : 0x%04x", elpg_msg->msg);
+ }
+
+ return;
+}
+
+static void pmu_handle_pg_stat_msg(struct gk20a *g, struct pmu_msg *msg,
+ void *param, u32 handle, u32 status)
+{
+ struct pmu_gk20a *pmu = param;
+
+ gk20a_dbg_fn("");
+
+ if (status != 0) {
+ gk20a_err(dev_from_gk20a(g), "ELPG cmd aborted");
+ /* TBD: disable ELPG */
+ return;
+ }
+
+ switch (msg->msg.pg.stat.sub_msg_id) {
+ case PMU_PG_STAT_MSG_RESP_DMEM_OFFSET:
+ gk20a_dbg_pmu("ALLOC_DMEM_OFFSET is acknowledged from PMU");
+ pmu->stat_dmem_offset = msg->msg.pg.stat.data;
+ wake_up(&pmu->pg_wq);
+ break;
+ default:
+ break;
+ }
+}
+
+static int pmu_init_powergating(struct pmu_gk20a *pmu)
+{
+ struct gk20a *g = pmu->g;
+ struct pmu_cmd cmd;
+ u32 seq;
+
+ gk20a_dbg_fn("");
+
+ if (tegra_cpu_is_asim()) {
+ /* TBD: calculate threshold for silicon */
+ gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
+ PMU_PG_IDLE_THRESHOLD_SIM);
+ gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
+ PMU_PG_POST_POWERUP_IDLE_THRESHOLD_SIM);
+ } else {
+ /* TBD: calculate threshold for silicon */
+ gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
+ PMU_PG_IDLE_THRESHOLD);
+ gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
+ PMU_PG_POST_POWERUP_IDLE_THRESHOLD);
+ }
+
+ /* init ELPG */
+ memset(&cmd, 0, sizeof(struct pmu_cmd));
+ cmd.hdr.unit_id = PMU_UNIT_PG;
+ cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
+ cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
+ cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
+ cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_INIT;
+
+ gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
+ pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
+
+ /* alloc dmem for powergating state log */
+ pmu->stat_dmem_offset = 0;
+ memset(&cmd, 0, sizeof(struct pmu_cmd));
+ cmd.hdr.unit_id = PMU_UNIT_PG;
+ cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_stat);
+ cmd.cmd.pg.stat.cmd_type = PMU_PG_CMD_ID_PG_STAT;
+ cmd.cmd.pg.stat.engine_id = ENGINE_GR_GK20A;
+ cmd.cmd.pg.stat.sub_cmd_id = PMU_PG_STAT_CMD_ALLOC_DMEM;
+ cmd.cmd.pg.stat.data = 0;
+
+ gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
+ pmu_handle_pg_stat_msg, pmu, &seq, ~0);
+
+ /* disallow ELPG initially
+ PMU ucode requires a disallow cmd before allow cmd */
+ pmu->elpg_stat = PMU_ELPG_STAT_ON; /* set for wait_event PMU_ELPG_STAT_OFF */
+ memset(&cmd, 0, sizeof(struct pmu_cmd));
+ cmd.hdr.unit_id = PMU_UNIT_PG;
+ cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
+ cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
+ cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
+ cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
+
+ gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
+ pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
+
+ /* start with elpg disabled until first enable call */
+ pmu->elpg_refcnt = 1;
+
+ return 0;
+}
+
+static int pmu_init_perfmon(struct pmu_gk20a *pmu)
+{
+ struct gk20a *g = pmu->g;
+ struct pmu_v *pv = &g->ops.pmu_ver;
+ struct pmu_cmd cmd;
+ struct pmu_payload payload;
+ u32 seq;
+ u32 data;
+ int err;
+
+ gk20a_dbg_fn("");
+
+ pmu->perfmon_ready = 0;
+
+ /* use counter #3 for GR && CE2 busy cycles */
+ gk20a_writel(g, pwr_pmu_idle_mask_r(3),
+ pwr_pmu_idle_mask_gr_enabled_f() |
+ pwr_pmu_idle_mask_ce_2_enabled_f());
+
+ /* disable idle filtering for counters 3 and 6 */
+ data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(3));
+ data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
+ pwr_pmu_idle_ctrl_filter_m(),
+ pwr_pmu_idle_ctrl_value_busy_f() |
+ pwr_pmu_idle_ctrl_filter_disabled_f());
+ gk20a_writel(g, pwr_pmu_idle_ctrl_r(3), data);
+
+ /* use counter #6 for total cycles */
+ data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(6));
+ data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
+ pwr_pmu_idle_ctrl_filter_m(),
+ pwr_pmu_idle_ctrl_value_always_f() |
+ pwr_pmu_idle_ctrl_filter_disabled_f());
+ gk20a_writel(g, pwr_pmu_idle_ctrl_r(6), data);
+
+ /*
+ * We don't want to disturb counters #3 and #6, which are used by
+ * perfmon, so we add wiring also to counters #1 and #2 for
+ * exposing raw counter readings.
+ */
+ gk20a_writel(g, pwr_pmu_idle_mask_r(1),
+ pwr_pmu_idle_mask_gr_enabled_f() |
+ pwr_pmu_idle_mask_ce_2_enabled_f());
+
+ data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(1));
+ data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
+ pwr_pmu_idle_ctrl_filter_m(),
+ pwr_pmu_idle_ctrl_value_busy_f() |
+ pwr_pmu_idle_ctrl_filter_disabled_f());
+ gk20a_writel(g, pwr_pmu_idle_ctrl_r(1), data);
+
+ data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(2));
+ data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
+ pwr_pmu_idle_ctrl_filter_m(),
+ pwr_pmu_idle_ctrl_value_always_f() |
+ pwr_pmu_idle_ctrl_filter_disabled_f());
+ gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
+
+ pmu->sample_buffer = 0;
+ err = pmu->dmem.alloc(&pmu->dmem, &pmu->sample_buffer, 2 * sizeof(u16));
+ if (err) {
+ gk20a_err(dev_from_gk20a(g),
+ "failed to allocate perfmon sample buffer");
+ return -ENOMEM;
+ }
+
+ /* init PERFMON */
+ memset(&cmd, 0, sizeof(struct pmu_cmd));
+ cmd.hdr.unit_id = PMU_UNIT_PERFMON;
+ cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_init_size();
+ cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT;
+ /* buffer to save counter values for pmu perfmon */
+ pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon,
+ (u16)pmu->sample_buffer);
+ /* number of sample periods below lower threshold
+ before pmu triggers perfmon decrease event
+ TBD: = 15 */
+ pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15);
+ /* index of base counter, aka. always ticking counter */
+ pv->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6);
+ /* microseconds interval between pmu polls perf counters */
+ pv->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700);
+ /* number of perfmon counters
+ counter #3 (GR and CE2) for gk20a */
+ pv->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1);
+ /* moving average window for sample periods
+ TBD: = 3000000 / sample_period_us = 17 */
+ pv->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17);
+
+ memset(&payload, 0, sizeof(struct pmu_payload));
+ payload.in.buf = &pmu->perfmon_counter;
+ payload.in.size = sizeof(struct pmu_perfmon_counter);
+ payload.in.offset = pv->get_perfmon_cmd_init_offsetofvar(COUNTER_ALLOC);
+
+ gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
+ NULL, NULL, &seq, ~0);
+
+ return 0;
+}
+
+static int pmu_process_init_msg(struct pmu_gk20a *pmu,
+ struct pmu_msg *msg)
+{
+ struct gk20a *g = pmu->g;
+ struct pmu_v *pv = &g->ops.pmu_ver;
+ union pmu_init_msg_pmu *init;
+ struct pmu_sha1_gid_data gid_data;
+ u32 i, tail = 0;
+
+ tail = pwr_pmu_msgq_tail_val_v(
+ gk20a_readl(g, pwr_pmu_msgq_tail_r()));
+
+ pmu_copy_from_dmem(pmu, tail,
+ (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0);
+
+ if (msg->hdr.unit_id != PMU_UNIT_INIT) {
+ gk20a_err(dev_from_gk20a(g),
+ "expecting init msg");
+ return -EINVAL;
+ }
+
+ pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE,
+ (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0);
+
+ if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) {
+ gk20a_err(dev_from_gk20a(g),
+ "expecting init msg");
+ return -EINVAL;
+ }
+
+ tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT);
+ gk20a_writel(g, pwr_pmu_msgq_tail_r(),
+ pwr_pmu_msgq_tail_val_f(tail));
+
+ init = pv->get_pmu_msg_pmu_init_msg_ptr(&(msg->msg.init));
+ if (!pmu->gid_info.valid) {
+
+ pmu_copy_from_dmem(pmu,
+ pv->get_pmu_init_msg_pmu_sw_mg_off(init),
+ (u8 *)&gid_data,
+ sizeof(struct pmu_sha1_gid_data), 0);
+
+ pmu->gid_info.valid =
+ (*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE);
+
+ if (pmu->gid_info.valid) {
+
+ BUG_ON(sizeof(pmu->gid_info.gid) !=
+ sizeof(gid_data.gid));
+
+ memcpy(pmu->gid_info.gid, gid_data.gid,
+ sizeof(pmu->gid_info.gid));
+ }
+ }
+
+ for (i = 0; i < PMU_QUEUE_COUNT; i++)
+ pmu_queue_init(pmu, i, init);
+
+ gk20a_allocator_init(&pmu->dmem, "gk20a_pmu_dmem",
+ pv->get_pmu_init_msg_pmu_sw_mg_off(init),
+ pv->get_pmu_init_msg_pmu_sw_mg_size(init),
+ PMU_DMEM_ALLOC_ALIGNMENT);
+
+ pmu->pmu_ready = true;
+
+ return 0;
+}
+
+static bool pmu_read_message(struct pmu_gk20a *pmu, struct pmu_queue *queue,
+ struct pmu_msg *msg, int *status)
+{
+ struct gk20a *g = pmu->g;
+ u32 read_size, bytes_read;
+ int err;
+
+ *status = 0;
+
+ if (pmu_queue_is_empty(pmu, queue))
+ return false;
+
+ err = pmu_queue_open_read(pmu, queue);
+ if (err) {
+ gk20a_err(dev_from_gk20a(g),
+ "fail to open queue %d for read", queue->id);
+ *status = err;
+ return false;
+ }
+
+ err = pmu_queue_pop(pmu, queue, &msg->hdr,
+ PMU_MSG_HDR_SIZE, &bytes_read);
+ if (err || bytes_read != PMU_MSG_HDR_SIZE) {
+ gk20a_err(dev_from_gk20a(g),
+ "fail to read msg from queue %d", queue->id);
+ *status = err | -EINVAL;
+ goto clean_up;
+ }
+
+ if (msg->hdr.unit_id == PMU_UNIT_REWIND) {
+ pmu_queue_rewind(pmu, queue);
+ /* read again after rewind */
+ err = pmu_queue_pop(pmu, queue, &msg->hdr,
+ PMU_MSG_HDR_SIZE, &bytes_read);
+ if (err || bytes_read != PMU_MSG_HDR_SIZE) {
+ gk20a_err(dev_from_gk20a(g),
+ "fail to read msg from queue %d", queue->id);
+ *status = err | -EINVAL;
+ goto clean_up;
+ }
+ }
+
+ if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) {
+ gk20a_err(dev_from_gk20a(g),
+ "read invalid unit_id %d from queue %d",
+ msg->hdr.unit_id, queue->id);
+ *status = -EINVAL;
+ goto clean_up;
+ }
+
+ if (msg->hdr.size > PMU_MSG_HDR_SIZE) {
+ read_size = msg->hdr.size - PMU_MSG_HDR_SIZE;
+ err = pmu_queue_pop(pmu, queue, &msg->msg,
+ read_size, &bytes_read);
+ if (err || bytes_read != read_size) {
+ gk20a_err(dev_from_gk20a(g),
+ "fail to read msg from queue %d", queue->id);
+ *status = err;
+ goto clean_up;
+ }
+ }
+
+ err = pmu_queue_close(pmu, queue, true);
+ if (err) {
+ gk20a_err(dev_from_gk20a(g),
+ "fail to close queue %d", queue->id);
+ *status = err;
+ return false;
+ }
+
+ return true;
+
+clean_up:
+ err = pmu_queue_close(pmu, queue, false);
+ if (err)
+ gk20a_err(dev_from_gk20a(g),
+ "fail to close queue %d", queue->id);
+ return false;
+}
+
+static int pmu_response_handle(struct pmu_gk20a *pmu,
+ struct pmu_msg *msg)
+{
+ struct gk20a *g = pmu->g;
+ struct pmu_sequence *seq;
+ struct pmu_v *pv = &g->ops.pmu_ver;
+ int ret = 0;
+
+ gk20a_dbg_fn("");
+
+ seq = &pmu->seq[msg->hdr.seq_id];
+ if (seq->state != PMU_SEQ_STATE_USED &&
+ seq->state != PMU_SEQ_STATE_CANCELLED) {
+ gk20a_err(dev_from_gk20a(g),
+ "msg for an unknown sequence %d", seq->id);
+ return -EINVAL;
+ }
+
+ if (msg->hdr.unit_id == PMU_UNIT_RC &&
+ msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) {
+ gk20a_err(dev_from_gk20a(g),
+ "unhandled cmd: seq %d", seq->id);
+ }
+ else if (seq->state != PMU_SEQ_STATE_CANCELLED) {
+ if (seq->msg) {
+ if (seq->msg->hdr.size >= msg->hdr.size) {
+ memcpy(seq->msg, msg, msg->hdr.size);
+ if (pv->pmu_allocation_get_dmem_size(pmu,
+ pv->get_pmu_seq_out_a_ptr(seq)) != 0) {
+ pmu_copy_from_dmem(pmu,
+ pv->pmu_allocation_get_dmem_offset(pmu,
+ pv->get_pmu_seq_out_a_ptr(seq)),
+ seq->out_payload,
+ pv->pmu_allocation_get_dmem_size(pmu,
+ pv->get_pmu_seq_out_a_ptr(seq)), 0);
+ }
+ } else {
+ gk20a_err(dev_from_gk20a(g),
+ "sequence %d msg buffer too small",
+ seq->id);
+ }
+ }
+ } else
+ seq->callback = NULL;
+ if (pv->pmu_allocation_get_dmem_size(pmu,
+ pv->get_pmu_seq_in_a_ptr(seq)) != 0)
+ pmu->dmem.free(&pmu->dmem,
+ pv->pmu_allocation_get_dmem_offset(pmu,
+ pv->get_pmu_seq_in_a_ptr(seq)),
+ pv->pmu_allocation_get_dmem_size(pmu,
+ pv->get_pmu_seq_in_a_ptr(seq)));
+ if (pv->pmu_allocation_get_dmem_size(pmu,
+ pv->get_pmu_seq_out_a_ptr(seq)) != 0)
+ pmu->dmem.free(&pmu->dmem,
+ pv->pmu_allocation_get_dmem_offset(pmu,
+ pv->get_pmu_seq_out_a_ptr(seq)),
+ pv->pmu_allocation_get_dmem_size(pmu,
+ pv->get_pmu_seq_out_a_ptr(seq)));
+
+ if (seq->callback)
+ seq->callback(g, msg, seq->cb_params, seq->desc, ret);
+
+ pmu_seq_release(pmu, seq);
+
+ /* TBD: notify client waiting for available dmem */
+
+ gk20a_dbg_fn("done");
+
+ return 0;
+}
+
+static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout,
+ u32 *var, u32 val);
+
+static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg,
+ void *param, u32 handle, u32 status)
+{
+ struct pmu_gk20a *pmu = param;
+ pmu->zbc_save_done = 1;
+}
+
+static void pmu_save_zbc(struct gk20a *g, u32 entries)
+{
+ struct pmu_gk20a *pmu = &g->pmu;
+ struct pmu_cmd cmd;
+ u32 seq;
+
+ if (!pmu->pmu_ready || !entries || !pmu->zbc_ready)
+ return;
+
+ memset(&cmd, 0, sizeof(struct pmu_cmd));
+ cmd.hdr.unit_id = PMU_UNIT_PG;
+ cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_zbc_cmd);
+ cmd.cmd.zbc.cmd_type = g->ops.pmu_ver.cmd_id_zbc_table_update;
+ cmd.cmd.zbc.entry_mask = ZBC_MASK(entries);
+
+ pmu->zbc_save_done = 0;
+
+ gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
+ pmu_handle_zbc_msg, pmu, &seq, ~0);
+ pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
+ &pmu->zbc_save_done, 1);
+ if (!pmu->zbc_save_done)
+ gk20a_err(dev_from_gk20a(g), "ZBC save timeout");
+}
+
+void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries)
+{
+ if (g->pmu.zbc_ready)
+ pmu_save_zbc(g, entries);
+}
+
+static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu)
+{
+ struct gk20a *g = pmu->g;
+ struct pmu_v *pv = &g->ops.pmu_ver;
+ struct pmu_cmd cmd;
+ struct pmu_payload payload;
+ u32 current_rate = 0;
+ u32 seq;
+
+ /* PERFMON Start */
+ memset(&cmd, 0, sizeof(struct pmu_cmd));
+ cmd.hdr.unit_id = PMU_UNIT_PERFMON;
+ cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_start_size();
+ pv->perfmon_start_set_cmd_type(&cmd.cmd.perfmon,
+ PMU_PERFMON_CMD_ID_START);
+ pv->perfmon_start_set_group_id(&cmd.cmd.perfmon,
+ PMU_DOMAIN_GROUP_PSTATE);
+ pv->perfmon_start_set_state_id(&cmd.cmd.perfmon,
+ pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]);
+
+ current_rate = rate_gpu_to_gpc2clk(gk20a_clk_get_rate(g));
+ if (current_rate >= gpc_pll_params.max_freq)
+ pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
+ PMU_PERFMON_FLAG_ENABLE_DECREASE);
+ else if (current_rate <= gpc_pll_params.min_freq)
+ pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
+ PMU_PERFMON_FLAG_ENABLE_INCREASE);
+ else
+ pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
+ PMU_PERFMON_FLAG_ENABLE_INCREASE |
+ PMU_PERFMON_FLAG_ENABLE_DECREASE);
+
+ pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
+ pv->perfmon_start_get_flags(&cmd.cmd.perfmon) |
+ PMU_PERFMON_FLAG_CLEAR_PREV);
+
+ memset(&payload, 0, sizeof(struct pmu_payload));
+
+ /* TBD: PMU_PERFMON_PCT_TO_INC * 100 */
+ pmu->perfmon_counter.upper_threshold = 3000; /* 30% */
+ /* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */
+ pmu->perfmon_counter.lower_threshold = 1000; /* 10% */
+ pmu->perfmon_counter.valid = true;
+
+ payload.in.buf = &pmu->perfmon_counter;
+ payload.in.size = sizeof(pmu->perfmon_counter);
+ payload.in.offset =
+ pv->get_perfmon_cmd_start_offsetofvar(COUNTER_ALLOC);
+
+ gk20a_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
+ NULL, NULL, &seq, ~0);
+
+ return 0;
+}
+
+static int pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu)
+{
+ struct gk20a *g = pmu->g;
+ struct pmu_cmd cmd;
+ u32 seq;
+
+ /* PERFMON Stop */
+ memset(&cmd, 0, sizeof(struct pmu_cmd));
+ cmd.hdr.unit_id = PMU_UNIT_PERFMON;
+ cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop);
+ cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP;
+
+ gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
+ NULL, NULL, &seq, ~0);
+ return 0;
+}
+
+static int pmu_handle_perfmon_event(struct pmu_gk20a *pmu,
+ struct pmu_perfmon_msg *msg)
+{
+ struct gk20a *g = pmu->g;
+ u32 rate;
+
+ gk20a_dbg_fn("");
+
+ switch (msg->msg_type) {
+ case PMU_PERFMON_MSG_ID_INCREASE_EVENT:
+ gk20a_dbg_pmu("perfmon increase event: "
+ "state_id %d, ground_id %d, pct %d",
+ msg->gen.state_id, msg->gen.group_id, msg->gen.data);
+ /* increase gk20a clock freq by 20% */
+ rate = gk20a_clk_get_rate(g);
+ gk20a_clk_set_rate(g, rate * 6 / 5);
+ break;
+ case PMU_PERFMON_MSG_ID_DECREASE_EVENT:
+ gk20a_dbg_pmu("perfmon decrease event: "
+ "state_id %d, ground_id %d, pct %d",
+ msg->gen.state_id, msg->gen.group_id, msg->gen.data);
+ /* decrease gk20a clock freq by 10% */
+ rate = gk20a_clk_get_rate(g);
+ gk20a_clk_set_rate(g, (rate / 10) * 7);
+ break;
+ case PMU_PERFMON_MSG_ID_INIT_EVENT:
+ pmu->perfmon_ready = 1;
+ gk20a_dbg_pmu("perfmon init event");
+ break;
+ default:
+ break;
+ }
+
+ /* restart sampling */
+ if (IS_ENABLED(CONFIG_GK20A_PERFMON))
+ return pmu_perfmon_start_sampling(pmu);
+ return 0;
+}
+
+
+static int pmu_handle_event(struct pmu_gk20a *pmu, struct pmu_msg *msg)
+{
+ int err;
+
+ gk20a_dbg_fn("");
+
+ switch (msg->hdr.unit_id) {
+ case PMU_UNIT_PERFMON:
+ err = pmu_handle_perfmon_event(pmu, &msg->msg.perfmon);
+ break;
+ default:
+ break;
+ }
+
+ return err;
+}
+
+static int pmu_process_message(struct pmu_gk20a *pmu)
+{
+ struct pmu_msg msg;
+ int status;
+
+ if (unlikely(!pmu->pmu_ready)) {
+ pmu_process_init_msg(pmu, &msg);
+ pmu_init_powergating(pmu);
+ pmu_init_perfmon(pmu);
+ return 0;
+ }
+
+ while (pmu_read_message(pmu,
+ &pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) {
+
+ gk20a_dbg_pmu("read msg hdr: "
+ "unit_id = 0x%08x, size = 0x%08x, "
+ "ctrl_flags = 0x%08x, seq_id = 0x%08x",
+ msg.hdr.unit_id, msg.hdr.size,
+ msg.hdr.ctrl_flags, msg.hdr.seq_id);
+
+ msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK;
+
+ if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT) {
+ pmu_handle_event(pmu, &msg);
+ } else {
+ pmu_response_handle(pmu, &msg);
+ }
+ }
+
+ return 0;
+}
+
+static int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout,
+ u32 *var, u32 val)
+{
+ struct gk20a *g = pmu->g;
+ unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
+ unsigned long delay = GR_IDLE_CHECK_DEFAULT;
+
+ do {
+ if (*var == val)
+ return 0;
+
+ if (gk20a_readl(g, pwr_falcon_irqstat_r()))
+ gk20a_pmu_isr(g);
+
+ usleep_range(delay, delay * 2);
+ delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
+ } while (time_before(jiffies, end_jiffies) ||
+ !tegra_platform_is_silicon());
+
+ return -ETIMEDOUT;
+}
+
+static void pmu_dump_elpg_stats(struct pmu_gk20a *pmu)
+{
+ struct gk20a *g = pmu->g;
+ struct pmu_pg_stats stats;
+
+ pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
+ (u8 *)&stats, sizeof(struct pmu_pg_stats), 0);
+
+ gk20a_dbg_pmu("pg_entry_start_timestamp : 0x%016llx",
+ stats.pg_entry_start_timestamp);
+ gk20a_dbg_pmu("pg_exit_start_timestamp : 0x%016llx",
+ stats.pg_exit_start_timestamp);
+ gk20a_dbg_pmu("pg_ingating_start_timestamp : 0x%016llx",
+ stats.pg_ingating_start_timestamp);
+ gk20a_dbg_pmu("pg_ungating_start_timestamp : 0x%016llx",
+ stats.pg_ungating_start_timestamp);
+ gk20a_dbg_pmu("pg_avg_entry_time_us : 0x%08x",
+ stats.pg_avg_entry_time_us);
+ gk20a_dbg_pmu("pg_avg_exit_time_us : 0x%08x",
+ stats.pg_avg_exit_time_us);
+ gk20a_dbg_pmu("pg_ingating_cnt : 0x%08x",
+ stats.pg_ingating_cnt);
+ gk20a_dbg_pmu("pg_ingating_time_us : 0x%08x",
+ stats.pg_ingating_time_us);
+ gk20a_dbg_pmu("pg_ungating_count : 0x%08x",
+ stats.pg_ungating_count);
+ gk20a_dbg_pmu("pg_ungating_time_us 0x%08x: ",
+ stats.pg_ungating_time_us);
+ gk20a_dbg_pmu("pg_gating_cnt : 0x%08x",
+ stats.pg_gating_cnt);
+ gk20a_dbg_pmu("pg_gating_deny_cnt : 0x%08x",
+ stats.pg_gating_deny_cnt);
+
+ /*
+ Turn on PG_DEBUG in ucode and locate symbol "ElpgLog" offset
+ in .nm file, e.g. 0x1000066c. use 0x66c.
+ u32 i, val[20];
+ pmu_copy_from_dmem(pmu, 0x66c,
+ (u8 *)val, sizeof(val), 0);
+ gk20a_dbg_pmu("elpg log begin");
+ for (i = 0; i < 20; i++)
+ gk20a_dbg_pmu("0x%08x", val[i]);
+ gk20a_dbg_pmu("elpg log end");
+ */
+
+ gk20a_dbg_pmu("pwr_pmu_idle_mask_supp_r(3): 0x%08x",
+ gk20a_readl(g, pwr_pmu_idle_mask_supp_r(3)));
+ gk20a_dbg_pmu("pwr_pmu_idle_mask_1_supp_r(3): 0x%08x",
+ gk20a_readl(g, pwr_pmu_idle_mask_1_supp_r(3)));
+ gk20a_dbg_pmu("pwr_pmu_idle_ctrl_supp_r(3): 0x%08x",
+ gk20a_readl(g, pwr_pmu_idle_ctrl_supp_r(3)));
+ gk20a_dbg_pmu("pwr_pmu_pg_idle_cnt_r(0): 0x%08x",
+ gk20a_readl(g, pwr_pmu_pg_idle_cnt_r(0)));
+ gk20a_dbg_pmu("pwr_pmu_pg_intren_r(0): 0x%08x",
+ gk20a_readl(g, pwr_pmu_pg_intren_r(0)));
+
+ gk20a_dbg_pmu("pwr_pmu_idle_count_r(3): 0x%08x",
+ gk20a_readl(g, pwr_pmu_idle_count_r(3)));
+ gk20a_dbg_pmu("pwr_pmu_idle_count_r(4): 0x%08x",
+ gk20a_readl(g, pwr_pmu_idle_count_r(4)));
+ gk20a_dbg_pmu("pwr_pmu_idle_count_r(7): 0x%08x",
+ gk20a_readl(g, pwr_pmu_idle_count_r(7)));
+
+ /*
+ TBD: script can't generate those registers correctly
+ gk20a_dbg_pmu("pwr_pmu_idle_status_r(): 0x%08x",
+ gk20a_readl(g, pwr_pmu_idle_status_r()));
+ gk20a_dbg_pmu("pwr_pmu_pg_ctrl_r(): 0x%08x",
+ gk20a_readl(g, pwr_pmu_pg_ctrl_r()));
+ */
+}
+
+static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu)
+{
+ struct gk20a *g = pmu->g;
+ int i;
+
+ gk20a_err(dev_from_gk20a(g), "pwr_falcon_os_r : %d",
+ gk20a_readl(g, pwr_falcon_os_r()));
+ gk20a_err(dev_from_gk20a(g), "pwr_falcon_cpuctl_r : 0x%x",
+ gk20a_readl(g, pwr_falcon_cpuctl_r()));
+ gk20a_err(dev_from_gk20a(g), "pwr_falcon_idlestate_r : 0x%x",
+ gk20a_readl(g, pwr_falcon_idlestate_r()));
+ gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox0_r : 0x%x",
+ gk20a_readl(g, pwr_falcon_mailbox0_r()));
+ gk20a_err(dev_from_gk20a(g), "pwr_falcon_mailbox1_r : 0x%x",
+ gk20a_readl(g, pwr_falcon_mailbox1_r()));
+ gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqstat_r : 0x%x",
+ gk20a_readl(g, pwr_falcon_irqstat_r()));
+ gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmode_r : 0x%x",
+ gk20a_readl(g, pwr_falcon_irqmode_r()));
+ gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqmask_r : 0x%x",
+ gk20a_readl(g, pwr_falcon_irqmask_r()));
+ gk20a_err(dev_from_gk20a(g), "pwr_falcon_irqdest_r : 0x%x",
+ gk20a_readl(g, pwr_falcon_irqdest_r()));
+
+ for (i = 0; i < pwr_pmu_mailbox__size_1_v(); i++)
+ gk20a_err(dev_from_gk20a(g), "pwr_pmu_mailbox_r(%d) : 0x%x",
+ i, gk20a_readl(g, pwr_pmu_mailbox_r(i)));
+
+ for (i = 0; i < pwr_pmu_debug__size_1_v(); i++)
+ gk20a_err(dev_from_gk20a(g), "pwr_pmu_debug_r(%d) : 0x%x",
+ i, gk20a_readl(g, pwr_pmu_debug_r(i)));
+
+ for (i = 0; i < 6/*NV_PPWR_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) {
+ gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
+ pwr_pmu_falcon_icd_cmd_opc_rstat_f() |
+ pwr_pmu_falcon_icd_cmd_idx_f(i));
+ gk20a_err(dev_from_gk20a(g), "pmu_rstat (%d) : 0x%x",
+ i, gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
+ }
+
+ i = gk20a_readl(g, pwr_pmu_bar0_error_status_r());
+ gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_error_status_r : 0x%x", i);
+ if (i != 0) {
+ gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_addr_r : 0x%x",
+ gk20a_readl(g, pwr_pmu_bar0_addr_r()));
+ gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_data_r : 0x%x",
+ gk20a_readl(g, pwr_pmu_bar0_data_r()));
+ gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_timeout_r : 0x%x",
+ gk20a_readl(g, pwr_pmu_bar0_timeout_r()));
+ gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_ctl_r : 0x%x",
+ gk20a_readl(g, pwr_pmu_bar0_ctl_r()));
+ }
+
+ i = gk20a_readl(g, pwr_pmu_bar0_fecs_error_r());
+ gk20a_err(dev_from_gk20a(g), "pwr_pmu_bar0_fecs_error_r : 0x%x", i);
+
+ i = gk20a_readl(g, pwr_falcon_exterrstat_r());
+ gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterrstat_r : 0x%x", i);
+ if (pwr_falcon_exterrstat_valid_v(i) ==
+ pwr_falcon_exterrstat_valid_true_v()) {
+ gk20a_err(dev_from_gk20a(g), "pwr_falcon_exterraddr_r : 0x%x",
+ gk20a_readl(g, pwr_falcon_exterraddr_r()));
+ gk20a_err(dev_from_gk20a(g), "top_fs_status_r : 0x%x",
+ gk20a_readl(g, top_fs_status_r()));
+ gk20a_err(dev_from_gk20a(g), "pmc_enable : 0x%x",
+ gk20a_readl(g, mc_enable_r()));
+ }
+
+ gk20a_err(dev_from_gk20a(g), "pwr_falcon_engctl_r : 0x%x",
+ gk20a_readl(g, pwr_falcon_engctl_r()));
+ gk20a_err(dev_from_gk20a(g), "pwr_falcon_curctx_r : 0x%x",
+ gk20a_readl(g, pwr_falcon_curctx_r()));
+ gk20a_err(dev_from_gk20a(g), "pwr_falcon_nxtctx_r : 0x%x",
+ gk20a_readl(g, pwr_falcon_nxtctx_r()));
+
+ gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
+ pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
+ pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_IMB));
+ gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_IMB : 0x%x",
+ gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
+
+ gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
+ pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
+ pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_DMB));
+ gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_DMB : 0x%x",
+ gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
+
+ gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
+ pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
+ pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CSW));
+ gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CSW : 0x%x",
+ gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
+
+ gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
+ pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
+ pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_CTX));
+ gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_CTX : 0x%x",
+ gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
+
+ gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
+ pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
+ pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_EXCI));
+ gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_EXCI : 0x%x",
+ gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
+
+ for (i = 0; i < 4; i++) {
+ gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
+ pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
+ pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_PC));
+ gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_PC : 0x%x",
+ gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
+
+ gk20a_writel(g, pwr_pmu_falcon_icd_cmd_r(),
+ pwr_pmu_falcon_icd_cmd_opc_rreg_f() |
+ pwr_pmu_falcon_icd_cmd_idx_f(PMU_FALCON_REG_SP));
+ gk20a_err(dev_from_gk20a(g), "PMU_FALCON_REG_SP : 0x%x",
+ gk20a_readl(g, pwr_pmu_falcon_icd_rdata_r()));
+ }
+
+ /* PMU may crash due to FECS crash. Dump FECS status */
+ gk20a_fecs_dump_falcon_stats(g);
+}
+
+void gk20a_pmu_isr(struct gk20a *g)
+{
+ struct pmu_gk20a *pmu = &g->pmu;
+ struct pmu_queue *queue;
+ u32 intr, mask;
+ bool recheck = false;
+
+ gk20a_dbg_fn("");
+
+ mutex_lock(&pmu->isr_mutex);
+
+ mask = gk20a_readl(g, pwr_falcon_irqmask_r()) &
+ gk20a_readl(g, pwr_falcon_irqdest_r());
+
+ intr = gk20a_readl(g, pwr_falcon_irqstat_r()) & mask;
+
+ gk20a_dbg_pmu("received falcon interrupt: 0x%08x", intr);
+
+ if (!intr) {
+ mutex_unlock(&pmu->isr_mutex);
+ return;
+ }
+
+ if (intr & pwr_falcon_irqstat_halt_true_f()) {
+ gk20a_err(dev_from_gk20a(g),
+ "pmu halt intr not implemented");
+ pmu_dump_falcon_stats(pmu);
+ }
+ if (intr & pwr_falcon_irqstat_exterr_true_f()) {
+ gk20a_err(dev_from_gk20a(g),
+ "pmu exterr intr not implemented. Clearing interrupt.");
+ pmu_dump_falcon_stats(pmu);
+
+ gk20a_writel(g, pwr_falcon_exterrstat_r(),
+ gk20a_readl(g, pwr_falcon_exterrstat_r()) &
+ ~pwr_falcon_exterrstat_valid_m());
+ }
+ if (intr & pwr_falcon_irqstat_swgen0_true_f()) {
+ pmu_process_message(pmu);
+ recheck = true;
+ }
+
+ gk20a_writel(g, pwr_falcon_irqsclr_r(), intr);
+
+ if (recheck) {
+ queue = &pmu->queue[PMU_MESSAGE_QUEUE];
+ if (!pmu_queue_is_empty(pmu, queue))
+ gk20a_writel(g, pwr_falcon_irqsset_r(),
+ pwr_falcon_irqsset_swgen0_set_f());
+ }
+
+ mutex_unlock(&pmu->isr_mutex);
+}
+
+static bool pmu_validate_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
+ struct pmu_msg *msg, struct pmu_payload *payload,
+ u32 queue_id)
+{
+ struct gk20a *g = pmu->g;
+ struct pmu_queue *queue;
+ u32 in_size, out_size;
+
+ if (!PMU_IS_SW_COMMAND_QUEUE(queue_id))
+ goto invalid_cmd;
+
+ queue = &pmu->queue[queue_id];
+ if (cmd->hdr.size < PMU_CMD_HDR_SIZE)
+ goto invalid_cmd;
+
+ if (cmd->hdr.size > (queue->size >> 1))
+ goto invalid_cmd;
+
+ if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE)
+ goto invalid_cmd;
+
+ if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id))
+ goto invalid_cmd;
+
+ if (payload == NULL)
+ return true;
+
+ if (payload->in.buf == NULL && payload->out.buf == NULL)
+ goto invalid_cmd;
+
+ if ((payload->in.buf != NULL && payload->in.size == 0) ||
+ (payload->out.buf != NULL && payload->out.size == 0))
+ goto invalid_cmd;
+
+ in_size = PMU_CMD_HDR_SIZE;
+ if (payload->in.buf) {
+ in_size += payload->in.offset;
+ in_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
+ }
+
+ out_size = PMU_CMD_HDR_SIZE;
+ if (payload->out.buf) {
+ out_size += payload->out.offset;
+ out_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
+ }
+
+ if (in_size > cmd->hdr.size || out_size > cmd->hdr.size)
+ goto invalid_cmd;
+
+
+ if ((payload->in.offset != 0 && payload->in.buf == NULL) ||
+ (payload->out.offset != 0 && payload->out.buf == NULL))
+ goto invalid_cmd;
+
+ return true;
+
+invalid_cmd:
+ gk20a_err(dev_from_gk20a(g), "invalid pmu cmd :\n"
+ "queue_id=%d,\n"
+ "cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n"
+ "payload in=%p, in_size=%d, in_offset=%d,\n"
+ "payload out=%p, out_size=%d, out_offset=%d",
+ queue_id, cmd->hdr.size, cmd->hdr.unit_id,
+ msg, msg?msg->hdr.unit_id:~0,
+ &payload->in, payload->in.size, payload->in.offset,
+ &payload->out, payload->out.size, payload->out.offset);
+
+ return false;
+}
+
+static int pmu_write_cmd(struct pmu_gk20a *pmu, struct pmu_cmd *cmd,
+ u32 queue_id, unsigned long timeout)
+{
+ struct gk20a *g = pmu->g;
+ struct pmu_queue *queue;
+ unsigned long end_jiffies = jiffies +
+ msecs_to_jiffies(timeout);
+ int err;
+
+ gk20a_dbg_fn("");
+
+ queue = &pmu->queue[queue_id];
+
+ do {
+ err = pmu_queue_open_write(pmu, queue, cmd->hdr.size);
+ if (err == -EAGAIN && time_before(jiffies, end_jiffies))
+ usleep_range(1000, 2000);
+ else
+ break;
+ } while (1);
+
+ if (err)
+ goto clean_up;
+
+ pmu_queue_push(pmu, queue, cmd, cmd->hdr.size);
+
+ err = pmu_queue_close(pmu, queue, true);
+
+clean_up:
+ if (err)
+ gk20a_err(dev_from_gk20a(g),
+ "fail to write cmd to queue %d", queue_id);
+ else
+ gk20a_dbg_fn("done");
+
+ return err;
+}
+
+int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
+ struct pmu_msg *msg, struct pmu_payload *payload,
+ u32 queue_id, pmu_callback callback, void* cb_param,
+ u32 *seq_desc, unsigned long timeout)
+{
+ struct pmu_gk20a *pmu = &g->pmu;
+ struct pmu_v *pv = &g->ops.pmu_ver;
+ struct pmu_sequence *seq;
+ void *in = NULL, *out = NULL;
+ int err;
+
+ gk20a_dbg_fn("");
+
+ BUG_ON(!cmd);
+ BUG_ON(!seq_desc);
+ BUG_ON(!pmu->pmu_ready);
+
+ if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id))
+ return -EINVAL;
+
+ err = pmu_seq_acquire(pmu, &seq);
+ if (err)
+ return err;
+
+ cmd->hdr.seq_id = seq->id;
+
+ cmd->hdr.ctrl_flags = 0;
+ cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS;
+ cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR;
+
+ seq->callback = callback;
+ seq->cb_params = cb_param;
+ seq->msg = msg;
+ seq->out_payload = NULL;
+ seq->desc = pmu->next_seq_desc++;
+
+ if (payload)
+ seq->out_payload = payload->out.buf;
+
+ *seq_desc = seq->desc;
+
+ if (payload && payload->in.offset != 0) {
+ pv->set_pmu_allocation_ptr(pmu, &in,
+ ((u8 *)&cmd->cmd + payload->in.offset));
+
+ if (payload->in.buf != payload->out.buf)
+ pv->pmu_allocation_set_dmem_size(pmu, in,
+ (u16)payload->in.size);
+ else
+ pv->pmu_allocation_set_dmem_size(pmu, in,
+ (u16)max(payload->in.size, payload->out.size));
+
+ err = pmu->dmem.alloc(&pmu->dmem,
+ pv->pmu_allocation_get_dmem_offset_addr(pmu, in),
+ pv->pmu_allocation_get_dmem_size(pmu, in));
+ if (err)
+ goto clean_up;
+
+ pmu_copy_to_dmem(pmu, (pv->pmu_allocation_get_dmem_offset(pmu,
+ in)),
+ payload->in.buf, payload->in.size, 0);
+ pv->pmu_allocation_set_dmem_size(pmu,
+ pv->get_pmu_seq_in_a_ptr(seq),
+ pv->pmu_allocation_get_dmem_size(pmu, in));
+ pv->pmu_allocation_set_dmem_offset(pmu,
+ pv->get_pmu_seq_in_a_ptr(seq),
+ pv->pmu_allocation_get_dmem_offset(pmu, in));
+ }
+
+ if (payload && payload->out.offset != 0) {
+ pv->set_pmu_allocation_ptr(pmu, &out,
+ ((u8 *)&cmd->cmd + payload->out.offset));
+ pv->pmu_allocation_set_dmem_size(pmu, out,
+ (u16)payload->out.size);
+
+ if (payload->out.buf != payload->in.buf) {
+ err = pmu->dmem.alloc(&pmu->dmem,
+ pv->pmu_allocation_get_dmem_offset_addr(pmu, out),
+ pv->pmu_allocation_get_dmem_size(pmu, out));
+ if (err)
+ goto clean_up;
+ } else {
+ BUG_ON(in == NULL);
+ pv->pmu_allocation_set_dmem_offset(pmu, out,
+ pv->pmu_allocation_get_dmem_offset(pmu, in));
+ }
+
+ pv->pmu_allocation_set_dmem_size(pmu,
+ pv->get_pmu_seq_out_a_ptr(seq),
+ pv->pmu_allocation_get_dmem_size(pmu, out));
+ pv->pmu_allocation_set_dmem_offset(pmu,
+ pv->get_pmu_seq_out_a_ptr(seq),
+ pv->pmu_allocation_get_dmem_offset(pmu, out));
+ }
+
+ seq->state = PMU_SEQ_STATE_USED;
+ err = pmu_write_cmd(pmu, cmd, queue_id, timeout);
+ if (err)
+ seq->state = PMU_SEQ_STATE_PENDING;
+
+ gk20a_dbg_fn("done");
+
+ return 0;
+
+clean_up:
+ gk20a_dbg_fn("fail");
+ if (in)
+ pmu->dmem.free(&pmu->dmem,
+ pv->pmu_allocation_get_dmem_offset(pmu, in),
+ pv->pmu_allocation_get_dmem_size(pmu, in));
+ if (out)
+ pmu->dmem.free(&pmu->dmem,
+ pv->pmu_allocation_get_dmem_offset(pmu, out),
+ pv->pmu_allocation_get_dmem_size(pmu, out));
+
+ pmu_seq_release(pmu, seq);
+ return err;
+}
+
+static int gk20a_pmu_enable_elpg_locked(struct gk20a *g)
+{
+ struct pmu_gk20a *pmu = &g->pmu;
+ struct pmu_cmd cmd;
+ u32 seq, status;
+
+ gk20a_dbg_fn("");
+
+ memset(&cmd, 0, sizeof(struct pmu_cmd));
+ cmd.hdr.unit_id = PMU_UNIT_PG;
+ cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
+ cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
+ cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
+ cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_ALLOW;
+
+ /* no need to wait ack for ELPG enable but set pending to sync
+ with follow up ELPG disable */
+ pmu->elpg_stat = PMU_ELPG_STAT_ON_PENDING;
+
+ status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
+ pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
+
+ BUG_ON(status != 0);
+
+ gk20a_dbg_fn("done");
+ return 0;
+}
+
+int gk20a_pmu_enable_elpg(struct gk20a *g)
+{
+ struct pmu_gk20a *pmu = &g->pmu;
+ struct gr_gk20a *gr = &g->gr;
+
+ int ret = 0;
+
+ gk20a_dbg_fn("");
+
+ if (!pmu->elpg_ready || !pmu->initialized)
+ goto exit;
+
+ mutex_lock(&pmu->elpg_mutex);
+
+ pmu->elpg_refcnt++;
+ if (pmu->elpg_refcnt <= 0)
+ goto exit_unlock;
+
+ /* something is not right if we end up in following code path */
+ if (unlikely(pmu->elpg_refcnt > 1)) {
+ gk20a_warn(dev_from_gk20a(g),
+ "%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
+ __func__, pmu->elpg_refcnt);
+ WARN_ON(1);
+ }
+
+ /* do NOT enable elpg until golden ctx is created,
+ which is related with the ctx that ELPG save and restore. */
+ if (unlikely(!gr->ctx_vars.golden_image_initialized))
+ goto exit_unlock;
+
+ /* return if ELPG is already on or on_pending or off_on_pending */
+ if (pmu->elpg_stat != PMU_ELPG_STAT_OFF)
+ goto exit_unlock;
+
+ /* if ELPG is not allowed right now, mark that it should be enabled
+ * immediately after it is allowed */
+ if (!pmu->elpg_enable_allow) {
+ pmu->elpg_stat = PMU_ELPG_STAT_OFF_ON_PENDING;
+ goto exit_unlock;
+ }
+
+ ret = gk20a_pmu_enable_elpg_locked(g);
+
+exit_unlock:
+ mutex_unlock(&pmu->elpg_mutex);
+exit:
+ gk20a_dbg_fn("done");
+ return ret;
+}
+
+static void pmu_elpg_enable_allow(struct work_struct *work)
+{
+ struct pmu_gk20a *pmu = container_of(to_delayed_work(work),
+ struct pmu_gk20a, elpg_enable);
+
+ gk20a_dbg_fn("");
+
+ mutex_lock(&pmu->elpg_mutex);
+
+ /* It is ok to enabled powergating now */
+ pmu->elpg_enable_allow = true;
+
+ /* do we have pending requests? */
+ if (pmu->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) {
+ pmu->elpg_stat = PMU_ELPG_STAT_OFF;
+ gk20a_pmu_enable_elpg_locked(pmu->g);
+ }
+
+ mutex_unlock(&pmu->elpg_mutex);
+
+ gk20a_dbg_fn("done");
+}
+
+static int gk20a_pmu_disable_elpg_defer_enable(struct gk20a *g, bool enable)
+{
+ struct pmu_gk20a *pmu = &g->pmu;
+ struct pmu_cmd cmd;
+ u32 seq;
+ int ret = 0;
+
+ gk20a_dbg_fn("");
+
+ if (!pmu->elpg_ready || !pmu->initialized)
+ return 0;
+
+ /* remove the work from queue */
+ cancel_delayed_work_sync(&pmu->elpg_enable);
+
+ mutex_lock(&pmu->elpg_mutex);
+
+ pmu->elpg_refcnt--;
+ if (pmu->elpg_refcnt > 0) {
+ gk20a_warn(dev_from_gk20a(g),
+ "%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
+ __func__, pmu->elpg_refcnt);
+ WARN_ON(1);
+ ret = 0;
+ goto exit_unlock;
+ }
+
+ /* cancel off_on_pending and return */
+ if (pmu->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) {
+ pmu->elpg_stat = PMU_ELPG_STAT_OFF;
+ ret = 0;
+ goto exit_reschedule;
+ }
+ /* wait if on_pending */
+ else if (pmu->elpg_stat == PMU_ELPG_STAT_ON_PENDING) {
+
+ pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
+ &pmu->elpg_stat, PMU_ELPG_STAT_ON);
+
+ if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
+ gk20a_err(dev_from_gk20a(g),
+ "ELPG_ALLOW_ACK failed, elpg_stat=%d",
+ pmu->elpg_stat);
+ pmu_dump_elpg_stats(pmu);
+ pmu_dump_falcon_stats(pmu);
+ ret = -EBUSY;
+ goto exit_unlock;
+ }
+ }
+ /* return if ELPG is already off */
+ else if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
+ ret = 0;
+ goto exit_reschedule;
+ }
+
+ memset(&cmd, 0, sizeof(struct pmu_cmd));
+ cmd.hdr.unit_id = PMU_UNIT_PG;
+ cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
+ cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
+ cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
+ cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
+
+ pmu->elpg_stat = PMU_ELPG_STAT_OFF_PENDING;
+
+ gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
+ pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
+
+ pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
+ &pmu->elpg_stat, PMU_ELPG_STAT_OFF);
+ if (pmu->elpg_stat != PMU_ELPG_STAT_OFF) {
+ gk20a_err(dev_from_gk20a(g),
+ "ELPG_DISALLOW_ACK failed");
+ pmu_dump_elpg_stats(pmu);
+ pmu_dump_falcon_stats(pmu);
+ ret = -EBUSY;
+ goto exit_unlock;
+ }
+
+exit_reschedule:
+ if (enable) {
+ pmu->elpg_enable_allow = false;
+ schedule_delayed_work(&pmu->elpg_enable,
+ msecs_to_jiffies(PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC));
+ } else
+ pmu->elpg_enable_allow = true;
+
+
+exit_unlock:
+ mutex_unlock(&pmu->elpg_mutex);
+ gk20a_dbg_fn("done");
+ return ret;
+}
+
+int gk20a_pmu_disable_elpg(struct gk20a *g)
+{
+ return gk20a_pmu_disable_elpg_defer_enable(g, true);
+}
+
+int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable)
+{
+ struct pmu_gk20a *pmu = &g->pmu;
+ int err;
+
+ gk20a_dbg_fn("");
+
+ if (enable)
+ err = pmu_perfmon_start_sampling(pmu);
+ else
+ err = pmu_perfmon_stop_sampling(pmu);
+
+ return err;
+}
+
+int gk20a_pmu_destroy(struct gk20a *g)
+{
+ struct pmu_gk20a *pmu = &g->pmu;
+ u32 elpg_ingating_time, elpg_ungating_time, gating_cnt;
+
+ gk20a_dbg_fn("");
+
+ if (!support_gk20a_pmu())
+ return 0;
+
+ /* make sure the pending operations are finished before we continue */
+ cancel_delayed_work_sync(&pmu->elpg_enable);
+ cancel_work_sync(&pmu->pg_init);
+
+ gk20a_pmu_get_elpg_residency_gating(g, &elpg_ingating_time,
+ &elpg_ungating_time, &gating_cnt);
+
+ gk20a_pmu_disable_elpg_defer_enable(g, false);
+ pmu->initialized = false;
+
+ /* update the s/w ELPG residency counters */
+ g->pg_ingating_time_us += (u64)elpg_ingating_time;
+ g->pg_ungating_time_us += (u64)elpg_ungating_time;
+ g->pg_gating_cnt += gating_cnt;
+
+ pmu_enable(pmu, false);
+
+ if (pmu->remove_support) {
+ pmu->remove_support(pmu);
+ pmu->remove_support = NULL;
+ }
+
+ gk20a_dbg_fn("done");
+ return 0;
+}
+
+int gk20a_pmu_load_norm(struct gk20a *g, u32 *load)
+{
+ struct pmu_gk20a *pmu = &g->pmu;
+ u16 _load = 0;
+
+ if (!pmu->perfmon_ready) {
+ *load = 0;
+ return 0;
+ }
+
+ pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0);
+ *load = _load / 10;
+
+ return 0;
+}
+
+void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
+ u32 *total_cycles)
+{
+ if (!g->power_on) {
+ *busy_cycles = 0;
+ *total_cycles = 0;
+ return;
+ }
+
+ gk20a_busy(g->dev);
+ *busy_cycles = pwr_pmu_idle_count_value_v(
+ gk20a_readl(g, pwr_pmu_idle_count_r(1)));
+ rmb();
+ *total_cycles = pwr_pmu_idle_count_value_v(
+ gk20a_readl(g, pwr_pmu_idle_count_r(2)));
+ gk20a_idle(g->dev);
+}
+
+void gk20a_pmu_reset_load_counters(struct gk20a *g)
+{
+ u32 reg_val = pwr_pmu_idle_count_reset_f(1);
+
+ if (!g->power_on)
+ return;
+
+ gk20a_busy(g->dev);
+ gk20a_writel(g, pwr_pmu_idle_count_r(2), reg_val);
+ wmb();
+ gk20a_writel(g, pwr_pmu_idle_count_r(1), reg_val);
+ gk20a_idle(g->dev);
+}
+
+static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
+ u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt)
+{
+ struct pmu_gk20a *pmu = &g->pmu;
+ struct pmu_pg_stats stats;
+
+ if (!pmu->initialized) {
+ *ingating_time = 0;
+ *ungating_time = 0;
+ *gating_cnt = 0;
+ return 0;
+ }
+
+ pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
+ (u8 *)&stats, sizeof(struct pmu_pg_stats), 0);
+
+ *ingating_time = stats.pg_ingating_time_us;
+ *ungating_time = stats.pg_ungating_time_us;
+ *gating_cnt = stats.pg_gating_cnt;
+
+ return 0;
+}
+
+/* Send an Adaptive Power (AP) related command to PMU */
+static int gk20a_pmu_ap_send_command(struct gk20a *g,
+ union pmu_ap_cmd *p_ap_cmd, bool b_block)
+{
+ struct pmu_gk20a *pmu = &g->pmu;
+ /* FIXME: where is the PG structure defined?? */
+ u32 status = 0;
+ struct pmu_cmd cmd;
+ u32 seq;
+ pmu_callback p_callback = NULL;
+
+ memset(&cmd, 0, sizeof(struct pmu_cmd));
+
+ /* Copy common members */
+ cmd.hdr.unit_id = PMU_UNIT_PG;
+ cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(union pmu_ap_cmd);
+
+ cmd.cmd.pg.ap_cmd.cmn.cmd_type = PMU_PG_CMD_ID_AP;
+ cmd.cmd.pg.ap_cmd.cmn.cmd_id = p_ap_cmd->cmn.cmd_id;
+
+ /* Copy other members of command */
+ switch (p_ap_cmd->cmn.cmd_id) {
+ case PMU_AP_CMD_ID_INIT:
+ cmd.cmd.pg.ap_cmd.init.pg_sampling_period_us =
+ p_ap_cmd->init.pg_sampling_period_us;
+ p_callback = ap_callback_init_and_enable_ctrl;
+ break;
+
+ case PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL:
+ cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.ctrl_id =
+ p_ap_cmd->init_and_enable_ctrl.ctrl_id;
+ memcpy(
+ (void *)&(cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.params),
+ (void *)&(p_ap_cmd->init_and_enable_ctrl.params),
+ sizeof(struct pmu_ap_ctrl_init_params));
+
+ p_callback = ap_callback_init_and_enable_ctrl;
+ break;
+
+ case PMU_AP_CMD_ID_ENABLE_CTRL:
+ cmd.cmd.pg.ap_cmd.enable_ctrl.ctrl_id =
+ p_ap_cmd->enable_ctrl.ctrl_id;
+ break;
+
+ case PMU_AP_CMD_ID_DISABLE_CTRL:
+ cmd.cmd.pg.ap_cmd.disable_ctrl.ctrl_id =
+ p_ap_cmd->disable_ctrl.ctrl_id;
+ break;
+
+ case PMU_AP_CMD_ID_KICK_CTRL:
+ cmd.cmd.pg.ap_cmd.kick_ctrl.ctrl_id =
+ p_ap_cmd->kick_ctrl.ctrl_id;
+ cmd.cmd.pg.ap_cmd.kick_ctrl.skip_count =
+ p_ap_cmd->kick_ctrl.skip_count;
+ break;
+
+ default:
+ gk20a_dbg_pmu("%s: Invalid Adaptive Power command %d\n",
+ __func__, p_ap_cmd->cmn.cmd_id);
+ return 0x2f;
+ }
+
+ status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
+ p_callback, pmu, &seq, ~0);
+
+ if (!status) {
+ gk20a_dbg_pmu(
+ "%s: Unable to submit Adaptive Power Command %d\n",
+ __func__, p_ap_cmd->cmn.cmd_id);
+ goto err_return;
+ }
+
+ /* TODO: Implement blocking calls (b_block) */
+
+err_return:
+ return status;
+}
+
+static void ap_callback_init_and_enable_ctrl(
+ struct gk20a *g, struct pmu_msg *msg,
+ void *param, u32 seq_desc, u32 status)
+{
+ /* Define p_ap (i.e pointer to pmu_ap structure) */
+ WARN_ON(!msg);
+
+ if (!status) {
+ switch (msg->msg.pg.ap_msg.cmn.msg_id) {
+ case PMU_AP_MSG_ID_INIT_ACK:
+ break;
+
+ default:
+ gk20a_dbg_pmu(
+ "%s: Invalid Adaptive Power Message: %x\n",
+ __func__, msg->msg.pg.ap_msg.cmn.msg_id);
+ break;
+ }
+ }
+}
+
+static int gk20a_aelpg_init(struct gk20a *g)
+{
+ int status = 0;
+
+ /* Remove reliance on app_ctrl field. */
+ union pmu_ap_cmd ap_cmd;
+
+ /* TODO: Check for elpg being ready? */
+ ap_cmd.init.cmd_id = PMU_AP_CMD_ID_INIT;
+ ap_cmd.init.pg_sampling_period_us =
+ APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US;
+
+ status = gk20a_pmu_ap_send_command(g, &ap_cmd, false);
+ return status;
+}
+
+static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id)
+{
+ int status = 0;
+ union pmu_ap_cmd ap_cmd;
+
+ /* TODO: Probably check if ELPG is ready? */
+
+ ap_cmd.init_and_enable_ctrl.cmd_id = PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL;
+ ap_cmd.init_and_enable_ctrl.ctrl_id = ctrl_id;
+ ap_cmd.init_and_enable_ctrl.params.min_idle_filter_us =
+ APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US;
+ ap_cmd.init_and_enable_ctrl.params.min_target_saving_us =
+ APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US;
+ ap_cmd.init_and_enable_ctrl.params.power_break_even_us =
+ APCTRL_POWER_BREAKEVEN_DEFAULT_US;
+ ap_cmd.init_and_enable_ctrl.params.cycles_per_sample_max =
+ APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT;
+
+ switch (ctrl_id) {
+ case PMU_AP_CTRL_ID_GRAPHICS:
+ break;
+ default:
+ break;
+ }
+
+ status = gk20a_pmu_ap_send_command(g, &ap_cmd, true);
+ return status;
+}
+
+#if CONFIG_DEBUG_FS
+static int elpg_residency_show(struct seq_file *s, void *data)
+{
+ struct gk20a *g = s->private;
+ u32 ingating_time = 0;
+ u32 ungating_time = 0;
+ u32 gating_cnt;
+ u64 total_ingating, total_ungating, residency, divisor, dividend;
+
+ /* Don't unnecessarily power on the device */
+ if (g->power_on) {
+ gk20a_busy(g->dev);
+ gk20a_pmu_get_elpg_residency_gating(g, &ingating_time,
+ &ungating_time, &gating_cnt);
+ gk20a_idle(g->dev);
+ }
+ total_ingating = g->pg_ingating_time_us + (u64)ingating_time;
+ total_ungating = g->pg_ungating_time_us + (u64)ungating_time;
+ divisor = total_ingating + total_ungating;
+
+ /* We compute the residency on a scale of 1000 */
+ dividend = total_ingating * 1000;
+
+ if (divisor)
+ residency = div64_u64(dividend, divisor);
+ else
+ residency = 0;
+
+ seq_printf(s, "Time in ELPG: %llu us\n"
+ "Time out of ELPG: %llu us\n"
+ "ELPG residency ratio: %llu\n",
+ total_ingating, total_ungating, residency);
+ return 0;
+
+}
+
+static int elpg_residency_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, elpg_residency_show, inode->i_private);
+}
+
+static const struct file_operations elpg_residency_fops = {
+ .open = elpg_residency_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int elpg_transitions_show(struct seq_file *s, void *data)
+{
+ struct gk20a *g = s->private;
+ u32 ingating_time, ungating_time, total_gating_cnt;
+ u32 gating_cnt = 0;
+
+ if (g->power_on) {
+ gk20a_busy(g->dev);
+ gk20a_pmu_get_elpg_residency_gating(g, &ingating_time,
+ &ungating_time, &gating_cnt);
+ gk20a_idle(g->dev);
+ }
+ total_gating_cnt = g->pg_gating_cnt + gating_cnt;
+
+ seq_printf(s, "%u\n", total_gating_cnt);
+ return 0;
+
+}
+
+static int elpg_transitions_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, elpg_transitions_show, inode->i_private);
+}
+
+static const struct file_operations elpg_transitions_fops = {
+ .open = elpg_transitions_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+int gk20a_pmu_debugfs_init(struct platform_device *dev)
+{
+ struct dentry *d;
+ struct gk20a_platform *platform = platform_get_drvdata(dev);
+ struct gk20a *g = get_gk20a(dev);
+
+ d = debugfs_create_file(
+ "elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
+ &elpg_residency_fops);
+ if (!d)
+ goto err_out;
+
+ d = debugfs_create_file(
+ "elpg_transitions", S_IRUGO, platform->debugfs, g,
+ &elpg_transitions_fops);
+ if (!d)
+ goto err_out;
+
+ return 0;
+
+err_out:
+ pr_err("%s: Failed to make debugfs node\n", __func__);
+ debugfs_remove_recursive(platform->debugfs);
+ return -ENOMEM;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
new file mode 100644
index 000000000000..c1b8ff1f61b8
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
@@ -0,0 +1,1097 @@
+/*
+ * drivers/video/tegra/host/gk20a/pmu_gk20a.h
+ *
+ * GK20A PMU (aka. gPMU outside gk20a context)
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef __PMU_GK20A_H__
+#define __PMU_GK20A_H__
+
+/* defined by pmu hw spec */
+#define GK20A_PMU_VA_START ((128 * 1024) << 10)
+#define GK20A_PMU_VA_SIZE (512 * 1024 * 1024)
+#define GK20A_PMU_INST_SIZE (4 * 1024)
+#define GK20A_PMU_UCODE_SIZE_MAX (256 * 1024)
+#define GK20A_PMU_SEQ_BUF_SIZE 4096
+
+#define ZBC_MASK(i) (~(~(0) << ((i)+1)) & 0xfffe)
+
+/* PMU Command/Message Interfaces for Adaptive Power */
+/* Macro to get Histogram index */
+#define PMU_AP_HISTOGRAM(idx) (idx)
+#define PMU_AP_HISTOGRAM_CONT (4)
+
+/* Total number of histogram bins */
+#define PMU_AP_CFG_HISTOGRAM_BIN_N (16)
+
+/* Mapping between Idle counters and histograms */
+#define PMU_AP_IDLE_MASK_HIST_IDX_0 (2)
+#define PMU_AP_IDLE_MASK_HIST_IDX_1 (3)
+#define PMU_AP_IDLE_MASK_HIST_IDX_2 (5)
+#define PMU_AP_IDLE_MASK_HIST_IDX_3 (6)
+
+
+/* Mapping between AP_CTRLs and Histograms */
+#define PMU_AP_HISTOGRAM_IDX_GRAPHICS (PMU_AP_HISTOGRAM(1))
+
+/* Mapping between AP_CTRLs and Idle counters */
+#define PMU_AP_IDLE_MASK_GRAPHICS (PMU_AP_IDLE_MASK_HIST_IDX_1)
+
+#define APP_VERSION_1 17997577
+#define APP_VERSION_0 16856675
+
+
+enum pmu_perfmon_cmd_start_fields {
+ COUNTER_ALLOC
+};
+
+/* Adaptive Power Controls (AP_CTRL) */
+enum {
+ PMU_AP_CTRL_ID_GRAPHICS = 0x0,
+ /* PMU_AP_CTRL_ID_MS ,*/
+ PMU_AP_CTRL_ID_MAX ,
+};
+
+/* AP_CTRL Statistics */
+struct pmu_ap_ctrl_stat {
+ /*
+ * Represents whether AP is active or not
+ * TODO: This is NvBool in RM; is that 1 byte of 4 bytes?
+ */
+ u8 b_active;
+
+ /* Idle filter represented by histogram bin index */
+ u8 idle_filter_x;
+ u8 rsvd[2];
+
+ /* Total predicted power saving cycles. */
+ s32 power_saving_h_cycles;
+
+ /* Counts how many times AP gave us -ve power benefits. */
+ u32 bad_decision_count;
+
+ /*
+ * Number of times ap structure needs to skip AP iterations
+ * KICK_CTRL from kernel updates this parameter.
+ */
+ u32 skip_count;
+ u8 bin[PMU_AP_CFG_HISTOGRAM_BIN_N];
+};
+
+/* Parameters initialized by INITn APCTRL command */
+struct pmu_ap_ctrl_init_params {
+ /* Minimum idle filter value in Us */
+ u32 min_idle_filter_us;
+
+ /*
+ * Minimum Targeted Saving in Us. AP will update idle thresholds only
+ * if power saving achieved by updating idle thresholds is greater than
+ * Minimum targeted saving.
+ */
+ u32 min_target_saving_us;
+
+ /* Minimum targeted residency of power feature in Us */
+ u32 power_break_even_us;
+
+ /*
+ * Maximum number of allowed power feature cycles per sample.
+ *
+ * We are allowing at max "pgPerSampleMax" cycles in one iteration of AP
+ * AKA pgPerSampleMax in original algorithm.
+ */
+ u32 cycles_per_sample_max;
+};
+
+/* AP Commands/Message structures */
+
+/*
+ * Structure for Generic AP Commands
+ */
+struct pmu_ap_cmd_common {
+ u8 cmd_type;
+ u16 cmd_id;
+};
+
+/*
+ * Structure for INIT AP command
+ */
+struct pmu_ap_cmd_init {
+ u8 cmd_type;
+ u16 cmd_id;
+ u8 rsvd;
+ u32 pg_sampling_period_us;
+};
+
+/*
+ * Structure for Enable/Disable ApCtrl Commands
+ */
+struct pmu_ap_cmd_enable_ctrl {
+ u8 cmd_type;
+ u16 cmd_id;
+
+ u8 ctrl_id;
+};
+
+struct pmu_ap_cmd_disable_ctrl {
+ u8 cmd_type;
+ u16 cmd_id;
+
+ u8 ctrl_id;
+};
+
+/*
+ * Structure for INIT command
+ */
+struct pmu_ap_cmd_init_ctrl {
+ u8 cmd_type;
+ u16 cmd_id;
+ u8 ctrl_id;
+ struct pmu_ap_ctrl_init_params params;
+};
+
+struct pmu_ap_cmd_init_and_enable_ctrl {
+ u8 cmd_type;
+ u16 cmd_id;
+ u8 ctrl_id;
+ struct pmu_ap_ctrl_init_params params;
+};
+
+/*
+ * Structure for KICK_CTRL command
+ */
+struct pmu_ap_cmd_kick_ctrl {
+ u8 cmd_type;
+ u16 cmd_id;
+ u8 ctrl_id;
+
+ u32 skip_count;
+};
+
+/*
+ * Structure for PARAM command
+ */
+struct pmu_ap_cmd_param {
+ u8 cmd_type;
+ u16 cmd_id;
+ u8 ctrl_id;
+
+ u32 data;
+};
+
+/*
+ * Defines for AP commands
+ */
+enum {
+ PMU_AP_CMD_ID_INIT = 0x0 ,
+ PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL,
+ PMU_AP_CMD_ID_ENABLE_CTRL ,
+ PMU_AP_CMD_ID_DISABLE_CTRL ,
+ PMU_AP_CMD_ID_KICK_CTRL ,
+};
+
+/*
+ * AP Command
+ */
+union pmu_ap_cmd {
+ u8 cmd_type;
+ struct pmu_ap_cmd_common cmn;
+ struct pmu_ap_cmd_init init;
+ struct pmu_ap_cmd_init_and_enable_ctrl init_and_enable_ctrl;
+ struct pmu_ap_cmd_enable_ctrl enable_ctrl;
+ struct pmu_ap_cmd_disable_ctrl disable_ctrl;
+ struct pmu_ap_cmd_kick_ctrl kick_ctrl;
+};
+
+/*
+ * Structure for generic AP Message
+ */
+struct pmu_ap_msg_common {
+ u8 msg_type;
+ u16 msg_id;
+};
+
+/*
+ * Structure for INIT_ACK Message
+ */
+struct pmu_ap_msg_init_ack {
+ u8 msg_type;
+ u16 msg_id;
+ u8 ctrl_id;
+ u32 stats_dmem_offset;
+};
+
+/*
+ * Defines for AP messages
+ */
+enum {
+ PMU_AP_MSG_ID_INIT_ACK = 0x0,
+};
+
+/*
+ * AP Message
+ */
+union pmu_ap_msg {
+ u8 msg_type;
+ struct pmu_ap_msg_common cmn;
+ struct pmu_ap_msg_init_ack init_ack;
+};
+
+/* Default Sampling Period of AELPG */
+#define APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US (1000000)
+
+/* Default values of APCTRL parameters */
+#define APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US (100)
+#define APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US (10000)
+#define APCTRL_POWER_BREAKEVEN_DEFAULT_US (2000)
+#define APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT (100)
+
+/*
+ * Disable reason for Adaptive Power Controller
+ */
+enum {
+ APCTRL_DISABLE_REASON_RM_UNLOAD,
+ APCTRL_DISABLE_REASON_RMCTRL,
+};
+
+/*
+ * Adaptive Power Controller
+ */
+struct ap_ctrl {
+ u32 stats_dmem_offset;
+ u32 disable_reason_mask;
+ struct pmu_ap_ctrl_stat stat_cache;
+ u8 b_ready;
+};
+
+/*
+ * Adaptive Power structure
+ *
+ * ap structure provides generic infrastructure to make any power feature
+ * adaptive.
+ */
+struct pmu_ap {
+ u32 supported_mask;
+ struct ap_ctrl ap_ctrl[PMU_AP_CTRL_ID_MAX];
+};
+
+
+enum {
+ GK20A_PMU_DMAIDX_UCODE = 0,
+ GK20A_PMU_DMAIDX_VIRT = 1,
+ GK20A_PMU_DMAIDX_PHYS_VID = 2,
+ GK20A_PMU_DMAIDX_PHYS_SYS_COH = 3,
+ GK20A_PMU_DMAIDX_PHYS_SYS_NCOH = 4,
+ GK20A_PMU_DMAIDX_RSVD = 5,
+ GK20A_PMU_DMAIDX_PELPG = 6,
+ GK20A_PMU_DMAIDX_END = 7
+};
+
+struct pmu_mem_v0 {
+ u32 dma_base;
+ u8 dma_offset;
+ u8 dma_idx;
+};
+
+struct pmu_mem_v1 {
+ u32 dma_base;
+ u8 dma_offset;
+ u8 dma_idx;
+ u16 fb_size;
+};
+
+struct pmu_dmem {
+ u16 size;
+ u32 offset;
+};
+
+/* Make sure size of this structure is a multiple of 4 bytes */
+struct pmu_cmdline_args_v0 {
+ u32 cpu_freq_hz; /* Frequency of the clock driving PMU */
+ u32 falc_trace_size; /* falctrace buffer size (bytes) */
+ u32 falc_trace_dma_base; /* 256-byte block address */
+ u32 falc_trace_dma_idx; /* dmaIdx for DMA operations */
+ struct pmu_mem_v0 gc6_ctx; /* dmem offset of gc6 context */
+};
+
+struct pmu_cmdline_args_v1 {
+ u32 cpu_freq_hz; /* Frequency of the clock driving PMU */
+ u32 falc_trace_size; /* falctrace buffer size (bytes) */
+ u32 falc_trace_dma_base; /* 256-byte block address */
+ u32 falc_trace_dma_idx; /* dmaIdx for DMA operations */
+ u8 secure_mode;
+ struct pmu_mem_v1 gc6_ctx; /* dmem offset of gc6 context */
+};
+
+#define GK20A_PMU_DMEM_BLKSIZE2 8
+
+#define GK20A_PMU_UCODE_NB_MAX_OVERLAY 32
+#define GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH 64
+
+struct pmu_ucode_desc {
+ u32 descriptor_size;
+ u32 image_size;
+ u32 tools_version;
+ u32 app_version;
+ char date[GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH];
+ u32 bootloader_start_offset;
+ u32 bootloader_size;
+ u32 bootloader_imem_offset;
+ u32 bootloader_entry_point;
+ u32 app_start_offset;
+ u32 app_size;
+ u32 app_imem_offset;
+ u32 app_imem_entry;
+ u32 app_dmem_offset;
+ u32 app_resident_code_offset; /* Offset from appStartOffset */
+ u32 app_resident_code_size; /* Exact size of the resident code ( potentially contains CRC inside at the end ) */
+ u32 app_resident_data_offset; /* Offset from appStartOffset */
+ u32 app_resident_data_size; /* Exact size of the resident code ( potentially contains CRC inside at the end ) */
+ u32 nb_overlays;
+ struct {u32 start; u32 size;} load_ovl[GK20A_PMU_UCODE_NB_MAX_OVERLAY];
+ u32 compressed;
+};
+
+#define PMU_UNIT_REWIND (0x00)
+#define PMU_UNIT_I2C (0x01)
+#define PMU_UNIT_SEQ (0x02)
+#define PMU_UNIT_PG (0x03)
+#define PMU_UNIT_AVAILABLE1 (0x04)
+#define PMU_UNIT_AVAILABLE2 (0x05)
+#define PMU_UNIT_MEM (0x06)
+#define PMU_UNIT_INIT (0x07)
+#define PMU_UNIT_FBBA (0x08)
+#define PMU_UNIT_DIDLE (0x09)
+#define PMU_UNIT_AVAILABLE3 (0x0A)
+#define PMU_UNIT_AVAILABLE4 (0x0B)
+#define PMU_UNIT_HDCP_MAIN (0x0C)
+#define PMU_UNIT_HDCP_V (0x0D)
+#define PMU_UNIT_HDCP_SRM (0x0E)
+#define PMU_UNIT_NVDPS (0x0F)
+#define PMU_UNIT_DEINIT (0x10)
+#define PMU_UNIT_AVAILABLE5 (0x11)
+#define PMU_UNIT_PERFMON (0x12)
+#define PMU_UNIT_FAN (0x13)
+#define PMU_UNIT_PBI (0x14)
+#define PMU_UNIT_ISOBLIT (0x15)
+#define PMU_UNIT_DETACH (0x16)
+#define PMU_UNIT_DISP (0x17)
+#define PMU_UNIT_HDCP (0x18)
+#define PMU_UNIT_REGCACHE (0x19)
+#define PMU_UNIT_SYSMON (0x1A)
+#define PMU_UNIT_THERM (0x1B)
+#define PMU_UNIT_PMGR (0x1C)
+#define PMU_UNIT_PERF (0x1D)
+#define PMU_UNIT_PCM (0x1E)
+#define PMU_UNIT_RC (0x1F)
+#define PMU_UNIT_NULL (0x20)
+#define PMU_UNIT_LOGGER (0x21)
+#define PMU_UNIT_SMBPBI (0x22)
+#define PMU_UNIT_END (0x23)
+
+#define PMU_UNIT_TEST_START (0xFE)
+#define PMU_UNIT_END_SIM (0xFF)
+#define PMU_UNIT_TEST_END (0xFF)
+
+#define PMU_UNIT_ID_IS_VALID(id) \
+ (((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START))
+
+#define PMU_DMEM_ALLOC_ALIGNMENT (32)
+#define PMU_DMEM_ALIGNMENT (4)
+
+#define PMU_CMD_FLAGS_PMU_MASK (0xF0)
+
+#define PMU_CMD_FLAGS_STATUS BIT(0)
+#define PMU_CMD_FLAGS_INTR BIT(1)
+#define PMU_CMD_FLAGS_EVENT BIT(2)
+#define PMU_CMD_FLAGS_WATERMARK BIT(3)
+
+struct pmu_hdr {
+ u8 unit_id;
+ u8 size;
+ u8 ctrl_flags;
+ u8 seq_id;
+};
+#define PMU_MSG_HDR_SIZE sizeof(struct pmu_hdr)
+#define PMU_CMD_HDR_SIZE sizeof(struct pmu_hdr)
+
+#define PMU_QUEUE_COUNT 5
+
+struct pmu_allocation_v0 {
+ u8 pad[3];
+ u8 fb_mem_use;
+ struct {
+ struct pmu_dmem dmem;
+ struct pmu_mem_v0 fb;
+ } alloc;
+};
+
+struct pmu_allocation_v1 {
+ struct {
+ struct pmu_dmem dmem;
+ struct pmu_mem_v1 fb;
+ } alloc;
+};
+
+enum {
+ PMU_INIT_MSG_TYPE_PMU_INIT = 0,
+};
+
+struct pmu_init_msg_pmu_v0 {
+ u8 msg_type;
+ u8 pad;
+
+ struct {
+ u16 size;
+ u16 offset;
+ u8 index;
+ u8 pad;
+ } queue_info[PMU_QUEUE_COUNT];
+
+ u16 sw_managed_area_offset;
+ u16 sw_managed_area_size;
+};
+
+struct pmu_init_msg_pmu_v1 {
+ u8 msg_type;
+ u8 pad;
+ u16 os_debug_entry_point;
+
+ struct {
+ u16 size;
+ u16 offset;
+ u8 index;
+ u8 pad;
+ } queue_info[PMU_QUEUE_COUNT];
+
+ u16 sw_managed_area_offset;
+ u16 sw_managed_area_size;
+};
+
+union pmu_init_msg_pmu {
+ struct pmu_init_msg_pmu_v0 v0;
+ struct pmu_init_msg_pmu_v1 v1;
+};
+
+struct pmu_init_msg {
+ union {
+ u8 msg_type;
+ struct pmu_init_msg_pmu_v1 pmu_init_v1;
+ struct pmu_init_msg_pmu_v0 pmu_init_v0;
+ };
+};
+
+enum {
+ PMU_PG_ELPG_MSG_INIT_ACK,
+ PMU_PG_ELPG_MSG_DISALLOW_ACK,
+ PMU_PG_ELPG_MSG_ALLOW_ACK,
+ PMU_PG_ELPG_MSG_FREEZE_ACK,
+ PMU_PG_ELPG_MSG_FREEZE_ABORT,
+ PMU_PG_ELPG_MSG_UNFREEZE_ACK,
+};
+
+struct pmu_pg_msg_elpg_msg {
+ u8 msg_type;
+ u8 engine_id;
+ u16 msg;
+};
+
+enum {
+ PMU_PG_STAT_MSG_RESP_DMEM_OFFSET = 0,
+};
+
+struct pmu_pg_msg_stat {
+ u8 msg_type;
+ u8 engine_id;
+ u16 sub_msg_id;
+ u32 data;
+};
+
+enum {
+ PMU_PG_MSG_ENG_BUF_LOADED,
+ PMU_PG_MSG_ENG_BUF_UNLOADED,
+ PMU_PG_MSG_ENG_BUF_FAILED,
+};
+
+struct pmu_pg_msg_eng_buf_stat {
+ u8 msg_type;
+ u8 engine_id;
+ u8 buf_idx;
+ u8 status;
+};
+
+struct pmu_pg_msg {
+ union {
+ u8 msg_type;
+ struct pmu_pg_msg_elpg_msg elpg_msg;
+ struct pmu_pg_msg_stat stat;
+ struct pmu_pg_msg_eng_buf_stat eng_buf_stat;
+ /* TBD: other pg messages */
+ union pmu_ap_msg ap_msg;
+ };
+};
+
+enum {
+ PMU_RC_MSG_TYPE_UNHANDLED_CMD = 0,
+};
+
+struct pmu_rc_msg_unhandled_cmd {
+ u8 msg_type;
+ u8 unit_id;
+};
+
+struct pmu_rc_msg {
+ u8 msg_type;
+ struct pmu_rc_msg_unhandled_cmd unhandled_cmd;
+};
+
+enum {
+ PMU_PG_CMD_ID_ELPG_CMD = 0,
+ PMU_PG_CMD_ID_ENG_BUF_LOAD,
+ PMU_PG_CMD_ID_ENG_BUF_UNLOAD,
+ PMU_PG_CMD_ID_PG_STAT,
+ PMU_PG_CMD_ID_PG_LOG_INIT,
+ PMU_PG_CMD_ID_PG_LOG_FLUSH,
+ PMU_PG_CMD_ID_PG_PARAM,
+ PMU_PG_CMD_ID_ELPG_INIT,
+ PMU_PG_CMD_ID_ELPG_POLL_CTXSAVE,
+ PMU_PG_CMD_ID_ELPG_ABORT_POLL,
+ PMU_PG_CMD_ID_ELPG_PWR_UP,
+ PMU_PG_CMD_ID_ELPG_DISALLOW,
+ PMU_PG_CMD_ID_ELPG_ALLOW,
+ PMU_PG_CMD_ID_AP,
+ RM_PMU_PG_CMD_ID_PSI,
+ RM_PMU_PG_CMD_ID_CG,
+ PMU_PG_CMD_ID_ZBC_TABLE_UPDATE,
+ PMU_PG_CMD_ID_PWR_RAIL_GATE_DISABLE = 0x20,
+ PMU_PG_CMD_ID_PWR_RAIL_GATE_ENABLE,
+ PMU_PG_CMD_ID_PWR_RAIL_SMU_MSG_DISABLE
+};
+
+enum {
+ PMU_PG_ELPG_CMD_INIT,
+ PMU_PG_ELPG_CMD_DISALLOW,
+ PMU_PG_ELPG_CMD_ALLOW,
+ PMU_PG_ELPG_CMD_FREEZE,
+ PMU_PG_ELPG_CMD_UNFREEZE,
+};
+
+struct pmu_pg_cmd_elpg_cmd {
+ u8 cmd_type;
+ u8 engine_id;
+ u16 cmd;
+};
+
+struct pmu_pg_cmd_eng_buf_load {
+ u8 cmd_type;
+ u8 engine_id;
+ u8 buf_idx;
+ u8 pad;
+ u16 buf_size;
+ u32 dma_base;
+ u8 dma_offset;
+ u8 dma_idx;
+};
+
+enum {
+ PMU_PG_STAT_CMD_ALLOC_DMEM = 0,
+};
+
+struct pmu_pg_cmd_stat {
+ u8 cmd_type;
+ u8 engine_id;
+ u16 sub_cmd_id;
+ u32 data;
+};
+
+struct pmu_pg_cmd {
+ union {
+ u8 cmd_type;
+ struct pmu_pg_cmd_elpg_cmd elpg_cmd;
+ struct pmu_pg_cmd_eng_buf_load eng_buf_load;
+ struct pmu_pg_cmd_stat stat;
+ /* TBD: other pg commands */
+ union pmu_ap_cmd ap_cmd;
+ };
+};
+
+/* PERFMON */
+#define PMU_DOMAIN_GROUP_PSTATE 0
+#define PMU_DOMAIN_GROUP_GPC2CLK 1
+#define PMU_DOMAIN_GROUP_NUM 2
+
+/* TBD: smart strategy */
+#define PMU_PERFMON_PCT_TO_INC 58
+#define PMU_PERFMON_PCT_TO_DEC 23
+
+struct pmu_perfmon_counter {
+ u8 index;
+ u8 flags;
+ u8 group_id;
+ u8 valid;
+ u16 upper_threshold; /* units of 0.01% */
+ u16 lower_threshold; /* units of 0.01% */
+};
+
+#define PMU_PERFMON_FLAG_ENABLE_INCREASE (0x00000001)
+#define PMU_PERFMON_FLAG_ENABLE_DECREASE (0x00000002)
+#define PMU_PERFMON_FLAG_CLEAR_PREV (0x00000004)
+
+/* PERFMON CMD */
+enum {
+ PMU_PERFMON_CMD_ID_START = 0,
+ PMU_PERFMON_CMD_ID_STOP = 1,
+ PMU_PERFMON_CMD_ID_INIT = 2
+};
+
+struct pmu_perfmon_cmd_start_v1 {
+ u8 cmd_type;
+ u8 group_id;
+ u8 state_id;
+ u8 flags;
+ struct pmu_allocation_v1 counter_alloc;
+};
+
+struct pmu_perfmon_cmd_start_v0 {
+ u8 cmd_type;
+ u8 group_id;
+ u8 state_id;
+ u8 flags;
+ struct pmu_allocation_v0 counter_alloc;
+};
+
+struct pmu_perfmon_cmd_stop {
+ u8 cmd_type;
+};
+
+struct pmu_perfmon_cmd_init_v1 {
+ u8 cmd_type;
+ u8 to_decrease_count;
+ u8 base_counter_id;
+ u32 sample_period_us;
+ struct pmu_allocation_v1 counter_alloc;
+ u8 num_counters;
+ u8 samples_in_moving_avg;
+ u16 sample_buffer;
+};
+
+struct pmu_perfmon_cmd_init_v0 {
+ u8 cmd_type;
+ u8 to_decrease_count;
+ u8 base_counter_id;
+ u32 sample_period_us;
+ struct pmu_allocation_v0 counter_alloc;
+ u8 num_counters;
+ u8 samples_in_moving_avg;
+ u16 sample_buffer;
+};
+
+struct pmu_perfmon_cmd {
+ union {
+ u8 cmd_type;
+ struct pmu_perfmon_cmd_start_v0 start_v0;
+ struct pmu_perfmon_cmd_start_v1 start_v1;
+ struct pmu_perfmon_cmd_stop stop;
+ struct pmu_perfmon_cmd_init_v0 init_v0;
+ struct pmu_perfmon_cmd_init_v1 init_v1;
+ };
+};
+
+struct pmu_zbc_cmd {
+ u8 cmd_type;
+ u8 pad;
+ u16 entry_mask;
+};
+
+/* PERFMON MSG */
+enum {
+ PMU_PERFMON_MSG_ID_INCREASE_EVENT = 0,
+ PMU_PERFMON_MSG_ID_DECREASE_EVENT = 1,
+ PMU_PERFMON_MSG_ID_INIT_EVENT = 2,
+ PMU_PERFMON_MSG_ID_ACK = 3
+};
+
+struct pmu_perfmon_msg_generic {
+ u8 msg_type;
+ u8 state_id;
+ u8 group_id;
+ u8 data;
+};
+
+struct pmu_perfmon_msg {
+ union {
+ u8 msg_type;
+ struct pmu_perfmon_msg_generic gen;
+ };
+};
+
+
+struct pmu_cmd {
+ struct pmu_hdr hdr;
+ union {
+ struct pmu_perfmon_cmd perfmon;
+ struct pmu_pg_cmd pg;
+ struct pmu_zbc_cmd zbc;
+ } cmd;
+};
+
+struct pmu_msg {
+ struct pmu_hdr hdr;
+ union {
+ struct pmu_init_msg init;
+ struct pmu_perfmon_msg perfmon;
+ struct pmu_pg_msg pg;
+ struct pmu_rc_msg rc;
+ } msg;
+};
+
+#define PMU_SHA1_GID_SIGNATURE 0xA7C66AD2
+#define PMU_SHA1_GID_SIGNATURE_SIZE 4
+
+#define PMU_SHA1_GID_SIZE 16
+
+struct pmu_sha1_gid {
+ bool valid;
+ u8 gid[PMU_SHA1_GID_SIZE];
+};
+
+struct pmu_sha1_gid_data {
+ u8 signature[PMU_SHA1_GID_SIGNATURE_SIZE];
+ u8 gid[PMU_SHA1_GID_SIZE];
+};
+
+#define PMU_COMMAND_QUEUE_HPQ 0 /* write by sw, read by pmu, protected by sw mutex lock */
+#define PMU_COMMAND_QUEUE_LPQ 1 /* write by sw, read by pmu, protected by sw mutex lock */
+#define PMU_COMMAND_QUEUE_BIOS 2 /* read/write by sw/hw, protected by hw pmu mutex, id = 2 */
+#define PMU_COMMAND_QUEUE_SMI 3 /* read/write by sw/hw, protected by hw pmu mutex, id = 3 */
+#define PMU_MESSAGE_QUEUE 4 /* write by pmu, read by sw, accessed by interrupt handler, no lock */
+#define PMU_QUEUE_COUNT 5
+
+enum {
+ PMU_MUTEX_ID_RSVD1 = 0 ,
+ PMU_MUTEX_ID_GPUSER ,
+ PMU_MUTEX_ID_QUEUE_BIOS ,
+ PMU_MUTEX_ID_QUEUE_SMI ,
+ PMU_MUTEX_ID_GPMUTEX ,
+ PMU_MUTEX_ID_I2C ,
+ PMU_MUTEX_ID_RMLOCK ,
+ PMU_MUTEX_ID_MSGBOX ,
+ PMU_MUTEX_ID_FIFO ,
+ PMU_MUTEX_ID_PG ,
+ PMU_MUTEX_ID_GR ,
+ PMU_MUTEX_ID_CLK ,
+ PMU_MUTEX_ID_RSVD6 ,
+ PMU_MUTEX_ID_RSVD7 ,
+ PMU_MUTEX_ID_RSVD8 ,
+ PMU_MUTEX_ID_RSVD9 ,
+ PMU_MUTEX_ID_INVALID
+};
+
+#define PMU_IS_COMMAND_QUEUE(id) \
+ ((id) < PMU_MESSAGE_QUEUE)
+
+#define PMU_IS_SW_COMMAND_QUEUE(id) \
+ (((id) == PMU_COMMAND_QUEUE_HPQ) || \
+ ((id) == PMU_COMMAND_QUEUE_LPQ))
+
+#define PMU_IS_MESSAGE_QUEUE(id) \
+ ((id) == PMU_MESSAGE_QUEUE)
+
+enum
+{
+ OFLAG_READ = 0,
+ OFLAG_WRITE
+};
+
+#define QUEUE_SET (true)
+#define QUEUE_GET (false)
+
+#define QUEUE_ALIGNMENT (4)
+
+#define PMU_PGENG_GR_BUFFER_IDX_INIT (0)
+#define PMU_PGENG_GR_BUFFER_IDX_ZBC (1)
+#define PMU_PGENG_GR_BUFFER_IDX_FECS (2)
+
+enum
+{
+ PMU_DMAIDX_UCODE = 0,
+ PMU_DMAIDX_VIRT = 1,
+ PMU_DMAIDX_PHYS_VID = 2,
+ PMU_DMAIDX_PHYS_SYS_COH = 3,
+ PMU_DMAIDX_PHYS_SYS_NCOH = 4,
+ PMU_DMAIDX_RSVD = 5,
+ PMU_DMAIDX_PELPG = 6,
+ PMU_DMAIDX_END = 7
+};
+
+struct pmu_gk20a;
+struct pmu_queue;
+
+struct pmu_queue {
+
+ /* used by hw, for BIOS/SMI queue */
+ u32 mutex_id;
+ u32 mutex_lock;
+ /* used by sw, for LPQ/HPQ queue */
+ struct mutex mutex;
+
+ /* current write position */
+ u32 position;
+ /* physical dmem offset where this queue begins */
+ u32 offset;
+ /* logical queue identifier */
+ u32 id;
+ /* physical queue index */
+ u32 index;
+ /* in bytes */
+ u32 size;
+
+ /* open-flag */
+ u32 oflag;
+ bool opened; /* opened implies locked */
+ bool locked; /* check free space after setting locked but before setting opened */
+};
+
+
+#define PMU_MUTEX_ID_IS_VALID(id) \
+ ((id) < PMU_MUTEX_ID_INVALID)
+
+#define PMU_INVALID_MUTEX_OWNER_ID (0)
+
+struct pmu_mutex {
+ u32 id;
+ u32 index;
+ u32 ref_cnt;
+};
+
+#define PMU_MAX_NUM_SEQUENCES (256)
+#define PMU_SEQ_BIT_SHIFT (5)
+#define PMU_SEQ_TBL_SIZE \
+ (PMU_MAX_NUM_SEQUENCES >> PMU_SEQ_BIT_SHIFT)
+
+#define PMU_INVALID_SEQ_DESC (~0)
+
+enum
+{
+ PMU_SEQ_STATE_FREE = 0,
+ PMU_SEQ_STATE_PENDING,
+ PMU_SEQ_STATE_USED,
+ PMU_SEQ_STATE_CANCELLED
+};
+
+struct pmu_payload {
+ struct {
+ void *buf;
+ u32 offset;
+ u32 size;
+ } in, out;
+};
+
+typedef void (*pmu_callback)(struct gk20a *, struct pmu_msg *, void *, u32,
+ u32);
+
+struct pmu_sequence {
+ u8 id;
+ u32 state;
+ u32 desc;
+ struct pmu_msg *msg;
+ union {
+ struct pmu_allocation_v0 in_v0;
+ struct pmu_allocation_v1 in_v1;
+ };
+ union {
+ struct pmu_allocation_v0 out_v0;
+ struct pmu_allocation_v1 out_v1;
+ };
+ u8 *out_payload;
+ pmu_callback callback;
+ void* cb_params;
+};
+
+struct pmu_pg_stats {
+ u64 pg_entry_start_timestamp;
+ u64 pg_ingating_start_timestamp;
+ u64 pg_exit_start_timestamp;
+ u64 pg_ungating_start_timestamp;
+ u32 pg_avg_entry_time_us;
+ u32 pg_ingating_cnt;
+ u32 pg_ingating_time_us;
+ u32 pg_avg_exit_time_us;
+ u32 pg_ungating_count;
+ u32 pg_ungating_time_us;
+ u32 pg_gating_cnt;
+ u32 pg_gating_deny_cnt;
+};
+
+#define PMU_PG_IDLE_THRESHOLD_SIM 1000
+#define PMU_PG_POST_POWERUP_IDLE_THRESHOLD_SIM 4000000
+/* TBD: QT or else ? */
+#define PMU_PG_IDLE_THRESHOLD 15000
+#define PMU_PG_POST_POWERUP_IDLE_THRESHOLD 1000000
+
+/* state transition :
+ OFF => [OFF_ON_PENDING optional] => ON_PENDING => ON => OFF
+ ON => OFF is always synchronized */
+#define PMU_ELPG_STAT_OFF 0 /* elpg is off */
+#define PMU_ELPG_STAT_ON 1 /* elpg is on */
+#define PMU_ELPG_STAT_ON_PENDING 2 /* elpg is off, ALLOW cmd has been sent, wait for ack */
+#define PMU_ELPG_STAT_OFF_PENDING 3 /* elpg is on, DISALLOW cmd has been sent, wait for ack */
+#define PMU_ELPG_STAT_OFF_ON_PENDING 4 /* elpg is off, caller has requested on, but ALLOW
+ cmd hasn't been sent due to ENABLE_ALLOW delay */
+
+/* Falcon Register index */
+#define PMU_FALCON_REG_R0 (0)
+#define PMU_FALCON_REG_R1 (1)
+#define PMU_FALCON_REG_R2 (2)
+#define PMU_FALCON_REG_R3 (3)
+#define PMU_FALCON_REG_R4 (4)
+#define PMU_FALCON_REG_R5 (5)
+#define PMU_FALCON_REG_R6 (6)
+#define PMU_FALCON_REG_R7 (7)
+#define PMU_FALCON_REG_R8 (8)
+#define PMU_FALCON_REG_R9 (9)
+#define PMU_FALCON_REG_R10 (10)
+#define PMU_FALCON_REG_R11 (11)
+#define PMU_FALCON_REG_R12 (12)
+#define PMU_FALCON_REG_R13 (13)
+#define PMU_FALCON_REG_R14 (14)
+#define PMU_FALCON_REG_R15 (15)
+#define PMU_FALCON_REG_IV0 (16)
+#define PMU_FALCON_REG_IV1 (17)
+#define PMU_FALCON_REG_UNDEFINED (18)
+#define PMU_FALCON_REG_EV (19)
+#define PMU_FALCON_REG_SP (20)
+#define PMU_FALCON_REG_PC (21)
+#define PMU_FALCON_REG_IMB (22)
+#define PMU_FALCON_REG_DMB (23)
+#define PMU_FALCON_REG_CSW (24)
+#define PMU_FALCON_REG_CCR (25)
+#define PMU_FALCON_REG_SEC (26)
+#define PMU_FALCON_REG_CTX (27)
+#define PMU_FALCON_REG_EXCI (28)
+#define PMU_FALCON_REG_RSVD0 (29)
+#define PMU_FALCON_REG_RSVD1 (30)
+#define PMU_FALCON_REG_RSVD2 (31)
+#define PMU_FALCON_REG_SIZE (32)
+
+struct pmu_gk20a {
+
+ struct gk20a *g;
+
+ struct pmu_ucode_desc *desc;
+ struct pmu_mem_desc ucode;
+
+ struct pmu_mem_desc pg_buf;
+ /* TBD: remove this if ZBC seq is fixed */
+ struct pmu_mem_desc seq_buf;
+ bool buf_loaded;
+
+ struct pmu_sha1_gid gid_info;
+
+ struct pmu_queue queue[PMU_QUEUE_COUNT];
+
+ struct pmu_sequence *seq;
+ unsigned long pmu_seq_tbl[PMU_SEQ_TBL_SIZE];
+ u32 next_seq_desc;
+
+ struct pmu_mutex *mutex;
+ u32 mutex_cnt;
+
+ struct mutex pmu_copy_lock;
+ struct mutex pmu_seq_lock;
+
+ struct gk20a_allocator dmem;
+
+ u32 *ucode_image;
+ bool pmu_ready;
+
+ u32 zbc_save_done;
+
+ u32 stat_dmem_offset;
+
+ bool elpg_ready;
+ u32 elpg_stat;
+ wait_queue_head_t pg_wq;
+
+#define PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC 1 /* msec */
+ struct delayed_work elpg_enable; /* deferred elpg enable */
+ struct work_struct pg_init;
+ bool elpg_enable_allow; /* true after init, false after disable, true after delay */
+ struct mutex elpg_mutex; /* protect elpg enable/disable */
+ int elpg_refcnt; /* disable -1, enable +1, <=0 elpg disabled, > 0 elpg enabled */
+
+ struct pmu_perfmon_counter perfmon_counter;
+ u32 perfmon_state_id[PMU_DOMAIN_GROUP_NUM];
+
+ bool initialized;
+
+ void (*remove_support)(struct pmu_gk20a *pmu);
+ bool sw_ready;
+ bool perfmon_ready;
+
+ u32 sample_buffer;
+
+ struct mutex isr_mutex;
+ bool zbc_ready;
+ union {
+ struct pmu_cmdline_args_v0 args_v0;
+ struct pmu_cmdline_args_v1 args_v1;
+ };
+};
+
+struct gk20a_pmu_save_state {
+ struct pmu_sequence *seq;
+ u32 next_seq_desc;
+ struct pmu_mutex *mutex;
+ u32 mutex_cnt;
+ struct pmu_ucode_desc *desc;
+ struct pmu_mem_desc ucode;
+ struct pmu_mem_desc seq_buf;
+ struct pmu_mem_desc pg_buf;
+ struct delayed_work elpg_enable;
+ wait_queue_head_t pg_wq;
+ bool sw_ready;
+ struct work_struct pg_init;
+};
+
+int gk20a_init_pmu_support(struct gk20a *g);
+int gk20a_init_pmu_setup_hw2(struct gk20a *g);
+
+void gk20a_pmu_isr(struct gk20a *g);
+
+/* send a cmd to pmu */
+int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, struct pmu_msg *msg,
+ struct pmu_payload *payload, u32 queue_id,
+ pmu_callback callback, void* cb_param,
+ u32 *seq_desc, unsigned long timeout);
+
+int gk20a_pmu_enable_elpg(struct gk20a *g);
+int gk20a_pmu_disable_elpg(struct gk20a *g);
+
+void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries);
+
+int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable);
+
+int pmu_mutex_acquire(struct pmu_gk20a *pmu, u32 id, u32 *token);
+int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token);
+int gk20a_pmu_destroy(struct gk20a *g);
+int gk20a_pmu_load_norm(struct gk20a *g, u32 *load);
+int gk20a_pmu_debugfs_init(struct platform_device *dev);
+void gk20a_pmu_reset_load_counters(struct gk20a *g);
+void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
+ u32 *total_cycles);
+
+#endif /*__PMU_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.c b/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.c
new file mode 100644
index 000000000000..aea1a80bbcad
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.c
@@ -0,0 +1,91 @@
+/*
+ * GK20A priv ring
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/delay.h> /* for mdelay */
+
+#include "gk20a.h"
+#include "hw_mc_gk20a.h"
+#include "hw_pri_ringmaster_gk20a.h"
+#include "hw_pri_ringstation_sys_gk20a.h"
+#include "hw_trim_gk20a.h"
+
+void gk20a_reset_priv_ring(struct gk20a *g)
+{
+ u32 data;
+
+ if (tegra_platform_is_linsim())
+ return;
+
+ data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
+ data = set_field(data,
+ trim_sys_gpc2clk_out_bypdiv_m(),
+ trim_sys_gpc2clk_out_bypdiv_f(0));
+ gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
+
+ gk20a_reset(g, mc_enable_priv_ring_enabled_f());
+
+ gk20a_writel(g,pri_ringmaster_command_r(),
+ 0x4);
+
+ gk20a_writel(g, pri_ringstation_sys_decode_config_r(),
+ 0x2);
+
+ gk20a_readl(g, pri_ringstation_sys_decode_config_r());
+}
+
+void gk20a_priv_ring_isr(struct gk20a *g)
+{
+ u32 status0, status1;
+ u32 cmd;
+ s32 retry = 100;
+
+ if (tegra_platform_is_linsim())
+ return;
+
+ status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r());
+ status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r());
+
+ gk20a_dbg_info("ringmaster intr status0: 0x%08x,"
+ "status1: 0x%08x", status0, status1);
+
+ if (status0 & (0x1 | 0x2 | 0x4)) {
+ gk20a_reset_priv_ring(g);
+ }
+
+ cmd = gk20a_readl(g, pri_ringmaster_command_r());
+ cmd = set_field(cmd, pri_ringmaster_command_cmd_m(),
+ pri_ringmaster_command_cmd_ack_interrupt_f());
+ gk20a_writel(g, pri_ringmaster_command_r(), cmd);
+
+ do {
+ cmd = pri_ringmaster_command_cmd_v(
+ gk20a_readl(g, pri_ringmaster_command_r()));
+ usleep_range(20, 40);
+ } while (cmd != pri_ringmaster_command_cmd_no_cmd_v() && --retry);
+
+ if (retry <= 0)
+ gk20a_warn(dev_from_gk20a(g),
+ "priv ringmaster cmd ack too many retries");
+
+ status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r());
+ status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r());
+
+ gk20a_dbg_info("ringmaster intr status0: 0x%08x,"
+ " status1: 0x%08x", status0, status1);
+}
+
diff --git a/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.h b/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.h
new file mode 100644
index 000000000000..cb9d49c7be07
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/priv_ring_gk20a.h
@@ -0,0 +1,27 @@
+/*
+ * drivers/video/tegra/host/gk20a/priv_ring_gk20a.h
+ *
+ * GK20A PRIV ringmaster
+ *
+ * Copyright (c) 2011-2012, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef __PRIV_RING_GK20A_H__
+#define __PRIV_RING_GK20A_H__
+
+void gk20a_reset_priv_ring(struct gk20a *g);
+void gk20a_priv_ring_isr(struct gk20a *g);
+
+#endif /*__PRIV_RING_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/regops_gk20a.c b/drivers/gpu/nvgpu/gk20a/regops_gk20a.c
new file mode 100644
index 000000000000..4a115fb10fac
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/regops_gk20a.c
@@ -0,0 +1,704 @@
+/*
+ *
+ * Tegra GK20A GPU Debugger Driver Register Ops
+ *
+ * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/bsearch.h>
+#include <linux/nvhost_dbg_gpu_ioctl.h>
+
+#include "gk20a.h"
+#include "gr_gk20a.h"
+#include "dbg_gpu_gk20a.h"
+#include "regops_gk20a.h"
+
+
+
+struct regop_offset_range {
+ u32 base:24;
+ u32 count:8;
+};
+
+static int regop_bsearch_range_cmp(const void *pkey, const void *pelem)
+{
+ u32 key = *(u32 *)pkey;
+ struct regop_offset_range *prange = (struct regop_offset_range *)pelem;
+ if (key < prange->base)
+ return -1;
+ else if (prange->base <= key && key < (prange->base +
+ (prange->count * 4)))
+ return 0;
+ return 1;
+}
+
+static inline bool linear_search(u32 offset, const u32 *list, int size)
+{
+ int i;
+ for (i = 0; i < size; i++)
+ if (list[i] == offset)
+ return true;
+ return false;
+}
+
+static const struct regop_offset_range gk20a_global_whitelist_ranges[] = {
+ { 0x000004f0, 1 },
+ { 0x00001a00, 3 },
+ { 0x0000259c, 1 },
+ { 0x0000280c, 1 },
+ { 0x00009400, 1 },
+ { 0x00009410, 1 },
+ { 0x00020200, 1 },
+ { 0x00022430, 7 },
+ { 0x00022548, 1 },
+ { 0x00100c18, 3 },
+ { 0x00100c84, 1 },
+ { 0x00100cc4, 1 },
+ { 0x00106640, 1 },
+ { 0x0010a0a8, 1 },
+ { 0x0010a4f0, 1 },
+ { 0x0010e064, 1 },
+ { 0x0010e164, 1 },
+ { 0x0010e490, 1 },
+ { 0x00110100, 1 },
+ { 0x00140028, 1 },
+ { 0x001408dc, 1 },
+ { 0x00140a5c, 1 },
+ { 0x001410dc, 1 },
+ { 0x0014125c, 1 },
+ { 0x0017e028, 1 },
+ { 0x0017e8dc, 1 },
+ { 0x0017ea5c, 1 },
+ { 0x0017f0dc, 1 },
+ { 0x0017f25c, 1 },
+ { 0x00180000, 68 },
+ { 0x00180200, 68 },
+ { 0x001a0000, 68 },
+ { 0x001b0000, 68 },
+ { 0x001b0200, 68 },
+ { 0x001b0400, 68 },
+ { 0x001b0600, 68 },
+ { 0x001b4000, 3 },
+ { 0x001b4010, 3 },
+ { 0x001b4020, 3 },
+ { 0x001b4040, 3 },
+ { 0x001b4050, 3 },
+ { 0x001b4060, 16 },
+ { 0x001b40a4, 1 },
+ { 0x001b4100, 6 },
+ { 0x001b4124, 2 },
+ { 0x001b8000, 7 },
+ { 0x001bc000, 7 },
+ { 0x001be000, 7 },
+ { 0x00400500, 1 },
+ { 0x00400700, 1 },
+ { 0x0040415c, 1 },
+ { 0x00405850, 1 },
+ { 0x00405908, 1 },
+ { 0x00405b40, 1 },
+ { 0x00405b50, 1 },
+ { 0x00406024, 1 },
+ { 0x00407010, 1 },
+ { 0x00407808, 1 },
+ { 0x0040803c, 1 },
+ { 0x0040880c, 1 },
+ { 0x00408910, 1 },
+ { 0x00408984, 1 },
+ { 0x004090a8, 1 },
+ { 0x004098a0, 1 },
+ { 0x0041000c, 1 },
+ { 0x00410110, 1 },
+ { 0x00410184, 1 },
+ { 0x00418384, 1 },
+ { 0x004184a0, 1 },
+ { 0x00418604, 1 },
+ { 0x00418680, 1 },
+ { 0x00418714, 1 },
+ { 0x0041881c, 1 },
+ { 0x004188c8, 2 },
+ { 0x00418b04, 1 },
+ { 0x00418c04, 1 },
+ { 0x00418c64, 2 },
+ { 0x00418c88, 1 },
+ { 0x00418cb4, 2 },
+ { 0x00418d00, 1 },
+ { 0x00418d28, 2 },
+ { 0x00418e08, 1 },
+ { 0x00418e1c, 2 },
+ { 0x00418f08, 1 },
+ { 0x00418f20, 2 },
+ { 0x00419000, 1 },
+ { 0x0041900c, 1 },
+ { 0x00419018, 1 },
+ { 0x00419854, 1 },
+ { 0x00419ab0, 1 },
+ { 0x00419ab8, 3 },
+ { 0x00419ac8, 1 },
+ { 0x00419c0c, 1 },
+ { 0x00419c8c, 3 },
+ { 0x00419ca8, 1 },
+ { 0x00419d08, 2 },
+ { 0x00419e00, 1 },
+ { 0x00419e0c, 1 },
+ { 0x00419e14, 2 },
+ { 0x00419e24, 2 },
+ { 0x00419e34, 2 },
+ { 0x00419e44, 4 },
+ { 0x00419ea4, 1 },
+ { 0x00419eb0, 1 },
+ { 0x0041a0a0, 1 },
+ { 0x0041a0a8, 1 },
+ { 0x0041a17c, 1 },
+ { 0x0041a890, 2 },
+ { 0x0041a8a0, 3 },
+ { 0x0041a8b0, 2 },
+ { 0x0041b014, 1 },
+ { 0x0041b0a0, 1 },
+ { 0x0041b0cc, 1 },
+ { 0x0041b0e8, 2 },
+ { 0x0041b1dc, 1 },
+ { 0x0041b1f8, 2 },
+ { 0x0041be14, 1 },
+ { 0x0041bea0, 1 },
+ { 0x0041becc, 1 },
+ { 0x0041bee8, 2 },
+ { 0x0041bfdc, 1 },
+ { 0x0041bff8, 2 },
+ { 0x0041c054, 1 },
+ { 0x0041c2b0, 1 },
+ { 0x0041c2b8, 3 },
+ { 0x0041c2c8, 1 },
+ { 0x0041c40c, 1 },
+ { 0x0041c48c, 3 },
+ { 0x0041c4a8, 1 },
+ { 0x0041c508, 2 },
+ { 0x0041c600, 1 },
+ { 0x0041c60c, 1 },
+ { 0x0041c614, 2 },
+ { 0x0041c624, 2 },
+ { 0x0041c634, 2 },
+ { 0x0041c644, 4 },
+ { 0x0041c6a4, 1 },
+ { 0x0041c6b0, 1 },
+ { 0x00500384, 1 },
+ { 0x005004a0, 1 },
+ { 0x00500604, 1 },
+ { 0x00500680, 1 },
+ { 0x00500714, 1 },
+ { 0x0050081c, 1 },
+ { 0x005008c8, 2 },
+ { 0x00500b04, 1 },
+ { 0x00500c04, 1 },
+ { 0x00500c64, 2 },
+ { 0x00500c88, 1 },
+ { 0x00500cb4, 2 },
+ { 0x00500d00, 1 },
+ { 0x00500d28, 2 },
+ { 0x00500e08, 1 },
+ { 0x00500e1c, 2 },
+ { 0x00500f08, 1 },
+ { 0x00500f20, 2 },
+ { 0x00501000, 1 },
+ { 0x0050100c, 1 },
+ { 0x00501018, 1 },
+ { 0x00501854, 1 },
+ { 0x00501ab0, 1 },
+ { 0x00501ab8, 3 },
+ { 0x00501ac8, 1 },
+ { 0x00501c0c, 1 },
+ { 0x00501c8c, 3 },
+ { 0x00501ca8, 1 },
+ { 0x00501d08, 2 },
+ { 0x00501e00, 1 },
+ { 0x00501e0c, 1 },
+ { 0x00501e14, 2 },
+ { 0x00501e24, 2 },
+ { 0x00501e34, 2 },
+ { 0x00501e44, 4 },
+ { 0x00501ea4, 1 },
+ { 0x00501eb0, 1 },
+ { 0x005020a0, 1 },
+ { 0x005020a8, 1 },
+ { 0x0050217c, 1 },
+ { 0x00502890, 2 },
+ { 0x005028a0, 3 },
+ { 0x005028b0, 2 },
+ { 0x00503014, 1 },
+ { 0x005030a0, 1 },
+ { 0x005030cc, 1 },
+ { 0x005030e8, 2 },
+ { 0x005031dc, 1 },
+ { 0x005031f8, 2 },
+ { 0x00503e14, 1 },
+ { 0x00503ea0, 1 },
+ { 0x00503ecc, 1 },
+ { 0x00503ee8, 2 },
+ { 0x00503fdc, 1 },
+ { 0x00503ff8, 2 },
+ { 0x00504054, 1 },
+ { 0x005042b0, 1 },
+ { 0x005042b8, 3 },
+ { 0x005042c8, 1 },
+ { 0x0050440c, 1 },
+ { 0x0050448c, 3 },
+ { 0x005044a8, 1 },
+ { 0x00504508, 2 },
+ { 0x00504600, 1 },
+ { 0x0050460c, 1 },
+ { 0x00504614, 2 },
+ { 0x00504624, 2 },
+ { 0x00504634, 2 },
+ { 0x00504644, 4 },
+ { 0x005046a4, 1 },
+ { 0x005046b0, 1 },
+};
+static const u32 gk20a_global_whitelist_ranges_count =
+ ARRAY_SIZE(gk20a_global_whitelist_ranges);
+
+/* context */
+
+static const struct regop_offset_range gk20a_context_whitelist_ranges[] = {
+ { 0x0000280c, 1 },
+ { 0x00100cc4, 1 },
+ { 0x00400500, 1 },
+ { 0x00405b40, 1 },
+ { 0x00419000, 1 },
+ { 0x00419c8c, 3 },
+ { 0x00419d08, 2 },
+ { 0x00419e04, 3 },
+ { 0x00419e14, 2 },
+ { 0x00419e24, 2 },
+ { 0x00419e34, 2 },
+ { 0x00419e44, 4 },
+ { 0x00419e58, 6 },
+ { 0x00419e84, 5 },
+ { 0x00419ea4, 1 },
+ { 0x00419eac, 2 },
+ { 0x00419f30, 8 },
+ { 0x0041c48c, 3 },
+ { 0x0041c508, 2 },
+ { 0x0041c604, 3 },
+ { 0x0041c614, 2 },
+ { 0x0041c624, 2 },
+ { 0x0041c634, 2 },
+ { 0x0041c644, 4 },
+ { 0x0041c658, 6 },
+ { 0x0041c684, 5 },
+ { 0x0041c6a4, 1 },
+ { 0x0041c6ac, 2 },
+ { 0x0041c730, 8 },
+ { 0x00501000, 1 },
+ { 0x00501c8c, 3 },
+ { 0x00501d08, 2 },
+ { 0x00501e04, 3 },
+ { 0x00501e14, 2 },
+ { 0x00501e24, 2 },
+ { 0x00501e34, 2 },
+ { 0x00501e44, 4 },
+ { 0x00501e58, 6 },
+ { 0x00501e84, 5 },
+ { 0x00501ea4, 1 },
+ { 0x00501eac, 2 },
+ { 0x00501f30, 8 },
+ { 0x0050448c, 3 },
+ { 0x00504508, 2 },
+ { 0x00504604, 3 },
+ { 0x00504614, 2 },
+ { 0x00504624, 2 },
+ { 0x00504634, 2 },
+ { 0x00504644, 4 },
+ { 0x00504658, 6 },
+ { 0x00504684, 5 },
+ { 0x005046a4, 1 },
+ { 0x005046ac, 2 },
+ { 0x00504730, 8 },
+};
+static const u32 gk20a_context_whitelist_ranges_count =
+ ARRAY_SIZE(gk20a_context_whitelist_ranges);
+
+/* runcontrol */
+static const u32 gk20a_runcontrol_whitelist[] = {
+ 0x00419e10,
+ 0x0041c610,
+ 0x00501e10,
+ 0x00504610,
+};
+static const u32 gk20a_runcontrol_whitelist_count =
+ ARRAY_SIZE(gk20a_runcontrol_whitelist);
+
+static const struct regop_offset_range gk20a_runcontrol_whitelist_ranges[] = {
+ { 0x00419e10, 1 },
+ { 0x0041c610, 1 },
+ { 0x00501e10, 1 },
+ { 0x00504610, 1 },
+};
+static const u32 gk20a_runcontrol_whitelist_ranges_count =
+ ARRAY_SIZE(gk20a_runcontrol_whitelist_ranges);
+
+
+/* quad ctl */
+static const u32 gk20a_qctl_whitelist[] = {
+ 0x00504670,
+ 0x00504674,
+ 0x00504678,
+ 0x0050467c,
+ 0x00504680,
+ 0x00504730,
+ 0x00504734,
+ 0x00504738,
+ 0x0050473c,
+};
+static const u32 gk20a_qctl_whitelist_count =
+ ARRAY_SIZE(gk20a_qctl_whitelist);
+
+static const struct regop_offset_range gk20a_qctl_whitelist_ranges[] = {
+ { 0x00504670, 1 },
+ { 0x00504730, 4 },
+};
+static const u32 gk20a_qctl_whitelist_ranges_count =
+ ARRAY_SIZE(gk20a_qctl_whitelist_ranges);
+
+
+
+
+static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s,
+ u32 *ctx_rd_count, u32 *ctx_wr_count,
+ struct nvhost_dbg_gpu_reg_op *ops,
+ u32 op_count);
+
+
+int exec_regops_gk20a(struct dbg_session_gk20a *dbg_s,
+ struct nvhost_dbg_gpu_reg_op *ops,
+ u64 num_ops)
+{
+ int err = 0, i;
+ struct channel_gk20a *ch = NULL;
+ struct gk20a *g = dbg_s->g;
+ /*struct gr_gk20a *gr = &g->gr;*/
+ u32 data32_lo = 0, data32_hi = 0;
+ u32 ctx_rd_count = 0, ctx_wr_count = 0;
+ bool skip_read_lo, skip_read_hi;
+ bool ok;
+
+ gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+
+ ch = dbg_s->ch;
+
+ ok = validate_reg_ops(dbg_s,
+ &ctx_rd_count, &ctx_wr_count,
+ ops, num_ops);
+ if (!ok) {
+ dev_err(dbg_s->dev, "invalid op(s)");
+ err = -EINVAL;
+ /* each op has its own err/status */
+ goto clean_up;
+ }
+
+ for (i = 0; i < num_ops; i++) {
+ /* if it isn't global then it is done in the ctx ops... */
+ if (ops[i].type != REGOP(TYPE_GLOBAL))
+ continue;
+
+ switch (ops[i].op) {
+
+ case REGOP(READ_32):
+ ops[i].value_hi = 0;
+ ops[i].value_lo = gk20a_readl(g, ops[i].offset);
+ gk20a_dbg(gpu_dbg_gpu_dbg, "read_32 0x%08x from 0x%08x",
+ ops[i].value_lo, ops[i].offset);
+
+ break;
+
+ case REGOP(READ_64):
+ ops[i].value_lo = gk20a_readl(g, ops[i].offset);
+ ops[i].value_hi =
+ gk20a_readl(g, ops[i].offset + 4);
+
+ gk20a_dbg(gpu_dbg_gpu_dbg, "read_64 0x%08x:%08x from 0x%08x",
+ ops[i].value_hi, ops[i].value_lo,
+ ops[i].offset);
+ break;
+
+ case REGOP(WRITE_32):
+ case REGOP(WRITE_64):
+ /* some of this appears wonky/unnecessary but
+ we've kept it for compat with existing
+ debugger code. just in case... */
+ skip_read_lo = skip_read_hi = false;
+ if (ops[i].and_n_mask_lo == ~(u32)0) {
+ data32_lo = ops[i].value_lo;
+ skip_read_lo = true;
+ }
+
+ if ((ops[i].op == REGOP(WRITE_64)) &&
+ (ops[i].and_n_mask_hi == ~(u32)0)) {
+ data32_hi = ops[i].value_hi;
+ skip_read_hi = true;
+ }
+
+ /* read first 32bits */
+ if (unlikely(skip_read_lo == false)) {
+ data32_lo = gk20a_readl(g, ops[i].offset);
+ data32_lo &= ~ops[i].and_n_mask_lo;
+ data32_lo |= ops[i].value_lo;
+ }
+
+ /* if desired, read second 32bits */
+ if ((ops[i].op == REGOP(WRITE_64)) &&
+ !skip_read_hi) {
+ data32_hi = gk20a_readl(g, ops[i].offset + 4);
+ data32_hi &= ~ops[i].and_n_mask_hi;
+ data32_hi |= ops[i].value_hi;
+ }
+
+ /* now update first 32bits */
+ gk20a_writel(g, ops[i].offset, data32_lo);
+ gk20a_dbg(gpu_dbg_gpu_dbg, "Wrote 0x%08x to 0x%08x ",
+ data32_lo, ops[i].offset);
+ /* if desired, update second 32bits */
+ if (ops[i].op == REGOP(WRITE_64)) {
+ gk20a_writel(g, ops[i].offset + 4, data32_hi);
+ gk20a_dbg(gpu_dbg_gpu_dbg, "Wrote 0x%08x to 0x%08x ",
+ data32_hi, ops[i].offset + 4);
+
+ }
+
+
+ break;
+
+ /* shouldn't happen as we've already screened */
+ default:
+ BUG();
+ err = -EINVAL;
+ goto clean_up;
+ break;
+ }
+ }
+
+ if (ctx_wr_count | ctx_rd_count) {
+ err = gr_gk20a_exec_ctx_ops(ch, ops, num_ops,
+ ctx_wr_count, ctx_rd_count);
+ if (err) {
+ dev_warn(dbg_s->dev,
+ "failed to perform ctx ops\n");
+ goto clean_up;
+ }
+ }
+
+ clean_up:
+ gk20a_dbg(gpu_dbg_gpu_dbg, "ret=%d", err);
+ return err;
+
+}
+
+
+static int validate_reg_op_info(struct dbg_session_gk20a *dbg_s,
+ struct nvhost_dbg_gpu_reg_op *op)
+{
+ int err = 0;
+
+ op->status = REGOP(STATUS_SUCCESS);
+
+ switch (op->op) {
+ case REGOP(READ_32):
+ case REGOP(READ_64):
+ case REGOP(WRITE_32):
+ case REGOP(WRITE_64):
+ break;
+ default:
+ op->status |= REGOP(STATUS_UNSUPPORTED_OP);
+ /*gk20a_err(dbg_s->dev, "Invalid regops op %d!", op->op);*/
+ err = -EINVAL;
+ break;
+ }
+
+ switch (op->type) {
+ case REGOP(TYPE_GLOBAL):
+ case REGOP(TYPE_GR_CTX):
+ case REGOP(TYPE_GR_CTX_TPC):
+ case REGOP(TYPE_GR_CTX_SM):
+ case REGOP(TYPE_GR_CTX_CROP):
+ case REGOP(TYPE_GR_CTX_ZROP):
+ case REGOP(TYPE_GR_CTX_QUAD):
+ break;
+ /*
+ case NVHOST_DBG_GPU_REG_OP_TYPE_FB:
+ */
+ default:
+ op->status |= REGOP(STATUS_INVALID_TYPE);
+ /*gk20a_err(dbg_s->dev, "Invalid regops type %d!", op->type);*/
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+}
+
+static bool check_whitelists(struct dbg_session_gk20a *dbg_s,
+ struct nvhost_dbg_gpu_reg_op *op, u32 offset)
+{
+ bool valid = false;
+
+ if (op->type == REGOP(TYPE_GLOBAL)) {
+ /* search global list */
+ valid = !!bsearch(&offset,
+ gk20a_global_whitelist_ranges,
+ gk20a_global_whitelist_ranges_count,
+ sizeof(*gk20a_global_whitelist_ranges),
+ regop_bsearch_range_cmp);
+
+ /* if debug session and channel is bound search context list */
+ if ((!valid) && (!dbg_s->is_profiler && dbg_s->ch)) {
+ /* binary search context list */
+ valid = !!bsearch(&offset,
+ gk20a_context_whitelist_ranges,
+ gk20a_context_whitelist_ranges_count,
+ sizeof(*gk20a_context_whitelist_ranges),
+ regop_bsearch_range_cmp);
+ }
+
+ /* if debug session and channel is bound search runcontrol list */
+ if ((!valid) && (!dbg_s->is_profiler && dbg_s->ch)) {
+ valid = linear_search(offset,
+ gk20a_runcontrol_whitelist,
+ gk20a_runcontrol_whitelist_count);
+ }
+ } else if (op->type == REGOP(TYPE_GR_CTX)) {
+ /* it's a context-relative op */
+ if (!dbg_s->ch) {
+ gk20a_err(dbg_s->dev, "can't perform ctx regop unless bound");
+ op->status = REGOP(STATUS_UNSUPPORTED_OP);
+ return -ENODEV;
+ }
+
+ /* binary search context list */
+ valid = !!bsearch(&offset,
+ gk20a_context_whitelist_ranges,
+ gk20a_context_whitelist_ranges_count,
+ sizeof(*gk20a_context_whitelist_ranges),
+ regop_bsearch_range_cmp);
+
+ /* if debug session and channel is bound search runcontrol list */
+ if ((!valid) && (!dbg_s->is_profiler && dbg_s->ch)) {
+ valid = linear_search(offset,
+ gk20a_runcontrol_whitelist,
+ gk20a_runcontrol_whitelist_count);
+ }
+
+ } else if (op->type == REGOP(TYPE_GR_CTX_QUAD)) {
+ valid = linear_search(offset,
+ gk20a_qctl_whitelist,
+ gk20a_qctl_whitelist_count);
+ }
+
+ return valid;
+}
+
+/* note: the op here has already been through validate_reg_op_info */
+static int validate_reg_op_offset(struct dbg_session_gk20a *dbg_s,
+ struct nvhost_dbg_gpu_reg_op *op)
+{
+ int err;
+ u32 buf_offset_lo, buf_offset_addr, num_offsets, offset;
+ bool valid = false;
+
+ op->status = 0;
+ offset = op->offset;
+
+ /* support only 24-bit 4-byte aligned offsets */
+ if (offset & 0xFF000003) {
+ gk20a_err(dbg_s->dev, "invalid regop offset: 0x%x\n", offset);
+ op->status |= REGOP(STATUS_INVALID_OFFSET);
+ return -EINVAL;
+ }
+
+ valid = check_whitelists(dbg_s, op, offset);
+ if ((op->op == REGOP(READ_64) || op->op == REGOP(WRITE_64)) && valid)
+ valid = check_whitelists(dbg_s, op, offset + 4);
+
+ if (valid && (op->type != REGOP(TYPE_GLOBAL))) {
+ err = gr_gk20a_get_ctx_buffer_offsets(dbg_s->g,
+ op->offset,
+ 1,
+ &buf_offset_lo,
+ &buf_offset_addr,
+ &num_offsets,
+ op->type == REGOP(TYPE_GR_CTX_QUAD),
+ op->quad);
+ if (err) {
+ op->status |= REGOP(STATUS_INVALID_OFFSET);
+ return -EINVAL;
+ }
+ if (!buf_offset_lo) {
+ op->status |= REGOP(STATUS_INVALID_OFFSET);
+ return -EINVAL;
+ }
+ }
+
+ if (!valid) {
+ gk20a_err(dbg_s->dev, "invalid regop offset: 0x%x\n", offset);
+ op->status |= REGOP(STATUS_INVALID_OFFSET);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static bool validate_reg_ops(struct dbg_session_gk20a *dbg_s,
+ u32 *ctx_rd_count, u32 *ctx_wr_count,
+ struct nvhost_dbg_gpu_reg_op *ops,
+ u32 op_count)
+{
+ u32 i;
+ int err;
+ bool ok = true;
+
+ /* keep going until the end so every op can get
+ * a separate error code if needed */
+ for (i = 0; i < op_count; i++) {
+
+ err = validate_reg_op_info(dbg_s, &ops[i]);
+ ok &= !err;
+
+ if (reg_op_is_gr_ctx(ops[i].type)) {
+ if (reg_op_is_read(ops[i].op))
+ (*ctx_rd_count)++;
+ else
+ (*ctx_wr_count)++;
+ }
+
+ err = validate_reg_op_offset(dbg_s, &ops[i]);
+ ok &= !err;
+ }
+
+ gk20a_dbg(gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d\n",
+ *ctx_wr_count, *ctx_rd_count);
+
+ return ok;
+}
+
+/* exported for tools like cyclestats, etc */
+bool is_bar0_global_offset_whitelisted_gk20a(u32 offset)
+{
+
+ bool valid = !!bsearch(&offset,
+ gk20a_global_whitelist_ranges,
+ gk20a_global_whitelist_ranges_count,
+ sizeof(*gk20a_global_whitelist_ranges),
+ regop_bsearch_range_cmp);
+ return valid;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/regops_gk20a.h b/drivers/gpu/nvgpu/gk20a/regops_gk20a.h
new file mode 100644
index 000000000000..23b4865b8db8
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/regops_gk20a.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * Tegra GK20A GPU Debugger Driver Register Ops
+ *
+ * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __REGOPS_GK20A_H_
+#define __REGOPS_GK20A_H_
+
+int exec_regops_gk20a(struct dbg_session_gk20a *dbg_s,
+ struct nvhost_dbg_gpu_reg_op *ops,
+ u64 num_ops);
+
+/* turn seriously unwieldy names -> something shorter */
+#define REGOP(x) NVHOST_DBG_GPU_REG_OP_##x
+
+
+static inline bool reg_op_is_gr_ctx(u8 type)
+{
+ return type == REGOP(TYPE_GR_CTX) ||
+ type == REGOP(TYPE_GR_CTX_TPC) ||
+ type == REGOP(TYPE_GR_CTX_SM) ||
+ type == REGOP(TYPE_GR_CTX_CROP) ||
+ type == REGOP(TYPE_GR_CTX_ZROP) ||
+ type == REGOP(TYPE_GR_CTX_QUAD);
+}
+static inline bool reg_op_is_read(u8 op)
+{
+ return op == REGOP(READ_32) ||
+ op == REGOP(READ_64) ;
+}
+
+bool is_bar0_global_offset_whitelisted_gk20a(u32 offset);
+
+#endif /* __REGOPS_GK20A_H_ */
diff --git a/drivers/gpu/nvgpu/gk20a/sim_gk20a.h b/drivers/gpu/nvgpu/gk20a/sim_gk20a.h
new file mode 100644
index 000000000000..5fc8006e202b
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/sim_gk20a.h
@@ -0,0 +1,62 @@
+/*
+ * drivers/video/tegra/host/gk20a/sim_gk20a.h
+ *
+ * GK20A sim support
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef __SIM_GK20A_H__
+#define __SIM_GK20A_H__
+
+
+struct gk20a;
+struct sim_gk20a {
+ struct gk20a *g;
+ struct resource *reg_mem;
+ void __iomem *regs;
+ struct {
+ struct page *page;
+ void *kvaddr;
+ phys_addr_t phys;
+ } send_bfr, recv_bfr, msg_bfr;
+ u32 send_ring_put;
+ u32 recv_ring_get;
+ u32 recv_ring_put;
+ u32 sequence_base;
+ void (*remove_support)(struct sim_gk20a *);
+};
+
+
+int gk20a_sim_esc_read(struct gk20a *g, char *path, u32 index,
+ u32 count, u32 *data);
+
+static inline int gk20a_sim_esc_read_no_sim(struct gk20a *g, char *p,
+ u32 i, u32 c, u32 *d)
+{
+ *d = ~(u32)0;
+ return -1;
+}
+
+static inline int gk20a_sim_esc_readl(struct gk20a *g, char * p, u32 i, u32 *d)
+{
+ if (tegra_cpu_is_asim())
+ return gk20a_sim_esc_read(g, p, i, sizeof(u32), d);
+
+ return gk20a_sim_esc_read_no_sim(g, p, i, sizeof(u32), d);
+}
+
+
+#endif /*__SIM_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/therm_gk20a.c b/drivers/gpu/nvgpu/gk20a/therm_gk20a.c
new file mode 100644
index 000000000000..da9119798c1f
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/therm_gk20a.c
@@ -0,0 +1,142 @@
+/*
+ * drivers/video/tegra/host/gk20a/therm_gk20a.c
+ *
+ * GK20A Therm
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "gk20a.h"
+#include "hw_chiplet_pwr_gk20a.h"
+#include "hw_gr_gk20a.h"
+#include "hw_therm_gk20a.h"
+
+static int gk20a_init_therm_reset_enable_hw(struct gk20a *g)
+{
+ return 0;
+}
+
+static int gk20a_init_therm_setup_sw(struct gk20a *g)
+{
+ return 0;
+}
+
+static int gk20a_init_therm_setup_hw(struct gk20a *g)
+{
+ /* program NV_THERM registers */
+ gk20a_writel(g, therm_use_a_r(), NV_THERM_USE_A_INIT);
+ gk20a_writel(g, therm_evt_ext_therm_0_r(),
+ NV_THERM_EVT_EXT_THERM_0_INIT);
+ gk20a_writel(g, therm_evt_ext_therm_1_r(),
+ NV_THERM_EVT_EXT_THERM_1_INIT);
+ gk20a_writel(g, therm_evt_ext_therm_2_r(),
+ NV_THERM_EVT_EXT_THERM_2_INIT);
+
+/*
+ u32 data;
+
+ data = gk20a_readl(g, gr_gpcs_tpcs_l1c_cfg_r());
+ data = set_field(data, gr_gpcs_tpcs_l1c_cfg_blkactivity_enable_m(),
+ gr_gpcs_tpcs_l1c_cfg_blkactivity_enable_enable_f());
+ gk20a_writel(g, gr_gpcs_tpcs_l1c_cfg_r(), data);
+
+ data = gk20a_readl(g, gr_gpcs_tpcs_l1c_pm_r());
+ data = set_field(data, gr_gpcs_tpcs_l1c_pm_enable_m(),
+ gr_gpcs_tpcs_l1c_pm_enable_enable_f());
+ gk20a_writel(g, gr_gpcs_tpcs_l1c_pm_r(), data);
+
+ data = gk20a_readl(g, gr_gpcs_tpcs_sm_pm_ctrl_r());
+ data = set_field(data, gr_gpcs_tpcs_sm_pm_ctrl_core_enable_m(),
+ gr_gpcs_tpcs_sm_pm_ctrl_core_enable_enable_f());
+ data = set_field(data, gr_gpcs_tpcs_sm_pm_ctrl_qctl_enable_m(),
+ gr_gpcs_tpcs_sm_pm_ctrl_qctl_enable_enable_f());
+ gk20a_writel(g, gr_gpcs_tpcs_sm_pm_ctrl_r(), data);
+
+ data = gk20a_readl(g, gr_gpcs_tpcs_sm_halfctl_ctrl_r());
+ data = set_field(data, gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_blkactivity_enable_m(),
+ gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_blkactivity_enable_enable_f());
+ gk20a_writel(g, gr_gpcs_tpcs_sm_halfctl_ctrl_r(), data);
+
+ data = gk20a_readl(g, gr_gpcs_tpcs_sm_debug_sfe_control_r());
+ data = set_field(data, gr_gpcs_tpcs_sm_debug_sfe_control_blkactivity_enable_m(),
+ gr_gpcs_tpcs_sm_debug_sfe_control_blkactivity_enable_enable_f());
+ gk20a_writel(g, gr_gpcs_tpcs_sm_debug_sfe_control_r(), data);
+
+ gk20a_writel(g, therm_peakpower_config6_r(0),
+ therm_peakpower_config6_trigger_cfg_1h_intr_f() |
+ therm_peakpower_config6_trigger_cfg_1l_intr_f());
+
+ gk20a_writel(g, chiplet_pwr_gpcs_config_1_r(),
+ chiplet_pwr_gpcs_config_1_ba_enable_yes_f());
+ gk20a_writel(g, chiplet_pwr_fbps_config_1_r(),
+ chiplet_pwr_fbps_config_1_ba_enable_yes_f());
+
+ data = gk20a_readl(g, therm_config1_r());
+ data = set_field(data, therm_config1_ba_enable_m(),
+ therm_config1_ba_enable_yes_f());
+ gk20a_writel(g, therm_config1_r(), data);
+
+ gk20a_writel(g, gr_gpcs_tpcs_sm_power_throttle_r(), 0x441a);
+
+ gk20a_writel(g, therm_weight_1_r(), 0xd3);
+ gk20a_writel(g, chiplet_pwr_gpcs_weight_6_r(), 0x7d);
+ gk20a_writel(g, chiplet_pwr_gpcs_weight_7_r(), 0xff);
+ gk20a_writel(g, chiplet_pwr_fbps_weight_0_r(), 0x13000000);
+ gk20a_writel(g, chiplet_pwr_fbps_weight_1_r(), 0x19);
+
+ gk20a_writel(g, therm_peakpower_config8_r(0), 0x8);
+ gk20a_writel(g, therm_peakpower_config9_r(0), 0x0);
+
+ gk20a_writel(g, therm_evt_ba_w0_t1h_r(), 0x100);
+
+ gk20a_writel(g, therm_use_a_r(), therm_use_a_ba_w0_t1h_yes_f());
+
+ gk20a_writel(g, therm_peakpower_config1_r(0),
+ therm_peakpower_config1_window_period_2m_f() |
+ therm_peakpower_config1_ba_sum_shift_20_f() |
+ therm_peakpower_config1_window_en_enabled_f());
+
+ gk20a_writel(g, therm_peakpower_config2_r(0),
+ therm_peakpower_config2_ba_threshold_1h_val_f(1) |
+ therm_peakpower_config2_ba_threshold_1h_en_enabled_f());
+
+ gk20a_writel(g, therm_peakpower_config4_r(0),
+ therm_peakpower_config4_ba_threshold_1l_val_f(1) |
+ therm_peakpower_config4_ba_threshold_1l_en_enabled_f());
+*/
+ return 0;
+}
+
+int gk20a_init_therm_support(struct gk20a *g)
+{
+ u32 err;
+
+ gk20a_dbg_fn("");
+
+ err = gk20a_init_therm_reset_enable_hw(g);
+ if (err)
+ return err;
+
+ err = gk20a_init_therm_setup_sw(g);
+ if (err)
+ return err;
+
+ err = gk20a_init_therm_setup_hw(g);
+ if (err)
+ return err;
+
+ return err;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/therm_gk20a.h b/drivers/gpu/nvgpu/gk20a/therm_gk20a.h
new file mode 100644
index 000000000000..3f67ee124429
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/therm_gk20a.h
@@ -0,0 +1,33 @@
+/*
+ * drivers/video/tegra/host/gk20a/therm_gk20a.h
+ *
+ * GK20A Therm
+ *
+ * Copyright (c) 2011 - 2012, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef _NVHOST_THERM_GK20A_H_
+#define _NVHOST_THERM_GK20A_H_
+
+/* priority for EXT_THERM_0 event set to highest */
+#define NV_THERM_EVT_EXT_THERM_0_INIT 0x3000100
+#define NV_THERM_EVT_EXT_THERM_1_INIT 0x2000200
+#define NV_THERM_EVT_EXT_THERM_2_INIT 0x1000300
+/* configures the thermal events that may cause clock slowdown */
+#define NV_THERM_USE_A_INIT 0x7
+
+int gk20a_init_therm_support(struct gk20a *g);
+
+#endif /* _NVHOST_THERM_GK20A_H_ */