summaryrefslogtreecommitdiff
path: root/drivers/mxc/amd-gpu/common/gsl_drawctxt.c
diff options
context:
space:
mode:
authorIan Wisbon <ian.wisbon@timesys.com>2011-02-10 17:15:15 -0500
committerIan Wisbon <ian.wisbon@timesys.com>2011-02-14 14:29:06 -0500
commit14a4057959f8ee0a2249eb2abd64fd6b1f571d98 (patch)
tree6df655a665a5a56bd6b7cae5e2689fc5a1b6c965 /drivers/mxc/amd-gpu/common/gsl_drawctxt.c
parent0eb553bf96e2c990d3bfccaa07da0863624c89ab (diff)
Digi Release Code - 02142011 Missing Files Fix2.6.31-digi-201102141503
Diffstat (limited to 'drivers/mxc/amd-gpu/common/gsl_drawctxt.c')
-rw-r--r--drivers/mxc/amd-gpu/common/gsl_drawctxt.c1796
1 files changed, 1796 insertions, 0 deletions
diff --git a/drivers/mxc/amd-gpu/common/gsl_drawctxt.c b/drivers/mxc/amd-gpu/common/gsl_drawctxt.c
new file mode 100644
index 000000000000..afa44e618794
--- /dev/null
+++ b/drivers/mxc/amd-gpu/common/gsl_drawctxt.c
@@ -0,0 +1,1796 @@
+/* Copyright (c) 2002,2007-2009, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ */
+
+#include "gsl.h"
+#include "gsl_hal.h"
+#ifdef _LINUX
+#include <asm/div64.h>
+#endif
+
+#ifdef GSL_BLD_YAMATO
+
+//#define DISABLE_SHADOW_WRITES
+
+/*
+//////////////////////////////////////////////////////////////////////////////
+//
+// Memory Map for Register, Constant & Instruction Shadow, and Command Buffers (34.5KB)
+//
+// +---------------------+------------+-------------+---+---------------------+
+// | ALU Constant Shadow | Reg Shadow | C&V Buffers |Tex| Shader Instr Shadow |
+// +---------------------+------------+-------------+---+---------------------+
+// ________________________________/ \___________________
+// / \
+// +--------------+-----------+------+-----------+------------------------+
+// | Restore Regs | Save Regs | Quad | Gmem Save | Gmem Restore | unused |
+// +--------------+-----------+------+-----------+------------------------+
+//
+// 8K - ALU Constant Shadow (8K aligned)
+// 4K - H/W Register Shadow (8K aligned)
+// 9K - Command and Vertex Buffers
+// - Indirect command buffer : Const/Reg restore
+// - includes Loop & Bool const shadows
+// - Indirect command buffer : Const/Reg save
+// - Quad vertices & texture coordinates
+// - Indirect command buffer : Gmem save
+// - Indirect command buffer : Gmem restore
+// - Unused (padding to 8KB boundary)
+// <1K - Texture Constant Shadow (768 bytes) (8K aligned)
+// 18K - Shader Instruction Shadow
+// - 6K vertex (32 byte aligned)
+// - 6K pixel (32 byte aligned)
+// - 6K shared (32 byte aligned)
+//
+// Note: Reading constants into a shadow, one at a time using REG_TO_MEM, takes
+// 3 DWORDS per DWORD transfered, plus 1 DWORD for the shadow, for a total of
+// 16 bytes per constant. If the texture constants were transfered this way,
+// the Command & Vertex Buffers section would extend past the 16K boundary.
+// By moving the texture constant shadow area to start at 16KB boundary, we
+// only require approximately 40 bytes more memory, but are able to use the
+// LOAD_CONSTANT_CONTEXT shadowing feature for the textures, speeding up
+// context switching.
+//
+// [Using LOAD_CONSTANT_CONTEXT shadowing feature for the Loop and/or Bool
+// constants would require an additional 8KB each, for alignment.]
+//
+//////////////////////////////////////////////////////////////////////////////
+*/
+
+//////////////////////////////////////////////////////////////////////////////
+// Constants
+//////////////////////////////////////////////////////////////////////////////
+
+
+
+
+#define ALU_CONSTANTS 2048 // DWORDS
+#define NUM_REGISTERS 1024 // DWORDS
+#ifdef DISABLE_SHADOW_WRITES
+ #define CMD_BUFFER_LEN 9216 // DWORDS
+#else
+ #define CMD_BUFFER_LEN 3072 // DWORDS
+#endif
+#define TEX_CONSTANTS (32*6) // DWORDS
+#define BOOL_CONSTANTS 8 // DWORDS
+#define LOOP_CONSTANTS 56 // DWORDS
+#define SHADER_INSTRUCT_LOG2 9U // 2^n == SHADER_INSTRUCTIONS
+
+#if defined(PM4_IM_STORE)
+#define SHADER_INSTRUCT (1<<SHADER_INSTRUCT_LOG2) // 96-bit instructions
+#else
+#define SHADER_INSTRUCT 0
+#endif
+
+// LOAD_CONSTANT_CONTEXT shadow size
+#define LCC_SHADOW_SIZE 0x2000 // 8KB
+
+#define ALU_SHADOW_SIZE LCC_SHADOW_SIZE // 8KB
+#define REG_SHADOW_SIZE 0x1000 // 4KB
+#ifdef DISABLE_SHADOW_WRITES
+ #define CMD_BUFFER_SIZE 0x9000 // 36KB
+#else
+ #define CMD_BUFFER_SIZE 0x3000 // 12KB
+#endif
+#define TEX_SHADOW_SIZE (TEX_CONSTANTS*4) // 768 bytes
+#define SHADER_SHADOW_SIZE (SHADER_INSTRUCT*12)// 6KB
+
+#define REG_OFFSET LCC_SHADOW_SIZE
+#define CMD_OFFSET (REG_OFFSET + REG_SHADOW_SIZE)
+#define TEX_OFFSET (CMD_OFFSET + CMD_BUFFER_SIZE)
+#define SHADER_OFFSET ((TEX_OFFSET + TEX_SHADOW_SIZE + 32) & ~31)
+
+#define CONTEXT_SIZE (SHADER_OFFSET + 3 * SHADER_SHADOW_SIZE)
+
+
+//////////////////////////////////////////////////////////////////////////////
+// temporary work structure
+//////////////////////////////////////////////////////////////////////////////
+
+typedef struct
+{
+ unsigned int *start; // Command & Vertex buffer start
+ unsigned int *cmd; // Next available dword in C&V buffer
+
+ // address of buffers, needed when creating IB1 command buffers.
+ gpuaddr_t bool_shadow; // Address where bool constants are shadowed
+ gpuaddr_t loop_shadow; // Address where loop constants are shadowed
+
+#if defined(PM4_IM_STORE)
+ gpuaddr_t shader_shared; // Address of shared shader instruction shadow
+ gpuaddr_t shader_vertex; // Address of vertex shader instruction shadow
+ gpuaddr_t shader_pixel; // Address of pixel shader instruction shadow
+#endif
+
+ gpuaddr_t reg_values[2]; // Addresses in command buffer where separately handled registers are saved
+ gpuaddr_t chicken_restore;// Address where the TP0_CHICKEN register value is written
+ gpuaddr_t gmem_base; // Base gpu address of GMEM
+}
+ctx_t;
+
+//////////////////////////////////////////////////////////////////////////////
+// Helper function to calculate IEEE754 single precision float values without FPU
+//////////////////////////////////////////////////////////////////////////////
+unsigned int uint2float( unsigned int uintval )
+{
+ unsigned int exp = 0;
+ unsigned int frac = 0;
+ unsigned int u = uintval;
+
+ // Handle zero separately
+ if( uintval == 0 ) return 0;
+
+ // Find log2 of u
+ if(u>=0x10000) { exp+=16; u>>=16; }
+ if(u>=0x100 ) { exp+=8; u>>=8; }
+ if(u>=0x10 ) { exp+=4; u>>=4; }
+ if(u>=0x4 ) { exp+=2; u>>=2; }
+ if(u>=0x2 ) { exp+=1; u>>=1; }
+
+ // Calculate fraction
+ frac = ( uintval & ( ~( 1 << exp ) ) ) << ( 23 - exp );
+
+ // Exp is biased by 127 and shifted 23 bits
+ exp = ( exp + 127 ) << 23;
+
+ return ( exp | frac );
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Helper function to divide two unsigned ints and return the result as a floating point value
+//////////////////////////////////////////////////////////////////////////////
+unsigned int uintdivide(unsigned int a, unsigned int b)
+{
+#ifdef _LINUX
+ uint64_t a_fixed = a << 16;
+ uint64_t b_fixed = b << 16;
+#else
+ unsigned int a_fixed = a << 16;
+ unsigned int b_fixed = b << 16;
+#endif
+ // Assume the result is 0.fraction
+ unsigned int fraction;
+ unsigned int exp = 126;
+
+ if( b == 0 ) return 0;
+
+#ifdef _LINUX
+ a_fixed = a_fixed << 32;
+ do_div(a_fixed, b_fixed);
+ fraction = (unsigned int)a_fixed;
+#else
+ fraction = ((unsigned int)((((__int64)a_fixed) << 32) / (__int64)b_fixed));
+#endif
+
+ if( fraction == 0 ) return 0;
+
+ // Normalize
+ while( !(fraction & (1<<31)) )
+ {
+ fraction <<= 1;
+ exp--;
+ }
+ // Remove hidden bit
+ fraction <<= 1;
+
+ // Round
+ if( ( fraction & 0x1ff ) > 256 )
+ {
+ int rounded = 0;
+ int i = 9;
+
+ // Do the bit addition
+ while( !rounded )
+ {
+ if( fraction & (1<<i) )
+ {
+ // 1b + 1b = 0b, carry = 1
+ fraction &= ~(1<<i);
+ i++;
+ }
+ else
+ {
+ fraction |= (1<<i);
+ rounded = 1;
+ }
+ }
+ }
+
+ // Use 23 most significant bits for the fraction
+ fraction >>= 9;
+
+ return ( ( exp << 23 ) | fraction );
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+// context save (gmem -> sys)
+//////////////////////////////////////////////////////////////////////////////
+
+
+//////////////////////////////////////////////////////////////////////////////
+// pre-compiled vertex shader program
+//
+// attribute vec4 P;
+// void main(void)
+// {
+// gl_Position = P;
+// }
+//
+//////////////////////////////////////////////////////////////////////////////
+
+#define GMEM2SYS_VTX_PGM_LEN 0x12
+
+static const unsigned int gmem2sys_vtx_pgm[GMEM2SYS_VTX_PGM_LEN] = {
+ 0x00011003, 0x00001000, 0xc2000000,
+ 0x00001004, 0x00001000, 0xc4000000,
+ 0x00001005, 0x00002000, 0x00000000,
+ 0x1cb81000, 0x00398a88, 0x00000003,
+ 0x140f803e, 0x00000000, 0xe2010100,
+ 0x14000000, 0x00000000, 0xe2000000
+};
+
+
+//////////////////////////////////////////////////////////////////////////////
+// pre-compiled fragment shader program
+//
+// precision highp float;
+// uniform vec4 clear_color;
+// void main(void)
+// {
+// gl_FragColor = clear_color;
+// }
+//
+//////////////////////////////////////////////////////////////////////////////
+
+#define GMEM2SYS_FRAG_PGM_LEN 0x0c
+
+static const unsigned int gmem2sys_frag_pgm[GMEM2SYS_FRAG_PGM_LEN] = {
+ 0x00000000, 0x1002c400, 0x10000000,
+ 0x00001003, 0x00002000, 0x00000000,
+ 0x140f8000, 0x00000000, 0x22000000,
+ 0x14000000, 0x00000000, 0xe2000000
+};
+
+
+//////////////////////////////////////////////////////////////////////////////
+// context restore (sys -> gmem)
+//////////////////////////////////////////////////////////////////////////////
+
+
+//////////////////////////////////////////////////////////////////////////////
+// pre-compiled vertex shader program
+//
+// attribute vec4 position;
+// attribute vec4 texcoord;
+// varying vec4 texcoord0;
+// void main()
+// {
+// gl_Position = position;
+// texcoord0 = texcoord;
+// }
+//
+//////////////////////////////////////////////////////////////////////////////
+
+#define SYS2GMEM_VTX_PGM_LEN 0x18
+
+static const unsigned int sys2gmem_vtx_pgm[SYS2GMEM_VTX_PGM_LEN] = {
+ 0x00052003, 0x00001000, 0xc2000000, 0x00001005,
+ 0x00001000, 0xc4000000, 0x00001006, 0x10071000,
+ 0x20000000, 0x18981000, 0x0039ba88, 0x00000003,
+ 0x12982000, 0x40257b08, 0x00000002, 0x140f803e,
+ 0x00000000, 0xe2010100, 0x140f8000, 0x00000000,
+ 0xe2020200, 0x14000000, 0x00000000, 0xe2000000
+};
+
+
+//////////////////////////////////////////////////////////////////////////////
+// pre-compiled fragment shader program
+//
+// precision mediump float;
+// uniform sampler2D tex0;
+// varying vec4 texcoord0;
+// void main()
+// {
+// gl_FragColor = texture2D(tex0, texcoord0.xy);
+// }
+//
+//////////////////////////////////////////////////////////////////////////////
+
+#define SYS2GMEM_FRAG_PGM_LEN 0x0f
+
+static const unsigned int sys2gmem_frag_pgm[SYS2GMEM_FRAG_PGM_LEN] = {
+ 0x00011002, 0x00001000, 0xc4000000, 0x00001003,
+ 0x10041000, 0x20000000, 0x10000001, 0x1ffff688,
+ 0x00000002, 0x140f8000, 0x00000000, 0xe2000000,
+ 0x14000000, 0x00000000, 0xe2000000
+};
+
+
+//////////////////////////////////////////////////////////////////////////////
+// shader texture constants (sysmem -> gmem)
+//////////////////////////////////////////////////////////////////////////////
+
+#define SYS2GMEM_TEX_CONST_LEN 6
+
+static unsigned int sys2gmem_tex_const[SYS2GMEM_TEX_CONST_LEN] =
+{
+ // Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,RFMode=ZeroClamp-1,Dim=1:2d
+ 0x00000002, // Pitch = TBD
+
+ // Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0, NearestClamp=1:OGL Mode
+ 0x00000806, // Address[31:12] = TBD
+
+ // Width, Height, EndianSwap=0:None
+ 0, // Width & Height = TBD
+
+ // NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point, Mip=2:BaseMap
+ 0 << 1 | 1 << 4 | 2 << 7 | 3 << 10 | 2 << 23,
+
+ // VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0, Dim3d=0
+ 0,
+
+ // BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0, Dim=1:2d, MipPacking=0
+ 1 << 9 // Mip Address[31:12] = TBD
+};
+
+
+//////////////////////////////////////////////////////////////////////////////
+// quad for copying GMEM to context shadow
+//////////////////////////////////////////////////////////////////////////////
+
+#define QUAD_LEN 12
+
+static unsigned int gmem_copy_quad[QUAD_LEN] = {
+ 0x00000000, 0x00000000, 0x3f800000,
+ 0x00000000, 0x00000000, 0x3f800000,
+ 0x00000000, 0x00000000, 0x3f800000,
+ 0x00000000, 0x00000000, 0x3f800000
+};
+
+#define TEXCOORD_LEN 8
+
+static unsigned int gmem_copy_texcoord[TEXCOORD_LEN] = {
+ 0x00000000, 0x3f800000,
+ 0x3f800000, 0x3f800000,
+ 0x00000000, 0x00000000,
+ 0x3f800000, 0x00000000
+};
+
+
+//////////////////////////////////////////////////////////////////////////////
+// shader linkage info
+//////////////////////////////////////////////////////////////////////////////
+
+#define SHADER_CONST_ADDR (11 * 6 + 3)
+
+
+//////////////////////////////////////////////////////////////////////////////
+// gmem command buffer length
+//////////////////////////////////////////////////////////////////////////////
+
+#define PM4_REG(reg) ((0x4 << 16) | (GSL_HAL_SUBBLOCK_OFFSET(reg)))
+
+//////////////////////////////////////////////////////////////////////////////
+// functions
+//////////////////////////////////////////////////////////////////////////////
+
+static void
+config_gmemsize(gmem_shadow_t *shadow, int gmem_size)
+{
+ int w=64, h=64; // 16KB surface, minimum
+
+ // convert from bytes to 32-bit words
+ gmem_size = (gmem_size + 3)/4;
+
+ // find the right surface size, close to a square.
+ while (w * h < gmem_size)
+ if (w < h)
+ w *= 2;
+ else
+ h *= 2;
+
+ shadow->width = w;
+ shadow->pitch = w;
+ shadow->height = h;
+
+ shadow->size = shadow->pitch * shadow->height * 4;
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+
+static unsigned int
+gpuaddr(unsigned int *cmd, gsl_memdesc_t *memdesc)
+{
+ return memdesc->gpuaddr + ((char *)cmd - (char *)memdesc->hostptr);
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+
+static void
+create_ib1(gsl_drawctxt_t *drawctxt, unsigned int *cmd, unsigned int *start, unsigned int *end)
+{
+ cmd[0] = PM4_HDR_INDIRECT_BUFFER_PFD;
+ cmd[1] = gpuaddr(start, &drawctxt->gpustate);
+ cmd[2] = end - start;
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+
+static unsigned int *
+program_shader(unsigned int *cmds, int vtxfrag, const unsigned int *shader_pgm, int dwords)
+{
+ // load the patched vertex shader stream
+ *cmds++ = pm4_type3_packet(PM4_IM_LOAD_IMMEDIATE, 2 + dwords);
+ *cmds++ = vtxfrag; // 0=vertex shader, 1=fragment shader
+ *cmds++ = ( (0 << 16) | dwords ); // instruction start & size (in 32-bit words)
+
+ kos_memcpy(cmds, shader_pgm, dwords<<2);
+ cmds += dwords;
+
+ return cmds;
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+
+static unsigned int *
+reg_to_mem(unsigned int *cmds, gpuaddr_t dst, gpuaddr_t src, int dwords)
+{
+ while (dwords-- > 0)
+ {
+ *cmds++ = pm4_type3_packet(PM4_REG_TO_MEM, 2);
+ *cmds++ = src++;
+ *cmds++ = dst;
+ dst += 4;
+ }
+
+ return cmds;
+}
+
+
+
+#ifdef DISABLE_SHADOW_WRITES
+
+static void build_reg_to_mem_range(unsigned int start, unsigned int end, unsigned int** cmd, /*ctx_t *ctx, unsigned int* offset) //*/gsl_drawctxt_t *drawctxt)
+{
+ unsigned int i = start;
+
+ for(i=start; i<=end; i++)
+ {
+ *(*cmd)++ = pm4_type3_packet(PM4_REG_TO_MEM, 2);
+ *(*cmd)++ = i | (1<<30);
+ *(*cmd)++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) + (i-0x2000)*4;
+ }
+}
+
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+// chicken restore
+//////////////////////////////////////////////////////////////////////////////
+static unsigned int*
+build_chicken_restore_cmds(gsl_drawctxt_t *drawctxt, ctx_t *ctx)
+{
+ unsigned int *start = ctx->cmd;
+ unsigned int *cmds = start;
+
+ *cmds++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1);
+ *cmds++ = 0;
+
+ *cmds++ = pm4_type0_packet(mmTP0_CHICKEN, 1);
+ ctx->chicken_restore = gpuaddr(cmds, &drawctxt->gpustate);
+ *cmds++ = 0x00000000;
+
+
+ // create indirect buffer command for above command sequence
+ create_ib1(drawctxt, drawctxt->chicken_restore, start, cmds);
+
+ return cmds;
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+// context save
+//////////////////////////////////////////////////////////////////////////////
+
+
+//////////////////////////////////////////////////////////////////////////////
+// save h/w regs, alu constants, texture contants, etc. ...
+// requires: bool_shadow_gpuaddr, loop_shadow_gpuaddr
+//////////////////////////////////////////////////////////////////////////////
+
+static void
+build_regsave_cmds(gsl_drawctxt_t *drawctxt, ctx_t *ctx)
+{
+ unsigned int *start = ctx->cmd;
+ unsigned int *cmd = start;
+
+#ifdef DISABLE_SHADOW_WRITES
+ // Write HW registers into shadow
+ build_reg_to_mem_range(mmRB_SURFACE_INFO, mmRB_DEPTH_INFO, &cmd, drawctxt);
+ build_reg_to_mem_range(mmCOHER_DEST_BASE_0, mmPA_SC_SCREEN_SCISSOR_BR, &cmd, drawctxt);
+ build_reg_to_mem_range(mmPA_SC_WINDOW_OFFSET, mmPA_SC_WINDOW_SCISSOR_BR, &cmd, drawctxt);
+ build_reg_to_mem_range(mmVGT_MAX_VTX_INDX, mmRB_FOG_COLOR, &cmd, drawctxt);
+ build_reg_to_mem_range(mmRB_STENCILREFMASK_BF, mmPA_CL_VPORT_ZOFFSET, &cmd, drawctxt);
+ build_reg_to_mem_range(mmSQ_PROGRAM_CNTL, mmSQ_WRAPPING_1, &cmd, drawctxt);
+ build_reg_to_mem_range(mmRB_DEPTHCONTROL, mmRB_MODECONTROL, &cmd, drawctxt);
+ build_reg_to_mem_range(mmPA_SU_POINT_SIZE, mmPA_SC_LINE_STIPPLE, &cmd, drawctxt);
+ build_reg_to_mem_range(mmPA_SC_VIZ_QUERY, mmPA_SC_VIZ_QUERY, &cmd, drawctxt);
+ build_reg_to_mem_range(mmPA_SC_LINE_CNTL, mmSQ_PS_CONST, &cmd, drawctxt);
+ build_reg_to_mem_range(mmPA_SC_AA_MASK, mmPA_SC_AA_MASK, &cmd, drawctxt);
+ build_reg_to_mem_range(mmVGT_VERTEX_REUSE_BLOCK_CNTL, mmRB_DEPTH_CLEAR, &cmd, drawctxt);
+ build_reg_to_mem_range(mmRB_SAMPLE_COUNT_CTL, mmRB_COLOR_DEST_MASK, &cmd, drawctxt);
+ build_reg_to_mem_range(mmPA_SU_POLY_OFFSET_FRONT_SCALE, mmPA_SU_POLY_OFFSET_BACK_OFFSET, &cmd, drawctxt);
+
+ // Copy ALU constants
+ cmd = reg_to_mem(cmd, (drawctxt->gpustate.gpuaddr) & 0xFFFFE000, mmSQ_CONSTANT_0, ALU_CONSTANTS);
+
+ // Copy Tex constants
+ cmd = reg_to_mem(cmd, (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000, mmSQ_FETCH_0, TEX_CONSTANTS);
+#else
+ // H/w registers are already shadowed; just need to disable shadowing to prevent corruption.
+ *cmd++ = pm4_type3_packet(PM4_LOAD_CONSTANT_CONTEXT, 3);
+ *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
+ *cmd++ = 4 << 16; // regs, start=0
+ *cmd++ = 0x0; // count = 0
+
+ // ALU constants are already shadowed; just need to disable shadowing to prevent corruption.
+ *cmd++ = pm4_type3_packet(PM4_LOAD_CONSTANT_CONTEXT, 3);
+ *cmd++ = drawctxt->gpustate.gpuaddr & 0xFFFFE000;
+ *cmd++ = 0 << 16; // ALU, start=0
+ *cmd++ = 0x0; // count = 0
+
+ // Tex constants are already shadowed; just need to disable shadowing to prevent corruption.
+ *cmd++ = pm4_type3_packet(PM4_LOAD_CONSTANT_CONTEXT, 3);
+ *cmd++ = (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000;
+ *cmd++ = 1 << 16; // Tex, start=0
+ *cmd++ = 0x0; // count = 0
+#endif
+
+
+
+
+ // Need to handle some of the registers separately
+ *cmd++ = pm4_type3_packet(PM4_REG_TO_MEM, 2);
+ *cmd++ = mmSQ_GPR_MANAGEMENT;
+ *cmd++ = ctx->reg_values[0];
+ *cmd++ = pm4_type3_packet(PM4_REG_TO_MEM, 2);
+ *cmd++ = mmTP0_CHICKEN;
+ *cmd++ = ctx->reg_values[1];
+
+ // Copy Boolean constants
+ cmd = reg_to_mem(cmd, ctx->bool_shadow, mmSQ_CF_BOOLEANS, BOOL_CONSTANTS);
+
+ // Copy Loop constants
+ cmd = reg_to_mem(cmd, ctx->loop_shadow, mmSQ_CF_LOOP, LOOP_CONSTANTS);
+
+ // create indirect buffer command for above command sequence
+ create_ib1(drawctxt, drawctxt->reg_save, start, cmd);
+
+ ctx->cmd = cmd;
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// copy colour, depth, & stencil buffers from graphics memory to system memory
+//////////////////////////////////////////////////////////////////////////////
+
+static unsigned int*
+build_gmem2sys_cmds(gsl_drawctxt_t *drawctxt, ctx_t* ctx, gmem_shadow_t *shadow)
+{
+ unsigned int *cmds = shadow->gmem_save_commands;
+ unsigned int *start = cmds;
+
+ // Store TP0_CHICKEN register
+ *cmds++ = pm4_type3_packet(PM4_REG_TO_MEM, 2);
+ *cmds++ = mmTP0_CHICKEN;
+ if( ctx )
+ *cmds++ = ctx->chicken_restore;
+ else
+ cmds++;
+
+ *cmds++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1);
+ *cmds++ = 0;
+
+ // Set TP0_CHICKEN to zero
+ *cmds++ = pm4_type0_packet(mmTP0_CHICKEN, 1);
+ *cmds++ = 0x00000000;
+
+ // --------------
+ // program shader
+ // --------------
+
+ // load shader vtx constants ... 5 dwords
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 4);
+ *cmds++ = (0x1 << 16) | SHADER_CONST_ADDR;
+ *cmds++ = 0;
+ *cmds++ = shadow->quad_vertices.gpuaddr | 0x3; // valid(?) vtx constant flag & addr
+ *cmds++ = 0x00000030; // limit = 12 dwords
+
+ // Invalidate L2 cache to make sure vertices are updated
+ *cmds++ = pm4_type0_packet(mmTC_CNTL_STATUS, 1);
+ *cmds++ = 0x1;
+
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 4);
+ *cmds++ = PM4_REG(mmVGT_MAX_VTX_INDX);
+ *cmds++ = 0x00ffffff; //mmVGT_MAX_VTX_INDX
+ *cmds++ = 0x0; //mmVGT_MIN_VTX_INDX
+ *cmds++ = 0x00000000; //mmVGT_INDX_OFFSET
+
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
+ *cmds++ = PM4_REG(mmPA_SC_AA_MASK);
+ *cmds++ = 0x0000ffff; //mmPA_SC_AA_MASK
+
+
+ // load the patched vertex shader stream
+ cmds = program_shader(cmds, 0, gmem2sys_vtx_pgm, GMEM2SYS_VTX_PGM_LEN);
+
+ // Load the patched fragment shader stream
+ cmds = program_shader(cmds, 1, gmem2sys_frag_pgm, GMEM2SYS_FRAG_PGM_LEN);
+
+ // SQ_PROGRAM_CNTL / SQ_CONTEXT_MISC
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
+ *cmds++ = PM4_REG(mmSQ_PROGRAM_CNTL);
+ *cmds++ = 0x10010001;
+ *cmds++ = 0x00000008;
+
+
+ // --------------
+ // resolve
+ // --------------
+
+ // PA_CL_VTE_CNTL
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
+ *cmds++ = PM4_REG(mmPA_CL_VTE_CNTL);
+ *cmds++ = 0x00000b00; // disable X/Y/Z transforms, X/Y/Z are premultiplied by W
+
+ // change colour buffer to RGBA8888, MSAA = 1, and matching pitch
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
+ *cmds++ = PM4_REG(mmRB_SURFACE_INFO);
+ *cmds++ = drawctxt->context_gmem_shadow.pitch; // GMEM pitch is equal to context GMEM shadow pitch
+
+ // RB_COLOR_INFO Endian=none, Linear, Format=RGBA8888, Swap=0, Base=gmem_base
+ if( ctx )
+ {
+ KOS_ASSERT((ctx->gmem_base & 0xFFF) == 0); // gmem base assumed 4K aligned.
+ *cmds++ = (COLORX_8_8_8_8 << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | ctx->gmem_base;
+ }
+ else
+ cmds++;
+
+ // disable Z
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
+ *cmds++ = PM4_REG(mmRB_DEPTHCONTROL);
+ *cmds++ = 0;
+
+ // set mmPA_SU_SC_MODE_CNTL
+ // Front_ptype = draw triangles
+ // Back_ptype = draw triangles
+ // Provoking vertex = last
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
+ *cmds++ = PM4_REG(mmPA_SU_SC_MODE_CNTL);
+ *cmds++ = 0x00080240;
+
+ // set the scissor to the extents of the draw surface
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
+ *cmds++ = PM4_REG(mmPA_SC_SCREEN_SCISSOR_TL);
+ *cmds++ = (0 << 16) | 0;
+ *cmds++ = (drawctxt->context_gmem_shadow.height << 16) | drawctxt->context_gmem_shadow.width;
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
+ *cmds++ = PM4_REG(mmPA_SC_WINDOW_SCISSOR_TL);
+ *cmds++ = (unsigned int) ((1U << 31) | (0 << 16) | 0);
+ *cmds++ = (drawctxt->context_gmem_shadow.height << 16) | drawctxt->context_gmem_shadow.width;
+
+ // load the viewport so that z scale = clear depth and z offset = 0.0f
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
+ *cmds++ = PM4_REG(mmPA_CL_VPORT_ZSCALE);
+ *cmds++ = 0xbf800000; // -1.0f
+ *cmds++ = 0x0;
+
+ // load the COPY state
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 6);
+ *cmds++ = PM4_REG(mmRB_COPY_CONTROL);
+ *cmds++ = 0; // RB_COPY_CONTROL
+
+ *cmds++ = (shadow->gmemshadow.gpuaddr+shadow->offset*4) & 0xfffff000; // RB_COPY_DEST_BASE
+
+ *cmds++ = shadow->pitch >> 5; // RB_COPY_DEST_PITCH
+ *cmds++ = 0x0003c058; // Endian=none, Linear, Format=RGBA8888,Swap=0,!Dither,MaskWrite:R=G=B=A=1
+
+ {
+ // Calculate the new offset based on the adjusted base
+ unsigned int addr = (shadow->gmemshadow.gpuaddr+shadow->offset*4);
+ unsigned int offset = (addr-(addr&0xfffff000))/4;
+
+ kos_assert( (offset & 0xfffff000) == 0 ); // Make sure we stay in offsetx field.
+
+ *cmds++ = offset;
+ }
+
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
+ *cmds++ = PM4_REG(mmRB_MODECONTROL);
+ *cmds++ = 0x6; // EDRAM copy
+
+ // queue the draw packet
+ *cmds++ = pm4_type3_packet(PM4_DRAW_INDX, 2);
+ *cmds++ = 0; // viz query info.
+ *cmds++ = 0x00030088; // PrimType=RectList, NumIndices=3, SrcSel=AutoIndex
+
+ // create indirect buffer command for above command sequence
+ create_ib1(drawctxt, shadow->gmem_save, start, cmds);
+
+ return cmds;
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// context restore
+//////////////////////////////////////////////////////////////////////////////
+
+
+//////////////////////////////////////////////////////////////////////////////
+// copy colour, depth, & stencil buffers from system memory to graphics memory
+//////////////////////////////////////////////////////////////////////////////
+
+static unsigned int*
+build_sys2gmem_cmds(gsl_drawctxt_t *drawctxt, ctx_t* ctx, gmem_shadow_t *shadow)
+{
+ unsigned int *cmds = shadow->gmem_restore_commands;
+ unsigned int *start = cmds;
+
+ // Store TP0_CHICKEN register
+ *cmds++ = pm4_type3_packet(PM4_REG_TO_MEM, 2);
+ *cmds++ = mmTP0_CHICKEN;
+ if( ctx )
+ *cmds++ = ctx->chicken_restore;
+ else
+ cmds++;
+
+ *cmds++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1);
+ *cmds++ = 0;
+
+ // Set TP0_CHICKEN to zero
+ *cmds++ = pm4_type0_packet(mmTP0_CHICKEN, 1);
+ *cmds++ = 0x00000000;
+
+ // ----------------
+ // shader constants
+ // ----------------
+
+ // vertex buffer constants
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 7);
+
+ *cmds++ = (0x1 << 16) | (9 * 6);
+ *cmds++ = shadow->quad_vertices.gpuaddr | 0x3; // valid(?) vtx constant flag & addr
+ *cmds++ = 0x00000030; // limit = 12 dwords
+ *cmds++ = shadow->quad_texcoords.gpuaddr | 0x3; // valid(?) vtx constant flag & addr
+ *cmds++ = 0x00000020; // limit = 8 dwords
+ *cmds++ = 0;
+ *cmds++ = 0;
+
+ // Invalidate L2 cache to make sure vertices and texture coordinates are updated
+ *cmds++ = pm4_type0_packet(mmTC_CNTL_STATUS, 1);
+ *cmds++ = 0x1;
+
+ // load the patched vertex shader stream
+ cmds = program_shader(cmds, 0, sys2gmem_vtx_pgm, SYS2GMEM_VTX_PGM_LEN);
+
+ // Load the patched fragment shader stream
+ cmds = program_shader(cmds, 1, sys2gmem_frag_pgm, SYS2GMEM_FRAG_PGM_LEN);
+
+ // SQ_PROGRAM_CNTL / SQ_CONTEXT_MISC
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
+ *cmds++ = PM4_REG(mmSQ_PROGRAM_CNTL);
+ *cmds++ = 0x10030002;
+ *cmds++ = 0x00000008;
+
+
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
+ *cmds++ = PM4_REG(mmPA_SC_AA_MASK);
+ *cmds++ = 0x0000ffff; //mmPA_SC_AA_MASK
+
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
+ *cmds++ = PM4_REG(mmPA_SC_VIZ_QUERY);
+ *cmds++ = 0x0; //mmPA_SC_VIZ_QUERY
+
+
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
+ *cmds++ = PM4_REG(mmRB_COLORCONTROL);
+ *cmds++ = 0x00000c20; // RB_COLORCONTROL
+
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 4);
+ *cmds++ = PM4_REG(mmVGT_MAX_VTX_INDX);
+ *cmds++ = 0x00ffffff; //mmVGT_MAX_VTX_INDX
+ *cmds++ = 0x0; //mmVGT_MIN_VTX_INDX
+ *cmds++ = 0x00000000; //mmVGT_INDX_OFFSET
+
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
+ *cmds++ = PM4_REG(mmVGT_VERTEX_REUSE_BLOCK_CNTL);
+ *cmds++ = 0x00000002; //mmVGT_VERTEX_REUSE_BLOCK_CNTL
+ *cmds++ = 0x00000002; //mmVGT_OUT_DEALLOC_CNTL
+
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
+ *cmds++ = PM4_REG(mmSQ_INTERPOLATOR_CNTL);
+ //*cmds++ = 0x0000ffff; //mmSQ_INTERPOLATOR_CNTL
+ *cmds++ = 0xffffffff; //mmSQ_INTERPOLATOR_CNTL
+
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
+ *cmds++ = PM4_REG(mmPA_SC_AA_CONFIG);
+ *cmds++ = 0x00000000; //mmPA_SC_AA_CONFIG
+
+
+ // set mmPA_SU_SC_MODE_CNTL
+ // Front_ptype = draw triangles
+ // Back_ptype = draw triangles
+ // Provoking vertex = last
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
+ *cmds++ = PM4_REG(mmPA_SU_SC_MODE_CNTL);
+ *cmds++ = 0x00080240;
+
+ // texture constants
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, (SYS2GMEM_TEX_CONST_LEN + 1));
+ *cmds++ = (0x1 << 16) | (0 * 6);
+ kos_memcpy(cmds, sys2gmem_tex_const, SYS2GMEM_TEX_CONST_LEN<<2);
+ cmds[0] |= (shadow->pitch >> 5) << 22;
+ cmds[1] |= shadow->gmemshadow.gpuaddr;
+ cmds[2] |= (shadow->width+shadow->offset_x-1) | (shadow->height+shadow->offset_y-1) << 13;
+ cmds += SYS2GMEM_TEX_CONST_LEN;
+
+ // change colour buffer to RGBA8888, MSAA = 1, and matching pitch
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
+ *cmds++ = PM4_REG(mmRB_SURFACE_INFO);
+ *cmds++ = drawctxt->context_gmem_shadow.pitch; // GMEM pitch is equal to context GMEM shadow pitch
+
+
+ // RB_COLOR_INFO Endian=none, Linear, Format=RGBA8888, Swap=0, Base=gmem_base
+ if( ctx )
+ *cmds++ = (COLORX_8_8_8_8 << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | ctx->gmem_base;
+ else
+ cmds++;
+
+ // RB_DEPTHCONTROL
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
+ *cmds++ = PM4_REG(mmRB_DEPTHCONTROL);
+ *cmds++ = 0; // disable Z
+
+
+ // set the scissor to the extents of the draw surface
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
+ *cmds++ = PM4_REG(mmPA_SC_SCREEN_SCISSOR_TL);
+ *cmds++ = (0 << 16) | 0;
+ *cmds++ = (drawctxt->context_gmem_shadow.height << 16) | drawctxt->context_gmem_shadow.width;
+
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
+ *cmds++ = PM4_REG(mmPA_SC_WINDOW_SCISSOR_TL);
+ *cmds++ = (unsigned int) ((1U << 31) | (shadow->gmem_offset_y << 16) | shadow->gmem_offset_x);
+ *cmds++ = (drawctxt->context_gmem_shadow.height << 16) | drawctxt->context_gmem_shadow.width;
+
+ // PA_CL_VTE_CNTL
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
+ *cmds++ = PM4_REG(mmPA_CL_VTE_CNTL);
+ *cmds++ = 0x00000b00; // disable X/Y/Z transforms, X/Y/Z are premultiplied by W
+
+ // load the viewport so that z scale = clear depth and z offset = 0.0f
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
+ *cmds++ = PM4_REG(mmPA_CL_VPORT_ZSCALE);
+ *cmds++ = 0xbf800000;
+ *cmds++ = 0x0;
+
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
+ *cmds++ = PM4_REG(mmRB_COLOR_MASK);
+ *cmds++ = 0x0000000f; // R = G = B = 1:enabled
+
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
+ *cmds++ = PM4_REG(mmRB_COLOR_DEST_MASK);
+ *cmds++ = 0xffffffff;
+
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
+ *cmds++ = PM4_REG(mmSQ_WRAPPING_0);
+ *cmds++ = 0x00000000;
+ *cmds++ = 0x00000000;
+
+ *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
+ *cmds++ = PM4_REG(mmRB_MODECONTROL);
+ *cmds++ = 0x4; // draw pixels with color and depth/stencil component
+
+ // queue the draw packet
+ *cmds++ = pm4_type3_packet(PM4_DRAW_INDX, 2);
+ *cmds++ = 0; // viz query info.
+ *cmds++ = 0x00030088; // PrimType=RectList, NumIndices=3, SrcSel=AutoIndex
+
+ // create indirect buffer command for above command sequence
+ create_ib1(drawctxt, shadow->gmem_restore, start, cmds);
+
+ return cmds;
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// restore h/w regs, alu constants, texture constants, etc. ...
+//////////////////////////////////////////////////////////////////////////////
+
+static unsigned *
+reg_range(unsigned int *cmd, unsigned int start, unsigned int end)
+{
+ *cmd++ = PM4_REG(start); // h/w regs, start addr
+ *cmd++ = end - start + 1; // count
+ return cmd;
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+
+static void
+build_regrestore_cmds(gsl_drawctxt_t *drawctxt, ctx_t *ctx)
+{
+ unsigned int *start = ctx->cmd;
+ unsigned int *cmd = start;
+
+
+ //*cmd++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1);
+ //*cmd++ = 0;
+
+ // H/W Registers
+ cmd++; // deferred pm4_type3_packet(PM4_LOAD_CONSTANT_CONTEXT, ???);
+#ifdef DISABLE_SHADOW_WRITES
+ *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1; // Force mismatch
+#else
+ *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
+#endif
+
+ cmd = reg_range(cmd, mmRB_SURFACE_INFO, mmPA_SC_SCREEN_SCISSOR_BR);
+ cmd = reg_range(cmd, mmPA_SC_WINDOW_OFFSET, mmPA_SC_WINDOW_SCISSOR_BR);
+ cmd = reg_range(cmd, mmVGT_MAX_VTX_INDX, mmPA_CL_VPORT_ZOFFSET);
+ cmd = reg_range(cmd, mmSQ_PROGRAM_CNTL, mmSQ_WRAPPING_1);
+ cmd = reg_range(cmd, mmRB_DEPTHCONTROL, mmRB_MODECONTROL);
+ cmd = reg_range(cmd, mmPA_SU_POINT_SIZE, mmPA_SC_VIZ_QUERY/*mmVGT_ENHANCE*/);
+ cmd = reg_range(cmd, mmPA_SC_LINE_CNTL, mmRB_COLOR_DEST_MASK);
+ cmd = reg_range(cmd, mmPA_SU_POLY_OFFSET_FRONT_SCALE, mmPA_SU_POLY_OFFSET_BACK_OFFSET);
+
+ // Now we know how many register blocks we have, we can compute command length
+ start[0] = pm4_type3_packet(PM4_LOAD_CONSTANT_CONTEXT, (cmd-start)-1);
+#ifdef DISABLE_SHADOW_WRITES
+ start[2] |= (0<<24) | (4 << 16); // Disable shadowing.
+#else
+ start[2] |= (1<<24) | (4 << 16); // Enable shadowing for the entire register block.
+#endif
+
+ // Need to handle some of the registers separately
+ *cmd++ = pm4_type0_packet(mmSQ_GPR_MANAGEMENT, 1);
+ ctx->reg_values[0] = gpuaddr(cmd, &drawctxt->gpustate);
+ *cmd++ = 0x00040400;
+
+ *cmd++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1);
+ *cmd++ = 0;
+ *cmd++ = pm4_type0_packet(mmTP0_CHICKEN, 1);
+ ctx->reg_values[1] = gpuaddr(cmd, &drawctxt->gpustate);
+ *cmd++ = 0x00000000;
+
+ // ALU Constants
+ *cmd++ = pm4_type3_packet(PM4_LOAD_CONSTANT_CONTEXT, 3);
+ *cmd++ = drawctxt->gpustate.gpuaddr & 0xFFFFE000;
+#ifdef DISABLE_SHADOW_WRITES
+ *cmd++ = (0<<24) | (0<<16) | 0; // Disable shadowing
+#else
+ *cmd++ = (1<<24) | (0<<16) | 0;
+#endif
+ *cmd++ = ALU_CONSTANTS;
+
+
+ // Texture Constants
+ *cmd++ = pm4_type3_packet(PM4_LOAD_CONSTANT_CONTEXT, 3);
+ *cmd++ = (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000;
+#ifdef DISABLE_SHADOW_WRITES
+ *cmd++ = (0<<24) | (1<<16) | 0; // Disable shadowing
+#else
+ *cmd++ = (1<<24) | (1<<16) | 0;
+#endif
+ *cmd++ = TEX_CONSTANTS;
+
+
+ // Boolean Constants
+ *cmd++ = pm4_type3_packet(PM4_SET_CONSTANT, 1 + BOOL_CONSTANTS);
+ *cmd++ = (2<<16) | 0;
+
+ // the next BOOL_CONSTANT dwords is the shadow area for boolean constants.
+ ctx->bool_shadow = gpuaddr(cmd, &drawctxt->gpustate);
+ cmd += BOOL_CONSTANTS;
+
+
+ // Loop Constants
+ *cmd++ = pm4_type3_packet(PM4_SET_CONSTANT, 1 + LOOP_CONSTANTS);
+ *cmd++ = (3<<16) | 0;
+
+ // the next LOOP_CONSTANTS dwords is the shadow area for loop constants.
+ ctx->loop_shadow = gpuaddr(cmd, &drawctxt->gpustate);
+ cmd += LOOP_CONSTANTS;
+
+ // create indirect buffer command for above command sequence
+ create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);
+
+ ctx->cmd = cmd;
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// quad for saving/restoring gmem
+//////////////////////////////////////////////////////////////////////////////
+
+static void set_gmem_copy_quad( gmem_shadow_t* shadow )
+{
+ unsigned int tex_offset[2];
+
+ // set vertex buffer values
+
+ gmem_copy_quad[1] = uint2float( shadow->height + shadow->gmem_offset_y );
+ gmem_copy_quad[3] = uint2float( shadow->width + shadow->gmem_offset_x );
+ gmem_copy_quad[4] = uint2float( shadow->height + shadow->gmem_offset_y );
+ gmem_copy_quad[9] = uint2float( shadow->width + shadow->gmem_offset_x );
+
+ gmem_copy_quad[0] = uint2float( shadow->gmem_offset_x );
+ gmem_copy_quad[6] = uint2float( shadow->gmem_offset_x );
+ gmem_copy_quad[7] = uint2float( shadow->gmem_offset_y );
+ gmem_copy_quad[10] = uint2float( shadow->gmem_offset_y );
+
+ tex_offset[0] = uintdivide( shadow->offset_x, (shadow->offset_x+shadow->width) );
+ tex_offset[1] = uintdivide( shadow->offset_y, (shadow->offset_y+shadow->height) );
+
+ gmem_copy_texcoord[0] = gmem_copy_texcoord[4] = tex_offset[0];
+ gmem_copy_texcoord[5] = gmem_copy_texcoord[7] = tex_offset[1];
+
+ // copy quad data to vertex buffer
+ kos_memcpy(shadow->quad_vertices.hostptr, gmem_copy_quad, QUAD_LEN << 2);
+
+ // copy tex coord data to tex coord buffer
+ kos_memcpy(shadow->quad_texcoords.hostptr, gmem_copy_texcoord, TEXCOORD_LEN << 2);
+}
+
+
+static void
+build_quad_vtxbuff(gsl_drawctxt_t *drawctxt, ctx_t *ctx, gmem_shadow_t* shadow)
+{
+ unsigned int *cmd = ctx->cmd;
+
+ // quad vertex buffer location
+ shadow->quad_vertices.hostptr = cmd;
+ shadow->quad_vertices.gpuaddr = gpuaddr(cmd, &drawctxt->gpustate);
+ cmd += QUAD_LEN;
+
+ // tex coord buffer location (in GPU space)
+ shadow->quad_texcoords.hostptr = cmd;
+ shadow->quad_texcoords.gpuaddr = gpuaddr(cmd, &drawctxt->gpustate);
+
+
+ cmd += TEXCOORD_LEN;
+
+ set_gmem_copy_quad(shadow);
+
+
+ ctx->cmd = cmd;
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+
+static void
+build_shader_save_restore_cmds(gsl_drawctxt_t *drawctxt, ctx_t *ctx)
+{
+ unsigned int *cmd = ctx->cmd;
+ unsigned int *save, *restore, *fixup;
+#if defined(PM4_IM_STORE)
+ unsigned int *startSizeVtx, *startSizePix, *startSizeShared;
+#endif
+ unsigned int *partition1;
+ unsigned int *shaderBases, *partition2;
+
+#if defined(PM4_IM_STORE)
+ // compute vertex, pixel and shared instruction shadow GPU addresses
+ ctx->shader_vertex = drawctxt->gpustate.gpuaddr + SHADER_OFFSET;
+ ctx->shader_pixel = ctx->shader_vertex + SHADER_SHADOW_SIZE;
+ ctx->shader_shared = ctx->shader_pixel + SHADER_SHADOW_SIZE;
+#endif
+
+
+ //-------------------------------------------------------------------
+ // restore shader partitioning and instructions
+ //-------------------------------------------------------------------
+
+ restore = cmd; // start address
+
+ // Invalidate Vertex & Pixel instruction code address and sizes
+ *cmd++ = pm4_type3_packet(PM4_INVALIDATE_STATE, 1);
+ *cmd++ = 0x00000300; // 0x100 = Vertex, 0x200 = Pixel
+
+ // Restore previous shader vertex & pixel instruction bases.
+ *cmd++ = pm4_type3_packet(PM4_SET_SHADER_BASES, 1);
+ shaderBases = cmd++; // TBD #5: shader bases (from fixup)
+
+ // write the shader partition information to a scratch register
+ *cmd++ = pm4_type0_packet(mmSQ_INST_STORE_MANAGMENT, 1);
+ partition1 = cmd++; // TBD #4a: partition info (from save)
+
+#if defined(PM4_IM_STORE)
+ // load vertex shader instructions from the shadow.
+ *cmd++ = pm4_type3_packet(PM4_IM_LOAD, 2);
+ *cmd++ = ctx->shader_vertex + 0x0; // 0x0 = Vertex
+ startSizeVtx = cmd++; // TBD #1: start/size (from save)
+
+ // load pixel shader instructions from the shadow.
+ *cmd++ = pm4_type3_packet(PM4_IM_LOAD, 2);
+ *cmd++ = ctx->shader_pixel + 0x1; // 0x1 = Pixel
+ startSizePix = cmd++; // TBD #2: start/size (from save)
+
+ // load shared shader instructions from the shadow.
+ *cmd++ = pm4_type3_packet(PM4_IM_LOAD, 2);
+ *cmd++ = ctx->shader_shared + 0x2; // 0x2 = Shared
+ startSizeShared = cmd++; // TBD #3: start/size (from save)
+#endif
+
+ // create indirect buffer command for above command sequence
+ create_ib1(drawctxt, drawctxt->shader_restore, restore, cmd);
+
+
+ //-------------------------------------------------------------------
+ // fixup SET_SHADER_BASES data
+ //
+ // since self-modifying PM4 code is being used here, a seperate
+ // command buffer is used for this fixup operation, to ensure the
+ // commands are not read by the PM4 engine before the data fields
+ // have been written.
+ //-------------------------------------------------------------------
+
+ fixup = cmd; // start address
+
+ // write the shader partition information to a scratch register
+ *cmd++ = pm4_type0_packet(mmSCRATCH_REG2, 1);
+ partition2 = cmd++; // TBD #4b: partition info (from save)
+
+ // mask off unused bits, then OR with shader instruction memory size
+ *cmd++ = pm4_type3_packet(PM4_REG_RMW, 3);
+ *cmd++ = mmSCRATCH_REG2;
+ *cmd++ = 0x0FFF0FFF; // AND off invalid bits.
+ *cmd++ = (unsigned int)((SHADER_INSTRUCT_LOG2-5U) << 29); // OR in instruction memory size
+
+ // write the computed value to the SET_SHADER_BASES data field
+ *cmd++ = pm4_type3_packet(PM4_REG_TO_MEM, 2);
+ *cmd++ = mmSCRATCH_REG2;
+ *cmd++ = gpuaddr(shaderBases, &drawctxt->gpustate); // TBD #5: shader bases (to restore)
+
+ // create indirect buffer command for above command sequence
+ create_ib1(drawctxt, drawctxt->shader_fixup, fixup, cmd);
+
+
+ //-------------------------------------------------------------------
+ // save shader partitioning and instructions
+ //-------------------------------------------------------------------
+
+ save = cmd; // start address
+
+ *cmd++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1);
+ *cmd++ = 0;
+
+ // Fetch the SQ_INST_STORE_MANAGMENT register value,
+ // Store the value in the data fields of the SET_CONSTANT commands above.
+ *cmd++ = pm4_type3_packet(PM4_REG_TO_MEM, 2);
+ *cmd++ = mmSQ_INST_STORE_MANAGMENT;
+ *cmd++ = gpuaddr(partition1, &drawctxt->gpustate); // TBD #4a: partition info (to restore)
+ *cmd++ = pm4_type3_packet(PM4_REG_TO_MEM, 2);
+ *cmd++ = mmSQ_INST_STORE_MANAGMENT;
+ *cmd++ = gpuaddr(partition2, &drawctxt->gpustate); // TBD #4b: partition info (to fixup)
+
+#if defined(PM4_IM_STORE)
+ // Store the vertex shader instructions
+ *cmd++ = pm4_type3_packet(PM4_IM_STORE, 2);
+ *cmd++ = ctx->shader_vertex + 0x0; // 0x0 = Vertex
+ *cmd++ = gpuaddr(startSizeVtx, &drawctxt->gpustate); // TBD #1: start/size (to restore)
+
+ // store the pixel shader instructions
+ *cmd++ = pm4_type3_packet(PM4_IM_STORE, 2);
+ *cmd++ = ctx->shader_pixel + 0x1; // 0x1 = Pixel
+ *cmd++ = gpuaddr(startSizePix, &drawctxt->gpustate); // TBD #2: start/size (to restore)
+
+ // Store the shared shader instructions
+ *cmd++ = pm4_type3_packet(PM4_IM_STORE, 2);
+ *cmd++ = ctx->shader_shared + 0x2; // 0x2 = Shared
+ *cmd++ = gpuaddr(startSizeShared, &drawctxt->gpustate); // TBD #3: start/size (to restore)
+#endif
+
+ *cmd++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1);
+ *cmd++ = 0;
+
+
+
+ // Create indirect buffer command for above command sequence
+ create_ib1(drawctxt, drawctxt->shader_save, save, cmd);
+
+
+ ctx->cmd = cmd;
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+// create buffers for saving/restoring registers and constants
+//////////////////////////////////////////////////////////////////////////////
+
+static int
+create_gpustate_shadow(gsl_device_t *device, gsl_drawctxt_t *drawctxt, ctx_t *ctx)
+{
+ gsl_flags_t flags;
+
+ flags = (GSL_MEMFLAGS_CONPHYS | GSL_MEMFLAGS_ALIGN8K);
+ KGSL_DEBUG(GSL_DBGFLAGS_DUMPX, flags = (GSL_MEMFLAGS_EMEM | GSL_MEMFLAGS_ALIGN8K));
+
+ // allocate memory to allow HW to save sub-blocks for efficient context save/restore
+ if (kgsl_sharedmem_alloc0(device->id, flags, CONTEXT_SIZE, &drawctxt->gpustate) != GSL_SUCCESS)
+ return GSL_FAILURE;
+
+ drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
+
+ // Blank out h/w register, constant, and command buffer shadows.
+ kgsl_sharedmem_set0(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE);
+
+ // set-up command and vertex buffer pointers
+ ctx->cmd = ctx->start = (unsigned int *) ((char *)drawctxt->gpustate.hostptr + CMD_OFFSET);
+
+ // build indirect command buffers to save & restore regs/constants
+ build_regrestore_cmds(drawctxt, ctx);
+ build_regsave_cmds(drawctxt, ctx);
+
+ build_shader_save_restore_cmds(drawctxt, ctx);
+
+ return GSL_SUCCESS;
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Allocate GMEM shadow buffer
+//////////////////////////////////////////////////////////////////////////////
+static int
+allocate_gmem_shadow_buffer(gsl_device_t *device, gsl_drawctxt_t *drawctxt)
+{
+ // allocate memory for GMEM shadow
+ if (kgsl_sharedmem_alloc0(device->id, (GSL_MEMFLAGS_CONPHYS | GSL_MEMFLAGS_ALIGN8K),
+ drawctxt->context_gmem_shadow.size, &drawctxt->context_gmem_shadow.gmemshadow) != GSL_SUCCESS)
+ return GSL_FAILURE;
+
+ // blank out gmem shadow.
+ kgsl_sharedmem_set0(&drawctxt->context_gmem_shadow.gmemshadow, 0, 0, drawctxt->context_gmem_shadow.size);
+
+ return GSL_SUCCESS;
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// create GMEM save/restore specific stuff
+//////////////////////////////////////////////////////////////////////////////
+
+static int
+create_gmem_shadow(gsl_device_t *device, gsl_drawctxt_t *drawctxt, ctx_t *ctx)
+{
+ unsigned int i;
+ config_gmemsize(&drawctxt->context_gmem_shadow, device->gmemspace.sizebytes);
+ ctx->gmem_base = device->gmemspace.gpu_base;
+
+ if( drawctxt->flags & CTXT_FLAGS_GMEM_SHADOW )
+ {
+ if( allocate_gmem_shadow_buffer(device, drawctxt) != GSL_SUCCESS )
+ return GSL_FAILURE;
+ }
+ else
+ {
+ kos_memset( &drawctxt->context_gmem_shadow.gmemshadow, 0, sizeof( gsl_memdesc_t ) );
+ }
+
+ // build quad vertex buffer
+ build_quad_vtxbuff(drawctxt, ctx, &drawctxt->context_gmem_shadow);
+
+ // build TP0_CHICKEN register restore command buffer
+ ctx->cmd = build_chicken_restore_cmds(drawctxt, ctx);
+
+ // build indirect command buffers to save & restore gmem
+ drawctxt->context_gmem_shadow.gmem_save_commands = ctx->cmd;
+ ctx->cmd = build_gmem2sys_cmds(drawctxt, ctx, &drawctxt->context_gmem_shadow);
+ drawctxt->context_gmem_shadow.gmem_restore_commands = ctx->cmd;
+ ctx->cmd = build_sys2gmem_cmds(drawctxt, ctx, &drawctxt->context_gmem_shadow);
+
+ for( i = 0; i < GSL_MAX_GMEM_SHADOW_BUFFERS; i++ )
+ {
+ // build quad vertex buffer
+ build_quad_vtxbuff(drawctxt, ctx, &drawctxt->user_gmem_shadow[i]);
+
+ // build indirect command buffers to save & restore gmem
+ drawctxt->user_gmem_shadow[i].gmem_save_commands = ctx->cmd;
+ ctx->cmd = build_gmem2sys_cmds(drawctxt, ctx, &drawctxt->user_gmem_shadow[i]);
+
+ drawctxt->user_gmem_shadow[i].gmem_restore_commands = ctx->cmd;
+ ctx->cmd = build_sys2gmem_cmds(drawctxt, ctx, &drawctxt->user_gmem_shadow[i]);
+ }
+
+ return GSL_SUCCESS;
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// init draw context
+//////////////////////////////////////////////////////////////////////////////
+
+int
+kgsl_drawctxt_init(gsl_device_t *device)
+{
+ return (GSL_SUCCESS);
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// close draw context
+//////////////////////////////////////////////////////////////////////////////
+
+int
+kgsl_drawctxt_close(gsl_device_t *device)
+{
+ return (GSL_SUCCESS);
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// create a new drawing context
+//////////////////////////////////////////////////////////////////////////////
+
+int
+kgsl_drawctxt_create(gsl_device_t* device, gsl_context_type_t type, unsigned int *drawctxt_id, gsl_flags_t flags)
+{
+ gsl_drawctxt_t *drawctxt;
+ int index;
+ ctx_t ctx;
+
+ kgsl_device_active(device);
+
+ if (device->drawctxt_count >= GSL_CONTEXT_MAX)
+ {
+ return (GSL_FAILURE);
+ }
+
+ // find a free context slot
+ index = 0;
+ while (index < GSL_CONTEXT_MAX)
+ {
+ if (device->drawctxt[index].flags == CTXT_FLAGS_NOT_IN_USE)
+ break;
+
+ index++;
+ }
+
+ if (index >= GSL_CONTEXT_MAX)
+ {
+ return (GSL_FAILURE);
+ }
+
+ drawctxt = &device->drawctxt[index];
+
+ kos_memset( &drawctxt->context_gmem_shadow, 0, sizeof( gmem_shadow_t ) );
+
+ drawctxt->pid = GSL_CALLER_PROCESSID_GET();
+ drawctxt->flags = CTXT_FLAGS_IN_USE;
+ drawctxt->type = type;
+
+ device->drawctxt_count++;
+
+ // create context shadows, when not running in safe mode
+ if (!(device->flags & GSL_FLAGS_SAFEMODE))
+ {
+ if (create_gpustate_shadow(device, drawctxt, &ctx) != GSL_SUCCESS)
+ {
+ kgsl_drawctxt_destroy(device, index);
+ return (GSL_FAILURE);
+ }
+
+ // Save the shader instruction memory on context switching
+ drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
+
+ if(!(flags & GSL_CONTEXT_NO_GMEM_ALLOC))
+ drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW;
+
+ // Clear out user defined GMEM shadow buffer structs
+ kos_memset( drawctxt->user_gmem_shadow, 0, sizeof(gmem_shadow_t)*GSL_MAX_GMEM_SHADOW_BUFFERS );
+
+ // create gmem shadow
+ if (create_gmem_shadow(device, drawctxt, &ctx) != GSL_SUCCESS)
+ {
+ kgsl_drawctxt_destroy(device, index);
+ return (GSL_FAILURE);
+ }
+
+
+ KOS_ASSERT(ctx.cmd - ctx.start <= CMD_BUFFER_LEN);
+ }
+
+ *drawctxt_id = index;
+
+ return (GSL_SUCCESS);
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// destroy a drawing context
+//////////////////////////////////////////////////////////////////////////////
+
+int
+kgsl_drawctxt_destroy(gsl_device_t* device, unsigned int drawctxt_id)
+{
+ gsl_drawctxt_t *drawctxt;
+
+ drawctxt = &device->drawctxt[drawctxt_id];
+
+ if (drawctxt->flags != CTXT_FLAGS_NOT_IN_USE)
+ {
+ // deactivate context
+ if (device->drawctxt_active == drawctxt)
+ {
+ // no need to save GMEM or shader, the context is being destroyed.
+ drawctxt->flags &= ~(CTXT_FLAGS_GMEM_SAVE | CTXT_FLAGS_SHADER_SAVE);
+
+ kgsl_drawctxt_switch(device, GSL_CONTEXT_NONE, 0);
+ }
+
+ device->ftbl.device_idle(device, GSL_TIMEOUT_DEFAULT);
+
+ // destroy state shadow, if allocated
+ if (drawctxt->flags & CTXT_FLAGS_STATE_SHADOW)
+ kgsl_sharedmem_free0(&drawctxt->gpustate, GSL_CALLER_PROCESSID_GET());
+
+
+ // destroy gmem shadow, if allocated
+ if (drawctxt->context_gmem_shadow.gmemshadow.size > 0)
+ {
+ kgsl_sharedmem_free0(&drawctxt->context_gmem_shadow.gmemshadow, GSL_CALLER_PROCESSID_GET());
+ drawctxt->context_gmem_shadow.gmemshadow.size = 0;
+ }
+
+ drawctxt->flags = CTXT_FLAGS_NOT_IN_USE;
+ drawctxt->pid = 0;
+
+ device->drawctxt_count--;
+ KOS_ASSERT(device->drawctxt_count >= 0);
+ }
+
+ return (GSL_SUCCESS);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Binds a user specified buffer as GMEM shadow area
+//
+// gmem_rect: defines the rectangle that is copied from GMEM. X and Y
+// coordinates need to be multiples of 8 after conversion to 32bpp.
+// X, Y, width, and height need to be at 32-bit boundary to avoid
+// rounding.
+//
+// shadow_x & shadow_y: Position in GMEM shadow buffer where the contents of
+// gmem_rect is copied. Both must be multiples of 8 after
+// conversion to 32bpp. They also need to be at 32-bit
+// boundary to avoid rounding.
+//
+// shadow_buffer: Description of the GMEM shadow buffer. BPP needs to be
+// 8, 16, 32, 64, or 128. Enabled tells if the buffer is
+// used or not (values 0 and 1). All the other buffer
+// parameters are ignored when enabled=0.
+//
+// buffer_id: Two different buffers can be defined. Use buffer IDs 0 and 1.
+//
+//
+//////////////////////////////////////////////////////////////////////////////
+KGSL_API int kgsl_drawctxt_bind_gmem_shadow(gsl_deviceid_t device_id, unsigned int drawctxt_id, const gsl_rect_t* gmem_rect, unsigned int shadow_x, unsigned int shadow_y, const gsl_buffer_desc_t* shadow_buffer, unsigned int buffer_id)
+{
+ gsl_device_t *device;
+ gsl_drawctxt_t *drawctxt;
+ gmem_shadow_t *shadow; // Shadow struct being modified
+ unsigned int i;
+
+ GSL_API_MUTEX_LOCK();
+
+ device = &gsl_driver.device[device_id-1]; // device_id is 1 based
+
+ drawctxt = &device->drawctxt[drawctxt_id];
+
+ shadow = &drawctxt->user_gmem_shadow[buffer_id];
+
+ if( !shadow_buffer->enabled )
+ {
+ // Disable shadow
+ shadow->gmemshadow.size = 0;
+ }
+ else
+ {
+ // Binding to a buffer
+ unsigned int width, height, gmem_x, gmem_y, gmem_width, gmem_height, pixel_ratio;
+
+ KOS_ASSERT(shadow_buffer->stride_bytes%4 == 0);
+
+ // Convert to 32bpp pixel units
+ if( shadow_buffer->bpp <= 32 )
+ {
+ KOS_ASSERT(32%shadow_buffer->bpp==0);
+ pixel_ratio = 32/shadow_buffer->bpp;
+ KOS_ASSERT(gmem_rect->x%pixel_ratio==0); // Needs to be at 32bit boundary
+ gmem_x = gmem_rect->x/pixel_ratio;
+ KOS_ASSERT(gmem_x%8==0); // Needs to be a multiple of 8
+ KOS_ASSERT(gmem_rect->y%pixel_ratio==0); // Needs to be at 32bit boundary
+ gmem_y = gmem_rect->y/pixel_ratio;
+ KOS_ASSERT(gmem_y%8==0); // Needs to be a multiple of 8
+ KOS_ASSERT(gmem_rect->width%pixel_ratio==0); // Needs to be at 32bit boundary
+ gmem_width = gmem_rect->width/pixel_ratio;
+ KOS_ASSERT(gmem_rect->height%pixel_ratio==0); // Needs to be at 32bit boundary
+ gmem_height = gmem_rect->height/pixel_ratio;
+ KOS_ASSERT(shadow_x%pixel_ratio==0); // Needs to be at 32bit boundary
+ shadow_x = shadow_x/pixel_ratio;
+ KOS_ASSERT(shadow_x%8==0); // Needs to be a multiple of 8
+ KOS_ASSERT(shadow_y%pixel_ratio==0); // Needs to be at 32bit boundary
+ shadow_y = shadow_y/pixel_ratio;
+ KOS_ASSERT(shadow_y%8==0); // Needs to be a multiple of 8
+ }
+ else
+ {
+ KOS_ASSERT(shadow_buffer->bpp==64 || shadow_buffer->bpp==128);
+ pixel_ratio = shadow_buffer->bpp/32;
+ gmem_x = gmem_rect->x*pixel_ratio;
+ KOS_ASSERT(gmem_x%8==0); // Needs to be a multiple of 8
+ gmem_y = gmem_rect->y*pixel_ratio;
+ KOS_ASSERT(gmem_y%8==0); // Needs to be a multiple of 8
+ gmem_width = gmem_rect->width*pixel_ratio;
+ gmem_height = gmem_rect->height*pixel_ratio;
+ shadow_x = shadow_x*pixel_ratio;
+ KOS_ASSERT(shadow_x%8==0); // Needs to be a multiple of 8
+ shadow_y = shadow_y*pixel_ratio;
+ KOS_ASSERT(shadow_y%8==0); // Needs to be a multiple of 8
+ }
+
+ KOS_ASSERT( buffer_id >= 0 && buffer_id < GSL_MAX_GMEM_SHADOW_BUFFERS );
+
+ width = gmem_width < drawctxt->context_gmem_shadow.width ? gmem_width : drawctxt->context_gmem_shadow.width;
+ height = gmem_height < drawctxt->context_gmem_shadow.height ? gmem_height : drawctxt->context_gmem_shadow.height;
+
+ drawctxt->user_gmem_shadow[buffer_id].width = width;
+ drawctxt->user_gmem_shadow[buffer_id].height = height;
+ drawctxt->user_gmem_shadow[buffer_id].pitch = shadow_buffer->stride_bytes/4;
+
+ kos_memcpy( &drawctxt->user_gmem_shadow[buffer_id].gmemshadow, &shadow_buffer->data, sizeof( gsl_memdesc_t ) );
+ // Calculate offset
+ drawctxt->user_gmem_shadow[buffer_id].offset = (int)shadow_buffer->stride_bytes/4*((int)shadow_y-(int)gmem_y)+(int)shadow_x-(int)gmem_x;
+
+ drawctxt->user_gmem_shadow[buffer_id].offset_x = shadow_x;
+ drawctxt->user_gmem_shadow[buffer_id].offset_y = shadow_y;
+ drawctxt->user_gmem_shadow[buffer_id].gmem_offset_x = gmem_x;
+ drawctxt->user_gmem_shadow[buffer_id].gmem_offset_y = gmem_y;
+
+ drawctxt->user_gmem_shadow[buffer_id].size = drawctxt->user_gmem_shadow[buffer_id].gmemshadow.size;
+
+ // Modify quad vertices
+ set_gmem_copy_quad(shadow);
+
+ // Modify commands
+ build_gmem2sys_cmds(drawctxt, NULL, shadow);
+ build_sys2gmem_cmds(drawctxt, NULL, shadow);
+
+ // Release context GMEM shadow if found
+ if (drawctxt->context_gmem_shadow.gmemshadow.size > 0)
+ {
+ kgsl_sharedmem_free0(&drawctxt->context_gmem_shadow.gmemshadow, GSL_CALLER_PROCESSID_GET());
+ drawctxt->context_gmem_shadow.gmemshadow.size = 0;
+ }
+ }
+
+ // Enable GMEM shadowing if we have any of the user buffers enabled
+ drawctxt->flags &= ~CTXT_FLAGS_GMEM_SHADOW;
+ for( i = 0; i < GSL_MAX_GMEM_SHADOW_BUFFERS; i++ )
+ {
+ if( drawctxt->user_gmem_shadow[i].gmemshadow.size > 0 )
+ {
+ drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW;
+ }
+ }
+
+ GSL_API_MUTEX_UNLOCK();
+
+ return (GSL_SUCCESS);
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+// switch drawing contexts
+//////////////////////////////////////////////////////////////////////////////
+
+void
+kgsl_drawctxt_switch(gsl_device_t *device, gsl_drawctxt_t *drawctxt, gsl_flags_t flags)
+{
+ gsl_drawctxt_t *active_ctxt = device->drawctxt_active;
+
+ if (drawctxt != GSL_CONTEXT_NONE)
+ {
+ if(0) // flags & GSL_CONTEXT_SAVE_GMEM )
+ {
+ // Set the flag in context so that the save is done when this context is switched out.
+ drawctxt->flags |= CTXT_FLAGS_GMEM_SAVE;
+ }
+ else
+ {
+ // Remove GMEM saving flag from the context
+ drawctxt->flags &= ~CTXT_FLAGS_GMEM_SAVE;
+ }
+ }
+
+ // already current?
+ if (active_ctxt == drawctxt)
+ {
+ return;
+ }
+
+ // save old context, when not running in safe mode
+ if (active_ctxt != GSL_CONTEXT_NONE && !(device->flags & GSL_FLAGS_SAFEMODE))
+ {
+ // save registers and constants.
+ kgsl_ringbuffer_issuecmds(device, 0, active_ctxt->reg_save, 3, active_ctxt->pid);
+
+ if (active_ctxt->flags & CTXT_FLAGS_SHADER_SAVE)
+ {
+ // save shader partitioning and instructions.
+ kgsl_ringbuffer_issuecmds(device, 1, active_ctxt->shader_save, 3, active_ctxt->pid);
+
+ // fixup shader partitioning parameter for SET_SHADER_BASES.
+ kgsl_ringbuffer_issuecmds(device, 0, active_ctxt->shader_fixup, 3, active_ctxt->pid);
+
+ active_ctxt->flags |= CTXT_FLAGS_SHADER_RESTORE;
+ }
+
+ if (active_ctxt->flags & CTXT_FLAGS_GMEM_SHADOW && active_ctxt->flags & CTXT_FLAGS_GMEM_SAVE )
+ {
+ // save gmem. (note: changes shader. shader must already be saved.)
+
+ unsigned int i, numbuffers = 0;
+
+ for( i = 0; i < GSL_MAX_GMEM_SHADOW_BUFFERS; i++ )
+ {
+ if( active_ctxt->user_gmem_shadow[i].gmemshadow.size > 0 )
+ {
+ kgsl_ringbuffer_issuecmds(device, 1, active_ctxt->user_gmem_shadow[i].gmem_save, 3, active_ctxt->pid);
+
+ // Restore TP0_CHICKEN
+ kgsl_ringbuffer_issuecmds(device, 0, active_ctxt->chicken_restore, 3, active_ctxt->pid);
+ numbuffers++;
+ }
+ }
+ if( numbuffers == 0 )
+ {
+ // No user defined buffers -> use context default
+ kgsl_ringbuffer_issuecmds(device, 1, active_ctxt->context_gmem_shadow.gmem_save, 3, active_ctxt->pid);
+ // Restore TP0_CHICKEN
+ kgsl_ringbuffer_issuecmds(device, 0, active_ctxt->chicken_restore, 3, active_ctxt->pid);
+ }
+
+ active_ctxt->flags |= CTXT_FLAGS_GMEM_RESTORE;
+ }
+ }
+
+ device->drawctxt_active = drawctxt;
+
+ // restore new context, when not running in safe mode
+ if (drawctxt != GSL_CONTEXT_NONE && !(device->flags & GSL_FLAGS_SAFEMODE))
+ {
+ KGSL_DEBUG(GSL_DBGFLAGS_DUMPX, KGSL_DEBUG_DUMPX(BB_DUMP_MEMWRITE, drawctxt->gpustate.gpuaddr, (unsigned int)drawctxt->gpustate.hostptr, LCC_SHADOW_SIZE + REG_SHADOW_SIZE + CMD_BUFFER_SIZE + TEX_SHADOW_SIZE , "kgsl_drawctxt_switch"));
+
+ // restore gmem. (note: changes shader. shader must not already be restored.)
+ if (drawctxt->flags & CTXT_FLAGS_GMEM_RESTORE)
+ {
+ unsigned int i, numbuffers = 0;
+
+ for( i = 0; i < GSL_MAX_GMEM_SHADOW_BUFFERS; i++ )
+ {
+ if( drawctxt->user_gmem_shadow[i].gmemshadow.size > 0 )
+ {
+ kgsl_ringbuffer_issuecmds(device, 1, drawctxt->user_gmem_shadow[i].gmem_restore, 3, drawctxt->pid);
+
+ // Restore TP0_CHICKEN
+ kgsl_ringbuffer_issuecmds(device, 0, drawctxt->chicken_restore, 3, drawctxt->pid);
+ numbuffers++;
+ }
+ }
+ if( numbuffers == 0 )
+ {
+ // No user defined buffers -> use context default
+ kgsl_ringbuffer_issuecmds(device, 1, drawctxt->context_gmem_shadow.gmem_restore, 3, drawctxt->pid);
+ // Restore TP0_CHICKEN
+ kgsl_ringbuffer_issuecmds(device, 0, drawctxt->chicken_restore, 3, drawctxt->pid);
+ }
+
+ drawctxt->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
+ }
+
+ // restore registers and constants.
+ kgsl_ringbuffer_issuecmds(device, 0, drawctxt->reg_restore, 3, drawctxt->pid);
+
+ // restore shader instructions & partitioning.
+ if (drawctxt->flags & CTXT_FLAGS_SHADER_RESTORE)
+ {
+ kgsl_ringbuffer_issuecmds(device, 0, drawctxt->shader_restore, 3, drawctxt->pid);
+ }
+ }
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// destroy all drawing contexts
+//////////////////////////////////////////////////////////////////////////////
+int
+kgsl_drawctxt_destroyall(gsl_device_t *device)
+{
+ int i;
+ gsl_drawctxt_t *drawctxt;
+
+ for (i = 0; i < GSL_CONTEXT_MAX; i++)
+ {
+ drawctxt = &device->drawctxt[i];
+
+ if (drawctxt->flags != CTXT_FLAGS_NOT_IN_USE)
+ {
+ // destroy state shadow, if allocated
+ if (drawctxt->flags & CTXT_FLAGS_STATE_SHADOW)
+ kgsl_sharedmem_free0(&drawctxt->gpustate, GSL_CALLER_PROCESSID_GET());
+
+ // destroy gmem shadow, if allocated
+ if (drawctxt->context_gmem_shadow.gmemshadow.size > 0)
+ {
+ kgsl_sharedmem_free0(&drawctxt->context_gmem_shadow.gmemshadow, GSL_CALLER_PROCESSID_GET());
+ drawctxt->context_gmem_shadow.gmemshadow.size = 0;
+ }
+
+ drawctxt->flags = CTXT_FLAGS_NOT_IN_USE;
+
+ device->drawctxt_count--;
+ KOS_ASSERT(device->drawctxt_count >= 0);
+ }
+ }
+
+ return (GSL_SUCCESS);
+}
+
+#endif