/* KallistiOS 0.6

   ta.c
   (c)2000 Dan Potter
   
*/

static char id[] = "KOS $Id: ta.c,v 1.9 2000/11/12 01:14:59 bard Exp $";

#include <kallisti/stdtypes.h>
#include <kallisti/video.h>
#include <kallisti/ta.h>
#include <kallisti/irq.h>

/*

This module handles access to the Tile Accelerator, also known as the
common access point of the 3D chip of the DC. Currently it supports
only one mode of access, which is the direct-to-TA mode using two
lists (one opaque, one translucent). This support will be improved
and expanded in the future.

This module makes use of the PVR interrupts to handle responsive page flipping
in the rendering process. Make sure that IRQ9 is enabled or a call to
ta_finish_frame() will freeze your program.

Note: This module currently only works if you've set the 640x480x16bit
interlaced video mode. VGA mode, 320x240, etc, will not work! Additionally,
inviting the TA routines into your program will render the normal frame
buffer access defunct unless you know where to access it and set the
start address appropriately.

WARNING: Do not disable interrupts and then call ta_finish_frame(). This
will cause a deadlock condition and unless you're in a seperate thread,
the kernel will halt.

Many thanks to Maiwe for his input, which has clarified a lot of the
buffer setup stuff, and for ta-intro.txt that explains the poly registration
process.

*/

/* Global background data structure; this is used during the rendering process. */
static ta_bkg_poly ta_bkg_internal;
ta_bkg_poly *ta_bkg = &ta_bkg_internal;


/* TA buffers structure: keeps track of buffers for each frame */
typedef struct {
	uint32		vertex, vertex_size;		/* Vertex buffer */
	uint32		poly_buf, poly_buf_size;	/* Poly buffers, size */
	uint32		poly_bufs[5];			/* Poly buffers (of each type) */
	uint32		tile_matrix, tile_matrix_size;	/* Tile matrix, size */
	uint32		frame, frame_size;		/* Output frame buffer, size */
} ta_buffers_t;

/* TA status structure: svcmpx friendly =) */
typedef struct {
	ta_buffers_t	buffers[2];			/* Frame TA buffer pointers */
	uint32		texture_base;			/* Start of texture RAM */
	uint32		lists;				/* Active lists bitmask */
	uint32		list_mask;			/* Active lists register mask */
	uint32		poly_buf_ind;			/* Individual poly type pb size */
	uint32		poly_buf_size;			/* Poly buffer size flag */
	int		w, h;				/* Screen width, height */
	int		tw, th;				/* Screen tile width, height */
	uint32		tsize_const;			/* Screen tile size constant */
	float		zclip;				/* Z clip plane */
	uint32		pclip_left, pclip_right;	/* X clip constants */
	uint32		pclip_top, pclip_bottom;	/* Y clip constants */
	uint32		pclip_x, pclip_y;		/* Composted clip constants */
} ta_state_t;

/* TA lists that you can activate */
#define TA_LIST_OPAQUE_POLYS	1
#define TA_LIST_TRANS_POLYS	2
#define TA_LIST_OPAQUE_MODS	4
#define TA_LIST_TRANS_MODS	8
#define TA_LIST_PUNCH_THRU	16

/* Polygon buffer sizes */
#define TA_POLYBUF_0		0
#define TA_POLYBUF_8		8
#define TA_POLYBUF_16		16
#define TA_POLYBUF_32		32

/* TA state structure */
ta_state_t ta_state;

/* Current page */
int ta_curpage = 0;

/* 3d-specific parameters; these are all about rendering and nothing
   to do with setting up the video, although these do currently assume
   a 640x480x16bit screen. Some stuff in here is still unknown. */
static uint32 three_d_parameters[] = {
	0x80a8, 0x15d1c951,	/* M (Unknown magic value) */
	0x80a0, 0x00000020,	/* M */
	0x8008, 0x00000000,	/* TA out of reset */
	0x8048, 0x00000009,	/* alpha config */
	0x8068, 0x02800000,	/* pixel clipping x */
	0x806c, 0x01e00000,	/* pixel clipping y */
	0x8110, 0x00093f39,	/* M */
	0x8098, 0x00800408,	/* M */
	0x804c, 0x000000a0,	/* display align (640*2)/8 */
	0x8078, 0x3f800000,	/* polygon culling (1.0f) */
	0x8084, 0x00000000,	/* M */
	0x8030, 0x00000101,	/* M */
	0x80b0, 0x007f7f7f,	/* Fog table color */
	0x80b4, 0x007f7f7f,	/* Fog vertex color */
	0x80c0, 0x00000000,	/* color clamp min */
	0x80bc, 0xffffffff,	/* color clamp max */
	0x8080, 0x00000007,	/* M */
	0x8074, 0x00000001,	/* cheap shadow */
	0x807c, 0x0027df77,	/* M */
	0x8008, 0x00000001,	/* TA reset */
	0x8008, 0x00000000,	/* TA out of reset */
	0x80e4, 0x00000000,	/* stride width */
	0x6884, 0x00000000,	/* Disable all interrupt events */
	0x6930, 0x00000000,
	0x6938, 0x00000000,
	0x6900, 0xffffffff,	/* Clear all pending int events */
	0x6908, 0xffffffff,
	0x6930, 0x00000088,	/* Re-enable some events */
	0x6938, 0x00000000,
	0x80b8, 0x0000ff07,	/* fog density */
	0x80b4, 0x007f7f7f,	/* fog vertex color */
	0x80b0, 0x007f7f7f	/* fog table color */
};

/* We wait for vertical blank (to make it nicer looking) and then
   set these screen parameters. */
uint32 scrn_parameters[] = {
	0x80cc, 0x00150104,	/* M */
	0x80d4, 0x007e0345,	/* horizontal border */
	0x80e0, 0x07d6c63f,	/* sync control */
	0x80c8, 0x03450000,	/* set to same as border H in 80d4 */
	0x8068, 0x027f0000,	/* (X resolution - 1) << 16 */
	0x806c, 0x01df0000,	/* (Y resolution - 1) << 16 */
	0x804c, 0x000000a0,	/* display align */
	0x8118, 0x00008040,	/* M */
	0x80f4, 0x00000401,	/* anti-aliasing */
	0x8048, 0x00000009,	/* alpha config */
	0x7814, 0x00000000,	/* More interrupt control stuff (so it seems) */
	0x7834, 0x00000000,
	0x7854, 0x00000000,
	0x7874, 0x00000000,
	0x78bc, 0x4659404f,
	0x8040, 0x00000000	/* border color */
};

/* Int handler stuff */
static vuint32 reg6900, reg6908;
static vuint32 list_complete;
static void int_handler(uint32 type, uint32 code);

/* A macro for setting PVR registers so we can hook it for debug */
#if 0
void SETREG(uint32 n, uint32 v) {
	printf("%08lx <- %08lx\r\n", n, v);
	*((vuint32*)(0xa05f0000+(n))) = (v);
}

uint32 GETREG(n) {
	uint32 value = *((vuint32*)(0xa05f0000+(n)));
	printf("%08lx -> %08lx\r\n", n, value);
	return value;
}
#else
#define SETREG(n, v) *((vuint32*)(0xa05f0000+(n))) = (v)
#define GETREG(n) *((vuint32*)(0xa05f0000+(n)))
#endif

/* Allocate TA buffers given a set of parameters

This is a little bit confusing so I'll clarify here:
- The registration process takes place into the buffer which is currently
  being displayed to the user. This is ok since registration doesn't affect
  the output display.
- The rendering process takes places into the buffer which is not being
  displayed. This is also ok since the user can't see this taking place.

So the "frame" that goes with a given set of buffers is not actually the frame
where that data will be rendered, it's the view frame that goes along with
registration into that buffer.

*/
#define TA_ALIGN 128
#define TA_ALIGN_MASK (TA_ALIGN-1)
static void ta_allocate_buffers(int w, int h, uint32 lists,
		uint32 vertex_list_size, uint32 poly_buf_size) {
	int		i;
	uint32		outaddr = 0, polybuf, sconst;
	ta_buffers_t	*buf;

	/* Set screen sizes */
	ta_state.w = w; ta_state.h = h;
	ta_state.tw = w/32; ta_state.th = h/32;
	ta_state.tsize_const = ((ta_state.th-1)<<16) | ((ta_state.tw-1)<<0);
	ta_state.zclip = 0.2f;
	ta_state.pclip_left = 0; ta_state.pclip_right = 640-1;
	ta_state.pclip_top = 0; ta_state.pclip_bottom = 480-1;
	ta_state.pclip_x = (ta_state.pclip_right << 16) | (ta_state.pclip_left);
	ta_state.pclip_y = (ta_state.pclip_bottom << 16) | (ta_state.pclip_top);
	
	/* Look at active lists and figure out how much to allocate
	   for each poly type */
	ta_state.lists = lists;
	ta_state.poly_buf_size = poly_buf_size * 4;		/* in bytes */
	ta_state.poly_buf_ind = ta_state.poly_buf_size * ta_state.tw * ta_state.th;
	polybuf = 0;
	ta_state.list_mask = 1<<20;
	switch(poly_buf_size) {
		case TA_POLYBUF_0: sconst = 0; break;
		case TA_POLYBUF_8: sconst = 1; break;
		case TA_POLYBUF_16: sconst = 2; break;
		case TA_POLYBUF_32: sconst = 3; break;
	}
	if (lists & TA_LIST_OPAQUE_POLYS) {
		polybuf += ta_state.poly_buf_ind;
		ta_state.list_mask |= sconst << 0;
	}
	if (lists & TA_LIST_TRANS_POLYS) {
		polybuf += ta_state.poly_buf_ind;
		ta_state.list_mask |= sconst << 8;
	}
	if (lists & TA_LIST_OPAQUE_MODS) {
		polybuf += ta_state.poly_buf_ind;
		ta_state.list_mask |= sconst << 4;
	}
	if (lists & TA_LIST_TRANS_MODS) {
		polybuf += ta_state.poly_buf_ind;
		ta_state.list_mask |= sconst << 12;
	}
	if (lists & TA_LIST_PUNCH_THRU) {
		polybuf += ta_state.poly_buf_ind;
		ta_state.list_mask |= sconst << 16;
	}
	

	/* Initialize each buffer set */
	for (i=0; i<2; i++) {
		uint32 polybuf_alloc;
		
		buf = ta_state.buffers + i;
	
		/* Vertex buffer */
		buf->vertex = outaddr;
		buf->vertex_size = vertex_list_size;
		outaddr += buf->vertex_size;
	
		/* N-byte align */
		if (outaddr & TA_ALIGN_MASK) outaddr += TA_ALIGN - (outaddr & TA_ALIGN_MASK);

		/* Polygon buffers */
		buf->poly_buf_size = 0x50580 + polybuf;
		outaddr += buf->poly_buf_size;
		polybuf_alloc = buf->poly_buf = outaddr - polybuf;
		
		if (lists & TA_LIST_OPAQUE_POLYS) {
			buf->poly_bufs[0] = polybuf_alloc;
			polybuf_alloc += ta_state.poly_buf_ind;
		}
		else
			buf->poly_bufs[0] = 0x80000000;

		if (lists & TA_LIST_OPAQUE_MODS) {
			buf->poly_bufs[1] = polybuf_alloc;
			polybuf_alloc += ta_state.poly_buf_ind;
		}
		else
			buf->poly_bufs[1] = 0x80000000;

		if (lists & TA_LIST_TRANS_POLYS) {
			buf->poly_bufs[2] = polybuf_alloc;
			polybuf_alloc += ta_state.poly_buf_ind;
		}
		else
			buf->poly_bufs[2] = 0x80000000;

		if (lists & TA_LIST_TRANS_MODS) {
			buf->poly_bufs[3] = polybuf_alloc;
			polybuf_alloc += ta_state.poly_buf_ind;
		}
		else
			buf->poly_bufs[3] = 0x80000000;

		if (lists & TA_LIST_PUNCH_THRU) {
			buf->poly_bufs[4] = polybuf_alloc;
			polybuf_alloc += ta_state.poly_buf_ind;
		}
		else
			buf->poly_bufs[4] = 0x80000000;
		outaddr += buf->poly_buf_size;
	
		/* N-byte align */
		if (outaddr & TA_ALIGN_MASK) outaddr += TA_ALIGN - (outaddr & TA_ALIGN_MASK);

		/* TA Matrix */
		buf->tile_matrix = outaddr;
		buf->tile_matrix_size = (18+6*ta_state.tw*ta_state.th)*4;
		outaddr += buf->tile_matrix_size;
	
		/* N-byte align */
		if (outaddr & TA_ALIGN_MASK) outaddr += TA_ALIGN - (outaddr & TA_ALIGN_MASK);
		
		/* Output buffer */
		buf->frame = outaddr;
		buf->frame_size = w*h*2;
		outaddr += buf->frame_size;
		
		/* N-byte align */
		if (outaddr & TA_ALIGN_MASK) outaddr += TA_ALIGN - (outaddr & TA_ALIGN_MASK);
	}

	/* Texture ram is whatever is left */
	ta_state.texture_base = outaddr;
	/* printf("Texture RAM begins at %08lx\r\n", outaddr); */
	
#if 1
	/* REMOVE THIS ONCE THE NEW CODE IS FULLY WORKING */
	/* Just patch in values from old TA module for now */
	buf = ta_state.buffers + 0;
	buf->vertex = 0;
	buf->vertex_size = 0x10e740;
	buf->poly_buf = 0x15ed80;
	buf->poly_buf_size = 0x50580;
	buf->poly_bufs[0] = buf->poly_buf;
	buf->poly_bufs[2] = buf->poly_buf + 0x4b00;
	buf->poly_bufs[1] = buf->poly_bufs[3] = buf->poly_bufs[4] = 0x80000000;
	buf->tile_matrix = 0x168380;
	buf->tile_matrix_size = 0x97c38;
	buf->frame = 0x600000;
	buf->frame_size = ta_state.w*ta_state.h*2;
	
	buf = ta_state.buffers + 1;
	buf->vertex = 0x400000;
	buf->vertex_size = 0x10e740;
	buf->poly_buf = 0x55ed80;
	buf->poly_buf_size = 0x50580;
	buf->poly_bufs[0] = buf->poly_buf;
	buf->poly_bufs[2] = buf->poly_buf + 0x4b00;
	buf->poly_bufs[1] = buf->poly_bufs[3] = buf->poly_bufs[4] = 0x80000000;
	buf->tile_matrix = 0x568380;
	buf->tile_matrix_size = 0x97c38;
	buf->frame = 0x200000;
	buf->frame_size = ta_state.w*ta_state.h*2;
	
	ta_state.texture_base = 0x700000;
#endif

	/* printf("Init TA buffers:\r\n");
	for (i=0; i<2; i++) {		
		buf = ta_state.buffers+i;
		printf("  vertex/vertex_size: %08lx/%08lx\r\n", buf->vertex, buf->vertex_size);
		printf("  poly_buf/poly_buf_size: %08lx/%08lx\r\n", buf->poly_buf, buf->poly_buf_size);
		printf("  poly_bufs: %08lx %08lx %08lx %08lx %08lx\r\n",
			buf->poly_bufs[0],buf->poly_bufs[1],buf->poly_bufs[2],buf->poly_bufs[3],buf->poly_bufs[4]);
		printf("  tile_matrix/tile_matrix_size: %08lx/%08lx\r\n", buf->tile_matrix, buf->tile_matrix_size);
		printf("  frame/frame_size: %08lx/%08lx\r\n", buf->frame, buf->frame_size);
	} */
	
	/* printf("lists/list_mask %08lx/%08lx\r\n", ta_state.lists, ta_state.list_mask);
	printf("poly_buf_ind/poly_buf_size %08lx/%08lx\r\n", ta_state.poly_buf_ind, ta_state.poly_buf_size);
	printf("w/h = %d/%d, tw/th = %d/%d\r\n", ta_state.w, ta_state.h,
		ta_state.tw, ta_state.th);
	printf("zclip %08lx\r\n", *((uint32*)&ta_state.zclip));
	printf("pclip_left/right %08lx/%08lx\r\n", ta_state.pclip_left, ta_state.pclip_right);
	printf("pclip_top/bottom %08lx/%08lx\r\n", ta_state.pclip_top, ta_state.pclip_bottom); */
}


/* Initialize fog tables; we don't use these right now but
   it's part of the proper setup. */
static void ta_fog_init() {
	uint32	idx;
	uint32	value;
	
	for (idx=0x8200, value=0xfffe; idx<0x8400; idx+=4) {
		SETREG(idx, value);
		value -= 0x101;
	}
}

/* Fill Tile Matrix buffers. This function takes a base address and sets up
   the TA rendering structures there. Each tile of the screen (32x32) receives
   a small buffer space. */
static void ta_create_buffers(int which) {
	int		x, y;
	uint32		*vr = (uint32*)0xa5000000;
	ta_buffers_t	*buf = ta_state.buffers + which;
	uint32		pbs = ta_state.poly_buf_size;
	uint32		strbase, bufbase;

	strbase = buf->tile_matrix;
	bufbase = buf->poly_buf;
	
	/* Header of zeros */
	vr += buf->tile_matrix/4;
	for (x=0; x<0x48; x+=4)
		*vr++ = 0;
		
	/* Initial init tile */
	vr[0] = 0x10000000;
	vr[1] = 0x80000000;
	vr[2] = 0x80000000;
	vr[3] = 0x80000000;
	vr[4] = 0x80000000;
	vr[5] = 0x80000000;
	vr += 6;
	
	/* Now the main tile matrix */
	printf("Using poly buffers %08lx/%08lx/%08lx/%08lx/%08lx\r\n",
		buf->poly_bufs[0],buf->poly_bufs[1],buf->poly_bufs[2],buf->poly_bufs[3],buf->poly_bufs[4]);
	for (x=0; x<ta_state.tw; x++) {
		for (y=0; y<ta_state.th; y++) {
			/*printf("%d,%d -> %08lx/%08lx\r\n", x, y,
				buf->poly_bufs[0]+pbs*ta_state.tw*y+pbs*x,
				buf->poly_bufs[2]+pbs*ta_state.tw*y+pbs*x);*/

			/* Control word */
			vr[0] = (y << 8) | (x << 2);

			/* Opaque poly buffer */
			vr[1] = buf->poly_bufs[0] + pbs*ta_state.tw*y + pbs*x;
			
			/* Opaque volume mod buffer */
			vr[2] = buf->poly_bufs[1] + pbs*ta_state.tw*y + pbs*x;
			
			/* Translucent poly buffer */
			vr[3] = buf->poly_bufs[2] + pbs*ta_state.tw*y + pbs*x;
			
			/* Translucent volume mod buffer */
			vr[4] = buf->poly_bufs[3] + pbs*ta_state.tw*y + pbs*x;
			
			/* Punch-thru poly buffer */
			vr[5] = buf->poly_bufs[4] + pbs*ta_state.tw*y + pbs*x;
			vr += 6;
		}
	}
	vr[-6] |= 1<<31;

	/* Must skip over zeroed header for actual usage */
	buf->tile_matrix += 0x48;
}

/* Take a series of register/value pairs and set the values */
static void set_regs(uint32 *values, uint32 cnt) {
	int i;
	uint32 r, v;

	for (i=0; i<cnt; i+=2) {
		r = values[i];
		v = values[i+1];
		SETREG(r, v);
	}
}

/* Select a structure buffer; after this completes, all registration
   and setup operations will reference the given buffer. */
static void ta_set_reg_target(int which) {
	vuint32	*regs = (uint32*)0xa05f0000;
	ta_buffers_t	*buf = ta_state.buffers + which;

	SETREG(0x8008, 1);			/* Reset registration mode */
	SETREG(0x8008, 0);
	SETREG(0x8124, buf->poly_buf);		/* Set buffer pointers */
	SETREG(0x812c, buf->poly_buf - buf->poly_buf_size);
	SETREG(0x8128, buf->vertex);
	SETREG(0x8130, buf->vertex + buf->vertex_size);
	SETREG(0x8164, buf->poly_buf);
	SETREG(0x813c, ta_state.tsize_const);	/* Tile count: (H/32-1) << 16 | (W/32-1) */
	SETREG(0x8140, ta_state.list_mask);	/* List enables */
	SETREG(0x8144, 0x80000000);		/* Confirm settings */
	(void)GETREG(0x8144);
}

/* Select a target view buffer; after this completes, the user will be looking
   at the frame buffer of the requested frame. */
static void ta_set_view_target(int which) {
	vid_set_start(ta_state.buffers[which].frame);
}

/* Begin the rendering process from the given registration source,
   into the given destination frame buffer. */
static void ta_render_target(int which) {
	ta_buffers_t *buf = ta_state.buffers + which;

	/* Calculate background value for below */
	/* Small side note: during setup, the value is originally
	   0x01203000... I'm thinking that the upper word signifies
	   the length of the background plane list in dwords
	   shifted up by 4. */
	uint32 taend = 0x01000000 | ((GETREG(0x8138) - buf->vertex) << 1);
	
	/* Finish up rendering the current frame (into the other buffer) */
	SETREG(0x802c, buf->tile_matrix);
	SETREG(0x8020, buf->vertex);
	SETREG(0x8060, buf->frame);
	SETREG(0x808c, taend);			/* Bkg plane location */
	SETREG(0x8088, *((uint32*)&ta_state.zclip));
	SETREG(0x8068, ta_state.pclip_x);
	SETREG(0x806c, ta_state.pclip_y);
	SETREG(0x804c, (ta_state.w*2)/8);
	SETREG(0x8048, 0x00000009);		/* Alpha mode */
	SETREG(0x8014, 0xffffffff);		/* Start render */
	
}

/* Prepare the TA for page flipped 3D */
static void ta_hdwr_init() {
	/* Fully reset TA */
	SETREG(0x8008, 0xffffffff);
	SETREG(0x8008, 0);

	/* Allocate VRAM space for all PVR structures */
	ta_allocate_buffers(640, 480, TA_LIST_OPAQUE_POLYS | TA_LIST_TRANS_POLYS,
		512*1024, TA_POLYBUF_16);

	/* Blank screen and reset display enable */
	SETREG(0x80e8, GETREG(0x80e8) | 8);	/* Blank */
	SETREG(0x8044, GETREG(0x8044) & ~1);	/* Display disable */

	/* Start with a non-primed interrupt */
	list_complete = 0;

	/* Hook the PVR interrupt */
	irq_set_handler(EXC_IRQ9, int_handler);

	/* Clear out video memory */
	vid_empty();

	/* Setup basic 3D parameters */
	set_regs(three_d_parameters, sizeof(three_d_parameters)/4);
	ta_fog_init();

	/* Set screen mode parameters */
	vid_waitvbl();
	set_regs(scrn_parameters, sizeof(scrn_parameters)/4);	

	/* Point at the second set of buffer structures, 
	   and build said structures. */
	ta_set_reg_target(1);
	ta_create_buffers(1);

	/* Now setup the first frame */
	ta_set_reg_target(0);
	ta_create_buffers(0);

	/* Point back at the second output buffer */
	ta_set_view_target(1);
	ta_set_reg_target(1);

	/* Set starting render output addresses */
	SETREG(0x8060, ta_state.buffers[1].frame);	/* render output address */
	SETREG(0x8064, ta_state.buffers[0].frame);	/* ? */
	
	/* Unblank screen and set display enable */
	SETREG(0x80e8, GETREG(0x80e8) & ~8);	/* Unblank */
	SETREG(0x8044, GETREG(0x8044) | 1);	/* Display enable */

	/* Set current page */
	ta_curpage = 0;
}

/* Turn off TA -- including turning off pesky interrupts */
static void ta_hdwr_shutdown() {
	/* Disable all PVR interrupts */
	SETREG(0x6930, 0);
	SETREG(0x6938, 0);
	irq_set_handler(EXC_IRQ9, NULL);
	
	/* Clear out the main display buffer and switch back */
	vid_clear(0,0,0);
	vid_set_start(0x00000000);
}

/* Copy data 4 bytes at a time */
static void copy4(uint32 *dest, uint32 *src, int bytes) {
	bytes = bytes / 4;
	while (bytes-- > 0) {
		*dest++ = *src++;
	}
}

/* Send a store queue full of data to the TA */
void ta_send_queue(void *sql, int size) {
	vuint32 *regs = (uint32*)0xff000038;

	/* Set store queue destination == tile accelerator */
	regs[0] = regs[1] = 0x10;

	/* Post the first queue */
	copy4((uint32*)0xe0000000, (uint32*)sql, size);
	asm("mov	#0xe0,r0");
	asm("shll16	r0");
	asm("shll8	r0");
	asm("pref	@r0");

	/* If there was a second queue... */
	if (size == 64) {
		asm("mov	#0xe0,r0");
		asm("shll16	r0");
		asm("shll8	r0");
		asm("or		#0x20,r0");
		asm("pref	@r0");
	}
}

/* Begin the rendering process for one frame */
void ta_begin_render() {
	/* Clear all pending events */
	vuint32 *pvrevt = (vuint32*)0xa05f6900;
	*pvrevt = 0xffffffff;
}

/* Commit a polygon header to the TA */
void ta_commit_poly_hdr(void *polyhdr) {
	ta_send_queue(polyhdr, 32);
}

/* Commit a vertex to the TA */
void ta_commit_vertex(void *vertex, int size) {
	ta_send_queue(vertex, size);
}

/* Commit an end-of-list to the TA */
void ta_commit_eol() {
	uint32	words[8] = { 0 };
	ta_send_queue(words, 32);
}

/* Finish rendering a frame; this assumes you have written
   a completed display list to the TA. It sets everything up and
   waits for the next vertical blank period to switch buffers. */
void ta_finish_frame() {
	int		i, ticks;
	uint32		taend;
	vuint32	*vrl = (vuint32 *)0xa5000000;
	uint32		*bkgdata = (uint32*)ta_bkg;

	/* Wait for TA to finish munching data */
	ticks = jiffies + 100;
	while (!(reg6900 & 0x80) && (jiffies < ticks))
		;
	if (jiffies >= ticks)
		printf("Timeout waiting for TA-complete\r\n");

	/* Throw the background data on the end of the TA's list */
	taend = GETREG(0x8138);
	for (i=0; i<0x40; i+=4)
		vrl[(i+taend)/4] = bkgdata[i/4];
	vrl[(0x44+taend)/4] = 0;	/* not sure if this is required */
	
	/* Mark ready for render */
	list_complete = 1;
	
	/* Wait for render-done signal from interrupt */
	/*printf("MAIN: Waiting for list_complete == 0\r\n");*/
	ticks = jiffies + 100;
	while (list_complete != 0 && (jiffies < ticks))
		;
	if (jiffies >= ticks)
		printf("Timeout waiting for list-complete\r\n");

	/* Switch registration buffers */
	ta_set_reg_target(ta_curpage);

	/* Swap out pages */
	ta_curpage ^= 1;
}

/* Build a polygon header from the given parameters; this is pretty
   incomplete right now but it's better than having to do it by hand. */
void ta_poly_hdr_col(poly_hdr_t *target, int translucent) {
	if (!translucent) {
		target->flags1 = 0x80870012;
		target->flags2 = 0x90800000;
		target->flags3 = 0x20800440;
		target->flags4 = 0x00000000;
		target->dummy1 = target->dummy2
			= target->dummy3 = target->dummy4 = 0xffffffff;
	} else  {
		target->flags1 = 0x82840012;
		target->flags2 = 0x90800000;
		target->flags3 = 0x949004c0;
		target->flags4 = 0x00000000;
		target->dummy1 = target->dummy2
			= target->dummy3 = target->dummy4 = 0xffffffff;
	}
}

void ta_poly_hdr_txr(poly_hdr_t *target, int translucent,
		int textureformat, int tw, int th, uint32 textureaddr,
		int filtering) {
	int i, ts = 8, n = 3;

	/* Take into account texture base */
	textureaddr += ta_state.texture_base;
	
	if (textureformat == TA_NO_TEXTURE) {
		ta_poly_hdr_col(target, translucent);
		return;
	}
	
	for (i=0; i<8 && n; i++) {
		if ((n&1) && tw == ts) {
			tw = i;
			n &= ~1;
		}
		if ((n&2) && th == ts) {
			th = i;
			n &= ~2;
		}
		ts <<= 1;
	}
	textureformat <<= 26;		
	
	if (!translucent) {
		target->flags1 = 0x8084001a;
		target->flags2 = 0x90800000;
		target->flags3 = 0x20800440 | (tw << 3) | th;
		if (filtering)
			target->flags3 |= 0x2000;
		target->flags4 = textureformat | (textureaddr >> 3);
	} else {
		target->flags1 = 0x8284001a;
		target->flags2 = 0x92800000;
		target->flags3 = 0x949004c0 | (tw << 3) | th;
		if (filtering)
			target->flags3 |= 0x2000;
		target->flags4 = textureformat | (textureaddr >> 3);
	}
}

/* Load texture data into the PVR ram */
void ta_load_texture(uint32 dest, void *src, int size) {
	uint32 *destl = ta_texture_map(dest);
	uint32 *srcl = (uint32*)src;
	
	if (size % 4)
		size = (size/4)+1;
	else
		size = size/4;

	while (size-- > 0)
		*destl++ = *srcl++;
}

/* Return a pointer to write to the texture ram directly */
/* Danger, DANGER WILL ROBINSON: Compiling this with -O2 makes
   it "optimize out" the addition! Unless we take special steps.. 
   Bug in GCC? */
void *ta_texture_map(uint32 loc) {
	uint32 final = 0xa4000000 + loc + ta_state.texture_base;
	return (void *)final;
}



/* PVR interrupt handler; the way things are setup, we're gonna get
   one of these for each full vertical refresh and at the completion
   of TA data acceptance. The timing here is pretty critical. We need
   to flip pages during a vertical blank, and then signal to the program
   that it's ok to start playing with TA registers again. */
static void int_handler(uint32 type, uint32 code) {
	vuint32 *regs = (vuint32*)0xa05f0000;

	/* Get events and clear all pending */
	reg6900 = regs[0x6900/4];
	reg6908 = regs[0x6908/4];
	regs[0x6900/4] = reg6900;
	regs[0x6908/4] = reg6908;

	/* Vertical blank is happening, and a list is complete */
	if ((list_complete != 0) && (reg6900 & 0x08)) {
		/* Switch view address to the "good" buffer */
		ta_set_view_target(ta_curpage);

		/* Finish up rendering the current frame (into the other buffer) */
		ta_render_target(ta_curpage^1);
		
		/* Mark render completion */
		list_complete = 0;
	}
}


/* Do TA hardware init/shutdown */
void ta_hw_init() {
	/* Setup a default hardware background plane */
	ta_bkg->flags1 = 0x90800000;
	ta_bkg->flags2 = 0x20800440;
	ta_bkg->dummy = 0;
	ta_bkg->x1 = 0.0f;
	ta_bkg->y1 = 480.0f;
	ta_bkg->z1 = 0.2f;
	ta_bkg->argb1 = 0xff000000;
	ta_bkg->x2 = 0.0f;
	ta_bkg->y2 = 0.0f;
	ta_bkg->z3 = 0.2f;
	ta_bkg->argb2 = 0xff000000;
	ta_bkg->x3 = 640.0f;
	ta_bkg->y3 = 480.0f;
	ta_bkg->z3 = 0.2f;
	ta_bkg->argb3 = 0xff000000;

	/* Initialize 3D hardware */
	ta_hdwr_init();
	
}

void ta_hw_shutdown() {
	/* De-init 3D hardware */
	ta_hdwr_shutdown();
}


/* Setup a service */
#include <kallisti/svcmpx.h>
#include <kallisti/abi/ta.h>
static abi_ta_t tabi;
static void ta_svc_init() {
	memset(&tabi, 0, sizeof(tabi));

	/* If you enable the pseudo-broken new TA alloc code, change this
	   to ABI_MAKE_VER(1,0,1)! */	
	tabi.hdr.version = ABI_MAKE_VER(1,0,0);

	tabi.hw_init = ta_hw_init;
	tabi.hw_shutdown = ta_hw_shutdown;
	tabi.send_queue = ta_send_queue;
	tabi.begin_render = ta_begin_render;
	tabi.commit_poly_hdr = ta_commit_poly_hdr;
	tabi.commit_vertex = ta_commit_vertex;
	tabi.commit_eol = ta_commit_eol;
	tabi.finish_frame = ta_finish_frame;
	tabi.poly_hdr_col = ta_poly_hdr_col;
	tabi.poly_hdr_txr = ta_poly_hdr_txr;
	tabi.load_texture = ta_load_texture;
	tabi.texture_map = ta_texture_map;
	
	tabi.bkg = ta_bkg;
	
	svcmpx_add_handler("ta", &tabi);
}


/* Program init / shutdown */
void ta_init() {
	ta_hw_init();

	/* Setup a service */
	ta_svc_init();
}

void ta_shutdown() {
	/* Take out service */
	svcmpx_remove_handler("ta");

	ta_hw_shutdown();
}




