Created
August 22, 2019 22:20
-
-
Save mattgodbolt/6d010089f7693cdbac1165c471338881 to your computer and use it in GitHub Desktop.
A bit of Red Dog: Superior Firepower's rendering system. SH4 Assembly, circa 1998
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; All new, all-singing, all-dancing textured scape renderer! | |
.CPU SH4 | |
.OUTPUT DBG=DWARF | |
.INCLUDE "..\RedDog.pre" | |
; Prototype: StripHeader *texturedStripRasteriserClipped | |
; (StripPos *v, StripEntry *strip, Uint32 nStrip, ModelContext *context); | |
; Register allocation: | |
temp: .REG R0 ; temporary register, R0 is most accessible | |
retVal: .REG R0 ; r0 is also the return value | |
lightingBuf: .REG R1 ; the lighting buffer | |
curVertex: .REG R2 ; the number of the vertex in the 3-vert round-robin buffer | |
pkm: .REG R3 ; where to stick the next vertex, TA-wise | |
vertTab: .REG R4 ; address of the vertex table | |
strip: .REG R5 ; the strip pointer | |
nStrip: .REG R6 ; number of vertices left in this strip | |
context: .REG R7 ; pointer to the context | |
v: .REG R8 ; pointer to current vertex position | |
pcw: .REG R8 ; [olap] paramcontrolword | |
outcode: .REG R9 ; current outcode | |
temp2: .REG R10 ; another temporary register | |
diff: .REG R10 ; [olap] difference in outcoding | |
v0: .REG R10 ; [olap] vertex 0 pointer in clipping | |
mask: .REG R11 ; used as a mask, and a temporary register | |
v1: .REG R11 ; [olap] vertex 1 pointer in clipping | |
uv_u: .REG R12 ; u value | |
abs_w: .REG R12 ; [olap] absolute binary W | |
v2: .REG R12 ; [olap] vertex 2 pointer in clipping | |
uv_v: .REG R13 ; v value | |
t: .REG R13 ; [olap] used in outcoding | |
counter: .REG R13 ; [olap] used in clipping | |
col: .REG R14 ; colour value | |
temp3: .REG R14 ; [olap] more temp action | |
addrV1: .REG R14 ; [olap] vertex 1's position in the alpha | |
; Floating point madness | |
DEC_XYZW vert, 4, 5, 6, 7 ; vertex position | |
X: .FREG FR4 | |
Y: .FREG FR5 | |
Z: .FREG FR6 | |
W: .FREG FR7 ; NOT tother way aroond | |
midX: .FREG FR8 ; screen midpoint X | |
midY: .FREG FR9 ; screen midpoint Y | |
mulX: .FREG FR10 ; screen size X | |
mulY: .FREG FR11 ; screen size Y | |
rW: .FREG FR0 | |
alpha: .FREG FR0 ; [olap] alpha value | |
rWnearVal: .FREG FR1 ; rW at camera plane | |
U: .FREG FR2 | |
V: .FREG FR3 | |
; Offsets: | |
_X: .EQU 0 | |
_Y: .EQU 4 | |
_Z: .EQU 8 | |
_W: .EQU 12 | |
_U: .EQU 16 | |
_V: .EQU 20 | |
_preColour: .EQU 24 | |
_dynColour: .EQU 28 | |
_outcode: .EQU 32 | |
_rW: .EQU 36 | |
_scrY: .EQU 40 | |
_scrX: .EQU 44 | |
_padding: .EQU 48 | |
.SECTION PSCAPERAST, CODE, ALIGN=16 | |
; Outcode description: | |
; bit | meaning | |
; -----+----------- | |
; 0 | Off near | |
; 1 | Off left | |
; 2 | Off right | |
; 3 | Off top | |
; 4 | Off bottom | |
.MACRO pad | |
; NOP no nops for speed | |
.ENDM | |
; curVertex bits: | |
; lower 2 bits are the current vertex | |
; bit 3 is the parity, if set then the triangle is the 'right way around' | |
; bit 4 is the continuation flag, if clear then the previous two vertices need to be sent before 'this' | |
; Subroutine: Prepares a vertex read from strip into the buffer pointed to | |
; by R0 | |
; Corrupts: everything barring the pointers | |
.ALIGN 16 | |
PrepareVertex: | |
MOV.L @strip+, temp2 ; read in the vertex number | |
pad | |
pad | |
pad | |
MOV temp2, v ; move into the v register | |
SHLL2 v ; get v as an offset into the vertex table | |
ADD vertTab, v ; v now points at the vertex in question | |
FLDI1 vertW ; vertW all set for the transformation | |
FMOV.S @v+, vertX ; read in the X position | |
MOV #-1, mask ; get mask=0xffffffff | |
FMOV.S @v+, vertY ; read in Y position | |
SHLR mask ; mask is now 0x7fffffff | |
FMOV.S @v+, vertZ ; read in the Z position | |
ADD lightingBuf, temp2 ; find the address of the lightingBuf address | |
MOV.L @strip+, uv_u ; read the UV value from strip | |
pad | |
FTRV XMTRX, vert ; transform the vertex [5-7 cyc lat] | |
MOV.L @temp2, temp2 ; read the dynamic lighting value | |
FLDI1 rW ; get 1 in rW ready for reciprocation | |
ADD #32, strip ; move strip on for a prefetch | |
MOV.L temp2, @(_dynColour, R0); store in the dynamic colour | |
EXTU.W uv_u, uv_v ; clear the top 16 bits of uv to get v into v | |
MOV.L @v+, col | |
SHLR16 uv_u ; move u down to clear the bottom bits | |
SHLL16 uv_v ; shift v back up == VVVV0000 | |
FLDI0 U ; load U ready for the comparison | |
MOV.L col, @(_preColour, R0) ; store in the prelit colour | |
SHLL16 uv_u ; shift U back up == UUUU0000 | |
MOV.L uv_v, @(_V, R0) ; store V | |
pad | |
MOV.L uv_u, @(_U, R0) ; store U | |
ADD #_U, R0 ; move R0 past the point data | |
; FTRV ready here | |
FLDS W, FPUL ; get bin(W) | |
FCMP/GT Z, U ; is 0 > Z? ie is z <= 0 [4 cyc lat] | |
STS FPUL, abs_w ; abs_w is bin(W) | |
pad | |
FLDS X, FPUL ; load X into FPUL ready for bin(X) | |
AND mask, abs_w ; get abs(bin(W)) | |
STS FPUL, t ; get bin(X) into t | |
MOV abs_w, diff ; diff = abs_w | |
FLDS Y, FPUL ; load Y into FPUL for bin(Y) | |
MOV t, temp3 ; blimey another temporary register | |
AND mask, temp3 ; temp3 is abs(bin(X)) | |
FDIV W, rW ; get some reciprocation going now | |
MOVT outcode ; bit 0 is now Z <= 0 (near clip bit) | |
PREF @strip ; prefetch the next strip information | |
SHLL t ; T bit is top bit of bin(X) | |
FMOV.S W, @-R0 ; store W | |
MOVT t ; store T into the t | |
FMOV.S Z, @-R0 ; store Z | |
SUB temp3, diff ; diff = abs_w - abs(bin(X)) | |
FMOV.S Y, @-R0 ; store Y | |
SHLL diff ; T bit = sign(abs_w - abs(bin(X))) | |
STS FPUL, temp3 ; temp3 is bin(Y) | |
ADD #1, t ; t is now 1 + sign of X | |
FMOV.S X, @-R0 ; store X | |
MOVT diff ; diff = T bit == sign(abs_w - abs(bin(X))) | |
FMUL mulX, X ; X = X * screen size X | |
SHLD t, diff ; diff = (abs_w - abs(bin(X))) << t | |
FMUL mulY, Y ; Y = Y * screen size Y | |
OR diff, outcode ; outcode |= 0, 2 or 4 depending of X outcode | |
pad | |
MOV temp3, t ; get a copy of bin(y) | |
AND mask, temp3 ; temp3 = abs(bin(y)) | |
SHLL t ; get top bit into t == sign of y | |
FMOV.S midX, U ; U is mid X ready for the FMAC | |
MOVT t ; t == 1 or 0 depending on sgn (y) | |
FMOV.S midY, V ; V is mid Y ready for the FMAC | |
MOV abs_w, diff ; move abs_w into diff | |
SUB temp3, diff ; diff = abs_w - abs(bin(y)) | |
SHLL diff ; T bit = sign(abs_w - abs(bin(y))) | |
FMAC rW, X, U ; U = midX + X * screen size * rW | |
ADD #3, t ; t = 3 or 4 | |
FMAC rW, Y, V ; V = midY + Y * screen size * rW | |
MOVT diff ; diff = T bit = sign(abs_w - abs(bin(y))) | |
pad | |
ADD #_rW, R0 ; point R0 at 1/w's place | |
pad | |
SHLD t, diff ; diff = sign(abs_w - abs(bin(y))) << t | |
FMOV.S rW, @R0 ; store 1/w | |
OR diff, outcode ; outcode is now complete - huzzah! | |
pad | |
ADD #12, R0 ; point R0 at the transformed X and Y | |
FMOV.S U, @-R0 ; store transformed X | |
FMOV.S V, @-R0 ; store transformed Y | |
ADD #-_scrY, R0 ; move R0 back to the beginning | |
ADD #-32, strip ; move strip back to whence it came | |
RTS ; return, and as a slot: | |
MOV.L outcode, @(_outcode, R0); [slot] store the outcode back | |
; Support routine: Output a vertex | |
; vertex pointed to by r0, output is pkm | |
; leaves pkm pointing at the PCW, and with SQ requiring dispatch | |
; corrupts only FP and temp3 | |
; sets pcw to be 0xe0000000 | |
; returns (curVertex+temp3 % 3) in R0 | |
.ALIGN 16 | |
OutputVertex: | |
ADD #_U, R0 ; point r0 at the U value | |
ADD #32, pkm ; point at the end of the vertex | |
FMOV.S @R0+, U ; read U | |
FMOV.S @R0+, V ; read V | |
MOV #H'E, pcw ; pcw = 0xe | |
FMOV.S @R0+, vertX ; read prelighting | |
SHLL16 pcw ; pcw = 0xe<<16 | |
FMOV.S @R0, vertY ; read dynamic lighting | |
ADD #8, R0 ; skip over to 1/w | |
FMOV.S @R0+, vertZ ; read rW | |
SHLL8 pcw ; pcw = 0xe<<24 | |
FMOV.S @R0+, rW ; read screen X | |
SHLL2 pcw ; pcw = 0xe<<26 | |
FMOV.S @R0+, vertW ; read screen Y | |
SHLL2 pcw ; pcw = 0xe<<28 == 0xe0000000 | |
FMOV.S vertY, @-pkm ; store dynamic lighting | |
MOV curVertex, R0 ; get curvertex | |
FMOV.S vertX, @-pkm ; store prelighting | |
AND #3, R0 ; just get the vertex number | |
FMOV.S V, @-pkm ; store V | |
ADD temp3, R0 ; curVertex + temp3 | |
MOV #3, temp3 ; check 3 | |
FMOV.S U, @-pkm ; store U | |
CMP/GE temp3, R0 ; check for curVertex overflow | |
FMOV.S vertZ, @-pkm ; store 1/w | |
BF/S ?ok ; is it OK to skip | |
FMOV.S rW, @-pkm ; store screen Y [slot] | |
ADD #-3, R0 ; reset R0 to 0 | |
?ok FMOV.S vertW, @-pkm ; store screen X | |
RTS | |
ADD #-4, pkm ; reset pkm to point at pcw | |
; Support routines: Output a vertex halfway between R0 and addrV1 | |
; Corrupts all FP, PCW | |
OutputAlpha: ; first read Z for both to get the alpha calculation done first | |
ADD #_Z, R0 ; point R0 at v0.z | |
pad | |
ADD #_Z, addrV1 ; point addrV1 at v1.z | |
FMOV.S @R0, Z ; read v0.z into Z | |
FMOV.S @addrV1, alpha ; read v1.z into alpha | |
ADD #-_Z, R0 ; put r0 back to the beginning | |
FMOV.S @R0+, X ; read v0.x into X | |
ADD #-_Z, addrV1 ; move addrV1 back to the beginning also | |
FMOV alpha, W ; take a copy of v1.z into W | |
FSUB Z, W ; W = v1.z - v0.z | |
FMOV.S @R0+, Y ; read v0.y into Y | |
ADD #8, R0 ; move R0 to point at the UV values | |
FMOV.S @addrV1+, U ; U = v1.X | |
pad | |
FDIV W, alpha ; alpha = v1.z / (v1.z - v0.z) in about 12 cycle's time | |
FMOV.S @addrV1+, V ; V = v1.Y | |
FMOV X, W ; W is temporarily v0.X | |
FSUB U, W ; W = v0.X - v1.X | |
FMOV Y, Z ; Z is temporarily v0.Y | |
FSUB V, Z ; Z = v0.Y - v1.Y | |
; stall for 10 cycles here - travesty! | |
ADD #8, addrV1 ; move on to the uvs | |
FMAC alpha, Z, V ; V = v1.Y + alpha * (v1.Y - v0.Y) | |
FMAC alpha, W, U ; U = v1.X + alpha * (v1.X - v0.X) | |
pad | |
pad | |
pad | |
FMUL rWnearVal, V ; perspectivize Y | |
pad | |
FMUL rWnearVal, U ; perspectivize X | |
ADD #16, pkm ; move pkm on | |
FMUL mulY, V ; scale to fit on screen in Y | |
FMOV.S @R0+, X ; X is v0.u | |
FMUL mulX, U ; scale to fit on screen in X | |
FMOV.S @R0+, Y ; Y is v0.v | |
FADD midY, V ; V is a screen pos at last too | |
pad | |
FADD midX, U ; U is a screen pos at last | |
FMOV.S rWnearVal, @-pkm ; store 1/near clip value | |
FMOV.S V, @-pkm ; store Y value | |
pad | |
FMOV.S U, @-pkm ; store X value | |
;stall | |
FMOV.S @addrV1+, U ; read v1.U into U | |
FMOV.S @addrV1+, V ; read v1.V into V | |
FMOV X, Z ; copy v0.U into Z | |
FSUB U, Z ; Z = v0.U - v1.U | |
FMOV Y, W ; copy v0.V into W | |
FSUB V, W ; W = v0.V - v1.V | |
pad | |
pad | |
FMAC alpha, Z, U ; U = alpha * v1.U - v0.U + v1.U | |
FMAC alpha, W, V ; V = alpha * v1.V - v0.V + v1.V | |
ADD #20, pkm ; move pkm to the end of the UV area | |
pad | |
pad | |
FMOV.S V, @-pkm | |
FMOV.S U, @-pkm | |
PUSH outcode | |
PUSH temp2 | |
ADD #16, pkm ; point at end of colour | |
PUSH t | |
MOV #4, temp2 ; loop counter is 4 | |
ADD #7, R0 ; move to end of colour | |
ADD #7, addrV1 ; move to end of colour | |
MOV #0, outcode ; outcode | |
?colourLoop MOV.B @R0, pcw ; read a byte of colour into pcw | |
ADD #-1, R0 ; move down | |
EXTU.B pcw, pcw ; zero those top bits | |
MOV.B @addrV1, t ; read a second byte of colour | |
ADD #-1, addrV1 ; move down | |
LDS pcw, FPUL ; FPUL madness | |
EXTU.B t, t ; zero those top bits | |
FLOAT FPUL, X ; X is colour 1 | |
SHLL8 outcode | |
CMP/EQ t, pcw ; are they the same? | |
LDS t, FPUL ; FPUL madness again | |
BT ?sameColours | |
FLOAT FPUL, Y ; Y is colour 2 | |
FSUB Y, X ; X = (colour 1 - colour 2) | |
FMAC alpha, X, Y ; Y = (colour 1 - colour 2) * alpha | |
FTRC Y, FPUL ; FPUL is the colour | |
STS FPUL, pcw | |
?sameColours DT temp2 | |
BF/S ?colourLoop | |
OR pcw, outcode ; store in the outcode madness | |
MOV.L outcode, @-pkm | |
; have we done? | |
MOV #7, t | |
TST t, pkm | |
BT ?hoorayTheEnd | |
BRA ?colourLoop | |
MOV #4, temp2 | |
MOV #0, outcode | |
?hoorayTheEnd | |
POP t | |
POP temp2 | |
POP outcode | |
RTS | |
ADD #-24, pkm ; reset pkm to the beginning | |
.EXPORT _texturedStripRasteriserClipped | |
.ALIGN 16 | |
_texturedStripRasteriserClipped: | |
; Enormous stacking action | |
PREF @strip ; start by prefetching the strip information | |
PUSH R14 | |
PUSH R13 | |
PUSH R12 | |
PUSH R11 | |
PUSH R10 | |
PUSH R9 | |
PUSH R8 | |
STS.L PR,@-R15 ; end of stacking | |
MOV.L @context, pkm ; read pkm from context | |
ADD #8, context ; skip matnum | |
FMOV.S @context+, midX ; read midX | |
MOV #96, R0 | |
FMOV.S @context+, midY ; read midY | |
ADD #96, R0 ; R0 is now 192 | |
MOV.L @context+, lightingBuf ; read lightingBuf | |
SUB R0, R15 ; rewind stack enough for three vertex buffers | |
FMOV.S @context+, mulX ; read mulX | |
MOV #2, curVertex ; read the coming vertex into vertbuf2 | |
FMOV.S @context+, mulY | |
ADD #-2, nStrip | |
FMOV.S @context, rWnearVal | |
ADD #-7*4, context ; rewind context | |
; Read in the first two vertices : (nStrip must be pre-decremented and curVertex 2) | |
?bigLoop: | |
BSR PrepareVertex ; vertex #1 | |
MOV R15, R0 ; buffer @sp+0 [slot] | |
MOV R15, R0 ; vertex #2 | |
BSR PrepareVertex ; buffer @sp+64 | |
ADD #64, R0 ; [slot] | |
.AIF \&COUNT_GEOMETRY EQ 1 | |
MOV.L #a_nDrawn, temp2 | |
MOV.L @temp2, temp2 | |
MOV.L @temp2, R0 | |
ADD nStrip, R0 | |
MOV.L R0, @temp2 | |
.AENDI | |
?loop ; Read in another vertex at the current vertex position | |
MOV curVertex, R0 ; r0 = curVertex | |
AND #3, R0 ; ensure only vertex number is around | |
SHLL8 R0 ; r0 = curVertex * 256 | |
pad | |
SHLR2 R0 ; r0 = curVertex * 64 | |
BSR PrepareVertex ; prepare that vertex | |
ADD R15, R0 ; [slot] r0 is address of new vertex now | |
; incrememnt curVertex, and toggle parity, ugly bit of code | |
MOV curVertex, R0 | |
AND #3, R0 | |
ADD #1, R0 | |
CMP/EQ #3, R0 | |
BF ?ok | |
MOV #0, R0 | |
?ok MOV #-4,t ; not 3 | |
AND t, curVertex | |
OR curVertex, R0 ; curVertex incremented | |
XOR #(1<<3), R0 ; toggle parity | |
MOV R0, curVertex | |
; Now check the outcode; outcode will be A|B|C, temp A&B&C | |
MOV.L @(_outcode, R15), outcode ; outcode is A | |
MOV R15, temp3 | |
MOV outcode, temp ; temp is going to be the AND version | |
ADD #64, temp3 | |
MOV.L @(_outcode, temp3), temp2 ; temp2 is B | |
ADD #64, temp3 | |
MOV.L @(_outcode, temp3), temp3 ; temp3 is C | |
OR temp2, outcode ; outcode |= B | |
AND temp2, temp ; temp &= B | |
OR temp3, outcode ; outcode |= C | |
AND temp3, temp ; temp &= C | |
CMPZ temp ; is temp zero; if not, we're totally offscreen | |
pad | |
BF ?offScreen ; branch if not zero to off screen | |
MOV outcode, temp | |
TST #1, temp ; is any of this triangle nearclipped? | |
pad | |
BF ?clipPoly ; if nearclipped, go ahead and clip it | |
?onScreen ; check to see if this is the first polygon | |
MOV #(1<<4), R0 | |
TST R0, curVertex ; check continuation bit | |
pad | |
BF ?continueStrip ; if continuation bit is 1, continue the strip | |
MOV #(1<<3), R0 | |
TST R0, curVertex ; check parity bit | |
pad | |
BF ?parityOK ; correct parity, so carry on | |
; we're going to output a dummy vertex of curVertex + 1 | |
MOV curVertex, R0 ; r0 = curVertex | |
AND #3, R0 ; ensure only vertex number is around | |
ADD #1, R0 ; + 1 | |
CMP/EQ #3, R0 ; ovf? | |
BF ?parityovfOK | |
MOV #0, R0 | |
?parityovfOK | |
SHLL8 R0 ; r0 = curVertex+1 * 256 | |
pad | |
SHLR2 R0 ; r0 = curVertex+1 * 64 | |
BSR OutputVertex ; and output vertex 0 of the triangle | |
ADD R15, R0 ; [slot] point r0 in the right place | |
MOV pcw, @pkm | |
pad | |
PREF @pkm | |
ADD #32, pkm | |
?parityOK | |
; Now we have to output the first two polygons in a strip | |
; That is, polygons at curVertex, curVertex+1 and curVertex+2 | |
MOV curVertex, R0 ; r0 = curVertex | |
AND #3, R0 ; ensure only vertex number is around | |
SHLL8 R0 ; r0 = curVertex * 256 | |
pad | |
MOV #1, temp3 ; add one | |
pad | |
SHLR2 R0 ; r0 = curVertex * 64 | |
BSR OutputVertex ; and output vertex 0 of the triangle | |
ADD R15, R0 ; [slot] point r0 in the right place | |
; store in the pcw and dispatch | |
MOV.L pcw, @pkm ; store e0000000 | |
SHLL8 R0 ; R0 is (curVertex+1)<<8 | |
PREF @pkm ; SQ blast! | |
ADD #32, pkm ; move on | |
MOV #2, temp3 ; add two this time | |
pad | |
SHLR2 R0 ; R0 is (curVertex+1)<<6 | |
BSR OutputVertex ; output vertex 1 | |
ADD R15, R0 ; [slot] move to the right plaice | |
; store in the pcw and dispatch | |
MOV.L pcw, @pkm ; store e0000000 | |
SHLL8 R0 ; R0 is (curVertex+2)<<8 | |
PREF @pkm ; SQ blast! | |
ADD #32, pkm ; move on | |
SHLR2 R0 ; R0 is (curVertex+2)<<6 | |
BSR OutputVertex ; output vertex 1 | |
ADD R15, R0 ; [slot] move to the right plaice | |
; leave the vertex hanging in the pipe, as we may be able to extend it, we may not, you never know | |
MOV #(1<<4), R0 ; get continuation bit | |
OR R0, curVertex ; curVertex has the continuation bit set | |
DT nStrip | |
BF ?loop | |
?endStripCont ; if we got here, then we have to terminate the poly in the current pipe | |
; pcw should be 0xe0000000 | |
SHAR pcw ; e0000000 -> f0000000 | |
MOV.L pcw, @pkm ; store terminator | |
PREF @pkm ; SQ blast | |
ADD #32, pkm ; move on to next | |
?end | |
MOV.L @strip+, nStrip ; read num strip | |
MOV.L @strip+, temp2 ; read material | |
MOV.L @(4, context), temp3 ; read should be material | |
CMPZ nStrip ; is temp zero? | |
BT ?reallyTheEnd | |
ADD #-2, nStrip | |
CMP/EQ temp2, temp3 ; is it the same material? | |
BT/S ?bigLoop ; yes? wicked! | |
MOV #2, curVertex ; reset curVertex [slot] | |
?reallyTheEnd MOV.L pkm, @context ; store back the pkm | |
MOV.L #192, R0 ; 192 = 3*64 | |
ADD R0, R15 ; stack sortout | |
LDS.L @R15+, PR ; unstacking action | |
MOV strip, R0 ; get the return value | |
POP R8 | |
ADD #-8, R0 ; rewind the retVal | |
POP R9 | |
POP R10 | |
POP R11 | |
POP R12 | |
POP R13 | |
RTS | |
POP R14 | |
; the polygon is offscreen, DT and reloop, | |
?offScreen ; Check for some flushing action | |
MOV #(1<<4), R0 | |
TST R0, curVertex ; check continuation bit | |
pad | |
MOV.L #H'F0000000, pcw | |
BT ?noFlush ; no need to flush | |
MOV.L pcw, @pkm ; store terminator | |
PREF @pkm ; SQ blast | |
ADD #32, pkm ; move on to next | |
MOV #(1<<4), R0 | |
XOR R0, curVertex ; clear continuation bit | |
?noFlush DT nStrip | |
BF ?loop | |
BRA ?end ; else end | |
NOP | |
; We've already outputted some vertices, and this is onscreen too, | |
; so SQ the pending vertex, and then output curVertex + 2 (== curVertex-1) | |
?continueStrip MOV.L #H'e0000000, pcw ; get e* | |
MOV curVertex, R0 | |
AND #3, R0 | |
pad | |
ADD #2, R0 ; add two | |
pad | |
MOV #2, temp2 | |
MOV.L pcw, @pkm ; PCW previous vertex | |
CMP/GT temp2, R0 ; r0 > temp2 ? : ovf? | |
PREF @pkm ; SQ blast previous vertex [slot] | |
BF/S ?contovfOK | |
ADD #32, pkm | |
ADD #-3, R0 ; get R0 ok | |
?contovfOK SHLL8 R0 | |
SHLR2 R0 ; get vert*64 | |
BSR OutputVertex ; output that vertex | |
ADD R15, R0 ; [slot] address add | |
DT nStrip ; finished strip? | |
BT ?endStripCont ; yes?...we need to flush | |
BRA ?loop ; no, lets go round again | |
NOP | |
; Time to clip the triangle[cur, cur+1, cur+2] | |
; Set up the vertex pointers correspondingly | |
?clipPoly: | |
MOV #(1<<4), R0 | |
TST R0, curVertex ; check continuation bit | |
pad | |
MOV.L #H'F0000000, pcw | |
BT ?noFlush2 ; no need to flush | |
MOV.L pcw, @pkm ; store terminator | |
PREF @pkm ; SQ blast | |
ADD #32, pkm ; move on to next | |
XOR R0, curVertex ; clear continuation bit | |
?noFlush2 | |
MOV curVertex, R0 ; r0 is curVertex | |
TST #(1<<3), R0 ; parity even or odd? | |
BF/S ?evenParity | |
?oddParity: AND #3, R0 ; get R0 as just current vertex number | |
; If we got here, then we need v0=cur+1, v1=cur, v2=cur+2 | |
MOV R0, v1 | |
SHLL8 v1 ; v1 = R0*256 | |
SHLR2 v1 ; v1 = R0*64 | |
pad | |
ADD R15, v1 ; v1 = address of vertex 0 | |
ADD #1, R0 ; move R0 on | |
CMP/EQ #3, R0 ; overflow? | |
BF ?skipOddOvf1 | |
MOV #0, R0 | |
?skipOddOvf1 | |
MOV R0, v0 ; v0 = vertex 1 | |
SHLL8 v0 | |
SHLR2 v0 ; v0 = vertex1 * 64 | |
ADD R15, v0 ; v0 is address of vertex 1 | |
ADD #1, R0 ; move R0 on | |
CMP/EQ #3, R0 ; ovf? | |
BF ?lastVertexAddr ; no, get last vertex into v2 | |
BT ?lastVertexOvf ; yes, flip round and get into v2 | |
; poly is in the right order, v0=0,v1=1,v2=2 | |
?evenParity | |
MOV R0, v0 | |
SHLL8 v0 ; v0 = R0*256 | |
SHLR2 v0 ; v0 = R0*64 | |
pad | |
ADD R15, v0 ; v0 = address of vertex 0 | |
ADD #1, R0 ; move R0 on | |
CMP/EQ #3, R0 ; overflow? | |
BF ?skipEvenOvf1 | |
MOV #0, R0 | |
?skipEvenOvf1 | |
MOV R0, v1 ; v1 = vertex 1 | |
SHLL8 v1 | |
SHLR2 v1 ; v1 = vertex1 * 64 | |
ADD R15, v1 ; v1 is address of vertex 1 | |
ADD #1, R0 ; move R0 on | |
CMP/EQ #3, R0 ; ovf? | |
BF ?lastVertexAddr ; no ovf | |
?lastVertexOvf MOV #0, R0 ; reset to 0 | |
?lastVertexAddr MOV R0, v2 | |
SHLL8 v2 ; v2 = 256*v2 | |
SHLR2 v2 | |
ADD R15, v2 ; v2 now points in the right place | |
; lets see whether this clips to a triangle or a quad | |
; a triangle can be blasted straight to the TA, but a quad needs | |
; temporary buffering and the vertices sent in 0132 order, annoyingly | |
; a triangle clips to a quad IFF only one vertex is off the near | |
; the case where all points are off the near has already been handled, so either | |
; one or two vertices are offscreen: a XOR will tell us the oddness or evenness | |
MOV.L @(_outcode, v0), outcode; outcode for v0 | |
MOV.L @(_outcode, v1), temp ; outcode for v1 | |
MOV.L @(_outcode, v2), v ; outcode for v2 | |
XOR temp, outcode ; outcode = v0^v1 | |
XOR v, outcode ; outcode = v0^v1^v2 | |
SHLR outcode ; get bottom bit into T bit | |
BF ?clipToTri ; if the bit is set, an odd number of nearclipped verts == 1 | |
BRA ?clipToQuad | |
NOP | |
; We've ascertained the output will be a triangle, so do each edge | |
?clipToTri: | |
MOV @(_outcode, v0), outcode; get v0's outcode | |
MOV #3, counter ; set up the vertex counter | |
pad | |
MOV outcode, R0 | |
SHLR R0 ; get T as the nearclip bit | |
BT ?v0OffScreen | |
BSR OutputVertex ; Output v0 | |
MOV v0, R0 ; [slot] point r0 at v0 | |
MOV.L pcw, @pkm ; store PCW | |
PREF @pkm ; sq blast | |
ADD #32, pkm ; move along | |
DT counter ; counter can't possibly go to zero here, so no check | |
?v0OffScreen MOV @(_outcode, v1), temp ; read v1's outcode | |
pad | |
pad | |
pad | |
XOR temp, outcode ; outcode = v0 ^ v1 | |
SHLR outcode ; get bottom bit into T | |
BF/S ?no0to1alpha ; if one onscreen and not the other, need to output alpha vertex | |
MOV temp, outcode ; put v1's outcode into 'outcode' | |
; We need to output the 0-1 alpha vertex | |
MOV v0, R0 ; r0 points to the first vertex | |
BSR OutputAlpha | |
MOV v1, addrV1 ; addrv1 is the second vertex | |
DT counter ; can't go to zero here | |
MOV.L #H'e0000000, pcw | |
MOV.L pcw, @pkm ; store the e0000000 | |
PREF @pkm | |
ADD #32, pkm ; blast and move on | |
?no0to1alpha MOV outcode, R0 ; get outcode of v1 into R0 | |
SHLR R0 ; get clip bit into T | |
BT ?v1OffScreen ; is v1 off screen? | |
BSR OutputVertex | |
MOV v1, R0 ; output v1 | |
DT counter ; is this the last polygon? | |
BF ?notTheLast | |
SHAR pcw ; e0000000 to f0000000 | |
?notTheLast MOV.L pcw, @pkm ; store PCW | |
PREF @pkm ; sq blast | |
ADD #32, pkm ; move along | |
?v1OffScreen MOV @(_outcode, v2), temp ; read v2's outcode | |
pad | |
pad | |
pad | |
XOR temp, outcode ; get v1^v2 | |
SHLR outcode ; T bit action | |
BF/S ?no1to2alpha ; no need to output an alpha vertex then | |
MOV temp,outcode ; get v2's outcode ready for the next bit | |
; we Need to output a v1-v2 alpha | |
MOV v1, R0 | |
BSR OutputAlpha | |
MOV v2, addrV1 ; [s] output alpha value | |
MOV.L #H'E0000000, pcw | |
DT counter ; check to see if this is the last | |
BF ?notTheLast2 | |
SHAR pcw ; e0 -> f00000000 | |
?notTheLast2 MOV.L pcw, @pkm | |
PREF @pkm | |
ADD #32, pkm | |
?no1to2alpha MOV outcode, R0 ; get v2's outcode into R0 | |
SHLR R0 ; T= offscreenness of v2 | |
BT ?v2OffScreen | |
BSR OutputVertex | |
MOV v2, R0 ; output v2 | |
DT counter ; is this the last polygon? | |
BF ?notTheLast3 | |
SHAR pcw ; e0000000 to f0000000 | |
?notTheLast3 MOV.L pcw, @pkm ; store PCW | |
PREF @pkm ; sq blast | |
ADD #32, pkm ; move along | |
?v2OffScreen MOV @(_outcode, v0), temp ; read v0's outcode | |
pad | |
pad | |
pad | |
XOR temp, outcode | |
pad | |
SHLR outcode ; check v0^v2 | |
BF ?noFlushjumper ; no more to do? Loop all the way back! | |
; we Need to output a v2-v0 alpha | |
MOV v2, R0 | |
BSR OutputAlpha | |
MOV v0, addrV1 ; [s] output alpha value | |
MOV.L #H'f0000000, pcw ; definitely the last | |
MOV.L pcw, @pkm | |
PREF @pkm | |
BRA ?noFlush | |
ADD #32, pkm ; [slot] | |
?noFlushjumper | |
BRA ?noFlush | |
NOP | |
; ////////////////////////////////////////////////////////////////////////////// | |
.ALIGN 16 | |
?clipToQuad | |
MOVA ?QuadBuffer, R0 | |
PUSH pkm | |
MOV R0, pkm ; point pkm at a buffer for now | |
MOV @(_outcode, v0), outcode; get v0's outcode | |
MOV outcode, R0 | |
SHLR R0 ; get T as the nearclip bit | |
BT ?v0OffScreenQ | |
BSR OutputVertex ; Output v0 | |
MOV v0, R0 ; [slot] point r0 at v0 | |
ADD #32, pkm | |
?v0OffScreenQ MOV @(_outcode, v1), temp ; read v1's outcode | |
pad | |
pad | |
pad | |
XOR temp, outcode ; outcode = v0 ^ v1 | |
SHLR outcode ; get bottom bit into T | |
BF/S ?no0to1alphaQ ; if one onscreen and not the other, need to output alpha vertex | |
MOV temp, outcode ; put v1's outcode into 'outcode' | |
; We need to output the 0-1 alpha vertex | |
MOV v0, R0 ; r0 points to the first vertex | |
BSR OutputAlpha | |
MOV v1, addrV1 ; addrv1 is the second vertex | |
ADD #32, pkm | |
?no0to1alphaQ MOV outcode, R0 ; get outcode of v1 into R0 | |
SHLR R0 ; get clip bit into T | |
BT ?v1OffScreenQ ; is v1 off screen? | |
BSR OutputVertex | |
MOV v1, R0 ; output v1 | |
ADD #32, pkm | |
?v1OffScreenQ MOV @(_outcode, v2), temp ; read v2's outcode | |
pad | |
pad | |
pad | |
XOR temp, outcode ; get v1^v2 | |
SHLR outcode ; T bit action | |
BF/S ?no1to2alphaQ ; no need to output an alpha vertex then | |
MOV temp,outcode ; get v2's outcode ready for the next bit | |
; we Need to output a v1-v2 alpha | |
MOV v1, R0 | |
BSR OutputAlpha | |
MOV v2, addrV1 ; [s] output alpha value | |
ADD #32, pkm | |
?no1to2alphaQ MOV outcode, R0 ; get v2's outcode into R0 | |
SHLR R0 ; T= offscreenness of v2 | |
BT ?v2OffScreenQ | |
BSR OutputVertex | |
MOV v2, R0 ; output v2 | |
ADD #32, pkm | |
?v2OffScreenQ MOV @(_outcode, v0), temp ; read v0's outcode | |
pad | |
pad | |
pad | |
XOR temp, outcode | |
pad | |
SHLR outcode ; check v0^v2 | |
BF ?endQuadClip ; no more to do? Loop all the way back! | |
; we Need to output a v2-v0 alpha | |
MOV v2, R0 | |
BSR OutputAlpha | |
MOV v0, addrV1 ; [s] output alpha value | |
;no need ADD #32, pkm | |
; Now to output the vertices in 0132 order | |
?endQuadClip POP pkm | |
MOVA ?QuadBuffer, R0 ; get true address in r0 | |
MOV.L #H'E0000000, pcw ; pcw is 0xe0000000 | |
FSCHG ; change to nice big stores | |
; first vertex : | |
FMOV.D @R0+, vertXY | |
FMOV.D @R0+, vertZW | |
FMOV.D vertXY, @pkm | |
MOV.L pcw, @pkm ; overwrite pkm | |
ADD #8, pkm | |
FMOV.D vertZW, @pkm | |
ADD #8, pkm | |
FMOV.D @R0+, vertXY | |
FMOV.D @R0+, vertZW | |
FMOV.D vertXY, @pkm | |
ADD #8, pkm | |
FMOV.D vertZW, @pkm | |
PREF @pkm | |
ADD #8, pkm | |
; second vertex | |
FMOV.D @R0+, vertXY | |
FMOV.D @R0+, vertZW | |
FMOV.D vertXY, @pkm | |
MOV.L pcw, @pkm ; overwrite pkm | |
ADD #8, pkm | |
FMOV.D vertZW, @pkm | |
ADD #8, pkm | |
FMOV.D @R0+, vertXY | |
FMOV.D @R0+, vertZW | |
FMOV.D vertXY, @pkm | |
ADD #8, pkm | |
FMOV.D vertZW, @pkm | |
PREF @pkm | |
ADD #8, pkm | |
ADD #8*4, R0 ; skip to fourth vertex | |
; third vertex | |
FMOV.D @R0+, vertXY | |
FMOV.D @R0+, vertZW | |
FMOV.D vertXY, @pkm | |
MOV.L pcw, @pkm ; overwrite pkm | |
ADD #8, pkm | |
FMOV.D vertZW, @pkm | |
ADD #8, pkm | |
FMOV.D @R0+, vertXY | |
FMOV.D @R0+, vertZW | |
FMOV.D vertXY, @pkm | |
ADD #8, pkm | |
FMOV.D vertZW, @pkm | |
PREF @pkm | |
ADD #8, pkm | |
ADD #-8*4*2, R0 ; rewind back to third vertex | |
SHAR pcw ; f00000000 | |
; last vertex | |
FMOV.D @R0+, vertXY | |
FMOV.D @R0+, vertZW | |
FMOV.D vertXY, @pkm | |
MOV.L pcw, @pkm ; overwrite pkm | |
ADD #8, pkm | |
FMOV.D vertZW, @pkm | |
ADD #8, pkm | |
FMOV.D @R0+, vertXY | |
FMOV.D @R0+, vertZW | |
FMOV.D vertXY, @pkm | |
ADD #8, pkm | |
FMOV.D vertZW, @pkm | |
PREF @pkm | |
FSCHG | |
BRA ?noFlush ; and loop back | |
ADD #8, pkm | |
.ALIGN 16 | |
?QuadBuffer .RES.L 8*4 | |
.AIF \&COUNT_GEOMETRY EQ 1 | |
.IMPORT _nDrawn | |
a_nDrawn: .DATA.L _nDrawn | |
.AENDI | |
.END |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment