Two9A · February 22, 2024 08:15
diff --git a/pongwars.s b/pongwars.s
 ;--------------------------------------------------------------------------
 ; C64 reimplementation of Pongwars
 ; Imran Nazar, 2024
 ; References:
 ; - Source: vnglst: https://hachyderm.io/@vnglst/111828811496422610
 ; - C64 memory map: https://sta.c64.org/cbm64mem.html
 ; - SID RNG: https://stackoverflow.com/questions/44937759
 ; - Raster interrupt: https://codebase64.org/doku.php?id=base:introduction_to_raster_irqs
 ; - Sprite design: https://cpstest.us/minecraft-circle-generator/
 ; - Sprite positioning:
 ;   https://dustlayer.com/vic-ii/2013/4/28/vic-ii-for-beginners-part-5-bringing-sprites-in-shape
 ;   https://www.lemon64.com/forum/viewtopic.php?t=60341
 ; - PRG loader: https://michaelcmartin.github.io/Ophis/book/x72.html
 ; - Make process: https://codebase64.org/doku.php?id=base:tools_for_putting_files_into_a_.d64_image
 ; - Sine table generator: Dr LUT, https://github.com/ppelikan/drlut
 ; - Comparison branching: http://www.6502.org/tutorials/compare_instructions.html
 ; - Signed subtraction: https://www.righto.com/2012/12/the-6502-overflow-flag-explained.html
 ; - BASIC number conversion: https://www.lemon64.com/forum/viewtopic.php?t=67576

 PROGSTART = $0801
 BLOCKBUF = $CE00
 BLOCKPTR = $03
 COLORBUF = $D832
 COLORPTR = $05
 BALL0 = $D9
 BALL1 = $DE
 SCRATCH = $92
 CNT0 = $FB
 CNT1 = $FD
 FLAG = $FF
 FPBUF = $0101
 SPRPTR = $07F8
 SCRRAM = $0400

 MEMPORT = $0001
 IRQ = $0314
 INT2FP = $B391
 FOUT = $BDDD
 VIC_SPR0X = $D000
 VIC_SPR0Y = $D001
 VIC_SPR1X = $D002
 VIC_SPR1Y = $D003
 VIC_SPRXHI = $D010
 VIC_SPREN = $D015
 VIC_SPR0COL = $D027
 VIC_SPR1COL = $D028
 SID_3_FREQ = $D40E
 SID_3_CTRL = $D412
 SID_3_OUT = $D41B

    processor 6502
    org PROGSTART
 loader:
    word _loader_end, $000A
    byte $9E, " 2064", $00
 _loader_end:
    word $0000
    word $0000

 main:
    jsr init
    jmp *

 ;--------------------------------------------------------------------------
 ; init: Set up the blockfield, sprites, RNG; blank screen
 ;--------------------------------------------------------------------------
 init:
    subroutine
    ; Job one: Blank the screen with spaces (not @'s)
    ; We'll use BLOCKPTR temporarily, as we don't need it yet
    lda #<SCRRAM
    sta BLOCKPTR
    lda #>SCRRAM
    sta BLOCKPTR + 1
    lda #32
    ldx #4
    ldy #0
 .blank:
    sta (BLOCKPTR),y
    dey
    bne .blank
    inc BLOCKPTR + 1
    dex
    bne .blank

    ; Then we want to fill in block characters for the blockfield
    ; The status of the blocks will be represented by color
    lda #<(SCRRAM + 49)
    sta BLOCKPTR
    lda #>(SCRRAM + 49)
    sta BLOCKPTR + 1
    ldx #20
 .screen:
    lda #160        ; Reverse-video space (filled block)
    ldy #20
 .scrrow:
    sta (BLOCKPTR),y
    dey
    bne .scrrow

    ; Advance by 40 for each row of 20 we want to fill in
    lda BLOCKPTR
    adc #40
    sta BLOCKPTR
    lda BLOCKPTR + 1
    adc #0
    sta BLOCKPTR + 1
    dex
    bne .screen
    
    ; Job two: Set up the blockfield at BLOCKBUF
    ; We start by setting a pointer to BLOCKBUF
    lda #<BLOCKBUF
    sta BLOCKPTR
    lda #>BLOCKBUF
    sta BLOCKPTR + 1

    ; Fill BLOCKBUF with 40 runs of alternating 0 and 1
    lda #0
    ldx #40
 .fill:
    ldy #9
 .fill_row
    sta (BLOCKPTR),y
    dey
    bpl .fill_row
    pha
    lda BLOCKPTR
    adc #10
    sta BLOCKPTR
    lda BLOCKPTR + 1
    adc #0
    sta BLOCKPTR + 1
    pla
    eor #1
    dex
    bne .fill

    ; Job three: Initialise the two ball sprites
    ; They use the same data, so we need to refer to it only once
    ldx #10
 .sprpos:
    lda posdata,x
    sta BALL0,x
    dex
    bpl .sprpos

    lda #(sprdata / 64)
    sta SPRPTR
    sta SPRPTR + 1

    ; Sprite 0 is white, 1 is black
    lda #1
    sta VIC_SPR0COL
    lda #0
    sta VIC_SPR1COL
    lda #%00000011
    sta VIC_SPREN

    ; Job four: Init the SID's voice waveform to use as an RNG
    lda #$ff
    sta SID_3_FREQ
    sta SID_3_FREQ + 1
    lda #%10000000
    sta SID_3_CTRL

    ; Job five and final: Set up the raster interrupt
    sei
    lda #<frame         ; Kernel jumps here after each frame
    sta IRQ
    lda #>frame
    sta IRQ + 1
    cli

    rts

 ;--------------------------------------------------------------------------
 ; Helper functions
 ;--------------------------------------------------------------------------

 ; add_u16_s8: Add 8-bit signed to 16-bit unsigned
 ; @param A The 8-bit signed
 ; @param Y Zero-page pointer to the 16-bit unsigned
 ; @return Changed value in zero-page
 ; @clobbers A
 ; @preserves Y
 add_u16_s8:
    subroutine
    clc
    and #$ff
    bpl .pos
 .neg:
    adc 0,y
    sta 0,y
    lda 1,y
    sbc #0
    sta 1,y
    jmp .end
 .pos:
    adc 0,y
    sta 0,y
    lda 1,y
    adc #0
    sta 1,y
 .end:
    rts

 ; get_blk_val: Get the block buffer value at the given coordinates
 ; @param X X-coord (0-159)
 ; @param Y Y-coord (0-159)
 ; @return A The value of the block covering this coord
 ; @clobbers A
 ; @preserves X, Y
 get_blk_val:
    ; We need to reduce the coordinates to block-level (8x8)
    ; and then calculate Y * 20 + X
    ; This works out to ((Y >> 3) * 20) + (X >> 3)
    ; = (Y >> 3 << 4) + (Y >> 3 << 2) + (X >> 3)
    ; = (Y&0xF8 << 1) + (Y&0xF8 >> 1) + (X >> 3)

    subroutine

    ; (Y&0xF8 << 1)
    lda #0
    sta BLOCKPTR + 1
    tya
    and #$f8
    asl
    sta BLOCKPTR
    rol BLOCKPTR + 1

    ; + (Y&0xF8 >> 1)
    tya
    and #$f8
    lsr
    clc
    adc BLOCKPTR
    sta BLOCKPTR
    lda BLOCKPTR + 1
    adc #0
    sta BLOCKPTR + 1

    ; + (X >> 3)
    txa
    lsr
    lsr
    lsr
    clc
    adc BLOCKPTR
    sta BLOCKPTR
    lda BLOCKPTR + 1
    adc #0
    sta BLOCKPTR + 1

    ; Translate the calculated index into BLOCKBUF
    clc
    lda #<BLOCKBUF
    adc BLOCKPTR
    sta BLOCKPTR
    clc
    lda #>BLOCKBUF
    adc BLOCKPTR + 1
    sta BLOCKPTR + 1

    ; And read from there
    sty SCRATCH
    ldy #0
    lda (BLOCKPTR),y
    ldy SCRATCH

    and #$ff
    rts

 ; rand: Read a value from the SID noise, and limit to (-8..+7)
 ; @return A The random number
 ; @clobbers A
 rand:
    lda SID_3_OUT
    and #$0f
    sec
    sbc #8
    rts

 ; flipx_0: Reflect the angle of ball 0 in the X-axis (256-theta)
 ; @clobbers A
 flipx_0:
    jsr rand
    sec
    sbc BALL0
    sta BALL0
    rts

 ; flipy_0: Reflect the angle of ball 0 in the Y-axis (128-theta)
 ; @clobbers A
 flipy_0:
    jsr rand
    clc
    adc #128
    sec
    sbc BALL0
    sta BALL0
    rts

 ; flipx_1: Reflect the angle of ball 1 in the X-axis (256-theta)
 ; @clobbers A
 flipx_1:
    jsr rand
    sec
    sbc BALL1
    sta BALL1
    rts

 ; flipy_1: Reflect the angle of ball 1 in the Y-axis (128-theta)
 ; @clobbers A
 flipy_1:
    jsr rand
    clc
    adc #128
    sec
    sbc BALL1
    sta BALL1
    rts

 ; flipblk: Invert the block pointed to by BLOCKPTR
 ; @clobbers A, Y
 flipblk:
    ldy #0
    lda (BLOCKPTR),y
    eor #%00000001
    sta (BLOCKPTR),y
    rts

 ;--------------------------------------------------------------------------
 ; frame: Interrupt service routine
 ;--------------------------------------------------------------------------
 frame:
    pha
    txa
    pha
    tya
    pha

    jsr step0
    jsr step1
    jsr render

    pla
    tay
    pla
    tax
    pla
    jmp $EA31

 ;--------------------------------------------------------------------------
 ; step0: Perform calculations for the next run, ball 0
 ;--------------------------------------------------------------------------
 step0:
    subroutine
    lda #0
    sta FLAG

    ; x += cos(theta)
    clc
    lda BALL0
    adc #64
    tax
    lda sindata,x
    ldy #(BALL0 + 1)
    jsr add_u16_s8

    ; y += sin(theta)
    lda BALL0
    tay
    lda sindata,y
    ldy #(BALL0 + 3)
    jsr add_u16_s8

    ; Bounce detection: edges of the field
    ; If the ball is about to hit an X-edge of the field...
    clc
    lda BALL0 + 2
    cmp #152
    bcc .nobounce_x
    cmp #0
    bpl .nobounce_x

    ; Flag up an X-reflection
    lda FLAG
    eor #1
    sta FLAG
    jsr flipy_0
 .nobounce_x:

    ; If the ball is about to hit a Y-edge of the field...
    clc
    lda BALL0 + 4
    cmp #152
    bcc .nobounce_y
    cmp #0
    bpl .nobounce_y

    ; Flag up a Y-reflection
    lda FLAG
    eor #2
    sta FLAG
    jsr flipx_0

    ; If we bounced off the edge of the field, we don't then
    ; want to check possibly out-of-bounds boundaries
 .nobounce_y:
    lda FLAG
    and #3
    bne .end

    ; Bounce detection: boundary with the other side
    ; Left edge first: (0,4) on the ball
    clc
    ldx BALL0 + 2
    lda BALL0 + 4
    adc #4
    tay
    jsr get_blk_val
    beq .nohit_l
    jsr flipblk
    jsr flipy_0
 .nohit_l:

    ; Then the right edge: (7,4) on the ball
    clc
    lda BALL0 + 2
    adc #7
    tax
    lda BALL0 + 4
    clc
    adc #4
    tay
    jsr get_blk_val
    beq .nohit_r
    jsr flipblk
    jsr flipy_0
 .nohit_r:

    ; Then the top: (4,0) on tbe ball
    clc
    lda BALL0 + 2
    adc #4
    tax
    ldy BALL0 + 4
    jsr get_blk_val
    beq .nohit_t
    jsr flipblk
    jsr flipx_0
 .nohit_t:

    ; Finally the bottom: (4,7) on the ball
    clc
    lda BALL0 + 2
    adc #4
    tax
    clc
    lda BALL0 + 4
    adc #7
    tay
    jsr get_blk_val
    beq .nohit_b
    jsr flipblk
    jsr flipx_0
 .nohit_b:

 .end:
    rts

 ;--------------------------------------------------------------------------
 ; step1: Perform calculations for the next run, ball 1
 ;--------------------------------------------------------------------------
 step1:
    subroutine
    lda #0
    sta FLAG

    ; x += cos(theta)
    clc
    lda BALL1
    adc #64
    tax
    lda sindata,x
    ldy #(BALL1 + 1)
    jsr add_u16_s8

    ; y += sin(theta)
    lda BALL1
    tay
    lda sindata,y
    ldy #(BALL1 + 3)
    jsr add_u16_s8

    ; Bounce detection: edges of the field
    ; If the ball is about to hit an X-edge of the field...
    clc
    lda BALL1 + 2
    cmp #152
    bcc .nobounce_x
    cmp #0
    bpl .nobounce_x

    ; Flag up an X-reflection
    lda FLAG
    eor #1
    sta FLAG
    jsr flipy_1
 .nobounce_x:

    ; If the ball is about to hit a Y-edge of the field...
    clc
    lda BALL1 + 4
    cmp #152
    bcc .nobounce_y
    cmp #0
    bpl .nobounce_y

    ; Flag up a Y-reflection
    lda FLAG
    eor #2
    sta FLAG
    jsr flipx_1

    ; If we bounced off the edge of the field, we don't then
    ; want to check possibly out-of-bounds boundaries
 .nobounce_y:
    lda FLAG
    and #3
    bne .end

    ; Bounce detection: boundary with the other side
    ; Left edge first: (0,4) on the ball
    clc
    ldx BALL1 + 2
    lda BALL1 + 4
    adc #4
    tay
    jsr get_blk_val
    bne .nohit_l
    jsr flipblk
    jsr flipy_1
 .nohit_l:

    ; Then the right edge: (7,4) on the ball
    clc
    lda BALL1 + 2
    adc #7
    tax
    lda BALL1 + 4
    clc
    adc #4
    tay
    jsr get_blk_val
    bne .nohit_r
    jsr flipblk
    jsr flipy_1
 .nohit_r:

    ; Then the top: (4,0) on tbe ball
    clc
    lda BALL1 + 2
    adc #4
    tax
    ldy BALL1 + 4
    jsr get_blk_val
    bne .nohit_t
    jsr flipblk
    jsr flipx_1
 .nohit_t:

    ; Finally the bottom: (4,7) on the ball
    clc
    lda BALL1 + 2
    adc #4
    tax
    clc
    lda BALL1 + 4
    adc #7
    tay
    jsr get_blk_val
    bne .nohit_b
    jsr flipblk
    jsr flipx_1
 .nohit_b:

 .end:
    rts

 ;--------------------------------------------------------------------------
 ; render: Draw the blockfield, position sprites
 ;--------------------------------------------------------------------------
 render:
    subroutine
    ; Set up two pointers: we're copying from BLOCKBUF to COLORBUF
    lda #<BLOCKBUF
    sta BLOCKPTR
    lda #>BLOCKBUF
    sta BLOCKPTR + 1
    lda #<COLORBUF
    sta COLORPTR
    lda #>COLORBUF
    sta COLORPTR + 1

    lda #0
    sta CNT0
    sta CNT0 + 1
    sta CNT1
    sta CNT1 + 1

    ; Copying 20 rows of 20
    ; While we're copying, count up blocks on each side
    ldx #20
 .copy:
    ldy #19
 .copy_row:
    clc
    lda (BLOCKPTR),y
    pha
    bne .copy_cnt1
 .copy_cnt0:
    inc CNT0
    lda CNT0 + 1
    adc #0
    sta CNT0 + 1
    jmp .copy_write
 .copy_cnt1:
    inc CNT1
    lda CNT1 + 1
    adc #0
    sta CNT1 + 1
 .copy_write:
    pla
    sta (COLORPTR),y
    sta (BLOCKPTR),y
    dey
    bpl .copy_row

    ; Importantly, we want to advance 20 in the blockfield for each row
    clc
    lda BLOCKPTR
    adc #20
    sta BLOCKPTR
    lda BLOCKPTR + 1
    adc #0
    sta BLOCKPTR + 1

    ; But 40 in the color RAM, to reach the next line onscreen
    lda COLORPTR
    adc #40
    sta COLORPTR
    lda COLORPTR + 1
    adc #0
    sta COLORPTR + 1

    dex
    bne .copy

    ; And now position the two ball sprites
    ; Top-left of the visible screen is (32, 50)
    ; And our field starts at (80, 8) on the visible screen
    ; So we want to add (104, 58)
    lda BALL0 + 2
    adc #104
    sta VIC_SPR0X
    lda VIC_SPRXHI
    and #%11111110
    bcc .spr0_xhi        ; The C flag is still set from the +104
    eor #1
 .spr0_xhi:
    sta VIC_SPRXHI
    lda BALL0 + 4
    adc #58
    sta VIC_SPR0Y

    lda BALL1 + 2
    adc #104
    sta VIC_SPR1X
    lda VIC_SPRXHI
    and #%11111101
    bcc .spr1_xhi
    eor #2
 .spr1_xhi:
    sta VIC_SPRXHI
    lda BALL1 + 4
    adc #58
    sta VIC_SPR1Y

    ; Finally, print out the calculated counts of each side
    ; We use BASIC's int-to-floating-point and fp-to-string routines
    ldy CNT0
    lda CNT0 + 1
    jsr INT2FP
    jsr FOUT
    ldx #0
 .cnt0_print:
    lda FPBUF,x
    beq .cnt0_end
    sta SCRRAM + 22 * 40 + 10,x
    inx
    jmp .cnt0_print
 .cnt0_end:
    ; And print out a space after
    lda #32
    sta SCRRAM + 22 * 40 + 10,x

    ; The white side needs to be printed "backwards", so we
    ; calculate the length of the string first
    ldy CNT1
    lda CNT1 + 1
    jsr INT2FP
    jsr FOUT
    ldx #0
 .cnt1_len:
    lda FPBUF,x
    beq .cnt1_lenend
    inx
    jmp .cnt1_len
 .cnt1_lenend:

    ; Once we have the length in X, we can subtract from 30
    txa
    sta FLAG
    lda #30
    tay
    sec
    sbc FLAG
 .cnt1_print:
    dex
    bmi .cnt1_end
    dey
    lda FPBUF,x
    sta SCRRAM + 22 * 40,y
    jmp .cnt1_print
 .cnt1_end:
    ; And print a space before, to wipe out any extraneous digit
    dey
    lda #32
    sta SCRRAM + 22 * 40,y

    rts

 ;--------------------------------------------------------------------------
 ; Data section
 ;--------------------------------------------------------------------------

 ; Initial position data for the balls
 posdata:
    ; White ball: Theta 32, X 40.0, Y 40.0
    byte 32, 0, 40, 0, 60
    ; Black ball: Theta 160, X 120.0, Y 120.0
    byte 160, 0, 120, 0, 100

 ; Sprite data for the balls
    align 64
 sprdata:
    byte %00111100, %00000000, %00000000
    byte %01111110, %00000000, %00000000
    byte %11111111, %00000000, %00000000
    byte %11111111, %00000000, %00000000
    byte %11111111, %00000000, %00000000
    byte %11111111, %00000000, %00000000
    byte %01111110, %00000000, %00000000
    byte %00111100, %00000000, %00000000
    byte %00000000, %00000000, %00000000
    byte %00000000, %00000000, %00000000
    byte %00000000, %00000000, %00000000
    byte %00000000, %00000000, %00000000
    byte %00000000, %00000000, %00000000
    byte %00000000, %00000000, %00000000
    byte %00000000, %00000000, %00000000
    byte %00000000, %00000000, %00000000
    byte %00000000, %00000000, %00000000
    byte %00000000, %00000000, %00000000
    byte %00000000, %00000000, %00000000
    byte %00000000, %00000000, %00000000
    byte %00000000, %00000000, %00000000

    ; And finally the sine table
    align 256
 sindata:
    hex 00 03 06 09 0C 10 13 16 19 1C 
    hex 1F 22 25 28 2B 2E 31 33 36 39 
    hex 3C 3F 41 44 47 49 4C 4E 51 53 
    hex 55 58 5A 5C 5E 60 62 64 66 68 
    hex 6A 6B 6D 6F 70 71 73 74 75 76 
    hex 78 79 7A 7A 7B 7C 7D 7D 7E 7E 
    hex 7E 7F 7F 7F 7F 7F 7F 7F 7E 7E 
    hex 7E 7D 7D 7C 7B 7A 7A 79 78 76 
    hex 75 74 73 71 70 6F 6D 6B 6A 68 
    hex 66 64 62 60 5E 5C 5A 58 55 53 
    hex 51 4E 4C 49 47 44 41 3F 3C 39 
    hex 36 33 31 2E 2B 28 25 22 1F 1C 
    hex 19 16 13 10 0C 09 06 03 00 FD 
    hex FA F7 F4 F0 ED EA E7 E4 E1 DE 
    hex DB D8 D5 D2 CF CD CA C7 C4 C1 
    hex BF BC B9 B7 B4 B2 AF AD AB A8 
    hex A6 A4 A2 A0 9E 9C 9A 98 96 95 
    hex 93 91 90 8F 8D 8C 8B 8A 88 87 
    hex 86 86 85 84 83 83 82 82 82 81 
    hex 81 81 81 81 81 81 82 82 82 83 
    hex 83 84 85 86 86 87 88 8A 8B 8C 
    hex 8D 8F 90 91 93 95 96 98 9A 9C 
    hex 9E A0 A2 A4 A6 A8 AB AD AF B2 
    hex B4 B7 B9 BC BF C1 C4 C7 CA CD 
    hex CF D2 D5 D8 DB DE E1 E4 E7 EA 
    hex ED F0 F4 F7 FA FD
	;--------------------------------------------------------------------------
	; C64 reimplementation of Pongwars
	; Imran Nazar, 2024
	; References:
	; - Source: vnglst: https://hachyderm.io/@vnglst/111828811496422610
	; - C64 memory map: https://sta.c64.org/cbm64mem.html
	; - SID RNG: https://stackoverflow.com/questions/44937759
	; - Raster interrupt: https://codebase64.org/doku.php?id=base:introduction_to_raster_irqs
	; - Sprite design: https://cpstest.us/minecraft-circle-generator/
	; - Sprite positioning:
	; https://dustlayer.com/vic-ii/2013/4/28/vic-ii-for-beginners-part-5-bringing-sprites-in-shape
	; https://www.lemon64.com/forum/viewtopic.php?t=60341
	; - PRG loader: https://michaelcmartin.github.io/Ophis/book/x72.html
	; - Make process: https://codebase64.org/doku.php?id=base:tools_for_putting_files_into_a_.d64_image
	; - Sine table generator: Dr LUT, https://github.com/ppelikan/drlut
	; - Comparison branching: http://www.6502.org/tutorials/compare_instructions.html
	; - Signed subtraction: https://www.righto.com/2012/12/the-6502-overflow-flag-explained.html
	; - BASIC number conversion: https://www.lemon64.com/forum/viewtopic.php?t=67576

	PROGSTART = $0801
	BLOCKBUF = $CE00
	BLOCKPTR = $03
	COLORBUF = $D832
	COLORPTR = $05
	BALL0 = $D9
	BALL1 = $DE
	SCRATCH = $92
	CNT0 = $FB
	CNT1 = $FD
	FLAG = $FF
	FPBUF = $0101
	SPRPTR = $07F8
	SCRRAM = $0400

	MEMPORT = $0001
	IRQ = $0314
	INT2FP = $B391
	FOUT = $BDDD
	VIC_SPR0X = $D000
	VIC_SPR0Y = $D001
	VIC_SPR1X = $D002
	VIC_SPR1Y = $D003
	VIC_SPRXHI = $D010
	VIC_SPREN = $D015
	VIC_SPR0COL = $D027
	VIC_SPR1COL = $D028
	SID_3_FREQ = $D40E
	SID_3_CTRL = $D412
	SID_3_OUT = $D41B

	processor 6502
	org PROGSTART
	loader:
	word _loader_end, $000A
	byte $9E, " 2064", $00
	_loader_end:
	word $0000
	word $0000

	main:
	jsr init
	jmp *

	;--------------------------------------------------------------------------
	; init: Set up the blockfield, sprites, RNG; blank screen
	;--------------------------------------------------------------------------
	init:
	subroutine
	; Job one: Blank the screen with spaces (not @'s)
	; We'll use BLOCKPTR temporarily, as we don't need it yet
	lda #<SCRRAM
	sta BLOCKPTR
	lda #>SCRRAM
	sta BLOCKPTR + 1
	lda #32
	ldx #4
	ldy #0
	.blank:
	sta (BLOCKPTR),y
	dey
	bne .blank
	inc BLOCKPTR + 1
	dex
	bne .blank

	; Then we want to fill in block characters for the blockfield
	; The status of the blocks will be represented by color
	lda #<(SCRRAM + 49)
	sta BLOCKPTR
	lda #>(SCRRAM + 49)
	sta BLOCKPTR + 1
	ldx #20
	.screen:
	lda #160 ; Reverse-video space (filled block)
	ldy #20
	.scrrow:
	sta (BLOCKPTR),y
	dey
	bne .scrrow

	; Advance by 40 for each row of 20 we want to fill in
	lda BLOCKPTR
	adc #40
	sta BLOCKPTR
	lda BLOCKPTR + 1
	adc #0
	sta BLOCKPTR + 1
	dex
	bne .screen

	; Job two: Set up the blockfield at BLOCKBUF
	; We start by setting a pointer to BLOCKBUF
	lda #<BLOCKBUF
	sta BLOCKPTR
	lda #>BLOCKBUF
	sta BLOCKPTR + 1

	; Fill BLOCKBUF with 40 runs of alternating 0 and 1
	lda #0
	ldx #40
	.fill:
	ldy #9
	.fill_row
	sta (BLOCKPTR),y
	dey
	bpl .fill_row
	pha
	lda BLOCKPTR
	adc #10
	sta BLOCKPTR
	lda BLOCKPTR + 1
	adc #0
	sta BLOCKPTR + 1
	pla
	eor #1
	dex
	bne .fill

	; Job three: Initialise the two ball sprites
	; They use the same data, so we need to refer to it only once
	ldx #10
	.sprpos:
	lda posdata,x
	sta BALL0,x
	dex
	bpl .sprpos

	lda #(sprdata / 64)
	sta SPRPTR
	sta SPRPTR + 1

	; Sprite 0 is white, 1 is black
	lda #1
	sta VIC_SPR0COL
	lda #0
	sta VIC_SPR1COL
	lda #%00000011
	sta VIC_SPREN

	; Job four: Init the SID's voice waveform to use as an RNG
	lda #$ff
	sta SID_3_FREQ
	sta SID_3_FREQ + 1
	lda #%10000000
	sta SID_3_CTRL

	; Job five and final: Set up the raster interrupt
	sei
	lda #<frame ; Kernel jumps here after each frame
	sta IRQ
	lda #>frame
	sta IRQ + 1
	cli

	rts

	;--------------------------------------------------------------------------
	; Helper functions
	;--------------------------------------------------------------------------

	; add_u16_s8: Add 8-bit signed to 16-bit unsigned
	; @param A The 8-bit signed
	; @param Y Zero-page pointer to the 16-bit unsigned
	; @return Changed value in zero-page
	; @clobbers A
	; @preserves Y
	add_u16_s8:
	subroutine
	clc
	and #$ff
	bpl .pos
	.neg:
	adc 0,y
	sta 0,y
	lda 1,y
	sbc #0
	sta 1,y
	jmp .end
	.pos:
	adc 0,y
	sta 0,y
	lda 1,y
	adc #0
	sta 1,y
	.end:
	rts

	; get_blk_val: Get the block buffer value at the given coordinates
	; @param X X-coord (0-159)
	; @param Y Y-coord (0-159)
	; @return A The value of the block covering this coord
	; @clobbers A
	; @preserves X, Y
	get_blk_val:
	; We need to reduce the coordinates to block-level (8x8)
	; and then calculate Y * 20 + X
	; This works out to ((Y >> 3) * 20) + (X >> 3)
	; = (Y >> 3 << 4) + (Y >> 3 << 2) + (X >> 3)
	; = (Y&0xF8 << 1) + (Y&0xF8 >> 1) + (X >> 3)

	subroutine

	; (Y&0xF8 << 1)
	lda #0
	sta BLOCKPTR + 1
	tya
	and #$f8
	asl
	sta BLOCKPTR
	rol BLOCKPTR + 1

	; + (Y&0xF8 >> 1)
	tya
	and #$f8
	lsr
	clc
	adc BLOCKPTR
	sta BLOCKPTR
	lda BLOCKPTR + 1
	adc #0
	sta BLOCKPTR + 1

	; + (X >> 3)
	txa
	lsr
	lsr
	lsr
	clc
	adc BLOCKPTR
	sta BLOCKPTR
	lda BLOCKPTR + 1
	adc #0
	sta BLOCKPTR + 1

	; Translate the calculated index into BLOCKBUF
	clc
	lda #<BLOCKBUF
	adc BLOCKPTR
	sta BLOCKPTR
	clc
	lda #>BLOCKBUF
	adc BLOCKPTR + 1
	sta BLOCKPTR + 1

	; And read from there
	sty SCRATCH
	ldy #0
	lda (BLOCKPTR),y
	ldy SCRATCH

	and #$ff
	rts

	; rand: Read a value from the SID noise, and limit to (-8..+7)
	; @return A The random number
	; @clobbers A
	rand:
	lda SID_3_OUT
	and #$0f
	sec
	sbc #8
	rts

	; flipx_0: Reflect the angle of ball 0 in the X-axis (256-theta)
	; @clobbers A
	flipx_0:
	jsr rand
	sec
	sbc BALL0
	sta BALL0
	rts

	; flipy_0: Reflect the angle of ball 0 in the Y-axis (128-theta)
	; @clobbers A
	flipy_0:
	jsr rand
	clc
	adc #128
	sec
	sbc BALL0
	sta BALL0
	rts

	; flipx_1: Reflect the angle of ball 1 in the X-axis (256-theta)
	; @clobbers A
	flipx_1:
	jsr rand
	sec
	sbc BALL1
	sta BALL1
	rts

	; flipy_1: Reflect the angle of ball 1 in the Y-axis (128-theta)
	; @clobbers A
	flipy_1:
	jsr rand
	clc
	adc #128
	sec
	sbc BALL1
	sta BALL1
	rts

	; flipblk: Invert the block pointed to by BLOCKPTR
	; @clobbers A, Y
	flipblk:
	ldy #0
	lda (BLOCKPTR),y
	eor #%00000001
	sta (BLOCKPTR),y
	rts

	;--------------------------------------------------------------------------
	; frame: Interrupt service routine
	;--------------------------------------------------------------------------
	frame:
	pha
	txa
	pha
	tya
	pha

	jsr step0
	jsr step1
	jsr render

	pla
	tay
	pla
	tax
	pla
	jmp $EA31

	;--------------------------------------------------------------------------
	; step0: Perform calculations for the next run, ball 0
	;--------------------------------------------------------------------------
	step0:
	subroutine
	lda #0
	sta FLAG

	; x += cos(theta)
	clc
	lda BALL0
	adc #64
	tax
	lda sindata,x
	ldy #(BALL0 + 1)
	jsr add_u16_s8

	; y += sin(theta)
	lda BALL0
	tay
	lda sindata,y
	ldy #(BALL0 + 3)
	jsr add_u16_s8

	; Bounce detection: edges of the field
	; If the ball is about to hit an X-edge of the field...
	clc
	lda BALL0 + 2
	cmp #152
	bcc .nobounce_x
	cmp #0
	bpl .nobounce_x

	; Flag up an X-reflection
	lda FLAG
	eor #1
	sta FLAG
	jsr flipy_0
	.nobounce_x:

	; If the ball is about to hit a Y-edge of the field...
	clc
	lda BALL0 + 4
	cmp #152
	bcc .nobounce_y
	cmp #0
	bpl .nobounce_y

	; Flag up a Y-reflection
	lda FLAG
	eor #2
	sta FLAG
	jsr flipx_0

	; If we bounced off the edge of the field, we don't then
	; want to check possibly out-of-bounds boundaries
	.nobounce_y:
	lda FLAG
	and #3
	bne .end

	; Bounce detection: boundary with the other side
	; Left edge first: (0,4) on the ball
	clc
	ldx BALL0 + 2
	lda BALL0 + 4
	adc #4
	tay
	jsr get_blk_val
	beq .nohit_l
	jsr flipblk
	jsr flipy_0
	.nohit_l:

	; Then the right edge: (7,4) on the ball
	clc
	lda BALL0 + 2
	adc #7
	tax
	lda BALL0 + 4
	clc
	adc #4
	tay
	jsr get_blk_val
	beq .nohit_r
	jsr flipblk
	jsr flipy_0
	.nohit_r:

	; Then the top: (4,0) on tbe ball
	clc
	lda BALL0 + 2
	adc #4
	tax
	ldy BALL0 + 4
	jsr get_blk_val
	beq .nohit_t
	jsr flipblk
	jsr flipx_0
	.nohit_t:

	; Finally the bottom: (4,7) on the ball
	clc
	lda BALL0 + 2
	adc #4
	tax
	clc
	lda BALL0 + 4
	adc #7
	tay
	jsr get_blk_val
	beq .nohit_b
	jsr flipblk
	jsr flipx_0
	.nohit_b:

	.end:
	rts

	;--------------------------------------------------------------------------
	; step1: Perform calculations for the next run, ball 1
	;--------------------------------------------------------------------------
	step1:
	subroutine
	lda #0
	sta FLAG

	; x += cos(theta)
	clc
	lda BALL1
	adc #64
	tax
	lda sindata,x
	ldy #(BALL1 + 1)
	jsr add_u16_s8

	; y += sin(theta)
	lda BALL1
	tay
	lda sindata,y
	ldy #(BALL1 + 3)
	jsr add_u16_s8

	; Bounce detection: edges of the field
	; If the ball is about to hit an X-edge of the field...
	clc
	lda BALL1 + 2
	cmp #152
	bcc .nobounce_x
	cmp #0
	bpl .nobounce_x

	; Flag up an X-reflection
	lda FLAG
	eor #1
	sta FLAG
	jsr flipy_1
	.nobounce_x:

	; If the ball is about to hit a Y-edge of the field...
	clc
	lda BALL1 + 4
	cmp #152
	bcc .nobounce_y
	cmp #0
	bpl .nobounce_y

	; Flag up a Y-reflection
	lda FLAG
	eor #2
	sta FLAG
	jsr flipx_1

	; If we bounced off the edge of the field, we don't then
	; want to check possibly out-of-bounds boundaries
	.nobounce_y:
	lda FLAG
	and #3
	bne .end

	; Bounce detection: boundary with the other side
	; Left edge first: (0,4) on the ball
	clc
	ldx BALL1 + 2
	lda BALL1 + 4
	adc #4
	tay
	jsr get_blk_val
	bne .nohit_l
	jsr flipblk
	jsr flipy_1
	.nohit_l:

	; Then the right edge: (7,4) on the ball
	clc
	lda BALL1 + 2
	adc #7
	tax
	lda BALL1 + 4
	clc
	adc #4
	tay
	jsr get_blk_val
	bne .nohit_r
	jsr flipblk
	jsr flipy_1
	.nohit_r:

	; Then the top: (4,0) on tbe ball
	clc
	lda BALL1 + 2
	adc #4
	tax
	ldy BALL1 + 4
	jsr get_blk_val
	bne .nohit_t
	jsr flipblk
	jsr flipx_1
	.nohit_t:

	; Finally the bottom: (4,7) on the ball
	clc
	lda BALL1 + 2
	adc #4
	tax
	clc
	lda BALL1 + 4
	adc #7
	tay
	jsr get_blk_val
	bne .nohit_b
	jsr flipblk
	jsr flipx_1
	.nohit_b:

	.end:
	rts

	;--------------------------------------------------------------------------
	; render: Draw the blockfield, position sprites
	;--------------------------------------------------------------------------
	render:
	subroutine
	; Set up two pointers: we're copying from BLOCKBUF to COLORBUF
	lda #<BLOCKBUF
	sta BLOCKPTR
	lda #>BLOCKBUF
	sta BLOCKPTR + 1
	lda #<COLORBUF
	sta COLORPTR
	lda #>COLORBUF
	sta COLORPTR + 1

	lda #0
	sta CNT0
	sta CNT0 + 1
	sta CNT1
	sta CNT1 + 1

	; Copying 20 rows of 20
	; While we're copying, count up blocks on each side
	ldx #20
	.copy:
	ldy #19
	.copy_row:
	clc
	lda (BLOCKPTR),y
	pha
	bne .copy_cnt1
	.copy_cnt0:
	inc CNT0
	lda CNT0 + 1
	adc #0
	sta CNT0 + 1
	jmp .copy_write
	.copy_cnt1:
	inc CNT1
	lda CNT1 + 1
	adc #0
	sta CNT1 + 1
	.copy_write:
	pla
	sta (COLORPTR),y
	sta (BLOCKPTR),y
	dey
	bpl .copy_row

	; Importantly, we want to advance 20 in the blockfield for each row
	clc
	lda BLOCKPTR
	adc #20
	sta BLOCKPTR
	lda BLOCKPTR + 1
	adc #0
	sta BLOCKPTR + 1

	; But 40 in the color RAM, to reach the next line onscreen
	lda COLORPTR
	adc #40
	sta COLORPTR
	lda COLORPTR + 1
	adc #0
	sta COLORPTR + 1

	dex
	bne .copy

	; And now position the two ball sprites
	; Top-left of the visible screen is (32, 50)
	; And our field starts at (80, 8) on the visible screen
	; So we want to add (104, 58)
	lda BALL0 + 2
	adc #104
	sta VIC_SPR0X
	lda VIC_SPRXHI
	and #%11111110
	bcc .spr0_xhi ; The C flag is still set from the +104
	eor #1
	.spr0_xhi:
	sta VIC_SPRXHI
	lda BALL0 + 4
	adc #58
	sta VIC_SPR0Y

	lda BALL1 + 2
	adc #104
	sta VIC_SPR1X
	lda VIC_SPRXHI
	and #%11111101
	bcc .spr1_xhi
	eor #2
	.spr1_xhi:
	sta VIC_SPRXHI
	lda BALL1 + 4
	adc #58
	sta VIC_SPR1Y

	; Finally, print out the calculated counts of each side
	; We use BASIC's int-to-floating-point and fp-to-string routines
	ldy CNT0
	lda CNT0 + 1
	jsr INT2FP
	jsr FOUT
	ldx #0
	.cnt0_print:
	lda FPBUF,x
	beq .cnt0_end
	sta SCRRAM + 22 * 40 + 10,x
	inx
	jmp .cnt0_print
	.cnt0_end:
	; And print out a space after
	lda #32
	sta SCRRAM + 22 * 40 + 10,x

	; The white side needs to be printed "backwards", so we
	; calculate the length of the string first
	ldy CNT1
	lda CNT1 + 1
	jsr INT2FP
	jsr FOUT
	ldx #0
	.cnt1_len:
	lda FPBUF,x
	beq .cnt1_lenend
	inx
	jmp .cnt1_len
	.cnt1_lenend:

	; Once we have the length in X, we can subtract from 30
	txa
	sta FLAG
	lda #30
	tay
	sec
	sbc FLAG
	.cnt1_print:
	dex
	bmi .cnt1_end
	dey
	lda FPBUF,x
	sta SCRRAM + 22 * 40,y
	jmp .cnt1_print
	.cnt1_end:
	; And print a space before, to wipe out any extraneous digit
	dey
	lda #32
	sta SCRRAM + 22 * 40,y

	rts

	;--------------------------------------------------------------------------
	; Data section
	;--------------------------------------------------------------------------

	; Initial position data for the balls
	posdata:
	; White ball: Theta 32, X 40.0, Y 40.0
	byte 32, 0, 40, 0, 60
	; Black ball: Theta 160, X 120.0, Y 120.0
	byte 160, 0, 120, 0, 100

	; Sprite data for the balls
	align 64
	sprdata:
	byte %00111100, %00000000, %00000000
	byte %01111110, %00000000, %00000000
	byte %11111111, %00000000, %00000000
	byte %11111111, %00000000, %00000000
	byte %11111111, %00000000, %00000000
	byte %11111111, %00000000, %00000000
	byte %01111110, %00000000, %00000000
	byte %00111100, %00000000, %00000000
	byte %00000000, %00000000, %00000000
	byte %00000000, %00000000, %00000000
	byte %00000000, %00000000, %00000000
	byte %00000000, %00000000, %00000000
	byte %00000000, %00000000, %00000000
	byte %00000000, %00000000, %00000000
	byte %00000000, %00000000, %00000000
	byte %00000000, %00000000, %00000000
	byte %00000000, %00000000, %00000000
	byte %00000000, %00000000, %00000000
	byte %00000000, %00000000, %00000000
	byte %00000000, %00000000, %00000000
	byte %00000000, %00000000, %00000000

	; And finally the sine table
	align 256
	sindata:
	hex 00 03 06 09 0C 10 13 16 19 1C
	hex 1F 22 25 28 2B 2E 31 33 36 39
	hex 3C 3F 41 44 47 49 4C 4E 51 53
	hex 55 58 5A 5C 5E 60 62 64 66 68
	hex 6A 6B 6D 6F 70 71 73 74 75 76
	hex 78 79 7A 7A 7B 7C 7D 7D 7E 7E
	hex 7E 7F 7F 7F 7F 7F 7F 7F 7E 7E
	hex 7E 7D 7D 7C 7B 7A 7A 79 78 76
	hex 75 74 73 71 70 6F 6D 6B 6A 68
	hex 66 64 62 60 5E 5C 5A 58 55 53
	hex 51 4E 4C 49 47 44 41 3F 3C 39
	hex 36 33 31 2E 2B 28 25 22 1F 1C
	hex 19 16 13 10 0C 09 06 03 00 FD
	hex FA F7 F4 F0 ED EA E7 E4 E1 DE
	hex DB D8 D5 D2 CF CD CA C7 C4 C1
	hex BF BC B9 B7 B4 B2 AF AD AB A8
	hex A6 A4 A2 A0 9E 9C 9A 98 96 95
	hex 93 91 90 8F 8D 8C 8B 8A 88 87
	hex 86 86 85 84 83 83 82 82 82 81
	hex 81 81 81 81 81 81 82 82 82 83
	hex 83 84 85 86 86 87 88 8A 8B 8C
	hex 8D 8F 90 91 93 95 96 98 9A 9C
	hex 9E A0 A2 A4 A6 A8 AB AD AF B2
	hex B4 B7 B9 BC BF C1 C4 C7 CA CD
	hex CF D2 D5 D8 DB DE E1 E4 E7 EA
	hex ED F0 F4 F7 FA FD