Skip to content

Commit

Permalink
Make most 16-bit macros take single args instead of pair args
Browse files Browse the repository at this point in the history
eg. instead of:
	LongAdd H,L, B,C, H,L
we just do:
	LongAdd HL, BC, HL

We use the new HIGH and LOW builtin functions to make this possible
  • Loading branch information
ekimekim committed Dec 30, 2017
1 parent 60adc00 commit 777db49
Show file tree
Hide file tree
Showing 8 changed files with 56 additions and 31 deletions.
4 changes: 2 additions & 2 deletions graphics.asm
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ GraphicsTryWriteTile::

ld A, D
add D
LongAddToA TileQueueInfo >> 8,TileQueueInfo & $ff, H,L ; HL = TileQueueInfo + 2 * D
LongAddToA TileQueueInfo, HL ; HL = TileQueueInfo + 2 * D
; HL = length of D'th queue

ld A, [HL+] ; A = length of queue, HL = addr of head
Expand Down Expand Up @@ -312,7 +312,7 @@ T_GraphicsWriteTile::
GraphicsWriteSprite::
rla
rla ; Shift A left twice, ie. A = 4 * A. Note the rotate is equiv to shift because A < 64.
LongAddToA WorkingSprites >> 8,WorkingSprites & $ff, H,L ; HL = WorkingSprites + A
LongAddToA WorkingSprites, HL ; HL = WorkingSprites + A
; In order to avoid a half-written sprite from being drawn, we ensure no draw will occur
; until we are done. We do this by clearing the dirty flag regardless of whether it was set.
; We know this won't be overwritten because we've disabled switching when calling this function.
Expand Down
40 changes: 30 additions & 10 deletions include/longcalc.asm
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@ LongLoad: MACRO
ENDM

; Add 16-bit reg pairs or immediates \1\2 and \3\4, putting result in \5\6, which may be the same as either.
; \1 and \2 may also be indirect immediates.
; Cannot use AF. Clobbers A. Sets or resets carry as per normal add.
; \1\2 and \5\6 may be indirect immediates.
; Note: In the case where \1\2 and \5\6 are HL and \3\4 are BC or DE, you should use "ADD HL, rr" instead.
LongAdd: MACRO
LongAddParts: MACRO
ld A, \2
add \4
ld \6, A
Expand All @@ -20,16 +21,28 @@ LongAdd: MACRO
ld \5, A
ENDM

; Helper to LongAddParts that takes 16-bit immediates or reg pairs \1, \2, \3
; such that \3 = \1 + \2
LongAdd: MACRO
LongAddParts HIGH(\1),LOW(\1), HIGH(\2),LOW(\2), HIGH(\3),LOW(\3)
ENDM

; Add 16-bit reg pair or immediate \1\2 to A, putting result in \3\4, which may be the same as \1\2.
; Clobbers A. Sets or resets carry as per normal add.
LongAddToA: MACRO
LongAddToAParts: MACRO
add \2
ld \4, A
ld A, 0 ; this can't be xor A because that would reset carry
adc \1
ld \3, A
ENDM

; Helper to LongAddParts that takes 16-bit immediates or reg pairs \1, \2
; such that \2 = A + \1
LongAddToA: MACRO
LongAddToAParts HIGH(\1),LOW(\1), HIGH(\2),LOW(\2)
ENDM

; An alternate approach to LongAdd, suitable for very small const in-place addition to a 16-bit reg.
; (compared to a LongAdd \1,\2,\1, faster for abs(\2) <= 4 and smaller for <= 8)
; (only faster than 'ld \1, immediate' for abs(\2) <= 1 and smaller for <= 3)
Expand Down Expand Up @@ -67,35 +80,42 @@ LongSub: MACRO
; Shift 16-bit reg pair \1\2 (not AF) left once. Sets carry as per normal shift.
; This corresponds to doubling the (unsigned) value.
; Note: If you simply want to double HL, "ADD HL, HL" is faster but has different flag effects.
LongShiftL: MACRO
LongShiftLParts: MACRO
sla \2
rl \1
ENDM
LongShiftL: MACRO
LongShiftLParts HIGH(\1), LOW(\1)
ENDM

; Shift 16-bit reg pair \1\2 (not AF) right once. Highest order bit in result is 0.
; This corresponds to halving the (unsigned) value, rounding down.
; Sets carry flag true if there was a remainder.
LongShiftR: MACRO
LongShiftRParts: MACRO
srl \1
rr \2
ENDM
LongShiftR: MACRO
LongShiftRParts HIGH(\1), LOW(\1)
ENDM


; Multiply 16-bit reg pair \1\2 by 8-bit immediate \3, adding result to reg pair \4\5.
; Multiply 16-bit reg pair \1 by 8-bit immediate \2, adding result to reg pair \3,
; ie. \3 += \1 * \2.
; The result pair MUST NOT be the same as the input pair.
; Overflow is undefined - you must ensure your maximum value * \3 < 65536.
; Overflow is undefined - you must ensure your maximum value * \2 < 65536.
; This is considerably fast because it's fully unrolled and hard-codes the multiplier,
; so it can straight up omit any steps that aren't needed for that number.
; Clobbers A, \1, \2
; Clobbers A, \1
MultiplyConst16: MACRO
_N SET \3
_N SET \2
REPT 8
IF _N & 1 > 0
LongAdd \4,\5, \1,\2, \4,\5
LongAdd \3, \1, \3
ENDC
_N SET _N >> 1
IF _N > 0
LongShiftL \1,\2
LongShiftL \1
ENDC
ENDR
ENDM
10 changes: 6 additions & 4 deletions include/ring.asm
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ RingLen: MACRO
; Clobbers A, H, L.
; Does NOT check if ring is full! Behaviour in that case is undefined.
RingPushNoCheck: MACRO
LongAdd 0,[(\1) + ring_head], (((\1)+ring_data) >> 8),(((\1)+ring_data) & $ff), H,L ; HL = \1 + ring_data + (value of ring_head) = addr of ring_head'th element of ring_data
ld A, [(\1) + ring_head]
LongAddToA (\1)+ring_data, HL ; HL = \1 + ring_data + (value of ring_head) = addr of ring_head'th element of ring_data
ld [HL], \3
ld A, [(\1) + ring_head]
inc A
Expand All @@ -67,7 +68,7 @@ RingPush: MACRO
ld \4, A ; store new head for safekeeping.
RepointStruct HL, ring_tail, ring_head
ld A, [HL] ; it would be faster to update head now, but this breaks interrupt-safety
LongAddToA ((\1)+ring_data) >> 8,((\1)+ring_data) & $ff, H,L ; HL = \1 + ring_data + head index
LongAddToA (\1)+ring_data, HL ; HL = \1 + ring_data + head index
ld [HL], \3
ld HL, (\1) + ring_head
ld [HL], \4 ; update head
Expand All @@ -89,7 +90,8 @@ _RingPopHL: MACRO
; Clobbers A, H, L.
; Does NOT check if ring is empty! Behaviour in that case is undefined.
RingPopNoCheck: MACRO
LongAdd 0,[(\1) + ring_tail], (((\1)+ring_data) >> 8),(((\1)+ring_data) & $ff), H,L ; HL = \1 + ring_data + (value of ring_tail) = addr of ring_tail'th element of ring_data
ld A, [(\1) + ring_tail]
LongAddToA (\1)+ring_data, HL ; HL = \1 + ring_data + (value of ring_tail) = addr of ring_tail'th element of ring_data
_RingPopHL \1, \2, \3
ENDM

Expand All @@ -104,7 +106,7 @@ RingPop: MACRO
jr z, .end\@ ; if no items, finish with z flag set
ld A, [HL+] ; A = tail
RepointStruct HL, ring_tail + 1, ring_data
LongAddToA H,L, H,L ; HL += tail index
LongAddToA HL, HL ; HL += tail index
_RingPopHL \1, \2, \3
or $ff ; unset z, which may be set
.end\@
Expand Down
2 changes: 1 addition & 1 deletion main.asm
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ Fib:
ld H, D
ld L, E
pop DE
LongAdd D,E, H,L, D,E ; DE += HL, ie. DE = Fib(n-1) + Fib(n-2)
LongAdd DE, HL, DE ; DE += HL, ie. DE = Fib(n-1) + Fib(n-2)
inc B
inc B ; return B to initial value
call T_TaskYield ; demonstrate yielding. Fib(B) should equal DE.
Expand Down
5 changes: 3 additions & 2 deletions malloc.asm
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ DynMemAlloc::
ld A, B
inc A ; A = desired chunk length
ld [HL+], A ; set this chunk's length to the desired length, set HL to this chunk + 1
LongAdd H,L, 0,B, H,L ; HL += B, HL = chunk + 1, B = chunk length - 1, so HL + B = next chunk
ld A, B
LongAddToA HL, HL ; HL += B, HL = chunk + 1, B = chunk length - 1, so HL + B = next chunk
ld [HL], C ; C = excess bytes = length of new chunk
inc HL
ld A, $ff
Expand All @@ -94,7 +95,7 @@ DynMemAlloc::
.nomatch
; C = chunk length - 1, HL points at chunk_owner = chunk start + 1, so HL + C = next chunk
ld A, C
LongAddToA H,L, H,L ; HL += C
LongAddToA HL, HL ; HL += C
.start
ld A, [HL+] ; A = chunk length, HL points at chunk_owner
dec A ; A = chunk length - 1, set Z if A = 1 (end of range), wraps to 255 if A = 0 (ie. 256)
Expand Down
12 changes: 6 additions & 6 deletions tasks.asm
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ TaskNewWithID:

; fill in the task struct
ld A, B
LongAddToA ((TaskList+task_sp) >> 8),((TaskList+task_sp) & $ff), H,L ; HL = TaskList + B + task_sp = &(TaskList[B].task_sp)
LongAddToA TaskList+task_sp, HL ; HL = TaskList + B + task_sp = &(TaskList[B].task_sp)
ld A, D
ld [HL+], A
ld A, E
Expand Down Expand Up @@ -123,7 +123,7 @@ TaskNewDynStack::
; HL points to the base of the new stack, but stacks grow down,
; we want to give the top of the stack
ld A, DYN_MEM_STACK_SIZE
LongAddToA H,L, H,L ; HL += stack size
LongAddToA HL, HL ; HL += stack size
jr TaskNewWithID ; tail call


Expand Down Expand Up @@ -160,7 +160,7 @@ TaskSave::
ld B, H
ld C, L ; BC = SP
ld A, [CurrentTask]
LongAddToA ((TaskList+task_sp) >> 8),((TaskList+task_sp) & $ff), H,L ; HL = TaskList + CurrentTask + task_sp = &(TaskList[CurrentTask].task_sp)
LongAddToA TaskList+task_sp, HL ; HL = TaskList + CurrentTask + task_sp = &(TaskList[CurrentTask].task_sp)
; Save SP to task struct
ld A, B
ld [HL+], A
Expand All @@ -178,7 +178,7 @@ TaskSave::
; Takes task ID to load in A.
TaskLoad::
ld [CurrentTask], A
LongAddToA ((TaskList+task_sp) >> 8),((TaskList+task_sp) & $ff), H,L ; HL = TaskList + A + task_sp = &(TaskList[A].task_sp)
LongAddToA TaskList+task_sp, HL ; HL = TaskList + A + task_sp = &(TaskList[A].task_sp)
; BC = [HL] = stored stack pointer
ld A, [HL+]
ld B, A
Expand Down Expand Up @@ -262,7 +262,7 @@ T_EnableSwitch::
; Clobbers A, HL
T_SetROMBank::
ld A, [CurrentTask]
LongAddToA ((TaskList+task_rombank) >> 8),((TaskList+task_rombank) & $ff), H,L ; HL = TaskList + CurrentTask + task_rombank = &(TaskList[CurrentTask].task_rombank)
LongAddToA TaskList+task_rombank, HL ; HL = TaskList + CurrentTask + task_rombank = &(TaskList[CurrentTask].task_rombank)
ld [HL], C
ld A, C
ld [CurrentROMBank], A
Expand All @@ -276,7 +276,7 @@ T_SetROMBank::
; Clobbers A, HL
T_SetRAMBank::
ld A, [CurrentTask]
LongAddToA ((TaskList+task_rambank) >> 8),((TaskList+task_rambank) & $ff), H,L ; HL = TaskList + CurrentTask + task_rambank = &(TaskList[CurrentTask].task_rambank)
LongAddToA TaskList+task_rambank, HL ; HL = TaskList + CurrentTask + task_rambank = &(TaskList[CurrentTask].task_rambank)
ld [HL], C
ld A, C
ld [CurrentRAMBank], A
Expand Down
2 changes: 1 addition & 1 deletion tasks/clock.asm
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ TaskClockGetHMS:
; 100 * HL / 1024 === 25 * HL / 256 -> multiply by 25 then take top byte (H)
push DE
ld DE, 0
MultiplyConst16 H,L, 25, D,E ; DE = HL * 25
MultiplyConst16 HL, 25, DE ; DE = HL * 25
ld H, D
pop DE
; Now H = 100ths of a second
Expand Down
12 changes: 7 additions & 5 deletions waiters.asm
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ WaiterWait::
.notlesser
RepointStruct HL, waiter_min_task, 0
call WaiterDeterminant ; DE = determinant
LongAdd 0,[CurrentTask], HIGH(TaskList+task_waiter),LOW(TaskList+task_waiter), H,L ; HL = &TaskList[Current Task].task_waiter
ld A, [CurrentTask]
LongAddToA TaskList+task_waiter, HL ; HL = &TaskList[Current Task].task_waiter
ld A, D
ld [HL+], A
ld [HL], E ; task_waiter = DE
Expand Down Expand Up @@ -54,7 +55,8 @@ _WaiterWake::
ld B, [HL] ; B = min task
dec A ; A = ff
ld [HL], A ; set min task id to ff, waiter is now cleared
LongAddToA HIGH(TaskList+task_sp),LOW(TaskList+task_sp), H,L ; HL = &TaskList[min task].task_sp
ld A, B
LongAddToA TaskList+task_sp, HL ; HL = &TaskList[min task].task_sp
; Starting at min task and proceeding until either we wake count tasks, or we hit end of task list.
; C contains things left to find, B contains current task id (stop when we hit MAX_TASKS * TASK_SIZE),
; DE is determinant to compare to and HL is our pointer.
Expand All @@ -64,7 +66,7 @@ _WaiterWake::
ld A, [HL+]
and A
jr nz, .valid
LongAdd H,L, 0,TASK_SIZE-1, H,L ; HL += TASK_SIZE - 1
LongAdd HL, TASK_SIZE-1, HL ; HL += TASK_SIZE - 1
jr .skip
.valid
; Advance to task_waiter
Expand Down Expand Up @@ -103,7 +105,7 @@ _WaiterWake::
WaiterDeterminant:
ld D, H
ld E, L
LongShiftR D,E ; DE = HL >> 1
LongShiftR DE ; DE = HL >> 1
ld A, D
and $f0 ; grab top 3 bits of address
cp %01100000 ; top 3 bits == 110 (z) means WRAM, < 110 (c) means SRAM, > (neither) means HRAM
Expand All @@ -128,7 +130,7 @@ WaiterDeterminant:
ld D, A ; DE = 10bb baaa aaaa aaaa
ret
.sram
LongShiftR D,E ; DE = 00aa aaaa aaaa aaaa
LongShiftR DE ; DE = 00aa aaaa aaaa aaaa
ld A, D
and %00000111
ld D, A ; mask out top 5 bits of D, DE = 0000 0aaa aaaa aaaa
Expand Down

0 comments on commit 777db49

Please sign in to comment.