Handwritten asm: bcmp, bcopy, bzero (#1817)

* Handwritten asm: bcmp, bcopy, bzero

* consistent and

* Consistent add
This commit is contained in:
Tharo 2025-08-16 14:07:25 +01:00 committed by GitHub
parent 086fc5635e
commit 28b60fc00c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 364 additions and 3 deletions

View File

@ -447,6 +447,8 @@ $(BUILD_DIR)/src/libultra/libc/%.o: OPTFLAGS := -O2
$(BUILD_DIR)/src/libultra/gu/%.o: OPTFLAGS := -O2
$(BUILD_DIR)/src/libultra/rmon/%.o: OPTFLAGS := -O2
$(BUILD_DIR)/src/libultra/libc/%.o: ASOPTFLAGS := -O2
$(BUILD_DIR)/src/boot/libu64/%.o: OPTFLAGS := -O2
$(BUILD_DIR)/src/boot/libc/%.o: OPTFLAGS := -O2

View File

@ -69,7 +69,7 @@ beginseg
include "$(BUILD_DIR)/src/libultra/os/thread.o"
include "$(BUILD_DIR)/src/libultra/os/destroythread.o"
include "$(BUILD_DIR)/src/libultra/voice/voicecheckresult.o"
include "$(BUILD_DIR)/asm/boot/bzero.text.o"
include "$(BUILD_DIR)/src/libultra/libc/bzero.o"
include "$(BUILD_DIR)/src/libultra/io/motor.o"
include "$(BUILD_DIR)/src/libultra/io/siacs.o"
include "$(BUILD_DIR)/src/libultra/io/controller.o"
@ -140,7 +140,7 @@ beginseg
include "$(BUILD_DIR)/src/libultra/gu/position.o"
include "$(BUILD_DIR)/src/libultra/io/epirawdma.o"
include "$(BUILD_DIR)/src/libultra/io/sptaskyielded.o"
include "$(BUILD_DIR)/asm/boot/bcmp.text.o"
include "$(BUILD_DIR)/src/libultra/libc/bcmp.o"
include "$(BUILD_DIR)/src/libultra/os/gettime.o"
include "$(BUILD_DIR)/src/libultra/gu/rotate.o"
include "$(BUILD_DIR)/src/libultra/os/setglobalintmask.o"
@ -157,7 +157,7 @@ beginseg
include "$(BUILD_DIR)/src/libultra/os/getcompare.o"
include "$(BUILD_DIR)/src/libultra/io/dpgetstat.o"
include "$(BUILD_DIR)/src/libultra/io/dpsetstat.o"
include "$(BUILD_DIR)/asm/boot/bcopy.text.o"
include "$(BUILD_DIR)/src/libultra/libc/bcopy.o"
include "$(BUILD_DIR)/src/libultra/os/resetglobalintmask.o"
include "$(BUILD_DIR)/src/libultra/io/pfsdeletefile.o"
include "$(BUILD_DIR)/src/libultra/gu/ortho.o"

89
src/libultra/libc/bcmp.s Normal file
View File

@ -0,0 +1,89 @@
#include "PR/asm.h"
#include "PR/regdef.h"
.text
LEAF(bcmp)
xor v0, a0, a1
blt a2, 0x10, bytecmp
and v0, v0, 3
negu t8, a0
bnez v0, unaligncmp
and t8, t8, 3
subu a2, a2, t8
beqz t8, wordcmp
move v0, v1
lwl v0, (a0)
lwl v1, (a1)
addu a0, a0, t8
addu a1, a1, t8
bne v0, v1, cmpne
wordcmp:
and a3, a2, ~3
subu a2, a2, a3
beqz a3, bytecmp
addu a3, a3, a0
1:
lw v0, (a0)
lw v1, (a1)
addu a0, a0, 4
addu a1, a1, 4
bne v0, v1, cmpne
bne a0, a3, 1b
b bytecmp
unaligncmp:
negu a3, a1
and a3, a3, 3
subu a2, a2, a3
beqz a3, partaligncmp
addu a3, a3, a0
1:
lbu v0, (a0)
lbu v1, (a1)
addu a0, a0, 1
addu a1, a1, 1
bne v0, v1, cmpne
bne a0, a3, 1b
partaligncmp:
and a3, a2, ~3
subu a2, a2, a3
beqz a3, bytecmp
addu a3, a3, a0
1:
lwl v0, (a0)
lwr v0, 3(a0)
lw v1, (a1)
addu a0, a0, 4
addu a1, a1, 4
bne v0, v1, cmpne
bne a0, a3, 1b
bytecmp:
addu a3, a2, a0
blez a2, cmpdone
1:
lbu v0, (a0)
lbu v1, (a1)
addu a0, a0, 1
addu a1, a1, 1
bne v0, v1, cmpne
bne a0, a3, 1b
cmpdone:
move v0, zero
jr ra
cmpne:
li v0, 1
jr ra
END(bcmp)

211
src/libultra/libc/bcopy.s Normal file
View File

@ -0,0 +1,211 @@
#include "PR/asm.h"
#include "PR/regdef.h"
.text
LEAF(bcopy)
move a3, a1
beqz a2, ret
beq a0, a1, ret
blt a1, a0, goforwards
add v0, a0, a2
bge a1, v0, goforwards
b gobackwards
goforwards:
blt a2, 0x10, forwards_bytecopy
and v0, a0, 3
and v1, a1, 3
beq v0, v1, forwalignable
forwards_bytecopy:
beqz a2, ret
addu v1, a0, a2
99:
lb v0, (a0)
addu a0, a0, 1
sb v0, (a1)
addu a1, a1, 1
bne a0, v1, 99b
ret:
move v0, a3
jr ra
forwalignable:
beqz v0, forwards_32
beq v0, 1, forw_copy3
beq v0, 2, forw_copy2
lb v0, (a0)
addu a0, a0, 1
sb v0, (a1)
addu a1, a1, 1
addu a2, a2, -1
b forwards_32
forw_copy2:
lh v0, (a0)
addu a0, a0, 2
sh v0, (a1)
addu a1, a1, 2
addu a2, a2, -2
b forwards_32
forw_copy3:
lb v0, (a0)
lh v1, 1(a0)
addu a0, a0, 3
sb v0, (a1)
sh v1, 1(a1)
addu a1, a1, 3
addu a2, a2, -3
forwards:
forwards_32:
blt a2, 32, forwards_16
lw v0, 0(a0)
lw v1, 4(a0)
lw t0, 8(a0)
lw t1, 12(a0)
lw t2, 16(a0)
lw t3, 20(a0)
lw t4, 24(a0)
lw t5, 28(a0)
addu a0, a0, 32
sw v0, 0(a1)
sw v1, 4(a1)
sw t0, 8(a1)
sw t1, 12(a1)
sw t2, 16(a1)
sw t3, 20(a1)
sw t4, 24(a1)
sw t5, 28(a1)
addu a1, a1, 32
addu a2, a2, -32
b forwards_32
forwards_16:
blt a2, 16, forwards_4
lw v0, 0(a0)
lw v1, 4(a0)
lw t0, 8(a0)
lw t1, 12(a0)
addu a0, a0, 16
sw v0, 0(a1)
sw v1, 4(a1)
sw t0, 8(a1)
sw t1, 12(a1)
addu a1, a1, 16
addu a2, a2, -16
b forwards_16
forwards_4:
blt a2, 4, forwards_bytecopy
lw v0, 0(a0)
addu a0, a0, 4
sw v0, 0(a1)
addu a1, a1, 4
addu a2, a2, -4
b forwards_4
gobackwards:
add a0, a0,a2
add a1, a1,a2
blt a2, 16, backwards_bytecopy
and v0, a0, 0x3
and v1, a1, 0x3
beq v0, v1, backalignable
backwards_bytecopy:
beqz a2, ret
addu a0, a0, -1
addu a1, a1, -1
subu v1, a0,a2
99:
lb v0, 0(a0)
addu a0, a0, -1
sb v0, 0(a1)
addu a1, a1, -1
bne a0, v1,99b
move v0, a3
jr ra
backalignable:
beqz v0, backwards
beq v0, 3, back_copy3
beq v0, 2, back_copy2
lb v0, -1(a0)
addu a0, a0, -1
sb v0, -1(a1)
addu a1, a1, -1
addu a2, a2, -1
b backwards
back_copy2:
lh v0, -2(a0)
addu a0, a0, -2
sh v0, -2(a1)
addu a1, a1, -2
addu a2, a2, -2
b backwards
back_copy3:
lb v0, -1(a0)
lh v1, -3(a0)
addu a0, a0, -3
sb v0, -1(a1)
sh v1, -3(a1)
addu a1, a1, -3
addu a2, a2, -3
backwards:
backwards_32:
blt a2, 32, backwards_16
lw v0, -4(a0)
lw v1, -8(a0)
lw t0, -12(a0)
lw t1, -16(a0)
lw t2, -20(a0)
lw t3, -24(a0)
lw t4, -28(a0)
lw t5, -32(a0)
addu a0, a0, -32
sw v0, -4(a1)
sw v1, -8(a1)
sw t0, -12(a1)
sw t1, -16(a1)
sw t2, -20(a1)
sw t3, -24(a1)
sw t4, -28(a1)
sw t5, -32(a1)
addu a1, a1, -32
addu a2, a2, -32
b backwards_32
backwards_16:
blt a2, 16, backwards_4
lw v0, -4(a0)
lw v1, -8(a0)
lw t0, -12(a0)
lw t1, -16(a0)
addu a0, a0, -16
sw v0, -4(a1)
sw v1, -8(a1)
sw t0, -12(a1)
sw t1, -16(a1)
addu a1, a1, -16
addu a2, a2, -16
b backwards_16
backwards_4:
blt a2, 4, backwards_bytecopy
lw v0, -4(a0)
addu a0, a0, -4
sw v0, -4(a1)
addu a1, a1, -4
addu a2, a2, -4
b backwards_4
END(bcopy)

59
src/libultra/libc/bzero.s Normal file
View File

@ -0,0 +1,59 @@
#include "PR/asm.h"
#include "PR/regdef.h"
.text
LEAF(bzero)
negu v1, a0
blt a1, 0xC, bytezero
and v1, v1, 3
subu a1, a1, v1
beqz v1, blkzero
swl zero, (a0)
addu a0, a0, v1
blkzero:
/* align backwards to 0x20 */
and a3, a1, ~(0x20 - 1)
/* If the result is zero, the amount to zero is less than 0x20 bytes */
subu a1, a1, a3
beqz a3, wordzero
/* zero in blocks of 0x20 at a time */
addu a3, a3, a0
1:
sw zero, 0(a0)
sw zero, 4(a0)
sw zero, 8(a0)
sw zero, 12(a0)
addu a0, a0, 0x20
sw zero, -16(a0)
sw zero, -12(a0)
sw zero, -8(a0)
sw zero, -4(a0)
bne a0, a3, 1b
wordzero:
/* align backwards to 0x4 */
and a3, a1, ~3
/* If the result is zero, the amount to zero is less than 0x4 bytes */
subu a1, a1, a3
beqz a3, bytezero
/* zero one word at a time */
addu a3, a3, a0
1:
addu a0, a0, 4
sw zero, -4(a0)
bne a0, a3, 1b
bytezero:
/* test if nothing left to zero */
blez a1, zerodone
/* zero one byte at a time */
addu a1, a1, a0
1:
addu a0, a0, 1
sb zero, -1(a0)
bne a0, a1, 1b
zerodone:
jr ra
END(bzero)