tp/libs/dolphin/mtx/vec.c

291 lines
7.7 KiB
C

#include "dolphin/mtx/vec.h"
#include "math.h"
#define R_RET fp1
#define FP2 fp2
#define FP3 fp3
#define FP4 fp4
#define FP5 fp5
#define FP6 fp6
#define FP7 fp7
#define FP8 fp8
#define FP9 fp9
#define FP10 fp10
#define FP11 fp11
#define FP12 fp12
#define FP13 fp13
/* 80347090-803470B4 3419D0 0024+00 1/1 103/103 679/679 .text PSVECAdd */
asm void PSVECAdd(const register Vec* vec1, const register Vec* vec2, register Vec* ret) {
#ifdef __MWERKS__ // clang-format off
nofralloc;
psq_l FP2, 0(vec1), 0, 0;
psq_l FP4, 0(vec2), 0, 0;
ps_add FP6, FP2, FP4;
psq_st FP6, 0(ret), 0, 0;
psq_l FP3, 8(vec1), 1, 0;
psq_l FP5, 8(vec2), 1, 0;
ps_add FP7, FP3, FP5;
psq_st FP7, 8(ret), 1, 0;
blr
#endif // clang-format on
}
/* 803470B4-803470D8 3419F4 0024+00 0/0 60/60 59/59 .text PSVECSubtract */
asm void PSVECSubtract(const register Vec* vec1, const register Vec* vec2, register Vec* ret) {
#ifdef __MWERKS__ // clang-format off
nofralloc;
psq_l FP2, 0(vec1), 0, 0;
psq_l FP4, 0(vec2), 0, 0;
ps_sub FP6, FP2, FP4;
psq_st FP6, 0(ret), 0, 0;
psq_l FP3, 8(vec1), 1, 0;
psq_l FP5, 8(vec2), 1, 0;
ps_sub FP7, FP3, FP5;
psq_st FP7, 8(ret), 1, 0;
blr
#endif // clang-format on
}
/* 803470D8-803470F4 341A18 001C+00 0/0 58/58 101/101 .text PSVECScale */
asm void PSVECScale(register const Vec* src, register Vec* dst, register f32 scale) {
#ifdef __MWERKS__ // clang-format off
nofralloc
psq_l f0, 0(src), 0, 0
psq_l f2, 8(src), 1, 0
ps_muls0 f0, f0, f1
psq_st f0, 0(dst), 0, 0
ps_muls0 f0, f2, f1
psq_st f0, 8(dst), 1, 0
blr
#endif // clang-format on
}
void C_VECScale(const Vec* src, Vec* dst, f32 scale) {
f32 s;
s = 1.0f / sqrtf(src->z * src->z + src->x * src->x + src->y * src->y);
dst->x = src->x * s;
dst->y = src->y * s;
dst->z = src->z * s;
}
/* 803470F4-80347138 341A34 0044+00 2/2 16/16 0/0 .text PSVECNormalize */
void PSVECNormalize(const register Vec* vec1, register Vec* ret) {
#ifdef __MWERKS__ // clang-format off
register f32 half = 0.5f;
register f32 three = 3.0f;
register f32 xx_zz, xx_yy;
register f32 square_sum;
register f32 ret_sqrt;
register f32 n_0, n_1;
asm {
psq_l FP2, 0(vec1), 0, 0;
ps_mul xx_yy, FP2, FP2;
psq_l FP3, 8(vec1), 1, 0;
ps_madd xx_zz, FP3, FP3, xx_yy;
ps_sum0 square_sum, xx_zz, FP3, xx_yy;
frsqrte ret_sqrt, square_sum;
fmuls n_0, ret_sqrt, ret_sqrt;
fmuls n_1, ret_sqrt, half;
fnmsubs n_0, n_0, square_sum, three;
fmuls ret_sqrt, n_0, n_1;
ps_muls0 FP2, FP2, ret_sqrt;
psq_st FP2, 0(ret), 0, 0;
ps_muls0 FP3, FP3, ret_sqrt;
psq_st FP3, 8(ret), 1, 0;
}
#endif // clang-format on
}
/* 80347138-80347150 341A78 0018+00 0/0 140/140 727/727 .text PSVECSquareMag */
asm f32 PSVECSquareMag(register const Vec* v){
#ifdef __MWERKS__ // clang-format off
nofralloc
psq_l f0, 0(v), 0, 0
ps_mul f0, f0, f0
lfs f1, 8(v)
ps_madd f1, f1, f1, f0
ps_sum0 f1, f1, f0, f0
blr
#endif // clang-format on
}
/* 80347150-80347194 341A90 0044+00 0/0 24/24 0/0 .text PSVECMag */
f32 PSVECMag(const register Vec* v) {
register f32 v_xy, v_zz, square_mag;
register f32 ret_mag, n_0, n_1;
register f32 three, half, zero;
half = 0.5f;
#ifdef __MWERKS__ // clang-format off
asm {
psq_l v_xy, 0(v), 0, 0
ps_mul v_xy, v_xy, v_xy
lfs v_zz, 8(v)
fsubs zero, half, half
ps_madd square_mag, v_zz, v_zz, v_xy
ps_sum0 square_mag, square_mag, v_xy, v_xy
fcmpu cr0, square_mag, zero
beq- __exit
frsqrte ret_mag, square_mag
}
#endif // clang-format on
three = 3.0f;
#ifdef __MWERKS__ // clang-format off
asm {
fmuls n_0, ret_mag, ret_mag
fmuls n_1, ret_mag, half
fnmsubs n_0, n_0, square_mag, three
fmuls ret_mag, n_0, n_1
fmuls square_mag, square_mag, ret_mag
__exit:
}
#endif // clang-format on
return square_mag;
}
/* 80347194-803471B4 341AD4 0020+00 2/2 39/39 15/15 .text PSVECDotProduct */
asm f32 PSVECDotProduct(const register Vec* vec1, const register Vec* vec2) {
#ifdef __MWERKS__ // clang-format off
nofralloc;
psq_l f2, 4(r3), 0, 0 /* qr0 */
psq_l f3, 4(r4), 0, 0 /* qr0 */
ps_mul f2, f2, f3
psq_l f5, 0(r3), 0, 0 /* qr0 */
psq_l f4, 0(r4), 0, 0 /* qr0 */
ps_madd f3, f5, f4, f2
ps_sum0 f1, f3, f2, f2
blr
#endif // clang-format on
}
/* 803471B4-803471F0 341AF4 003C+00 0/0 20/20 3/3 .text PSVECCrossProduct */
asm void PSVECCrossProduct(register const Vec* a, register const Vec* b, register Vec* axb) {
#ifdef __MWERKS__ // clang-format off
nofralloc
psq_l f1, 0(b), 0, 0
lfs f2, 8(a)
psq_l f0, 0(a), 0, 0
ps_merge10 f6, f1, f1
lfs f3, 8(b)
ps_mul f4, f1, f2
ps_muls0 f7, f1, f0
ps_msub f5, f0, f3, f4
ps_msub f8, f0, f6, f7
ps_merge11 f9, f5, f5
ps_merge01 f10, f5, f8
psq_st f9, 0(axb), 1, 0
ps_neg f10, f10
psq_st f10, 4(axb), 0, 0
blr
#endif // clang-format on
}
/* 803471F0-803472C8 341B30 00D8+00 0/0 1/1 0/0 .text C_VECHalfAngle */
void C_VECHalfAngle(const Vec* a, const Vec* b, Vec* half) {
Vec a0;
Vec b0;
Vec ab;
a0.x = -a->x;
a0.y = -a->y;
a0.z = -a->z;
b0.x = -b->x;
b0.y = -b->y;
b0.z = -b->z;
VECNormalize(&a0, &a0);
VECNormalize(&b0, &b0);
VECAdd(&a0, &b0, &ab);
if (VECDotProduct(&ab, &ab) > 0.0f) {
VECNormalize(&ab, half);
} else {
*half = ab;
}
}
/* 803472C8-8034739C 341C08 00D4+00 0/0 1/1 11/11 .text C_VECReflect */
void C_VECReflect(const Vec* src, const Vec* normal, Vec* dst) {
Vec a0;
Vec b0;
f32 dot;
a0.x = -src->x;
a0.y = -src->y;
a0.z = -src->z;
VECNormalize(&a0, &a0);
VECNormalize(normal, &b0);
dot = VECDotProduct(&a0, &b0);
dst->x = b0.x * 2.0f * dot - a0.x;
dst->y = b0.y * 2.0f * dot - a0.y;
dst->z = b0.z * 2.0f * dot - a0.z;
VECNormalize(dst, dst);
}
/* 8034739C-803473C4 341CDC 0028+00 0/0 107/107 446/446 .text PSVECSquareDistance */
asm f32 PSVECSquareDistance(register const Vec* a, register const Vec* b){
#ifdef __MWERKS__ // clang-format off
nofralloc
psq_l f0, 4(a), 0, 0
psq_l f1, 4(b), 0, 0
ps_sub f2, f0, f1
psq_l f0, 0(a), 0, 0
psq_l f1, 0(b), 0, 0
ps_mul f2, f2, f2
ps_sub f0, f0, f1
ps_madd f1, f0, f0, f2
ps_sum0 f1, f1, f2, f2
blr
#endif // clang-format on
}
/* 803473C4-80347418 341D04 0054+00 0/0 4/4 0/0 .text PSVECDistance */
f32 PSVECDistance(register const Vec* a, register const Vec* b) {
register f32 half_c;
register f32 three_c;
register f32 dist;
#ifdef __MWERKS__ // clang-format off
asm {
psq_l f0, 4(a), 0, 0 /* qr0 */
psq_l f1, 4(b), 0, 0 /* qr0 */
ps_sub f2, f0, f1
psq_l f0, 0(a), 0, 0 /* qr0 */
psq_l f1, 0(b), 0, 0 /* qr0 */
ps_mul f2, f2, f2
ps_sub f0, f0, f1
}
half_c = 0.5f;
asm {
ps_madd dist, f0, f0, f2
fsubs f0, half_c, half_c
ps_sum0 dist, dist, f2, f2
fcmpu cr0, f0, dist
beq exit
}
three_c = 3.0f;
asm {
frsqrte f0, dist
fmuls f2, f0, f0
fmuls f0, f0, half_c
fnmsubs f2, f2, dist, three_c
fmuls f0, f2, f0
fmuls dist, dist, f0
exit:
}
return dist;
#endif // clang-format on
}