From 5a958d10aadf0fe30204016db7d1ef419ce859e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Lam?= Date: Sat, 8 Jan 2022 10:59:05 +0100 Subject: [PATCH] Havok: Add more hkVector4f functions --- data/uking_functions.csv | 2 +- lib/hkStubs/CMakeLists.txt | 1 + .../Common/Base/Math/Matrix/hkTransformf.h | 6 + .../Common/Base/Math/Vector/hkSimdFloat32.h | 7 + .../Common/Base/Math/Vector/hkVector4f.h | 129 ++++++---- .../Common/Base/Math/Vector/hkVector4f.inl | 230 ++++++++++++++++++ lib/hkStubs/Havok/Common/Base/Math/hkMath.h | 4 + .../Dynamics/World/hkpWorldCinfo.h | 6 +- .../RigidBody/Shape/physCapsuleShape.cpp | 6 +- .../RigidBody/Shape/physCapsuleShape.h | 2 +- 10 files changed, 331 insertions(+), 62 deletions(-) create mode 100644 lib/hkStubs/Havok/Common/Base/Math/Vector/hkVector4f.inl diff --git a/data/uking_functions.csv b/data/uking_functions.csv index e80304c1..5c8cf9f0 100644 --- a/data/uking_functions.csv +++ b/data/uking_functions.csv @@ -83582,7 +83582,7 @@ Address,Quality,Size,Name 0x0000007100fabcdc,O,000008,_ZNK4ksys4phys11CapsuleBody8getShapeEv 0x0000007100fabce4,U,000252, 0x0000007100fabde0,U,000160, -0x0000007100fabe80,O,000192,_ZN4ksys4phys11CapsuleBody14sub_7100FABE80EPN4sead7Vector3IfEES5_RK10hkVector4f +0x0000007100fabe80,O,000192,_ZN4ksys4phys11CapsuleBody14sub_7100FABE80EPN4sead7Vector3IfEES5_RK12hkTransformf 0x0000007100fabf40,U,000204, 0x0000007100fac00c,U,000092, 0x0000007100fac068,U,000008, diff --git a/lib/hkStubs/CMakeLists.txt b/lib/hkStubs/CMakeLists.txt index 495e717e..39da1fef 100644 --- a/lib/hkStubs/CMakeLists.txt +++ b/lib/hkStubs/CMakeLists.txt @@ -27,6 +27,7 @@ add_library(hkStubs OBJECT Havok/Common/Base/Math/Vector/hkSimdReal.h Havok/Common/Base/Math/Vector/hkVector4.h Havok/Common/Base/Math/Vector/hkVector4f.h + Havok/Common/Base/Math/Vector/hkVector4f.inl Havok/Common/Base/Math/Vector/hkVector4Comparison.h Havok/Common/Base/Math/Vector/hkVector4fComparison.h diff --git a/lib/hkStubs/Havok/Common/Base/Math/Matrix/hkTransformf.h b/lib/hkStubs/Havok/Common/Base/Math/Matrix/hkTransformf.h index 2cbcad81..cbc0fbb1 100644 --- a/lib/hkStubs/Havok/Common/Base/Math/Matrix/hkTransformf.h +++ b/lib/hkStubs/Havok/Common/Base/Math/Matrix/hkTransformf.h @@ -5,6 +5,12 @@ class hkTransformf { public: + hkRotationf& getRotation() { return m_rotation; } + const hkRotationf& getRotation() const { return m_rotation; } + + hkVector4f& getTranslation() { return m_translation; } + const hkVector4f& getTranslation() const { return m_translation; } + hkRotationf m_rotation; hkVector4f m_translation; }; diff --git a/lib/hkStubs/Havok/Common/Base/Math/Vector/hkSimdFloat32.h b/lib/hkStubs/Havok/Common/Base/Math/Vector/hkSimdFloat32.h index 5aa8ee5a..87f2c901 100644 --- a/lib/hkStubs/Havok/Common/Base/Math/Vector/hkSimdFloat32.h +++ b/lib/hkStubs/Havok/Common/Base/Math/Vector/hkSimdFloat32.h @@ -2,7 +2,14 @@ #include +using hkSimdFloat32Parameter = class hkSimdFloat32; + class hkSimdFloat32 { public: + hkSimdFloat32() = default; + hkSimdFloat32(float x) : m_real(x) {} // NOLINT(google-explicit-constructor) + operator float() const { return val(); } // NOLINT(google-explicit-constructor) + hkFloat32 val() const { return m_real; } + hkFloat32 m_real; }; diff --git a/lib/hkStubs/Havok/Common/Base/Math/Vector/hkVector4f.h b/lib/hkStubs/Havok/Common/Base/Math/Vector/hkVector4f.h index 6efb3a53..ded5078f 100644 --- a/lib/hkStubs/Havok/Common/Base/Math/Vector/hkVector4f.h +++ b/lib/hkStubs/Havok/Common/Base/Math/Vector/hkVector4f.h @@ -1,32 +1,102 @@ #pragma once +#include #include #include #include -#ifdef __aarch64__ -#include -#define HK_VECTOR4F_AARCH64_NEON +#ifndef HK_MATH_H +#error "Include or hkBase.h" #endif using hkVector4fParameter = const class hkVector4f&; using hkVector4fComparisonParameter = const class hkVector4fComparison&; +class hkMatrix3f; +class hkTransformf; + class hkVector4f { public: HK_DECLARE_CLASS_ALLOCATOR(hkVector4f) + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init,modernize-use-equals-default) HK_FORCE_INLINE hkVector4f() {} HK_FORCE_INLINE hkVector4f(hkFloat32 x, hkFloat32 y, hkFloat32 z, hkFloat32 w = 0); + HK_FORCE_INLINE hkVector4f(const hkVector4f& other); - // This prevents hkVector4f from being passed in registers. - // NOLINTNEXTLINE(modernize-use-equals-default) - HK_FORCE_INLINE hkVector4f(const hkVector4f& other) : v(other.v) {} + // ========== Vector initialization HK_FORCE_INLINE void set(hkFloat32 x, hkFloat32 y, hkFloat32 z, hkFloat32 w = 0); HK_FORCE_INLINE void setAll(hkFloat32 x); - void sub_7100FABE80(const hkVector4f&, const hkVector4f&); + // ========== Vector operations + + HK_FORCE_INLINE void add(hkVector4fParameter a); + HK_FORCE_INLINE void sub(hkVector4fParameter a); + HK_FORCE_INLINE void mul(hkVector4fParameter a); + HK_FORCE_INLINE void div(hkVector4fParameter a); + + HK_FORCE_INLINE void setAdd(hkVector4fParameter a, hkVector4fParameter b); + HK_FORCE_INLINE void setSub(hkVector4fParameter a, hkVector4fParameter b); + HK_FORCE_INLINE void setMul(hkVector4fParameter a, hkVector4fParameter b); + HK_FORCE_INLINE void setDiv(hkVector4fParameter a, hkVector4fParameter b); + + HK_FORCE_INLINE void mul(hkSimdFloat32Parameter a); + HK_FORCE_INLINE void setMul(hkVector4fParameter a, hkSimdFloat32Parameter r); + HK_FORCE_INLINE void setMul(hkSimdFloat32Parameter r, hkVector4fParameter a); + HK_FORCE_INLINE void setAdd(hkVector4fParameter a, hkSimdFloat32Parameter b); + HK_FORCE_INLINE void setSub(hkVector4fParameter a, hkSimdFloat32Parameter b); + + HK_FORCE_INLINE void setReciprocal(hkVector4fParameter a); + HK_FORCE_INLINE void setSqrt(hkVector4fParameter a); + HK_FORCE_INLINE void setSqrtInverse(hkVector4fParameter a); + + HK_FORCE_INLINE void addMul(hkVector4fParameter a, hkVector4fParameter b); + HK_FORCE_INLINE void subMul(hkVector4fParameter a, hkVector4fParameter b); + + HK_FORCE_INLINE void setAddMul(hkVector4fParameter a, hkVector4fParameter b, + hkVector4fParameter c); + HK_FORCE_INLINE void setSubMul(hkVector4fParameter a, hkVector4fParameter b, + hkVector4fParameter c); + + HK_FORCE_INLINE void addMul(hkVector4fParameter a, hkSimdFloat32Parameter r); + HK_FORCE_INLINE void addMul(hkSimdFloat32Parameter r, hkVector4fParameter a); + HK_FORCE_INLINE void subMul(hkVector4fParameter a, hkSimdFloat32Parameter r); + HK_FORCE_INLINE void subMul(hkSimdFloat32Parameter r, hkVector4fParameter a); + + HK_FORCE_INLINE void setAddMul(hkVector4fParameter a, hkVector4fParameter b, + hkSimdFloat32Parameter r); + HK_FORCE_INLINE void setSubMul(hkVector4fParameter a, hkVector4fParameter b, + hkSimdFloat32Parameter r); + + HK_FORCE_INLINE void setCross(hkVector4fParameter a, hkVector4fParameter b); + HK_FORCE_INLINE void setInterpolate(hkVector4fParameter v0, hkVector4fParameter v1, + hkSimdFloat32Parameter t); + + // ========== Comparisons + + // ========== Sign, comparisons, clamping + + // ========== Matrix operations (out-of-line) + + void setRotatedDir(const hkMatrix3f& a, hkVector4fParameter b); + void setTransformedPos(const hkTransformf& a, const hkVector4f& pos); + + // ========== Matrix operations (inline) + + HK_FORCE_INLINE void _setRotatedDir(const hkMatrix3f& a, hkVector4fParameter b); + HK_FORCE_INLINE void _setTransformedPos(const hkTransformf& a, hkVector4fParameter b); + + // ========== Length and normalization + + // ========== Misc + + // ========== Component access + + hkSimdFloat32 operator()(int i) const { return v[i]; } + hkSimdFloat32 operator[](int i) const { return v[i]; } + + // ========== Load/store /// Store N floats to out. template @@ -34,48 +104,3 @@ public: m128 v; }; - -inline hkVector4f::hkVector4f(hkFloat32 x, hkFloat32 y, hkFloat32 z, hkFloat32 w) { - v[0] = x; - v[1] = y; - v[2] = z; - v[3] = w; -} - -inline void hkVector4f::set(hkFloat32 x, hkFloat32 y, hkFloat32 z, hkFloat32 w) { - v[0] = x; - v[1] = y; - v[2] = z; - v[3] = w; -} - -inline void hkVector4f::setAll(hkReal x) { - v = {x, x, x, x}; -} - -template -inline void hkVector4f::store(hkFloat32* out) const { - static_assert(1 <= N && N <= 4, "invalid N"); -#ifdef HK_VECTOR4F_AARCH64_NEON - switch (N) { - case 1: - vst1q_lane_f32(out, v, 0); - break; - case 2: - vst1_f32(out, vget_low_f32(v)); - break; - case 3: - vst1_f32(out, vget_low_f32(v)); - vst1q_lane_f32(out + 2, v, 2); - break; - case 4: - vst1q_f32(out, v); - break; - default: - break; - } -#else - for (int i = 0; i < N; ++i) - p[i] = v[i]; -#endif -} diff --git a/lib/hkStubs/Havok/Common/Base/Math/Vector/hkVector4f.inl b/lib/hkStubs/Havok/Common/Base/Math/Vector/hkVector4f.inl new file mode 100644 index 00000000..428314e3 --- /dev/null +++ b/lib/hkStubs/Havok/Common/Base/Math/Vector/hkVector4f.inl @@ -0,0 +1,230 @@ +#pragma once + +#ifdef __aarch64__ +#include +#define HK_VECTOR4F_AARCH64_NEON +#else +#include +#endif + +// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) +inline hkVector4f::hkVector4f(hkFloat32 x, hkFloat32 y, hkFloat32 z, hkFloat32 w) { + set(x, y, z, w); +} + +inline hkVector4f::hkVector4f(const hkVector4f& other) = default; + +inline void hkVector4f::set(hkFloat32 x, hkFloat32 y, hkFloat32 z, hkFloat32 w) { + v[0] = x; + v[1] = y; + v[2] = z; + v[3] = w; +} + +inline void hkVector4f::setAll(hkReal x) { + v = {x, x, x, x}; +} + +inline void hkVector4f::add(hkVector4fParameter a) { + setAdd(*this, a); +} + +inline void hkVector4f::sub(hkVector4fParameter a) { + setSub(*this, a); +} + +inline void hkVector4f::mul(hkVector4fParameter a) { + setMul(*this, a); +} + +inline void hkVector4f::div(hkVector4fParameter a) { + setDiv(*this, a); +} + +inline void hkVector4f::setAdd(hkVector4fParameter a, hkVector4fParameter b) { + v = a.v + b.v; +} + +inline void hkVector4f::setSub(hkVector4fParameter a, hkVector4fParameter b) { + v = a.v - b.v; +} + +inline void hkVector4f::setMul(hkVector4fParameter a, hkVector4fParameter b) { + v = a.v * b.v; +} + +inline void hkVector4f::setDiv(hkVector4fParameter a, hkVector4fParameter b) { + v = a.v / b.v; +} + +inline void hkVector4f::mul(hkSimdFloat32Parameter a) { + setMul(*this, a); +} + +inline void hkVector4f::setMul(hkVector4fParameter a, hkSimdFloat32Parameter r) { +#ifdef HK_VECTOR4F_AARCH64_NEON + v = vmulq_n_f32(v, r); +#else + v *= r.val(); +#endif +} + +inline void hkVector4f::setMul(hkSimdFloat32Parameter r, hkVector4fParameter a) { + setMul(a, r); +} + +inline void hkVector4f::setAdd(hkVector4fParameter a, hkSimdFloat32Parameter b) { +#ifdef HK_VECTOR4F_AARCH64_NEON + v = vaddq_f32(v, vdupq_n_f32(b)); +#else + v += b.val(); +#endif +} + +inline void hkVector4f::setSub(hkVector4fParameter a, hkSimdFloat32Parameter b) { +#ifdef HK_VECTOR4F_AARCH64_NEON + v = vsubq_f32(v, vdupq_n_f32(b)); +#else + v -= b.val(); +#endif +} + +inline void hkVector4f::setReciprocal(hkVector4fParameter a) { +#ifdef HK_VECTOR4F_AARCH64_NEON + v = vrecpeq_f32(a.v); +#else + for (int i = 0; i < 4; ++i) + v[i] = 1.0f / a[i]; +#endif +} + +inline void hkVector4f::setSqrt(hkVector4fParameter a) { +#ifdef HK_VECTOR4F_AARCH64_NEON + v = vsqrtq_f32(a.v); +#else + for (int i = 0; i < 4; ++i) + v[i] = std::sqrt(a[i]); +#endif +} + +inline void hkVector4f::setSqrtInverse(hkVector4fParameter a) { +#ifdef HK_VECTOR4F_AARCH64_NEON + v = vrsqrteq_f32(a.v); +#else + for (int i = 0; i < 4; ++i) + v[i] = 1.0f / std::sqrt(a[i]); +#endif +} + +inline void hkVector4f::addMul(hkVector4fParameter a, hkVector4fParameter b) { + setAddMul(*this, a, b); +} + +inline void hkVector4f::subMul(hkVector4fParameter a, hkVector4fParameter b) { + setSubMul(*this, a, b); +} + +inline void hkVector4f::setAddMul(hkVector4fParameter a, hkVector4fParameter b, + hkVector4fParameter c) { + v = a.v + b.v * c.v; +} + +inline void hkVector4f::setSubMul(hkVector4fParameter a, hkVector4fParameter b, + hkVector4fParameter c) { + v = a.v - b.v * c.v; +} + +inline void hkVector4f::addMul(hkVector4fParameter a, hkSimdFloat32Parameter r) { + setAddMul(*this, a, r); +} + +inline void hkVector4f::addMul(hkSimdFloat32Parameter r, hkVector4fParameter a) { + addMul(a, r); +} + +inline void hkVector4f::subMul(hkVector4fParameter a, hkSimdFloat32Parameter r) { + setSubMul(*this, a, r); +} + +inline void hkVector4f::subMul(hkSimdFloat32Parameter r, hkVector4fParameter a) { + subMul(a, r); +} + +inline void hkVector4f::setAddMul(hkVector4fParameter a, hkVector4fParameter b, + hkSimdFloat32Parameter r) { + m128 rr{r, r, r, r}; + v = a.v + rr * b.v; +} + +inline void hkVector4f::setSubMul(hkVector4fParameter a, hkVector4fParameter b, + hkSimdFloat32Parameter r) { + m128 rr{r, r, r, r}; + v = a.v - rr * b.v; +} + +inline void hkVector4f::setCross(hkVector4fParameter a, hkVector4fParameter b) { + // z = a[0] * b[1] - b[0] * a[1] + // x = a[1] * b[2] - b[1] * a[2] + // y = a[2] * b[0] - b[2] * a[0] + // ---- ---- ---- ---- + // a bb b aa + + auto cross0 = a.v * __builtin_shufflevector(b.v, b.v, 1, 2, 0, 3); + auto cross1 = b.v * __builtin_shufflevector(a.v, a.v, 1, 2, 0, 3); + auto diff = cross0 - cross1; + v = __builtin_shufflevector(diff, diff, 1, 2, 0, 3); +} + +inline void hkVector4f::setInterpolate(hkVector4fParameter v0, hkVector4fParameter v1, + hkSimdFloat32Parameter t) { + // v = v0 + t * (v1 - v0) + hkVector4f diff; + diff.setSub(v1, v0); + setAddMul(v0, diff, t); +} + +inline void hkVector4f::_setRotatedDir(const hkMatrix3f& a, hkVector4fParameter b) { +#ifdef HK_VECTOR4F_AARCH64_NEON + auto col0 = vmulq_laneq_f32(a.m_col0.v, v, 0); + auto col1 = vmulq_laneq_f32(a.m_col1.v, v, 1); + auto col2 = vmulq_laneq_f32(a.m_col2.v, v, 2); + v = col0 + col1 + col2; +#else + setMul(a.m_col0, b[0]); + addMul(a.m_col1, b[1]); + addMul(a.m_col2, b[2]); +#endif +} + +inline void hkVector4f::_setTransformedPos(const hkTransformf& a, hkVector4fParameter b) { + hkVector4f t; + t._setRotatedDir(a.getRotation(), b); + setAdd(t, a.getTranslation()); +} + +template +inline void hkVector4f::store(hkFloat32* out) const { + static_assert(1 <= N && N <= 4, "invalid N"); +#ifdef HK_VECTOR4F_AARCH64_NEON + switch (N) { + case 1: + vst1q_lane_f32(out, v, 0); + break; + case 2: + vst1_f32(out, vget_low_f32(v)); + break; + case 3: + vst1_f32(out, vget_low_f32(v)); + vst1q_lane_f32(out + 2, v, 2); + break; + case 4: + vst1q_f32(out, v); + break; + default: + break; + } +#else + for (int i = 0; i < N; ++i) + out[i] = v[i]; +#endif +} diff --git a/lib/hkStubs/Havok/Common/Base/Math/hkMath.h b/lib/hkStubs/Havok/Common/Base/Math/hkMath.h index a0333434..12d948d3 100644 --- a/lib/hkStubs/Havok/Common/Base/Math/hkMath.h +++ b/lib/hkStubs/Havok/Common/Base/Math/hkMath.h @@ -1,5 +1,7 @@ #pragma once +#define HK_MATH_H + #include #include @@ -9,3 +11,5 @@ #include #include #include + +#include diff --git a/lib/hkStubs/Havok/Physics2012/Dynamics/World/hkpWorldCinfo.h b/lib/hkStubs/Havok/Physics2012/Dynamics/World/hkpWorldCinfo.h index 5e35453f..957a2cb9 100644 --- a/lib/hkStubs/Havok/Physics2012/Dynamics/World/hkpWorldCinfo.h +++ b/lib/hkStubs/Havok/Physics2012/Dynamics/World/hkpWorldCinfo.h @@ -1,11 +1,7 @@ #pragma once -#include -#include -#include #include -#include -#include +#include #include #include #include diff --git a/src/KingSystem/Physics/RigidBody/Shape/physCapsuleShape.cpp b/src/KingSystem/Physics/RigidBody/Shape/physCapsuleShape.cpp index bacc005f..4eba6781 100644 --- a/src/KingSystem/Physics/RigidBody/Shape/physCapsuleShape.cpp +++ b/src/KingSystem/Physics/RigidBody/Shape/physCapsuleShape.cpp @@ -95,15 +95,15 @@ const hkpShape* CapsuleBody::getShape() const { } void CapsuleBody::sub_7100FABE80(sead::Vector3f* veca, sead::Vector3f* vecb, - const hkVector4& rb_vec) { + const hkTransformf& rb_vec) { if (veca != nullptr) { hkVector4 tmp; - tmp.sub_7100FABE80(rb_vec, hkVector4(vertex_a.x, vertex_a.y, vertex_a.z)); + tmp.setTransformedPos(rb_vec, hkVector4(vertex_a.x, vertex_a.y, vertex_a.z)); tmp.store<3>(veca->e.data()); } if (vecb != nullptr) { hkVector4 tmp; - tmp.sub_7100FABE80(rb_vec, hkVector4(vertex_b.x, vertex_b.y, vertex_b.z)); + tmp.setTransformedPos(rb_vec, hkVector4(vertex_b.x, vertex_b.y, vertex_b.z)); tmp.store<3>(vecb->e.data()); } } diff --git a/src/KingSystem/Physics/RigidBody/Shape/physCapsuleShape.h b/src/KingSystem/Physics/RigidBody/Shape/physCapsuleShape.h index 1fc2f50f..5d60c487 100644 --- a/src/KingSystem/Physics/RigidBody/Shape/physCapsuleShape.h +++ b/src/KingSystem/Physics/RigidBody/Shape/physCapsuleShape.h @@ -49,7 +49,7 @@ struct CapsuleBody { bool setRadius(f32 r); bool setVertices(const sead::Vector3f& va, const sead::Vector3f& vb); f32 getVolume() const; - void sub_7100FABE80(sead::Vector3f* veca, sead::Vector3f* vecb, const hkVector4& rb_vec); + void sub_7100FABE80(sead::Vector3f* veca, sead::Vector3f* vecb, const hkTransformf& rb_vec); void setMaterialMask(const MaterialMask& mask); sead::Vector3f vertex_a;