Havok: Add more hkVector4f functions

2022-01-08 10:59:05 +01:00 · 2022-01-08 10:59:05 +01:00 · 5a958d10aa
parent a526afbdb6
commit 5a958d10aa
10 changed files with 331 additions and 62 deletions
--- a/data/uking_functions.csv
+++ b/data/uking_functions.csv
@ -83582,7 +83582,7 @@ Address,Quality,Size,Name
 0x0000007100fabcdc,O,000008,_ZNK4ksys4phys11CapsuleBody8getShapeEv
 0x0000007100fabce4,U,000252,
 0x0000007100fabde0,U,000160,
-0x0000007100fabe80,O,000192,_ZN4ksys4phys11CapsuleBody14sub_7100FABE80EPN4sead7Vector3IfEES5_RK10hkVector4f
+0x0000007100fabe80,O,000192,_ZN4ksys4phys11CapsuleBody14sub_7100FABE80EPN4sead7Vector3IfEES5_RK12hkTransformf
 0x0000007100fabf40,U,000204,
 0x0000007100fac00c,U,000092,
 0x0000007100fac068,U,000008,
--- a/lib/hkStubs/CMakeLists.txt
+++ b/lib/hkStubs/CMakeLists.txt
@ -27,6 +27,7 @@ add_library(hkStubs OBJECT
  Havok/Common/Base/Math/Vector/hkSimdReal.h
  Havok/Common/Base/Math/Vector/hkVector4.h
  Havok/Common/Base/Math/Vector/hkVector4f.h
+  Havok/Common/Base/Math/Vector/hkVector4f.inl
  Havok/Common/Base/Math/Vector/hkVector4Comparison.h
  Havok/Common/Base/Math/Vector/hkVector4fComparison.h

--- a/lib/hkStubs/Havok/Common/Base/Math/Matrix/hkTransformf.h
+++ b/lib/hkStubs/Havok/Common/Base/Math/Matrix/hkTransformf.h
@ -5,6 +5,12 @@

 class hkTransformf {
 public:
+    hkRotationf& getRotation() { return m_rotation; }
+    const hkRotationf& getRotation() const { return m_rotation; }
+
+    hkVector4f& getTranslation() { return m_translation; }
+    const hkVector4f& getTranslation() const { return m_translation; }
+
    hkRotationf m_rotation;
    hkVector4f m_translation;
 };
--- a/lib/hkStubs/Havok/Common/Base/Math/Vector/hkSimdFloat32.h
+++ b/lib/hkStubs/Havok/Common/Base/Math/Vector/hkSimdFloat32.h
@ -2,7 +2,14 @@

 #include <Havok/Common/Base/Types/hkBaseTypes.h>

+using hkSimdFloat32Parameter = class hkSimdFloat32;
+
 class hkSimdFloat32 {
 public:
+    hkSimdFloat32() = default;
+    hkSimdFloat32(float x) : m_real(x) {}     // NOLINT(google-explicit-constructor)
+    operator float() const { return val(); }  // NOLINT(google-explicit-constructor)
+    hkFloat32 val() const { return m_real; }
+
    hkFloat32 m_real;
 };
--- a/lib/hkStubs/Havok/Common/Base/Math/Vector/hkVector4f.h
+++ b/lib/hkStubs/Havok/Common/Base/Math/Vector/hkVector4f.h
@ -1,32 +1,102 @@
 #pragma once

+#include <Havok/Common/Base/Math/Vector/hkSimdFloat32.h>
 #include <Havok/Common/Base/Memory/Router/hkMemoryRouter.h>
 #include <Havok/Common/Base/Types/hkBaseDefs.h>
 #include <Havok/Common/Base/Types/hkBaseTypes.h>

-#ifdef __aarch64__
-#include <arm_neon.h>
-#define HK_VECTOR4F_AARCH64_NEON
+#ifndef HK_MATH_H
+#error "Include <Havok/Common/Base/Math/hkMath.h> or hkBase.h"
 #endif

 using hkVector4fParameter = const class hkVector4f&;
 using hkVector4fComparisonParameter = const class hkVector4fComparison&;

+class hkMatrix3f;
+class hkTransformf;
+
 class hkVector4f {
 public:
    HK_DECLARE_CLASS_ALLOCATOR(hkVector4f)

+    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init,modernize-use-equals-default)
    HK_FORCE_INLINE hkVector4f() {}
    HK_FORCE_INLINE hkVector4f(hkFloat32 x, hkFloat32 y, hkFloat32 z, hkFloat32 w = 0);
+    HK_FORCE_INLINE hkVector4f(const hkVector4f& other);

-    // This prevents hkVector4f from being passed in registers.
-    // NOLINTNEXTLINE(modernize-use-equals-default)
-    HK_FORCE_INLINE hkVector4f(const hkVector4f& other) : v(other.v) {}
+    // ========== Vector initialization

    HK_FORCE_INLINE void set(hkFloat32 x, hkFloat32 y, hkFloat32 z, hkFloat32 w = 0);
    HK_FORCE_INLINE void setAll(hkFloat32 x);

-    void sub_7100FABE80(const hkVector4f&, const hkVector4f&);
+    // ========== Vector operations
+
+    HK_FORCE_INLINE void add(hkVector4fParameter a);
+    HK_FORCE_INLINE void sub(hkVector4fParameter a);
+    HK_FORCE_INLINE void mul(hkVector4fParameter a);
+    HK_FORCE_INLINE void div(hkVector4fParameter a);
+
+    HK_FORCE_INLINE void setAdd(hkVector4fParameter a, hkVector4fParameter b);
+    HK_FORCE_INLINE void setSub(hkVector4fParameter a, hkVector4fParameter b);
+    HK_FORCE_INLINE void setMul(hkVector4fParameter a, hkVector4fParameter b);
+    HK_FORCE_INLINE void setDiv(hkVector4fParameter a, hkVector4fParameter b);
+
+    HK_FORCE_INLINE void mul(hkSimdFloat32Parameter a);
+    HK_FORCE_INLINE void setMul(hkVector4fParameter a, hkSimdFloat32Parameter r);
+    HK_FORCE_INLINE void setMul(hkSimdFloat32Parameter r, hkVector4fParameter a);
+    HK_FORCE_INLINE void setAdd(hkVector4fParameter a, hkSimdFloat32Parameter b);
+    HK_FORCE_INLINE void setSub(hkVector4fParameter a, hkSimdFloat32Parameter b);
+
+    HK_FORCE_INLINE void setReciprocal(hkVector4fParameter a);
+    HK_FORCE_INLINE void setSqrt(hkVector4fParameter a);
+    HK_FORCE_INLINE void setSqrtInverse(hkVector4fParameter a);
+
+    HK_FORCE_INLINE void addMul(hkVector4fParameter a, hkVector4fParameter b);
+    HK_FORCE_INLINE void subMul(hkVector4fParameter a, hkVector4fParameter b);
+
+    HK_FORCE_INLINE void setAddMul(hkVector4fParameter a, hkVector4fParameter b,
+                                   hkVector4fParameter c);
+    HK_FORCE_INLINE void setSubMul(hkVector4fParameter a, hkVector4fParameter b,
+                                   hkVector4fParameter c);
+
+    HK_FORCE_INLINE void addMul(hkVector4fParameter a, hkSimdFloat32Parameter r);
+    HK_FORCE_INLINE void addMul(hkSimdFloat32Parameter r, hkVector4fParameter a);
+    HK_FORCE_INLINE void subMul(hkVector4fParameter a, hkSimdFloat32Parameter r);
+    HK_FORCE_INLINE void subMul(hkSimdFloat32Parameter r, hkVector4fParameter a);
+
+    HK_FORCE_INLINE void setAddMul(hkVector4fParameter a, hkVector4fParameter b,
+                                   hkSimdFloat32Parameter r);
+    HK_FORCE_INLINE void setSubMul(hkVector4fParameter a, hkVector4fParameter b,
+                                   hkSimdFloat32Parameter r);
+
+    HK_FORCE_INLINE void setCross(hkVector4fParameter a, hkVector4fParameter b);
+    HK_FORCE_INLINE void setInterpolate(hkVector4fParameter v0, hkVector4fParameter v1,
+                                        hkSimdFloat32Parameter t);
+
+    // ========== Comparisons
+
+    // ========== Sign, comparisons, clamping
+
+    // ========== Matrix operations (out-of-line)
+
+    void setRotatedDir(const hkMatrix3f& a, hkVector4fParameter b);
+    void setTransformedPos(const hkTransformf& a, const hkVector4f& pos);
+
+    // ========== Matrix operations (inline)
+
+    HK_FORCE_INLINE void _setRotatedDir(const hkMatrix3f& a, hkVector4fParameter b);
+    HK_FORCE_INLINE void _setTransformedPos(const hkTransformf& a, hkVector4fParameter b);
+
+    // ========== Length and normalization
+
+    // ========== Misc
+
+    // ========== Component access
+
+    hkSimdFloat32 operator()(int i) const { return v[i]; }
+    hkSimdFloat32 operator[](int i) const { return v[i]; }
+
+    // ========== Load/store

    /// Store N floats to out.
    template <int N>
@ -34,48 +104,3 @@ public:

    m128 v;
 };
-
-inline hkVector4f::hkVector4f(hkFloat32 x, hkFloat32 y, hkFloat32 z, hkFloat32 w) {
-    v[0] = x;
-    v[1] = y;
-    v[2] = z;
-    v[3] = w;
-}
-
-inline void hkVector4f::set(hkFloat32 x, hkFloat32 y, hkFloat32 z, hkFloat32 w) {
-    v[0] = x;
-    v[1] = y;
-    v[2] = z;
-    v[3] = w;
-}
-
-inline void hkVector4f::setAll(hkReal x) {
-    v = {x, x, x, x};
-}
-
-template <int N>
-inline void hkVector4f::store(hkFloat32* out) const {
-    static_assert(1 <= N && N <= 4, "invalid N");
-#ifdef HK_VECTOR4F_AARCH64_NEON
-    switch (N) {
-    case 1:
-        vst1q_lane_f32(out, v, 0);
-        break;
-    case 2:
-        vst1_f32(out, vget_low_f32(v));
-        break;
-    case 3:
-        vst1_f32(out, vget_low_f32(v));
-        vst1q_lane_f32(out + 2, v, 2);
-        break;
-    case 4:
-        vst1q_f32(out, v);
-        break;
-    default:
-        break;
-    }
-#else
-    for (int i = 0; i < N; ++i)
-        p[i] = v[i];
-#endif
-}
--- a/lib/hkStubs/Havok/Common/Base/Math/Vector/hkVector4f.inl
+++ b/lib/hkStubs/Havok/Common/Base/Math/Vector/hkVector4f.inl
@ -0,0 +1,230 @@
+#pragma once
+
+#ifdef __aarch64__
+#include <arm_neon.h>
+#define HK_VECTOR4F_AARCH64_NEON
+#else
+#include <cmath>
+#endif
+
+// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
+inline hkVector4f::hkVector4f(hkFloat32 x, hkFloat32 y, hkFloat32 z, hkFloat32 w) {
+    set(x, y, z, w);
+}
+
+inline hkVector4f::hkVector4f(const hkVector4f& other) = default;
+
+inline void hkVector4f::set(hkFloat32 x, hkFloat32 y, hkFloat32 z, hkFloat32 w) {
+    v[0] = x;
+    v[1] = y;
+    v[2] = z;
+    v[3] = w;
+}
+
+inline void hkVector4f::setAll(hkReal x) {
+    v = {x, x, x, x};
+}
+
+inline void hkVector4f::add(hkVector4fParameter a) {
+    setAdd(*this, a);
+}
+
+inline void hkVector4f::sub(hkVector4fParameter a) {
+    setSub(*this, a);
+}
+
+inline void hkVector4f::mul(hkVector4fParameter a) {
+    setMul(*this, a);
+}
+
+inline void hkVector4f::div(hkVector4fParameter a) {
+    setDiv(*this, a);
+}
+
+inline void hkVector4f::setAdd(hkVector4fParameter a, hkVector4fParameter b) {
+    v = a.v + b.v;
+}
+
+inline void hkVector4f::setSub(hkVector4fParameter a, hkVector4fParameter b) {
+    v = a.v - b.v;
+}
+
+inline void hkVector4f::setMul(hkVector4fParameter a, hkVector4fParameter b) {
+    v = a.v * b.v;
+}
+
+inline void hkVector4f::setDiv(hkVector4fParameter a, hkVector4fParameter b) {
+    v = a.v / b.v;
+}
+
+inline void hkVector4f::mul(hkSimdFloat32Parameter a) {
+    setMul(*this, a);
+}
+
+inline void hkVector4f::setMul(hkVector4fParameter a, hkSimdFloat32Parameter r) {
+#ifdef HK_VECTOR4F_AARCH64_NEON
+    v = vmulq_n_f32(v, r);
+#else
+    v *= r.val();
+#endif
+}
+
+inline void hkVector4f::setMul(hkSimdFloat32Parameter r, hkVector4fParameter a) {
+    setMul(a, r);
+}
+
+inline void hkVector4f::setAdd(hkVector4fParameter a, hkSimdFloat32Parameter b) {
+#ifdef HK_VECTOR4F_AARCH64_NEON
+    v = vaddq_f32(v, vdupq_n_f32(b));
+#else
+    v += b.val();
+#endif
+}
+
+inline void hkVector4f::setSub(hkVector4fParameter a, hkSimdFloat32Parameter b) {
+#ifdef HK_VECTOR4F_AARCH64_NEON
+    v = vsubq_f32(v, vdupq_n_f32(b));
+#else
+    v -= b.val();
+#endif
+}
+
+inline void hkVector4f::setReciprocal(hkVector4fParameter a) {
+#ifdef HK_VECTOR4F_AARCH64_NEON
+    v = vrecpeq_f32(a.v);
+#else
+    for (int i = 0; i < 4; ++i)
+        v[i] = 1.0f / a[i];
+#endif
+}
+
+inline void hkVector4f::setSqrt(hkVector4fParameter a) {
+#ifdef HK_VECTOR4F_AARCH64_NEON
+    v = vsqrtq_f32(a.v);
+#else
+    for (int i = 0; i < 4; ++i)
+        v[i] = std::sqrt(a[i]);
+#endif
+}
+
+inline void hkVector4f::setSqrtInverse(hkVector4fParameter a) {
+#ifdef HK_VECTOR4F_AARCH64_NEON
+    v = vrsqrteq_f32(a.v);
+#else
+    for (int i = 0; i < 4; ++i)
+        v[i] = 1.0f / std::sqrt(a[i]);
+#endif
+}
+
+inline void hkVector4f::addMul(hkVector4fParameter a, hkVector4fParameter b) {
+    setAddMul(*this, a, b);
+}
+
+inline void hkVector4f::subMul(hkVector4fParameter a, hkVector4fParameter b) {
+    setSubMul(*this, a, b);
+}
+
+inline void hkVector4f::setAddMul(hkVector4fParameter a, hkVector4fParameter b,
+                                  hkVector4fParameter c) {
+    v = a.v + b.v * c.v;
+}
+
+inline void hkVector4f::setSubMul(hkVector4fParameter a, hkVector4fParameter b,
+                                  hkVector4fParameter c) {
+    v = a.v - b.v * c.v;
+}
+
+inline void hkVector4f::addMul(hkVector4fParameter a, hkSimdFloat32Parameter r) {
+    setAddMul(*this, a, r);
+}
+
+inline void hkVector4f::addMul(hkSimdFloat32Parameter r, hkVector4fParameter a) {
+    addMul(a, r);
+}
+
+inline void hkVector4f::subMul(hkVector4fParameter a, hkSimdFloat32Parameter r) {
+    setSubMul(*this, a, r);
+}
+
+inline void hkVector4f::subMul(hkSimdFloat32Parameter r, hkVector4fParameter a) {
+    subMul(a, r);
+}
+
+inline void hkVector4f::setAddMul(hkVector4fParameter a, hkVector4fParameter b,
+                                  hkSimdFloat32Parameter r) {
+    m128 rr{r, r, r, r};
+    v = a.v + rr * b.v;
+}
+
+inline void hkVector4f::setSubMul(hkVector4fParameter a, hkVector4fParameter b,
+                                  hkSimdFloat32Parameter r) {
+    m128 rr{r, r, r, r};
+    v = a.v - rr * b.v;
+}
+
+inline void hkVector4f::setCross(hkVector4fParameter a, hkVector4fParameter b) {
+    // 	z = a[0] * b[1] - b[0] * a[1]
+    //	x = a[1] * b[2] - b[1] * a[2]
+    //	y = a[2] * b[0] - b[2] * a[0]
+    //      ----   ----   ----   ----
+    //        a     bb     b      aa
+
+    auto cross0 = a.v * __builtin_shufflevector(b.v, b.v, 1, 2, 0, 3);
+    auto cross1 = b.v * __builtin_shufflevector(a.v, a.v, 1, 2, 0, 3);
+    auto diff = cross0 - cross1;
+    v = __builtin_shufflevector(diff, diff, 1, 2, 0, 3);
+}
+
+inline void hkVector4f::setInterpolate(hkVector4fParameter v0, hkVector4fParameter v1,
+                                       hkSimdFloat32Parameter t) {
+    // v = v0 + t * (v1 - v0)
+    hkVector4f diff;
+    diff.setSub(v1, v0);
+    setAddMul(v0, diff, t);
+}
+
+inline void hkVector4f::_setRotatedDir(const hkMatrix3f& a, hkVector4fParameter b) {
+#ifdef HK_VECTOR4F_AARCH64_NEON
+    auto col0 = vmulq_laneq_f32(a.m_col0.v, v, 0);
+    auto col1 = vmulq_laneq_f32(a.m_col1.v, v, 1);
+    auto col2 = vmulq_laneq_f32(a.m_col2.v, v, 2);
+    v = col0 + col1 + col2;
+#else
+    setMul(a.m_col0, b[0]);
+    addMul(a.m_col1, b[1]);
+    addMul(a.m_col2, b[2]);
+#endif
+}
+
+inline void hkVector4f::_setTransformedPos(const hkTransformf& a, hkVector4fParameter b) {
+    hkVector4f t;
+    t._setRotatedDir(a.getRotation(), b);
+    setAdd(t, a.getTranslation());
+}
+
+template <int N>
+inline void hkVector4f::store(hkFloat32* out) const {
+    static_assert(1 <= N && N <= 4, "invalid N");
+#ifdef HK_VECTOR4F_AARCH64_NEON
+    switch (N) {
+    case 1:
+        vst1q_lane_f32(out, v, 0);
+        break;
+    case 2:
+        vst1_f32(out, vget_low_f32(v));
+        break;
+    case 3:
+        vst1_f32(out, vget_low_f32(v));
+        vst1q_lane_f32(out + 2, v, 2);
+        break;
+    case 4:
+        vst1q_f32(out, v);
+        break;
+    default:
+        break;
+    }
+#else
+    for (int i = 0; i < N; ++i)
+        out[i] = v[i];
+#endif
+}
--- a/lib/hkStubs/Havok/Common/Base/Math/hkMath.h
+++ b/lib/hkStubs/Havok/Common/Base/Math/hkMath.h
@ -1,5 +1,7 @@
 #pragma once

+#define HK_MATH_H
+
 #include <Havok/Common/Base/Types/hkBaseTypes.h>

 #include <Havok/Common/Base/Math/Matrix/hkMatrix3.h>
@ -9,3 +11,5 @@
 #include <Havok/Common/Base/Math/Vector/hkSimdReal.h>
 #include <Havok/Common/Base/Math/Vector/hkVector4.h>
 #include <Havok/Common/Base/Math/Vector/hkVector4Comparison.h>
+
+#include <Havok/Common/Base/Math/Vector/hkVector4f.inl>
--- a/lib/hkStubs/Havok/Physics2012/Dynamics/World/hkpWorldCinfo.h
+++ b/lib/hkStubs/Havok/Physics2012/Dynamics/World/hkpWorldCinfo.h
@ -1,11 +1,7 @@
 #pragma once

-#include <Havok/Common/Base/Math/Vector/hkVector4.h>
-#include <Havok/Common/Base/Memory/Router/hkMemoryRouter.h>
-#include <Havok/Common/Base/Object/hkReferencedObject.h>
 #include <Havok/Common/Base/Types/Geometry/Aabb/hkAabb.h>
-#include <Havok/Common/Base/Types/hkBaseTypes.h>
-#include <Havok/Common/Base/Types/hkRefPtr.h>
+#include <Havok/Common/Base/hkBase.h>
 #include <Havok/Physics2012/Collide/Filter/hkpCollisionFilter.h>
 #include <Havok/Physics2012/Collide/Filter/hkpConvexListFilter.h>
 #include <Havok/Physics2012/Dynamics/World/Memory/hkpWorldMemoryAvailableWatchDog.h>
--- a/src/KingSystem/Physics/RigidBody/Shape/physCapsuleShape.cpp
+++ b/src/KingSystem/Physics/RigidBody/Shape/physCapsuleShape.cpp
@ -95,15 +95,15 @@ const hkpShape* CapsuleBody::getShape() const {
 }

 void CapsuleBody::sub_7100FABE80(sead::Vector3f* veca, sead::Vector3f* vecb,
-                                 const hkVector4& rb_vec) {
+                                 const hkTransformf& rb_vec) {
    if (veca != nullptr) {
        hkVector4 tmp;
-        tmp.sub_7100FABE80(rb_vec, hkVector4(vertex_a.x, vertex_a.y, vertex_a.z));
+        tmp.setTransformedPos(rb_vec, hkVector4(vertex_a.x, vertex_a.y, vertex_a.z));
        tmp.store<3>(veca->e.data());
    }
    if (vecb != nullptr) {
        hkVector4 tmp;
-        tmp.sub_7100FABE80(rb_vec, hkVector4(vertex_b.x, vertex_b.y, vertex_b.z));
+        tmp.setTransformedPos(rb_vec, hkVector4(vertex_b.x, vertex_b.y, vertex_b.z));
        tmp.store<3>(vecb->e.data());
    }
 }
--- a/src/KingSystem/Physics/RigidBody/Shape/physCapsuleShape.h
+++ b/src/KingSystem/Physics/RigidBody/Shape/physCapsuleShape.h
@ -49,7 +49,7 @@ struct CapsuleBody {
    bool setRadius(f32 r);
    bool setVertices(const sead::Vector3f& va, const sead::Vector3f& vb);
    f32 getVolume() const;
-    void sub_7100FABE80(sead::Vector3f* veca, sead::Vector3f* vecb, const hkVector4& rb_vec);
+    void sub_7100FABE80(sead::Vector3f* veca, sead::Vector3f* vecb, const hkTransformf& rb_vec);
    void setMaterialMask(const MaterialMask& mask);

    sead::Vector3f vertex_a;