diff --git a/lib/hkStubs/Havok/Common/Base/Math/Vector/hkVector4f.h b/lib/hkStubs/Havok/Common/Base/Math/Vector/hkVector4f.h index fbc674ba..dbba2bc8 100644 --- a/lib/hkStubs/Havok/Common/Base/Math/Vector/hkVector4f.h +++ b/lib/hkStubs/Havok/Common/Base/Math/Vector/hkVector4f.h @@ -105,6 +105,15 @@ public: // ========== Length and normalization + template + HK_FORCE_INLINE hkSimdFloat32 dot(hkVector4fParameter a) const; + + template + HK_FORCE_INLINE void setDot(hkVector4fParameter a, hkVector4fParameter b); + + template + HK_FORCE_INLINE hkSimdFloat32 lengthSquared() const; + // ========== Misc // ========== Component access diff --git a/lib/hkStubs/Havok/Common/Base/Math/Vector/hkVector4f.inl b/lib/hkStubs/Havok/Common/Base/Math/Vector/hkVector4f.inl index e4544779..640b5f47 100644 --- a/lib/hkStubs/Havok/Common/Base/Math/Vector/hkVector4f.inl +++ b/lib/hkStubs/Havok/Common/Base/Math/Vector/hkVector4f.inl @@ -278,6 +278,48 @@ inline void hkVector4f::_setTransformedPos(const hkTransformf& a, hkVector4fPara setAdd(t, a.getTranslation()); } +#ifdef HK_VECTOR4F_AARCH64_NEON +template +HK_FORCE_INLINE hkSimdFloat32 hkVector4f::dot(hkVector4fParameter a) const { + if constexpr (N == 2) { + float32x4_t x2 = v * a.v; + float32x2_t low = vget_low_f32(x2); + float32x2_t xy = vpadd_f32(low, low); + return xy[0]; + } else if constexpr (N == 3 || N == 4) { + float32x4_t x2 = v * a.v; + float32x2_t low = vget_low_f32(x2); + float32x2_t high = vget_high_f32(x2); + if constexpr (N == 3) + high = vset_lane_f32(0, high, 1); + float32x2_t xy_zw = vpadd_f32(low, high); + float32x2_t xyzw = vpadd_f32(xy_zw, xy_zw); + return xyzw[0]; + } else { + static_assert(2 <= N && N <= 4, "invalid N"); + } +} +#else +template +HK_FORCE_INLINE hkSimdFloat32 hkVector4f::dot(hkVector4fParameter a) const { + static_assert(2 <= N && N <= 4, "invalid N"); + float sum = 0.0f; + for (int i = 0; i < N; ++i) + sum += v[i] * a.v[i]; + return sum; +} +#endif + +template +inline void hkVector4f::setDot(hkVector4fParameter a, hkVector4fParameter b) { + setAll(a.dot(b)); +} + +template +inline hkSimdFloat32 hkVector4f::lengthSquared() const { + return dot(*this); +} + template inline void hkVector4f::store(hkFloat32* out) const { static_assert(1 <= N && N <= 4, "invalid N");