From cabc7039696f73e8216b629b79b3363f2e91865c Mon Sep 17 00:00:00 2001 From: LagoLunatic Date: Tue, 2 Dec 2025 18:38:12 -0500 Subject: [PATCH] J3DShapeDraw and J3DSkinDeform OK (#2908) --- configure.py | 4 +- .../J3DGraphAnimator/J3DSkinDeform.cpp | 6 +-- src/JSystem/J3DGraphBase/J3DShapeDraw.cpp | 42 ++++++++++--------- src/JSystem/JMath/JMATrigonometric.cpp | 6 +-- 4 files changed, 28 insertions(+), 30 deletions(-) diff --git a/configure.py b/configure.py index 6595502427e..c916413955b 100755 --- a/configure.py +++ b/configure.py @@ -1235,7 +1235,7 @@ config.libs = [ Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphBase/J3DTexture.cpp"), Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphBase/J3DPacket.cpp"), Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphBase/J3DShapeMtx.cpp"), - Object(NonMatching, "JSystem/J3DGraphBase/J3DShapeDraw.cpp"), + Object(MatchingFor(ALL_GCN, "ShieldD"), "JSystem/J3DGraphBase/J3DShapeDraw.cpp"), Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphBase/J3DShape.cpp"), Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphBase/J3DMaterial.cpp"), Object(Equivalent, "JSystem/J3DGraphBase/J3DMatBlock.cpp"), # virtual function order @@ -1254,7 +1254,7 @@ config.libs = [ Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DModel.cpp"), Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DAnimation.cpp"), Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DMaterialAnm.cpp"), - Object(NonMatching, "JSystem/J3DGraphAnimator/J3DSkinDeform.cpp"), + Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DSkinDeform.cpp"), Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DCluster.cpp"), Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DJoint.cpp"), Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DMaterialAttach.cpp"), diff --git a/src/JSystem/J3DGraphAnimator/J3DSkinDeform.cpp b/src/JSystem/J3DGraphAnimator/J3DSkinDeform.cpp index 79234158a90..17112fb93c5 100644 --- a/src/JSystem/J3DGraphAnimator/J3DSkinDeform.cpp +++ b/src/JSystem/J3DGraphAnimator/J3DSkinDeform.cpp @@ -347,10 +347,6 @@ int J3DSkinDeform::initMtxIndexArray(J3DModelData* pModelData) { return kJ3DError_Success; } -// NONMATCHING - instruction ordering/optimization issue, matches debug -// the compiler needs to delay adding +3 to dl until the end of the while loop for the function to match -// but instead it puts the +3 at the start of the for loop and reworks the other instructions -// can get a 99.93% match on retail by moving where dl is incremented, but it seems fake as it breaks debug, and introduces an operand swap on src void J3DSkinDeform::changeFastSkinDL(J3DModelData* pModelData) { J3D_ASSERT_NULLPTR(740, pModelData != NULL); for (u16 i = 0; i < pModelData->getShapeNum(); i++) { @@ -389,7 +385,7 @@ void J3DSkinDeform::changeFastSkinDL(J3DModelData* pModelData) { memcpy(dst, src + 1, (int)(vtxSize - 1)); // The -1 is to remove GX_VA_PNMTXIDX dst += (int)(vtxSize - 1); } - dl += vtxSize * vtxCount; + dl = (u8*)dl + vtxSize * vtxCount; } int dlistSize = ((int)dst - (int)displayListStart + 0x1f) & ~0x1f; diff --git a/src/JSystem/J3DGraphBase/J3DShapeDraw.cpp b/src/JSystem/J3DGraphBase/J3DShapeDraw.cpp index fcfec6e2a4e..dc00a46f9ad 100644 --- a/src/JSystem/J3DGraphBase/J3DShapeDraw.cpp +++ b/src/JSystem/J3DGraphBase/J3DShapeDraw.cpp @@ -8,55 +8,57 @@ u32 J3DShapeDraw::countVertex(u32 stride) { u32 count = 0; - uintptr_t dlStart = (uintptr_t)getDisplayList(); + u8* dlStart = (u8*)getDisplayList(); - for (u8* dl = (u8*)dlStart; ((uintptr_t)dl - dlStart) < getDisplayListSize();) { - if (*dl != GX_TRIANGLEFAN && *dl != GX_TRIANGLESTRIP) + for (u8* dl = dlStart; (dl - dlStart) < getDisplayListSize();) { + u8 cmd = *(u8*)dl; + dl++; + if (cmd != GX_TRIANGLEFAN && cmd != GX_TRIANGLESTRIP) break; - u16 vtxNum = *((u16*)(dl + 1)); + int vtxNum = *((u16*)(dl)); + dl += 2; count += vtxNum; - dl += stride * vtxNum; - dl += 3; + dl = (u8*)dl + stride * vtxNum; } return count; } -// NONMATCHING regalloc void J3DShapeDraw::addTexMtxIndexInDL(u32 stride, u32 attrOffs, u32 valueBase) { u32 byteNum = countVertex(stride); - u32 newSize = ALIGN_NEXT(mDisplayListSize + byteNum, 0x20); + u32 oldSize = mDisplayListSize; + u32 newSize = ALIGN_NEXT(oldSize + byteNum, 0x20); u8* newDLStart = new (0x20) u8[newSize]; - u8* oldDLStart = getDisplayList(); + u8* oldDLStart = (u8*)mDisplayList; u8* oldDL = oldDLStart; u8* newDL = newDLStart; for (; (oldDL - oldDLStart) < mDisplayListSize;) { // Copy command - u8 h = *oldDL; - *newDL++ = h; + u8 cmd = *(u8*)oldDL; + oldDL++; + *newDL++ = cmd; - if (h != GX_TRIANGLEFAN && h != GX_TRIANGLESTRIP) + if (cmd != GX_TRIANGLEFAN && cmd != GX_TRIANGLESTRIP) break; // Copy count - // regalloc (I suspect there's a way to shove this in a u16 temp without an mr) - s32 vtxNum = *((u16*)(oldDL + 1)); - *((u16*)newDL) = vtxNum; + int vtxNum = *(u16*)oldDL; + oldDL += 2; + *(u16*)newDL = vtxNum; newDL += 2; - for (s32 i = 0; i < vtxNum; i++) { - u8* oldDLVtx = &oldDL[stride * i + 3]; + for (int i = 0; i < vtxNum; i++) { + u8* oldDLVtx = &oldDL[stride * i]; u8 pnmtxidx = *oldDLVtx; - memcpy(newDL, oldDLVtx, attrOffs); + memcpy(newDL, oldDLVtx, (int)attrOffs); newDL += attrOffs; *newDL++ = valueBase + pnmtxidx; memcpy(newDL, oldDLVtx + attrOffs, stride - attrOffs); newDL += (stride - attrOffs); } - oldDL += stride * vtxNum; - oldDL += 3; + oldDL = (u8*)oldDL + stride * vtxNum; } u32 realSize = ALIGN_NEXT((uintptr_t)newDL - (uintptr_t)newDLStart, 0x20); diff --git a/src/JSystem/JMath/JMATrigonometric.cpp b/src/JSystem/JMath/JMATrigonometric.cpp index b6649594da4..e0374009947 100644 --- a/src/JSystem/JMath/JMATrigonometric.cpp +++ b/src/JSystem/JMath/JMATrigonometric.cpp @@ -70,10 +70,10 @@ struct TAsinAcosTable { namespace JMath { -TSinCosTable<13, f32> sincosTable_; +TSinCosTable<13, f32> sincosTable_ ATTRIBUTE_ALIGN(32); -TAtanTable atanTable_; +TAtanTable atanTable_ ATTRIBUTE_ALIGN(32); -TAsinAcosTable asinAcosTable_; +TAsinAcosTable asinAcosTable_ ATTRIBUTE_ALIGN(32); } // namespace JMath