Make profiler report RSP, RDP and thread timing accurately

This commit is contained in:
Ryan Dwyer 2023-05-01 18:48:04 +10:00
parent 502f88dddb
commit b18d1259fe
7 changed files with 152 additions and 47 deletions

View File

@ -78,6 +78,8 @@ typedef struct OSThread_s {
OSId id; /* id for debugging */
int fp; /* thread has used fp unit */
__OSThreadContext context; /* register/interrupt mask */
u32 cycles_saved;
u32 cycles_at_dispatch;
} OSThread;

View File

@ -859,8 +859,60 @@ Gfx *lvPrintRateGraph(Gfx *gdl)
return gdl;
}
u32 g_LvBefore = 0;
u32 g_LvAfter = 0;
Gfx *lvPrintCounter(Gfx *gdl, s32 *y, char *label, u32 cycles, u32 cycles_per_second)
{
char buffer[32];
s32 x = 10;
u32 colour;
u32 percentage;
u32 microseconds;
s32 textwidth;
s32 textheight;
u32 target;
// 100% means 30 FPS
// 50% means 60 FPS
target = cycles_per_second / 30;
percentage = 100 * cycles / target;
microseconds = (f32) cycles / cycles_per_second * 1000000;
// Colour green if this counter can achieve 60 FPS
// Colour yellow if this counter can achieve 30 FPS
// Colour red if under 30 FPS
if (percentage <= 50) {
colour = 0x00ff00a0; // green
} else if (percentage <= 100) {
colour = 0xffff00a0; // yellow
} else {
colour = 0xff0000a0; // red
}
// Label
x = 10;
gdl = textRender(gdl, &x, y, label, g_CharsHandelGothicXs, g_FontHandelGothicXs, colour, 0x000000a0, viGetWidth(), viGetHeight(), 0, 0);
// Microseconds
sprintf(buffer, "%d", microseconds);
textMeasure(&textheight, &textwidth, buffer, g_CharsHandelGothicXs, g_FontHandelGothicXs, 0);
x = 80 - textwidth;
gdl = textRender(gdl, &x, y, buffer, g_CharsHandelGothicXs, g_FontHandelGothicXs, colour, 0x000000a0, viGetWidth(), viGetHeight(), 0, 0);
// Percentage
sprintf(buffer, "(%d%%)\n", percentage);
textMeasure(&textheight, &textwidth, buffer, g_CharsHandelGothicXs, g_FontHandelGothicXs, 0);
x = 120 - textwidth;
gdl = textRender(gdl, &x, y, buffer, g_CharsHandelGothicXs, g_FontHandelGothicXs, colour, 0x000000a0, viGetWidth(), viGetHeight(), 0, 0);
// Line
gdl = textSetPrimColour(gdl, colour);
gDPFillRectangleScaled(gdl++, 10, *y, 11 + microseconds * 110 / 33333, *y + 1);
*y += 2;
return gdl;
}
Gfx *lvPrintRateText(Gfx *gdl)
{
@ -907,8 +959,23 @@ Gfx *lvPrintRateText(Gfx *gdl)
gdl = textRender(gdl, &x, &y, buffer, g_CharsHandelGothicXs, g_FontHandelGothicXs, 0x00ff00a0, 0x000000a0, viGetWidth(), viGetHeight(), 0, 0);
}
#ifdef PROFILING
{
u32 counters[5];
profileGetCounters(counters);
gdl = lvPrintCounter(gdl, &y, "RSP", counters[0], OS_CLOCK_RATE);
gdl = lvPrintCounter(gdl, &y, "RDP", counters[1], OS_CLOCK_RATE);
gdl = lvPrintCounter(gdl, &y, "CPU", counters[2] + counters[3] + counters[4], OS_CPU_COUNTER);
gdl = lvPrintCounter(gdl, &y, " audio", counters[2], OS_CPU_COUNTER);
gdl = lvPrintCounter(gdl, &y, " main", counters[3], OS_CPU_COUNTER);
gdl = lvPrintCounter(gdl, &y, " sched", counters[4], OS_CPU_COUNTER);
}
#endif
x = 10;
sprintf(buffer, "mema free %d KB\n", memaGetLongestFree() / 1024);
sprintf(buffer, "\nmema free %d KB\n", memaGetLongestFree() / 1024);
gdl = textRender(gdl, &x, &y, buffer, g_CharsHandelGothicXs, g_FontHandelGothicXs, 0x00ff00a0, 0x000000a0, viGetWidth(), viGetHeight(), 0, 0);
sprintf(buffer, "memp free %d KB\n", mempGetStageFree() / 1024);

View File

@ -1847,7 +1847,7 @@ void playerTickCutscene(bool arg0)
g_CutsceneCurTotalFrame60f += g_Vars.lvupdate60freal;
}
if (g_CutsceneCurTotalFrame60f > 30 && (buttons & 0xffff)) {
if (g_CutsceneCurTotalFrame60f > 30 && (buttons & ~L_TRIG)) {
g_CutsceneSkipRequested = true;
if (g_Vars.autocutplaying) {

View File

@ -13,6 +13,7 @@ void profile00009a98(void);
void profileStart(s32 arg0);
void profileEnd(s32 arg0);
void profileHandleRspEvent(s32 event);
void profileGetCounters(u32 counters[5]);
#endif
Gfx *profileRender(Gfx *gdl);

View File

@ -13,10 +13,11 @@
// [x][x][1] is the start time
u32 g_ProfileMarkers[NUM_SAMPLES][NUM_PROFILEMARKERS][2];
s32 g_ProfileIndex = 0;
u32 g_ProfileAudStart = 0;
u32 g_ProfileAudTicks[NUM_SAMPLES];
u32 g_ProfileGfxTicks[NUM_SAMPLES];
u32 g_ProfileGfxTally;
u32 g_ProfileAudStart;
u32 g_ProfileAudCycles;
u32 g_ProfileGfxCycles;
u32 g_ProfileGfxHistory[30];
u32 g_ProfileGfxIndex = 0;
struct profileslot *g_ProfileCurrentSlot;
struct profileslot {
@ -135,6 +136,43 @@ u32 profileReadCounters(void)
return max;
}
extern OSThread g_SchedThread;
void profileGetCounters(u32 counters[5])
{
s32 i;
u32 max = 0;
OSPri prevpri = osGetThreadPri(0);
osSetThreadPri(0, THREADPRI_SCHED + 1);
// RSP
counters[0] = g_ProfileAudCycles;
// RDP
for (i = 0; i < ARRAYCOUNT(g_ProfileGfxHistory); i++) {
if (g_ProfileGfxHistory[i] > max) {
max = g_ProfileGfxHistory[i];
}
}
counters[1] = g_ProfileGfxCycles;
// Audio thread
counters[2] = g_AudioManager.thread.cycles_saved;
g_AudioManager.thread.cycles_saved = 0;
// Main thread
counters[3] = g_MainThread.cycles_saved + (osGetCount() - g_MainThread.cycles_at_dispatch);
g_MainThread.cycles_saved = 0;
// Scheduler thread
counters[4] = g_SchedThread.cycles_saved;
g_SchedThread.cycles_saved = 0;
osSetThreadPri(0, prevpri);
}
void profileHandleRspEvent(s32 event)
{
switch (event) {
@ -142,19 +180,15 @@ void profileHandleRspEvent(s32 event)
g_ProfileAudStart = osGetCount();
break;
case RSPEVENT_AUD_FINISH:
g_ProfileMarkers[g_ProfileIndex][PROFILEMARKER_AUD][0] += osGetCount() - g_ProfileAudStart;
g_ProfileAudCycles = osGetCount() - g_ProfileAudStart;
break;
case RSPEVENT_GFX_START:
osDpSetStatus(DPC_CLR_CMD_CTR | DPC_CLR_PIPE_CTR | DPC_CLR_TMEM_CTR);
break;
case RSPEVENT_GFX_PAUSE:
g_ProfileGfxTally += profileReadCounters();
break;
case RSPEVENT_GFX_RESUME:
osDpSetStatus(DPC_CLR_CMD_CTR | DPC_CLR_PIPE_CTR | DPC_CLR_TMEM_CTR);
break;
case RSPEVENT_GFX_FINISH:
g_ProfileGfxTicks[g_ProfileIndex] = g_ProfileGfxTally + profileReadCounters();
g_ProfileGfxCycles = profileReadCounters();
g_ProfileGfxHistory[g_ProfileGfxIndex] = g_ProfileGfxCycles;
g_ProfileGfxIndex = (g_ProfileGfxIndex + 1) % ARRAYCOUNT(g_ProfileGfxHistory);
break;
}
}
@ -260,8 +294,6 @@ Gfx *profileRender(Gfx *gdl)
s32 y = 10;
gdl = text0f153628(gdl);
gdl = profileRenderCpuLine(gdl, x, &y, "AUD", PROFILEMARKER_AUD);
gdl = profileRenderRdpLine(gdl, x, &y, "GFX", g_ProfileGfxTicks);
gdl = profileRenderCpuLine(gdl, x, &y, "CPU", PROFILEMARKER_CPU);
gdl = profileRenderCpuLine(gdl, x, &y, " audio", PROFILEMARKER_AUDIO);
gdl = profileRenderCpuLine(gdl, x, &y, " lvTick", PROFILEMARKER_LVTICK);
@ -322,8 +354,6 @@ Gfx *profileRender(Gfx *gdl)
gdl = text0f153780(gdl);
}
g_ProfileGfxTally = 0;
return gdl;
}

View File

@ -164,7 +164,7 @@ void __scMain(void *arg)
switch ((int) msg) {
case VIDEO_MSG:
if (osViGetCurrentFramebuffer() == osViGetNextFramebuffer()) {
osDpSetStatus(DPC_STATUS_FLUSH);
osDpSetStatus(DPC_CLR_FREEZE);
}
__scHandleRetrace(sc);
@ -174,7 +174,7 @@ void __scMain(void *arg)
__scHandleRSP(sc);
break;
case RDP_DONE_MSG:
osDpSetStatus(DPC_STATUS_START_GCLK);
osDpSetStatus(DPC_SET_FREEZE);
__scHandleRDP(sc);
__scHandleTasks(sc);
break;
@ -309,8 +309,6 @@ void __scHandleRSP(OSSched *sc)
t->state |= OS_SC_YIELDED;
if ((t->flags & OS_SC_TYPE_MASK) == OS_SC_XBUS) {
profileHandleRspEvent(RSPEVENT_GFX_PAUSE);
// Push the task back on the list
t->next = sc->gfxListHead;
sc->gfxListHead = t;
@ -489,13 +487,13 @@ OSScTask *__scTaskReady(OSScTask *t)
*/
s32 __scTaskComplete(OSSched *sc, OSScTask *t)
{
if (t->list.t.type == M_AUDTASK) {
profileHandleRspEvent(RSPEVENT_AUD_FINISH);
} else {
profileHandleRspEvent(RSPEVENT_GFX_FINISH);
}
if ((t->state & OS_SC_RCP_MASK) == 0) {
if (t->list.t.type == M_AUDTASK) {
profileHandleRspEvent(RSPEVENT_AUD_FINISH);
} else {
profileHandleRspEvent(RSPEVENT_GFX_FINISH);
}
if (t->list.t.type == 1
&& (t->flags & OS_SC_SWAPBUFFER)
&& (t->flags & OS_SC_LAST_TASK)) {
@ -582,24 +580,13 @@ void __scAppendList(OSSched *sc, OSScTask *t)
void __scExec(OSSched *sc, OSScTask *sp, OSScTask *dp, s32 resuming)
{
if (sp) {
if (sp->list.t.type == M_AUDTASK) {
osWritebackDCacheAll();
}
if (sp->list.t.type != M_AUDTASK && (sp->state & OS_SC_YIELD) == 0) {
osDpSetStatus(DPC_STATUS_CMD_BUSY | DPC_STATUS_CBUF_READY | DPC_STATUS_DMA_BUSY | DPC_STATUS_END_VALID);
}
if (resuming) {
if (sp->list.t.type != M_AUDTASK) {
profileHandleRspEvent(RSPEVENT_GFX_RESUME);
}
} else {
if (sp->list.t.type == M_AUDTASK) {
profileHandleRspEvent(RSPEVENT_AUD_START);
} else {
if (sp->list.t.type == M_GFXTASK) {
if ((sp->state & OS_SC_YIELD) == 0) {
profileHandleRspEvent(RSPEVENT_GFX_START);
}
} else {
osWritebackDCacheAll();
profileHandleRspEvent(RSPEVENT_AUD_START);
}
sp->state &= ~(OS_SC_YIELD | OS_SC_YIELDED);

View File

@ -645,7 +645,25 @@ glabel __osDispatchThread
jal __osPopThread
addiu $a0, $a0, %lo(__osRunQueue)
lui $at, %hi(__osRunningThread)
sw $v0, %lo(__osRunningThread)($at)
lw $v1, %lo(__osRunningThread)($at) # v1 = yielding thread
sw $v0, %lo(__osRunningThread)($at) # v0 = dispatching thread
# Read the current CPU cycle counter
mfc0 $t0, C0_COUNT
# In the dispatching thread, set cycles_at_dispatch = COUNT
sw $t0, 0x234($v0)
# In the yielding thread, set cycles_saved += (COUNT - cycles_at_dispatch)
beqz $v1, .Ldispatch
nop
lw $t1, 0x234($v1) # t1 = cycles_at_dispatch
subu $t0, $t0, $t1 # t0 = COUNT - cycles_at_dispatch
lw $t1, 0x230($v1) # t1 = cycles_saved
addu $t1, $t1, $t0 # cycles_saved += COUNT - cycles_at_dispatch
sw $t0, 0x230($v1)
.Ldispatch:
addiu $t0, $zero, 0x4
sh $t0, 0x10($v0)
move $k0, $v0