diff --git a/include/PR/os_thread.h b/include/PR/os_thread.h index 707608abd..25e64b14c 100644 --- a/include/PR/os_thread.h +++ b/include/PR/os_thread.h @@ -78,6 +78,8 @@ typedef struct OSThread_s { OSId id; /* id for debugging */ int fp; /* thread has used fp unit */ __OSThreadContext context; /* register/interrupt mask */ + u32 cycles_saved; + u32 cycles_at_dispatch; } OSThread; diff --git a/src/game/lv.c b/src/game/lv.c index 508486bef..266bfe6ff 100644 --- a/src/game/lv.c +++ b/src/game/lv.c @@ -859,8 +859,60 @@ Gfx *lvPrintRateGraph(Gfx *gdl) return gdl; } -u32 g_LvBefore = 0; -u32 g_LvAfter = 0; +Gfx *lvPrintCounter(Gfx *gdl, s32 *y, char *label, u32 cycles, u32 cycles_per_second) +{ + char buffer[32]; + s32 x = 10; + u32 colour; + u32 percentage; + u32 microseconds; + s32 textwidth; + s32 textheight; + u32 target; + + // 100% means 30 FPS + // 50% means 60 FPS + target = cycles_per_second / 30; + percentage = 100 * cycles / target; + + microseconds = (f32) cycles / cycles_per_second * 1000000; + + // Colour green if this counter can achieve 60 FPS + // Colour yellow if this counter can achieve 30 FPS + // Colour red if under 30 FPS + if (percentage <= 50) { + colour = 0x00ff00a0; // green + } else if (percentage <= 100) { + colour = 0xffff00a0; // yellow + } else { + colour = 0xff0000a0; // red + } + + // Label + x = 10; + gdl = textRender(gdl, &x, y, label, g_CharsHandelGothicXs, g_FontHandelGothicXs, colour, 0x000000a0, viGetWidth(), viGetHeight(), 0, 0); + + // Microseconds + sprintf(buffer, "%d", microseconds); + textMeasure(&textheight, &textwidth, buffer, g_CharsHandelGothicXs, g_FontHandelGothicXs, 0); + x = 80 - textwidth; + gdl = textRender(gdl, &x, y, buffer, g_CharsHandelGothicXs, g_FontHandelGothicXs, colour, 0x000000a0, viGetWidth(), viGetHeight(), 0, 0); + + // Percentage + sprintf(buffer, "(%d%%)\n", percentage); + textMeasure(&textheight, &textwidth, buffer, g_CharsHandelGothicXs, g_FontHandelGothicXs, 0); + x = 120 - textwidth; + gdl = textRender(gdl, &x, y, buffer, g_CharsHandelGothicXs, g_FontHandelGothicXs, colour, 0x000000a0, viGetWidth(), viGetHeight(), 0, 0); + + // Line + gdl = textSetPrimColour(gdl, colour); + + gDPFillRectangleScaled(gdl++, 10, *y, 11 + microseconds * 110 / 33333, *y + 1); + + *y += 2; + + return gdl; +} Gfx *lvPrintRateText(Gfx *gdl) { @@ -907,8 +959,23 @@ Gfx *lvPrintRateText(Gfx *gdl) gdl = textRender(gdl, &x, &y, buffer, g_CharsHandelGothicXs, g_FontHandelGothicXs, 0x00ff00a0, 0x000000a0, viGetWidth(), viGetHeight(), 0, 0); } +#ifdef PROFILING + { + u32 counters[5]; + + profileGetCounters(counters); + + gdl = lvPrintCounter(gdl, &y, "RSP", counters[0], OS_CLOCK_RATE); + gdl = lvPrintCounter(gdl, &y, "RDP", counters[1], OS_CLOCK_RATE); + gdl = lvPrintCounter(gdl, &y, "CPU", counters[2] + counters[3] + counters[4], OS_CPU_COUNTER); + gdl = lvPrintCounter(gdl, &y, " audio", counters[2], OS_CPU_COUNTER); + gdl = lvPrintCounter(gdl, &y, " main", counters[3], OS_CPU_COUNTER); + gdl = lvPrintCounter(gdl, &y, " sched", counters[4], OS_CPU_COUNTER); + } +#endif + x = 10; - sprintf(buffer, "mema free %d KB\n", memaGetLongestFree() / 1024); + sprintf(buffer, "\nmema free %d KB\n", memaGetLongestFree() / 1024); gdl = textRender(gdl, &x, &y, buffer, g_CharsHandelGothicXs, g_FontHandelGothicXs, 0x00ff00a0, 0x000000a0, viGetWidth(), viGetHeight(), 0, 0); sprintf(buffer, "memp free %d KB\n", mempGetStageFree() / 1024); diff --git a/src/game/player.c b/src/game/player.c index 609627cc2..368e33e28 100644 --- a/src/game/player.c +++ b/src/game/player.c @@ -1847,7 +1847,7 @@ void playerTickCutscene(bool arg0) g_CutsceneCurTotalFrame60f += g_Vars.lvupdate60freal; } - if (g_CutsceneCurTotalFrame60f > 30 && (buttons & 0xffff)) { + if (g_CutsceneCurTotalFrame60f > 30 && (buttons & ~L_TRIG)) { g_CutsceneSkipRequested = true; if (g_Vars.autocutplaying) { diff --git a/src/include/lib/profile.h b/src/include/lib/profile.h index ec6ac10b5..eae990265 100644 --- a/src/include/lib/profile.h +++ b/src/include/lib/profile.h @@ -13,6 +13,7 @@ void profile00009a98(void); void profileStart(s32 arg0); void profileEnd(s32 arg0); void profileHandleRspEvent(s32 event); +void profileGetCounters(u32 counters[5]); #endif Gfx *profileRender(Gfx *gdl); diff --git a/src/lib/profile.c b/src/lib/profile.c index 500e7e258..2d40ae70a 100644 --- a/src/lib/profile.c +++ b/src/lib/profile.c @@ -13,10 +13,11 @@ // [x][x][1] is the start time u32 g_ProfileMarkers[NUM_SAMPLES][NUM_PROFILEMARKERS][2]; s32 g_ProfileIndex = 0; -u32 g_ProfileAudStart = 0; -u32 g_ProfileAudTicks[NUM_SAMPLES]; -u32 g_ProfileGfxTicks[NUM_SAMPLES]; -u32 g_ProfileGfxTally; +u32 g_ProfileAudStart; +u32 g_ProfileAudCycles; +u32 g_ProfileGfxCycles; +u32 g_ProfileGfxHistory[30]; +u32 g_ProfileGfxIndex = 0; struct profileslot *g_ProfileCurrentSlot; struct profileslot { @@ -135,6 +136,43 @@ u32 profileReadCounters(void) return max; } +extern OSThread g_SchedThread; + +void profileGetCounters(u32 counters[5]) +{ + s32 i; + u32 max = 0; + + OSPri prevpri = osGetThreadPri(0); + osSetThreadPri(0, THREADPRI_SCHED + 1); + + // RSP + counters[0] = g_ProfileAudCycles; + + // RDP + for (i = 0; i < ARRAYCOUNT(g_ProfileGfxHistory); i++) { + if (g_ProfileGfxHistory[i] > max) { + max = g_ProfileGfxHistory[i]; + } + } + + counters[1] = g_ProfileGfxCycles; + + // Audio thread + counters[2] = g_AudioManager.thread.cycles_saved; + g_AudioManager.thread.cycles_saved = 0; + + // Main thread + counters[3] = g_MainThread.cycles_saved + (osGetCount() - g_MainThread.cycles_at_dispatch); + g_MainThread.cycles_saved = 0; + + // Scheduler thread + counters[4] = g_SchedThread.cycles_saved; + g_SchedThread.cycles_saved = 0; + + osSetThreadPri(0, prevpri); +} + void profileHandleRspEvent(s32 event) { switch (event) { @@ -142,19 +180,15 @@ void profileHandleRspEvent(s32 event) g_ProfileAudStart = osGetCount(); break; case RSPEVENT_AUD_FINISH: - g_ProfileMarkers[g_ProfileIndex][PROFILEMARKER_AUD][0] += osGetCount() - g_ProfileAudStart; + g_ProfileAudCycles = osGetCount() - g_ProfileAudStart; break; case RSPEVENT_GFX_START: osDpSetStatus(DPC_CLR_CMD_CTR | DPC_CLR_PIPE_CTR | DPC_CLR_TMEM_CTR); break; - case RSPEVENT_GFX_PAUSE: - g_ProfileGfxTally += profileReadCounters(); - break; - case RSPEVENT_GFX_RESUME: - osDpSetStatus(DPC_CLR_CMD_CTR | DPC_CLR_PIPE_CTR | DPC_CLR_TMEM_CTR); - break; case RSPEVENT_GFX_FINISH: - g_ProfileGfxTicks[g_ProfileIndex] = g_ProfileGfxTally + profileReadCounters(); + g_ProfileGfxCycles = profileReadCounters(); + g_ProfileGfxHistory[g_ProfileGfxIndex] = g_ProfileGfxCycles; + g_ProfileGfxIndex = (g_ProfileGfxIndex + 1) % ARRAYCOUNT(g_ProfileGfxHistory); break; } } @@ -260,8 +294,6 @@ Gfx *profileRender(Gfx *gdl) s32 y = 10; gdl = text0f153628(gdl); - gdl = profileRenderCpuLine(gdl, x, &y, "AUD", PROFILEMARKER_AUD); - gdl = profileRenderRdpLine(gdl, x, &y, "GFX", g_ProfileGfxTicks); gdl = profileRenderCpuLine(gdl, x, &y, "CPU", PROFILEMARKER_CPU); gdl = profileRenderCpuLine(gdl, x, &y, " audio", PROFILEMARKER_AUDIO); gdl = profileRenderCpuLine(gdl, x, &y, " lvTick", PROFILEMARKER_LVTICK); @@ -322,8 +354,6 @@ Gfx *profileRender(Gfx *gdl) gdl = text0f153780(gdl); } - g_ProfileGfxTally = 0; - return gdl; } diff --git a/src/lib/sched.c b/src/lib/sched.c index 725b1b14c..6205982d6 100644 --- a/src/lib/sched.c +++ b/src/lib/sched.c @@ -164,7 +164,7 @@ void __scMain(void *arg) switch ((int) msg) { case VIDEO_MSG: if (osViGetCurrentFramebuffer() == osViGetNextFramebuffer()) { - osDpSetStatus(DPC_STATUS_FLUSH); + osDpSetStatus(DPC_CLR_FREEZE); } __scHandleRetrace(sc); @@ -174,7 +174,7 @@ void __scMain(void *arg) __scHandleRSP(sc); break; case RDP_DONE_MSG: - osDpSetStatus(DPC_STATUS_START_GCLK); + osDpSetStatus(DPC_SET_FREEZE); __scHandleRDP(sc); __scHandleTasks(sc); break; @@ -309,8 +309,6 @@ void __scHandleRSP(OSSched *sc) t->state |= OS_SC_YIELDED; if ((t->flags & OS_SC_TYPE_MASK) == OS_SC_XBUS) { - profileHandleRspEvent(RSPEVENT_GFX_PAUSE); - // Push the task back on the list t->next = sc->gfxListHead; sc->gfxListHead = t; @@ -489,13 +487,13 @@ OSScTask *__scTaskReady(OSScTask *t) */ s32 __scTaskComplete(OSSched *sc, OSScTask *t) { - if (t->list.t.type == M_AUDTASK) { - profileHandleRspEvent(RSPEVENT_AUD_FINISH); - } else { - profileHandleRspEvent(RSPEVENT_GFX_FINISH); - } - if ((t->state & OS_SC_RCP_MASK) == 0) { + if (t->list.t.type == M_AUDTASK) { + profileHandleRspEvent(RSPEVENT_AUD_FINISH); + } else { + profileHandleRspEvent(RSPEVENT_GFX_FINISH); + } + if (t->list.t.type == 1 && (t->flags & OS_SC_SWAPBUFFER) && (t->flags & OS_SC_LAST_TASK)) { @@ -582,24 +580,13 @@ void __scAppendList(OSSched *sc, OSScTask *t) void __scExec(OSSched *sc, OSScTask *sp, OSScTask *dp, s32 resuming) { if (sp) { - if (sp->list.t.type == M_AUDTASK) { - osWritebackDCacheAll(); - } - - if (sp->list.t.type != M_AUDTASK && (sp->state & OS_SC_YIELD) == 0) { - osDpSetStatus(DPC_STATUS_CMD_BUSY | DPC_STATUS_CBUF_READY | DPC_STATUS_DMA_BUSY | DPC_STATUS_END_VALID); - } - - if (resuming) { - if (sp->list.t.type != M_AUDTASK) { - profileHandleRspEvent(RSPEVENT_GFX_RESUME); - } - } else { - if (sp->list.t.type == M_AUDTASK) { - profileHandleRspEvent(RSPEVENT_AUD_START); - } else { + if (sp->list.t.type == M_GFXTASK) { + if ((sp->state & OS_SC_YIELD) == 0) { profileHandleRspEvent(RSPEVENT_GFX_START); } + } else { + osWritebackDCacheAll(); + profileHandleRspEvent(RSPEVENT_AUD_START); } sp->state &= ~(OS_SC_YIELD | OS_SC_YIELDED); diff --git a/src/lib/ultra/os/exceptasm.s b/src/lib/ultra/os/exceptasm.s index 011f47993..2e9450fc3 100644 --- a/src/lib/ultra/os/exceptasm.s +++ b/src/lib/ultra/os/exceptasm.s @@ -645,7 +645,25 @@ glabel __osDispatchThread jal __osPopThread addiu $a0, $a0, %lo(__osRunQueue) lui $at, %hi(__osRunningThread) - sw $v0, %lo(__osRunningThread)($at) + lw $v1, %lo(__osRunningThread)($at) # v1 = yielding thread + sw $v0, %lo(__osRunningThread)($at) # v0 = dispatching thread + + # Read the current CPU cycle counter + mfc0 $t0, C0_COUNT + + # In the dispatching thread, set cycles_at_dispatch = COUNT + sw $t0, 0x234($v0) + + # In the yielding thread, set cycles_saved += (COUNT - cycles_at_dispatch) + beqz $v1, .Ldispatch + nop + lw $t1, 0x234($v1) # t1 = cycles_at_dispatch + subu $t0, $t0, $t1 # t0 = COUNT - cycles_at_dispatch + lw $t1, 0x230($v1) # t1 = cycles_saved + addu $t1, $t1, $t0 # cycles_saved += COUNT - cycles_at_dispatch + sw $t0, 0x230($v1) + +.Ldispatch: addiu $t0, $zero, 0x4 sh $t0, 0x10($v0) move $k0, $v0