From f2ff7b97e5aaaaf2223e5af6b7862fdd764f7429 Mon Sep 17 00:00:00 2001 From: Sean Maas Date: Fri, 5 Feb 2021 21:28:40 -0500 Subject: [PATCH] Initial DS(i) port, take 2 Rebuilt from the ground up; now featuring a custom GBI implementation designed specifically for DS hardware. --- Makefile | 83 ++- sound/sequences/00_sound_player.s | 26 +- src/audio/external.c | 4 + src/audio/heap.c | 2 + src/audio/port_eu.c | 4 + src/game/memory.c | 7 + src/nds/nds_controller.c | 56 ++ src/nds/nds_include.h | 26 + src/nds/nds_main.c | 48 ++ src/nds/nds_renderer.c | 991 ++++++++++++++++++++++++++++++ src/nds/nds_renderer.h | 7 + src/nds/ultra_reimplementation.c | 187 ++++++ 12 files changed, 1423 insertions(+), 18 deletions(-) create mode 100644 src/nds/nds_controller.c create mode 100644 src/nds/nds_include.h create mode 100644 src/nds/nds_main.c create mode 100644 src/nds/nds_renderer.c create mode 100644 src/nds/nds_renderer.h create mode 100644 src/nds/ultra_reimplementation.c diff --git a/Makefile b/Makefile index ae68f648..2e676e20 100644 --- a/Makefile +++ b/Makefile @@ -16,8 +16,9 @@ DEFINES := # 'make clean' may be required first. # Build for the N64 (turn this off for ports) -TARGET_N64 ?= 1 - +TARGET_N64 ?= 0 +# Build for Nintendo DS +TARGET_NDS ?= 1 # COMPILER - selects the C compiler to use # ido - uses the SGI IRIS Development Option compiler, which is used to build @@ -57,6 +58,11 @@ else ifeq ($(VERSION),sh) VERSION_JP_US ?= false endif +ifeq ($(TARGET_NDS),1) + OPT_FLAGS := -O2 + GRUCODE := f3dex2 +endif + TARGET := sm64.$(VERSION) @@ -202,8 +208,13 @@ endif BUILD_DIR_BASE := build # BUILD_DIR is the location where all build artifacts are placed +ifeq ($(TARGET_NDS),1) +BUILD_DIR := $(BUILD_DIR_BASE)/$(VERSION)_nds +ROM := $(BUILD_DIR)/$(TARGET).nds +else BUILD_DIR := $(BUILD_DIR_BASE)/$(VERSION) ROM := $(BUILD_DIR)/$(TARGET).z64 +endif ELF := $(BUILD_DIR)/$(TARGET).elf LIBULTRA := $(BUILD_DIR)/libultra.a LD_SCRIPT := sm64.ld @@ -214,12 +225,19 @@ ACTOR_DIR := actors LEVEL_DIRS := $(patsubst levels/%,%,$(dir $(wildcard levels/*/header.h))) # Directories containing source files -SRC_DIRS := src src/engine src/game src/audio src/menu src/buffers actors levels bin data assets asm lib sound +SRC_DIRS := src src/engine src/game src/audio src/menu src/buffers actors levels bin data assets lib sound BIN_DIRS := bin bin/$(VERSION) -ULTRA_SRC_DIRS := lib/src lib/src/math lib/asm lib/data +ULTRA_SRC_DIRS := lib/src lib/src/math lib/data ULTRA_BIN_DIRS := lib/bin +ifeq ($(TARGET_NDS),1) + SRC_DIRS += src/nds +else + SRC_DIRS += asm + ULTRA_SRC_DIRS += lib/asm +endif + GODDARD_SRC_DIRS := src/goddard src/goddard/dynlists # File dependencies and variables for specific files @@ -232,7 +250,23 @@ S_FILES := $(foreach dir,$(SRC_DIRS),$(wildcard $(dir)/*.s)) ULTRA_C_FILES := $(foreach dir,$(ULTRA_SRC_DIRS),$(wildcard $(dir)/*.c)) GODDARD_C_FILES := $(foreach dir,$(GODDARD_SRC_DIRS),$(wildcard $(dir)/*.c)) ULTRA_S_FILES := $(foreach dir,$(ULTRA_SRC_DIRS),$(wildcard $(dir)/*.s)) -GENERATED_C_FILES := $(BUILD_DIR)/assets/mario_anim_data.c $(BUILD_DIR)/assets/demo_data.c +GENERATED_C_FILES := $(BUILD_DIR)/assets/mario_anim_data.c $(BUILD_DIR)/assets/demo_data.c \ + $(addprefix $(BUILD_DIR)/bin/,$(addsuffix _skybox.c,$(notdir $(basename $(wildcard textures/skyboxes/*.png))))) + +ifeq ($(TARGET_NDS),1) + C_FILES := $(filter-out src/game/main.c,$(C_FILES)) + ULTRA_C_FILES := \ + alBnkfNew.c \ + guLookAtRef.c \ + guMtxF2L.c \ + guNormalize.c \ + guOrthoF.c \ + guPerspectiveF.c \ + guRotateF.c \ + guScaleF.c \ + guTranslateF.c + ULTRA_C_FILES := $(addprefix lib/src/,$(ULTRA_C_FILES)) +endif # Sound files SOUND_BANK_FILES := $(wildcard sound/sound_banks/*.json) @@ -277,6 +311,16 @@ endif # Compiler Options # #==============================================================================# +ifeq ($(TARGET_NDS),1) +AS := $(DEVKITARM)/bin/arm-none-eabi-as +CC := $(DEVKITARM)/bin/arm-none-eabi-gcc +CPP := $(DEVKITARM)/bin/arm-none-eabi-cpp -P +CXX := $(DEVKITARM)/bin/arm-none-eabi-g++ +LD := $(CXX) +OBJDUMP := $(DEVKITARM)/bin/arm-none-eabi-objdump +OBJCOPY := $(DEVKITARM)/bin/arm-none-eabi-objcopy +else + # detect prefix for MIPS toolchain ifneq ($(call find-command,mips-linux-gnu-ld),) CROSS := mips-linux-gnu- @@ -315,6 +359,8 @@ AR := $(CROSS)ar OBJDUMP := $(CROSS)objdump OBJCOPY := $(CROSS)objcopy +endif + ifeq ($(TARGET_N64),1) TARGET_CFLAGS := -nostdinc -DTARGET_N64 -D_LANGUAGE_C CC_CFLAGS := -fno-builtin @@ -328,6 +374,21 @@ endif C_DEFINES := $(foreach d,$(DEFINES),-D$(d)) DEF_INC_CFLAGS := $(foreach i,$(INCLUDE_DIRS),-I$(i)) $(C_DEFINES) +ifeq ($(TARGET_NDS),1) + +LIBDIRS := $(DEVKITPRO)/libnds +TARGET_CFLAGS := -march=armv5te -mtune=arm946e-s -fomit-frame-pointer -ffast-math $(foreach dir,$(LIBDIRS),-I$(dir)/include) -DTARGET_NDS -DARM9 -D_LANGUAGE_C -DNO_SEGMENTED_MEMORY -DLIBFAT +TARGET_LDFLAGS := -lfat -lnds9 -specs=dsi_arm9.specs -g -mthumb -mthumb-interwork $(foreach dir,$(LIBDIRS),-L$(dir)/lib) + +CC_CHECK := $(CC) +CC_CHECK_CFLAGS := -fsyntax-only -fsigned-char $(CC_CFLAGS) $(TARGET_CFLAGS) -Wall -Wextra -Wno-format-security -DNON_MATCHING -DAVOID_UB $(DEF_INC_CFLAGS) + +ASFLAGS := $(foreach i,$(INCLUDE_DIRS),-I$(i)) $(foreach d,$(DEFINES),--defsym $(d)) +CFLAGS := -fno-strict-aliasing -fwrapv $(OPT_FLAGS) $(TARGET_CFLAGS) $(DEF_INC_CFLAGS) +LDFLAGS := $(TARGET_LDFLAGS) + +else + # Check code syntax with host compiler CC_CHECK := gcc CC_CHECK_CFLAGS := -fsyntax-only -fsigned-char $(CC_CFLAGS) $(TARGET_CFLAGS) -std=gnu90 -Wall -Wextra -Wno-format-security -Wno-main -DNON_MATCHING -DAVOID_UB $(DEF_INC_CFLAGS) @@ -357,6 +418,8 @@ endif # Prevent a crash with -sopt export LANG := C +endif + #==============================================================================# # Miscellaneous Tools # #==============================================================================# @@ -509,6 +572,7 @@ $(BUILD_DIR)/%.ci4: %.ci4.png # Compressed Segment Generation # #==============================================================================# +ifeq ($(TARGET_N64),1) # Link segment file to resolve external labels # TODO: ideally this would be `-Trodata-segment=0x07000000` but that doesn't set the address $(BUILD_DIR)/%.elf: $(BUILD_DIR)/%.o @@ -537,6 +601,7 @@ $(BUILD_DIR)/%.mio0: $(BUILD_DIR)/%.bin $(BUILD_DIR)/%.mio0.o: $(BUILD_DIR)/%.mio0 $(call print,Converting MIO0 to ELF:,$<,$@) $(V)printf ".section .data\n\n.incbin \"$<\"\n" | $(AS) $(ASFLAGS) -o $@ +endif #==============================================================================# @@ -727,6 +792,13 @@ $(BUILD_DIR)/rsp/%.bin $(BUILD_DIR)/rsp/%_data.bin: rsp/%.s $(call print,Assembling:,$<,$@) $(V)$(RSPASM) -sym $@.sym $(RSPASMFLAGS) -strequ CODE_FILE $(BUILD_DIR)/rsp/$*.bin -strequ DATA_FILE $(BUILD_DIR)/rsp/$*_data.bin $< +# Build NDS ROM +ifeq ($(TARGET_NDS),1) +$(ROM): $(O_FILES) $(MIO0_FILES:.mio0=.o) $(ULTRA_O_FILES) $(GODDARD_O_FILES) + $(LD) -L $(BUILD_DIR) -o $@.elf $(O_FILES) $(ULTRA_O_FILES) $(GODDARD_O_FILES) $(LDFLAGS) + ndstool -c $@ -9 $@.elf +else + # Run linker script through the C preprocessor $(BUILD_DIR)/$(LD_SCRIPT): $(LD_SCRIPT) $(call print,Preprocessing linker script:,$<,$@) @@ -757,6 +829,7 @@ $(ROM): $(ELF) $(BUILD_DIR)/$(TARGET).objdump: $(ELF) $(OBJDUMP) -D $< > $@ +endif .PHONY: all clean distclean default diff test load libultra diff --git a/sound/sequences/00_sound_player.s b/sound/sequences/00_sound_player.s index 68502f9c..a5f6b91c 100644 --- a/sound/sequences/00_sound_player.s +++ b/sound/sequences/00_sound_player.s @@ -95,13 +95,13 @@ chan_dyncall .poll_023589: chan_delay1 chan_ioreadval 0 -chan_bltz .skip_023589 # if we have a signal: - chan_beqz .force_stop_023589 # told to stop - chan_jump .start_playing_023589 # told to play something else +chan_bltz .skip_023589 /* if we have a signal: */ + chan_beqz .force_stop_023589 /* told to stop */ + chan_jump .start_playing_023589 /* told to play something else */ .skip_023589: chan_testlayerfinished 0 -chan_beqz .poll_023589 # if layer 0 hasn't finished, keep polling -chan_jump .main_loop_023589 # otherwise go back to the main loop +chan_beqz .poll_023589 /* if layer 0 hasn't finished, keep polling */ +chan_jump .main_loop_023589 /* otherwise go back to the main loop */ .force_stop_023589: chan_freelayer 0 chan_freelayer 1 @@ -202,15 +202,15 @@ chan_dyncall .poll_7: chan_delay1 chan_ioreadval 0 -chan_bltz .skip_7 # if we have a signal: - chan_beqz .force_stop_7 # told to stop +chan_bltz .skip_7 /* if we have a signal: */ + chan_beqz .force_stop_7 /* told to stop */ chan_unreservenotes - chan_jump .start_playing_7 # told to play something else + chan_jump .start_playing_7 /* told to play something else */ .skip_7: chan_testlayerfinished 0 -chan_beqz .poll_7 # if layer 0 hasn't finished, keep polling +chan_beqz .poll_7 /* if layer 0 hasn't finished, keep polling */ chan_unreservenotes -chan_jump .main_loop_7 # otherwise go back to the main loop +chan_jump .main_loop_7 /* otherwise go back to the main loop */ .force_stop_7: chan_freelayer 0 chan_freelayer 1 @@ -233,10 +233,10 @@ chan_end chan_setpanmix 127 chan_setvolscale 127 chan_setvibratoextent 0 -chan_ioreadval 1 # IO slots 0-3 are reset to -1 when read; restore the value +chan_ioreadval 1 /* IO slots 0-3 are reset to -1 when read; restore the value */ chan_iowriteval 0 -chan_break # break out of the loop -chan_break # force the caller to return immediately +chan_break /* break out of the loop */ +chan_break /* force the caller to return immediately */ chan_end # Set reverb in way that takes area echo level and volume into account. This diff --git a/src/audio/external.c b/src/audio/external.c index eb5c3d7e..1b3e1016 100644 --- a/src/audio/external.c +++ b/src/audio/external.c @@ -712,6 +712,7 @@ void func_eu_802e9bec(s32 player, s32 channel, s32 arg2) { * Called from threads: thread4_sound */ struct SPTask *create_next_audio_frame_task(void) { +#ifdef TARGET_N64 u32 samplesRemainingInAI; s32 writtenCmds; s32 index; @@ -813,6 +814,9 @@ struct SPTask *create_next_audio_frame_task(void) { decrease_sample_dma_ttls(); return gAudioTask; +#else + return NULL; +#endif } #endif diff --git a/src/audio/heap.c b/src/audio/heap.c index 357855f3..b830f154 100644 --- a/src/audio/heap.c +++ b/src/audio/heap.c @@ -1088,10 +1088,12 @@ s32 audio_shut_down_and_reset_step(void) { */ void wait_for_audio_frames(s32 frames) { gAudioFrameCount = 0; +#ifdef TARGET_N64 // Sound thread will update gAudioFrameCount while (gAudioFrameCount < frames) { // spin } +#endif } #endif diff --git a/src/audio/port_eu.c b/src/audio/port_eu.c index 636d0143..e9a2a6a3 100644 --- a/src/audio/port_eu.c +++ b/src/audio/port_eu.c @@ -35,6 +35,7 @@ s32 audio_shut_down_and_reset_step(void); void func_802ad7ec(u32); struct SPTask *create_next_audio_frame_task(void) { +#ifdef TARGET_N64 u32 samplesRemainingInAI; s32 writtenCmds; s32 index; @@ -129,6 +130,9 @@ struct SPTask *create_next_audio_frame_task(void) { task->yield_data_ptr = NULL; task->yield_data_size = 0; return gAudioTask; +#else + return NULL; +#endif } void eu_process_audio_cmd(struct EuAudioCmd *cmd) { diff --git a/src/game/memory.c b/src/game/memory.c index 5f3b329d..868224e6 100644 --- a/src/game/memory.c +++ b/src/game/memory.c @@ -1,4 +1,7 @@ #include +#ifndef TARGET_N64 +#include +#endif #include "sm64.h" @@ -244,6 +247,7 @@ u32 main_pool_pop_state(void) { * function blocks until completion. */ static void dma_read(u8 *dest, u8 *srcStart, u8 *srcEnd) { +#ifdef TARGET_N64 u32 size = ALIGN16(srcEnd - srcStart); osInvalDCache(dest, size); @@ -258,6 +262,9 @@ static void dma_read(u8 *dest, u8 *srcStart, u8 *srcEnd) { srcStart += copySize; size -= copySize; } +#else + memcpy(dest, srcStart, srcEnd - srcStart); +#endif } /** diff --git a/src/nds/nds_controller.c b/src/nds/nds_controller.c new file mode 100644 index 00000000..dc24753b --- /dev/null +++ b/src/nds/nds_controller.c @@ -0,0 +1,56 @@ +#include "nds_include.h" + +#include "lib/src/osContInternal.h" + +s32 osContInit(UNUSED OSMesgQueue *mq, u8 *controllerBits, UNUSED OSContStatus *status) { + *controllerBits = 1; + return 0; +} + +s32 osContStartReadData(UNUSED OSMesgQueue *mesg) { + return 0; +} + +void osContGetReadData(OSContPad *pad) { + pad->button = 0; + pad->stick_x = 0; + pad->stick_y = 0; + + scanKeys(); + const u32 keys = keysHeld(); + + if (keys & KEY_A) { + pad->button |= A_BUTTON; + } + if (keys & KEY_B) { + pad->button |= B_BUTTON; + } + if (keys & KEY_X) { + pad->button |= U_CBUTTONS; + } + if (keys & KEY_Y) { + pad->button |= D_CBUTTONS; + } + if (keys & KEY_START) { + pad->button |= START_BUTTON; + } + if (keys & KEY_L) { + pad->button |= R_TRIG; + } + if (keys & KEY_R) { + pad->button |= Z_TRIG; + } + + if (keys & KEY_UP) { + pad->stick_y = 80; + } + if (keys & KEY_DOWN) { + pad->stick_y = -80; + } + if (keys & KEY_LEFT) { + pad->stick_x = -80; + } + if (keys & KEY_RIGHT) { + pad->stick_x = 80; + } +} diff --git a/src/nds/nds_include.h b/src/nds/nds_include.h new file mode 100644 index 00000000..a8ec2998 --- /dev/null +++ b/src/nds/nds_include.h @@ -0,0 +1,26 @@ +#ifndef NDS_INCLUDE_H +#define NDS_INCLUDE_H + +// Workaround for libnds type redefinitions +#define u64 _u64 +#define s64 _s64 +#define u32 _u32 +#define vu32 _vu32 +#define vs32 _vs32 +#define s32 _s32 +#define u16 _u16 +#define s16 _s16 +#define u8 _u8 +#define s8 _s8 +#include +#undef u64 +#undef s64 +#undef u32 +#undef vu32 +#undef vs32 +#undef s32 +#undef u16 +#undef s16 +#undef u8 + +#endif // NDS_INCLUDE_H diff --git a/src/nds/nds_main.c b/src/nds/nds_main.c new file mode 100644 index 00000000..9e553283 --- /dev/null +++ b/src/nds/nds_main.c @@ -0,0 +1,48 @@ +#include + +#include "nds_include.h" +#include + +#include "audio/external.h" +#include "game/game_init.h" +#include "nds_renderer.h" + +OSMesg D_80339BEC; +OSMesgQueue gSIEventMesgQueue; + +s8 gResetTimer; +s8 D_8032C648; +s8 gDebugLevelSelect; +s8 gShowProfiler; +s8 gShowDebugText; + +void set_vblank_handler(UNUSED s32 index, UNUSED struct VblankHandler *handler, UNUSED OSMesgQueue *queue, UNUSED OSMesg *msg) { +} + +void dispatch_audio_sptask(UNUSED struct SPTask *spTask) { +} + +void send_display_list(struct SPTask *spTask) { + draw_frame((Gfx*)spTask->task.t.data_ptr); +} + +int main(void) { + static u64 pool[0x165000 / sizeof(u64)]; + main_pool_init(pool, pool + sizeof(pool) / sizeof(pool[0])); + gEffectsMemoryPool = mem_pool_init(0x4000, MEMORY_POOL_LEFT); + + renderer_init(); + +#ifdef LIBFAT + if (!fatInitDefault()) { + printf("Failed to initialize libfat!\n"); + } +#endif + + audio_init(); + sound_init(); + + thread5_game_loop(NULL); + + return 0; +} diff --git a/src/nds/nds_renderer.c b/src/nds/nds_renderer.c new file mode 100644 index 00000000..9c90f2ba --- /dev/null +++ b/src/nds/nds_renderer.c @@ -0,0 +1,991 @@ +#include +#include + +#include "nds_include.h" +#include + +#include "nds_renderer.h" + +struct Color { + uint8_t r, g, b, a; +}; + +struct Vertex { + int16_t x, y, z; + int16_t s, t; + struct Color color; +}; + +struct Texture { + uint8_t *original; + uint8_t *converted; + int name; + uint8_t type; + uint8_t size_x; + uint8_t size_y; +}; + +struct Light { + int16_t nx, ny, nz; + int8_t x, y, z; + struct Color color; +}; + +static struct Color env_color; +static struct Color fill_color; + +static struct Vertex vertex_buffer[16]; +static struct Texture texture_map[2048]; +static struct Light lights[5]; + +uint16_t texture_fifo[2048]; +uint16_t texture_fifo_start; +uint16_t texture_fifo_end; + +static uint8_t *texture_address; +static uint8_t texture_format; +static uint8_t texture_bit_width; +static uint16_t texture_row_size; +static uint16_t texture_size; +static uint16_t texture_scale_s; +static uint16_t texture_scale_t; + +static uint32_t geometry_mode; +static uint32_t rdphalf_1; +static uint32_t other_mode_l; +static uint32_t other_mode_h; +static Gwords texrect; + +static uint8_t *z_buffer; +static uint8_t *c_buffer; + +static bool texture_dirty; +static bool lights_dirty; +static int num_lights; + +static int polygon_id; +static int poly_fmt; +static int tex_params; + +static bool use_color; +static bool use_texture; +static bool use_env_color; +static bool use_env_alpha; + +static bool shrunk; +static bool background; +static int32_t z_depth; + +static int no_texture; +static int frame_count; + +static void load_texture() { + // Look up the current texture using a simple hash calculated from its address + uint32_t index = ((uint32_t)texture_address >> 5) & 0x7FF; + while (texture_map[index].original != texture_address && texture_map[index].original != NULL) { + index = (index + 1) & 0x7FF; + } + + struct Texture *cur = &texture_map[index]; + + // Load the texture if it was found + if (cur->original != NULL) { + if (cur->name) { + glBindTexture(GL_TEXTURE_2D, cur->name); + return; + } + + // Copy the texture back into VRAM if it was pushed out, pushing out other textures if necessary + glGenTextures(1, &cur->name); + glBindTexture(GL_TEXTURE_2D, cur->name); + while (!glTexImage2D(GL_TEXTURE_2D, 0, cur->type, cur->size_x, cur->size_y, 0, TEXGEN_TEXCOORD, cur->converted)) { + glDeleteTextures(1, &texture_map[texture_fifo[texture_fifo_end]].name); + texture_map[texture_fifo[texture_fifo_end]].name = 0; + texture_fifo_end = (texture_fifo_end + 1) & 0x7FF; + } + texture_fifo[texture_fifo_start] = index; + texture_fifo_start = (texture_fifo_start + 1) & 0x7FF; + return; + } + + cur->original = texture_address; + + // Determine the width of the new texture + const int width = texture_row_size << (4 - texture_bit_width); + switch (width) { + case 8: cur->size_x = TEXTURE_SIZE_8; break; + case 16: cur->size_x = TEXTURE_SIZE_16; break; + case 32: cur->size_x = TEXTURE_SIZE_32; break; + case 64: cur->size_x = TEXTURE_SIZE_64; break; + case 128: cur->size_x = TEXTURE_SIZE_128; break; + + default: + //printf("Unsupported texture width: %d\n", width); + glBindTexture(GL_TEXTURE_2D, cur->name = no_texture); + return; + }; + + // Determine the height of the new texture + const int height = ((texture_size << 1) >> texture_bit_width) / width; + switch (height) { + case 8: cur->size_y = TEXTURE_SIZE_8; break; + case 16: cur->size_y = TEXTURE_SIZE_16; break; + case 32: cur->size_y = TEXTURE_SIZE_32; break; + case 64: cur->size_y = TEXTURE_SIZE_64; break; + case 128: cur->size_y = TEXTURE_SIZE_128; break; + + default: + //printf("Unsupported texture height: %d\n", height); + glBindTexture(GL_TEXTURE_2D, cur->name = no_texture); + return; + }; + + // Convert the texture to a format the DS understands + switch (texture_format) { + case G_IM_FMT_RGBA: + switch (texture_bit_width) { + case G_IM_SIZ_16b: + cur->converted = (uint8_t*)malloc(texture_size); + for (uint32_t x = 0; x < texture_size / 2; x++) { + const uint16_t color = (texture_address[x * 2] << 8) | texture_address[x * 2 + 1]; + const uint8_t r = ((color >> 11) & 0x1F); + const uint8_t g = ((color >> 6) & 0x1F); + const uint8_t b = ((color >> 1) & 0x1F); + const uint8_t a = ((color >> 0) & 0x01); + ((uint16_t*)cur->converted)[x] = (a << 15) | (b << 10) | (g << 5) | r; + } + DC_FlushRange(cur->converted, texture_size); + cur->type = GL_RGBA; + break; + + default: + //printf("Unsupported RGBA texture bit width: %d\n", texture_bit_width); + glBindTexture(GL_TEXTURE_2D, cur->name = no_texture); + return; + } + break; + + case G_IM_FMT_IA: + switch (texture_bit_width) { + case G_IM_SIZ_4b: + cur->converted = (uint8_t*)malloc(texture_size * 2); + for (uint32_t x = 0; x < texture_size * 2; x++) { + const uint8_t color = (texture_address[x / 2] >> ((x & 1) ? 0 : 4)) & 0x0F; + const uint8_t i = ((color >> 1) & 0x07); + const uint8_t a = ((color >> 0) & 0x01) ? 31 : 0; + cur->converted[x] = (a << 3) | i; + } + DC_FlushRange(cur->converted, texture_size * 2); + cur->type = GL_RGB8_A5; + break; + + case G_IM_SIZ_8b: + cur->converted = (uint8_t*)malloc(texture_size); + for (uint32_t x = 0; x < texture_size; x++) { + const uint8_t color = texture_address[x]; + const uint8_t i = ((color >> 4) & 0x0F) * 7 / 15; + const uint8_t a = ((color >> 0) & 0x0F) * 31 / 15; + cur->converted[x] = (a << 3) | i; + } + DC_FlushRange(cur->converted, texture_size); + cur->type = GL_RGB8_A5; + break; + + case G_IM_SIZ_16b: + cur->converted = (uint8_t*)malloc(texture_size / 2); + for (uint32_t x = 0; x < texture_size / 2; x++) { + const uint8_t i = texture_address[x * 2 + 0] * 7 / 255; + const uint8_t a = texture_address[x * 2 + 1] * 31 / 255; + cur->converted[x] = (a << 3) | i; + } + DC_FlushRange(cur->converted, texture_size / 2); + cur->type = GL_RGB8_A5; + break; + + default: + //printf("Unsupported IA texture bit width: %d\n", texture_bit_width); + glBindTexture(GL_TEXTURE_2D, cur->name = no_texture); + return; + } + break; + + default: + //printf("Unsupported texture format: %d\n", texture_format); + glBindTexture(GL_TEXTURE_2D, cur->name = no_texture); + return; + } + + // Copy the texture into VRAM, pushing out other textures if necessary + glGenTextures(1, &cur->name); + glBindTexture(GL_TEXTURE_2D, cur->name); + while (!glTexImage2D(GL_TEXTURE_2D, 0, cur->type, cur->size_x, cur->size_y, 0, TEXGEN_TEXCOORD, cur->converted)) { + glDeleteTextures(1, &texture_map[texture_fifo[texture_fifo_end]].name); + texture_map[texture_fifo[texture_fifo_end]].name = 0; + texture_fifo_end = (texture_fifo_end + 1) & 0x7FF; + } + texture_fifo[texture_fifo_start] = index; + texture_fifo_start = (texture_fifo_start + 1) & 0x7FF; +} + +static void draw_vertices(const struct Vertex **v, int count) { + // Get the alpha value and return early if it's 0 (alpha 0 is wireframe on the DS) + // Since the DS only supports one alpha value per polygon, just use the one from first vertex + const int alpha = ((other_mode_l & (G_BL_A_MEM << 18)) ? 31 : (v[0]->color.a >> 3)); + if (alpha == 0) return; + + // Clear the vertex color if it shoudn't be used + if (!use_color) { + glColor3b(0xFF, 0xFF, 0xFF); + } + + // Clear the texture if it shouldn't be used, or load it if it's dirty + if (!use_texture) { + glBindTexture(GL_TEXTURE_2D, no_texture); + texture_dirty = true; + } else if (texture_dirty) { + load_texture(); + glTexParameter(GL_TEXTURE_2D, tex_params); + texture_dirty = false; + } + + // Apply the polygon attributes + glPolyFmt(poly_fmt | POLY_ALPHA(alpha) | POLY_ID(polygon_id)); + glBegin(GL_TRIANGLE); + + if (geometry_mode & G_ZBUFFER) { + // Incoming vertices expect W to be 1, not 1 << 12 like the DS sets + // This is a hack to scale W values; it's reverted during matrix multiplication to prevent breakage + if (!shrunk) { + const m4x4 shrink = {{ + 1 << 12, 0, 0, 0, + 0, 1 << 12, 0, 0, + 0, 0, 1 << 12, 0, + 0, 0, 0, 1 << 0 + }}; + glMatrixMode(GL_MODELVIEW); + glMultMatrix4x4(&shrink); + shrunk = true; + } + + // Send the vertices to the 3D engine + for (int i = 0; i < count; i++) { + if (use_color) glColor3b(v[i]->color.r, v[i]->color.g, v[i]->color.b); + if (use_texture) glTexCoord2t16(v[i]->s, v[i]->t); + glVertex3v16(v[i]->x, v[i]->y, v[i]->z); + } + + // As part of the depth hack, move the hijacked Z value to the front once normal polygons start being sent + // This relies on the assumption that background 2D elements are sent first, and foreground last + if (background) { + z_depth = (128 - 0x1000) * 6; // Room for 128 foreground quads + background = false; + } + } else { + // Since depth test is disabled, 2D elements are likely being drawn and these expect proper multiplication by 1 + // So instead of scaling the W value down, scale the other components up to have proper 12-bit fractionals + const m4x4 enlarge = {{ + 1 << 24, 0, 0, 0, + 0, 1 << 24, 0, 0, + 0, 0, 1 << 24, 0, + 0, 0, 0, 1 << (shrunk ? 24 : 12) + }}; + glMatrixMode(GL_MODELVIEW); + glPushMatrix(); + glMultMatrix4x4(&enlarge); + + for (int i = 0; i < count; i++) { + // Send the vertex attributes to the 3D engine + if (use_color) glColor3b(v[i]->color.r, v[i]->color.g, v[i]->color.b); + if (use_texture) glTexCoord2t16(v[i]->s, v[i]->t); + + // Use position test to project the vertex so the result can be hijacked before sending it for real + PosTest(v[i]->x, v[i]->y, v[i]->z); + + // Push the current matrices to the stack, and load an identity matrix so the outgoing vertex won't be affected + glPushMatrix(); + glLoadIdentity(); + glMatrixMode(GL_PROJECTION); + glPushMatrix(); + + // Depth test can't be disabled on the DS; this is a problem, since 2D elements are usually drawn this way + // This hack sets decreasing Z values so that these polygons will be properly rendered on top of each other + // Since the W value can't be set directly, use a scaling matrix with a vertex of 1s to send the coordinates + const m4x4 vertex = {{ + PosTestXresult(), 0, 0, 0, + 0, PosTestYresult(), 0, 0, + 0, 0, (--z_depth) / 6, 0, + 0, 0, 0, PosTestWresult() + }}; + glLoadMatrix4x4(&vertex); + glVertex3v16(1 << 12, 1 << 12, 1 << 12); + + // Restore the original matrices + glPopMatrix(1); + glMatrixMode(GL_MODELVIEW); + glPopMatrix(1); + } + + glPopMatrix(1); + } +} + +static uint32_t sqrt_fixed(uint32_t x) { + // Calculate the square root of a 16-bit fractional fixed point number + uint32_t r = x; + uint32_t b = 0x40000000; + uint32_t q = 0; + while (b > 0x40) { + uint32_t t = q + b; + if (r >= t) { + r -= t; + q = t + b; + } + r <<= 1; + b >>= 1; + } + return q >> 8; +} + +static void g_vtx(Gwords *words) { + const uint8_t count = ((words->w0 >> 12) & 0xFF); + const uint8_t index = ((words->w0 >> 0) & 0xFF) >> 1; + const Vtx *vertices = (const Vtx*)words->w1; + + // Store vertices in the vertex buffer + for (uint8_t i = index - count; i < index; i++) { + const Vtx_t *v = &vertices[i].v; + const Vtx_tn *n = &vertices[i].n; + + // Set the vertex coordinates + vertex_buffer[i].x = v->ob[0]; + vertex_buffer[i].y = v->ob[1]; + vertex_buffer[i].z = v->ob[2]; + + // Scale the texture coordinates, and shift out an additional bit to get 4-bit fractionals for the DS + vertex_buffer[i].s = (v->tc[0] * texture_scale_s) >> 17; + vertex_buffer[i].t = (v->tc[1] * texture_scale_t) >> 17; + + // Calulate vertex colors for lighting in software + // The DS can *almost* do this in hardware, but the vectors need to be normalized after being transformed + if (geometry_mode & G_LIGHTING) { + // Use the last light as ambient light (or emission, in DS terms) + uint32_t r = lights[num_lights].color.r; + uint32_t g = lights[num_lights].color.g; + uint32_t b = lights[num_lights].color.b; + + // Recalculate transformed light vectors if the lights or modelview matrix changed + if (lights_dirty) { + // Read the current modelview matrix from hardware + int m[12]; + glGetFixed(GL_GET_MATRIX_VECTOR, m); + + for (int i = 0; i < num_lights; i++) { + // Multiply the light vector with the modelview matrix + lights[i].nx = (lights[i].x * m[0] + lights[i].y * m[1] + lights[i].z * m[2]) >> 7; + lights[i].ny = (lights[i].x * m[3] + lights[i].y * m[4] + lights[i].z * m[5]) >> 7; + lights[i].nz = (lights[i].x * m[6] + lights[i].y * m[7] + lights[i].z * m[8]) >> 7; + + // Normalize the result + int s = (lights[i].nx * lights[i].nx + lights[i].ny * lights[i].ny + lights[i].nz * lights[i].nz) >> 8; + if (s > 0) { + s = sqrt_fixed(s); + lights[i].nx = (lights[i].nx << 16) / s; + lights[i].ny = (lights[i].ny << 16) / s; + lights[i].nz = (lights[i].nz << 16) / s; + } + } + + lights_dirty = false; + } + + // Multiply the light vertices with the vertex's normal to calculate light intensity + for (int i = 2; i < num_lights; i++) { + int intensity = (lights[i].nx * n->n[0] + lights[i].ny * n->n[1] + lights[i].nz * n->n[2]) >> 7; + if (intensity > 0) { + r += (intensity * lights[i].color.r) >> 12; + g += (intensity * lights[i].color.g) >> 12; + b += (intensity * lights[i].color.b) >> 12; + } + } + + // Set the calulated vertex color + vertex_buffer[i].color.r = (r > 0xFF) ? 0xFF : r; + vertex_buffer[i].color.g = (g > 0xFF) ? 0xFF : g; + vertex_buffer[i].color.b = (b > 0xFF) ? 0xFF : b; + + // Generate spherical texture coordinates by multiplying the vertex's normal with the lookat vectors + if (geometry_mode & G_TEXTURE_GEN) { + const int dot_y = (lights[0].nx * n->n[0] + lights[0].ny * n->n[1] + lights[0].nz * n->n[2]) >> 7; + const int dot_x = (lights[1].nx * n->n[0] + lights[1].ny * n->n[1] + lights[1].nz * n->n[2]) >> 7; + vertex_buffer[i].s = ((dot_x + (1 << 12)) * texture_scale_s) >> 15; + vertex_buffer[i].t = ((dot_y + (1 << 12)) * texture_scale_t) >> 15; + } + } else if (use_env_color) { + // Use the environment color as the vertex color if enabled + vertex_buffer[i].color.r = env_color.r; + vertex_buffer[i].color.g = env_color.g; + vertex_buffer[i].color.b = env_color.b; + } else { + // Set the vertex color normally + vertex_buffer[i].color.r = v->cn[0]; + vertex_buffer[i].color.g = v->cn[1]; + vertex_buffer[i].color.b = v->cn[2]; + } + + // Set the vertex alpha, using the environment alpha if enabled + vertex_buffer[i].color.a = (use_env_alpha ? env_color.a : v->cn[3]); + + // Round texture coodinates (by adding 0.5) if linear filtering is enabled + // The DS can't actually do linear filtering, but this still keeps textures from being slightly misplaced + if ((other_mode_h & (3 << G_MDSFT_TEXTFILT)) != G_TF_POINT) { + vertex_buffer[i].s += 1 << 4; + vertex_buffer[i].t += 1 << 4; + } + } +} + +static void g_tri1(Gwords *words) { + // Draw a triangle + const struct Vertex *v[] = { + &vertex_buffer[((words->w0 >> 16) & 0xFF) >> 1], + &vertex_buffer[((words->w0 >> 8) & 0xFF) >> 1], + &vertex_buffer[((words->w0 >> 0) & 0xFF) >> 1] + }; + draw_vertices(v, 3); +} + +static void g_tri2(Gwords *words) { + // Draw two triangles at once + const struct Vertex *v[] = { + &vertex_buffer[((words->w0 >> 16) & 0xFF) >> 1], + &vertex_buffer[((words->w0 >> 8) & 0xFF) >> 1], + &vertex_buffer[((words->w0 >> 0) & 0xFF) >> 1], + &vertex_buffer[((words->w1 >> 16) & 0xFF) >> 1], + &vertex_buffer[((words->w1 >> 8) & 0xFF) >> 1], + &vertex_buffer[((words->w1 >> 0) & 0xFF) >> 1] + }; + draw_vertices(v, 6); +} + +static void g_texture(Gwords *words) { + // Set the texture scaling factors + texture_scale_s = (words->w1 >> 16) & 0xFFFF; + texture_scale_t = (words->w1 >> 0) & 0xFFFF; +} + +static void g_popmtx(Gwords *words) { + // Pop matrices from the modelview stack + glMatrixMode(GL_MODELVIEW); + glPopMatrix(words->w1 / 64); +} + +static void g_geometrymode(Gwords *words) { + // Clear and set the geometry mode bits + geometry_mode = (geometry_mode & words->w0) | words->w1; + + // Update the polygon culling settings + poly_fmt |= POLY_CULL_NONE; + if (geometry_mode & (1 << 9)) { + poly_fmt &= ~POLY_CULL_BACK; + } + if (geometry_mode & (1 << 10)) { + poly_fmt &= ~POLY_CULL_FRONT; + } +} + +static void g_mtx(Gwords *words) { + // Load a matrix, shifting the elements so they have 12-bit fractionals for the DS + m4x4 matrix; + for (int i = 0; i < 16; i += 2) { + const uint32_t *data = &((uint32_t*)words->w1)[i / 2]; + matrix.m[i + 0] = ((int32_t)((data[0] & 0xFFFF0000) | (data[8] >> 16)) + 8) >> 4; + matrix.m[i + 1] = ((int32_t)((data[0] << 16) | (data[8] & 0x0000FFFF)) + 8) >> 4; + } + + // Perform a matrix operation + const uint8_t params = words->w0 ^ G_MTX_PUSH; + if (params & G_MTX_PROJECTION) { + glMatrixMode(GL_PROJECTION); + + // Load or multiply the projection matrix + if (params & G_MTX_LOAD) { + glLoadMatrix4x4(&matrix); + } else { + glMultMatrix4x4(&matrix); + } + } else { + glMatrixMode(GL_MODELVIEW); + + // Push the current modelview matrix to the stack if requested + if (params & G_MTX_PUSH) { + glPushMatrix(); + } + + // Load or multiply the modelview matrix + if (params & G_MTX_LOAD) { + glLoadMatrix4x4(&matrix); + } else { + // Revert the W value scaling hack so matrix multiplication works properly + if (shrunk) { + const m4x4 enlarge = {{ + 1 << 12, 0, 0, 0, + 0, 1 << 12, 0, 0, + 0, 0, 1 << 12, 0, + 0, 0, 0, 1 << 24 + }}; + glMultMatrix4x4(&enlarge); + } + + glMultMatrix4x4(&matrix); + } + + shrunk = false; + lights_dirty = true; + } +} + +static void g_moveword(Gwords *words) { + // Set values that are normally at specific locations in DMEM + const uint8_t index = (words->w0 >> 16) & 0xFF; + switch (index) { + case G_MW_NUMLIGHT: + // Set the current number of lights, including the lookat vectors + num_lights = (words->w1 / 24) + 2; + break; + + // Unimplemented writes + case G_MW_CLIP: break; + case G_MW_FOG: break; + case G_MW_PERSPNORM: break; + + default: + //printf("Unsupported G_MOVEWORD index: 0x%.2X\n", index); + break; + } +} + +static void g_movemem(Gwords *words) { + // Set a block of values that are normally at specific locations in DMEM + const uint8_t index = (words->w0 >> 0) & 0xFF; + switch (index) { + case G_MV_VIEWPORT: { + // Calulate and set the specified viewport + const Vp_t *vp = (Vp_t*)words->w1; + const uint8_t x2 = ((vp->vscale[0] >> 1) * 255 / 320); + const uint8_t x1 = ((vp->vtrans[0] >> 1) * 255 / 320 - x2) >> 1; + const uint8_t y2 = ((vp->vscale[1] >> 1) * 191 / 240); + const uint8_t y1 = ((vp->vtrans[1] >> 1) * 191 / 240 - y2) >> 1; + glViewport(x1, y1, x2, y2); + break; + } + + case G_MV_LIGHT: { + // Set light parameters + const int index = ((words->w0 >> 8) & 0xFF) / 3; + const Light_t *light = (Light_t*)words->w1; + if (index >= 2) { // Not lookat vectors + lights[index].color.r = light->col[0]; + lights[index].color.g = light->col[1]; + lights[index].color.b = light->col[2]; + } + if (index < num_lights) { // Not ambient light + lights[index].x = light->dir[0]; + lights[index].y = light->dir[1]; + lights[index].z = light->dir[2]; + lights_dirty = true; + } + break; + } + + default: + //printf("Unsupported G_MOVEMEM index: 0x%.2X\n", index); + break; + } +} + +static void g_rdphalf_1(Gwords *words) { + // Set the higher half of the RDP word (holds upper-left texture coordinates for G_TEXRECT) + rdphalf_1 = words->w1; +} + +static void g_setothermode_l(Gwords *words) { + // Set the specified bits in the lower half of the other mode word + const uint8_t bits = ((words->w0 >> 0) & 0xFF) + 1; + const uint8_t shift = 32 - ((words->w0 >> 8) & 0xFF) - bits; + const uint32_t mask = ((1 << bits) - 1) << shift; + other_mode_l = (other_mode_l & ~mask) | (words->w1 & mask); +} + +static void g_setothermode_h(Gwords *words) { + // Set the specified bits in the higher half of the other mode word + const uint8_t bits = ((words->w0 >> 0) & 0xFF) + 1; + const uint8_t shift = 32 - ((words->w0 >> 8) & 0xFF) - bits; + const uint32_t mask = ((1 << bits) - 1) << shift; + other_mode_h = (other_mode_h & ~mask) | (words->w1 & mask); +} + +static void g_texrect(Gwords *words) { + // Store the G_TEXRECT parameters so they can be used after the texture coordinates are set + texrect = *words; +} + +static void g_rdphalf_2(Gwords *words) { + // G_TEXRECT is actually performed here; the texture coordinates must be set in the RDP word before it can begin + + // Get the alpha value and return early if it's 0 (alpha 0 is wireframe on the DS) + const int alpha = (use_env_alpha ? (env_color.a >> 3) : 31); + if (alpha == 0) return; + + // Push the current matrices to the stack, and load identity matrices so the outgoing vertices won't be affected + glMatrixMode(GL_MODELVIEW); + glPushMatrix(); + glLoadIdentity(); + glMatrixMode(GL_PROJECTION); + glPushMatrix(); + glLoadIdentity(); + + // Load the texture if it's dirty + if (texture_dirty) { + load_texture(); + glTexParameter(GL_TEXTURE_2D, tex_params); + texture_dirty = false; + } + + // Apply the polygon attributes, using the environment alpha if enabled + glPolyFmt(POLY_CULL_NONE | POLY_ALPHA(alpha)); + glBegin(GL_TRIANGLE); + + // Check if copy mode is enabled; certian rules change if this is the case + // The rectangle dimensions are a pixel bigger, and the S-coordinate change has 2 extra fractional bits(?) + const bool copy = ((other_mode_h & (3 << G_MDSFT_CYCLETYPE)) == G_CYC_COPY); + + // Use the environment color if enabled, or clear the vertex color + if (use_env_color && !copy) { + glColor3b(env_color.r, env_color.g, env_color.b); + } else { + glColor3b(0xFF, 0xFF, 0xFF); + } + + // Get the rectangle dimensions + int16_t x1 = ((texrect.w1 >> 12) & 0xFFF); + int16_t y1 = ((texrect.w1 >> 0) & 0xFFF); + int16_t x2 = ((texrect.w0 >> 12) & 0xFFF) + (copy ? (1 << 2) : 0); + int16_t y2 = ((texrect.w0 >> 0) & 0xFFF) + (copy ? (1 << 2) : 0); + + // Calculate the texture coordinates + const int16_t s1 = (((rdphalf_1 >> 16) & 0xFFFF) >> 1); + const int16_t t1 = (((rdphalf_1 >> 0) & 0xFFFF) >> 1); + const int16_t s2 = s1 + ((((words->w1 >> 16) & 0xFFFF) * (x2 - x1)) >> (copy ? 10 : 8)); + const int16_t t2 = t1 + ((((words->w1 >> 0) & 0xFFFF) * (y2 - y1)) >> 8); + + // Scale the dimensions to be between -1 and 1 with 12 fractional bits + x1 = (x1 * (2 << 12) / (320 << 2) - (1 << 12)); + y1 = -(y1 * (2 << 12) / (240 << 2) - (1 << 12)); + x2 = (x2 * (2 << 12) / (320 << 2) - (1 << 12)); + y2 = -(y2 * (2 << 12) / (240 << 2) - (1 << 12)); + + // Draw one half of the rectangle, using depth hijacking + glTexCoord2t16(s1, t1); + glVertex3v16(x1, y1, (--z_depth) / 6); + glTexCoord2t16(s1, t2); + glVertex3v16(x1, y2, (--z_depth) / 6); + glTexCoord2t16(s2, t1); + glVertex3v16(x2, y1, (--z_depth) / 6); + + // Draw the other half of the rectangle, using depth hijacking + glTexCoord2t16(s2, t1); + glVertex3v16(x2, y1, (--z_depth) / 6); + glTexCoord2t16(s1, t2); + glVertex3v16(x1, y2, (--z_depth) / 6); + glTexCoord2t16(s2, t2); + glVertex3v16(x2, y2, (--z_depth) / 6); + + // Restore the original matrices + glPopMatrix(1); + glMatrixMode(GL_MODELVIEW); + glPopMatrix(1); +} + +static void g_loadblock(Gwords *words) { + const int tile = (words->w1 >> 24) & 0x07; + if (tile != G_TX_LOADTILE) return; + + // Set the size of the current texture in memory, in bytes + texture_size = (((words->w1 >> 12) & 0xFFF) + 1); + switch (texture_bit_width) { + case G_IM_SIZ_4b: texture_size >>= 1; break; + case G_IM_SIZ_16b: texture_size <<= 1; break; + } +} + +static void g_settile(Gwords *words) { + const int tile = (words->w1 >> 24) & 0x07; + if (tile != G_TX_RENDERTILE) return; + + // Set the texture properties + texture_format = (words->w0 >> 21) & 0x007; + texture_bit_width = (words->w0 >> 19) & 0x003; + texture_row_size = (words->w0 >> 9) & 0x1FF; + const uint8_t cms = (words->w1 >> 8) & 0x003; + const uint8_t cmt = (words->w1 >> 18) & 0x003; + + // Update the texture parameters + tex_params = 0; + if (!(cms & G_TX_CLAMP)) { + tex_params |= GL_TEXTURE_WRAP_S; + if (cms & G_TX_MIRROR) { + tex_params |= GL_TEXTURE_FLIP_S; + } + } + if (!(cmt & G_TX_CLAMP)) { + tex_params |= GL_TEXTURE_WRAP_T; + if (cmt & G_TX_MIRROR) { + tex_params |= GL_TEXTURE_FLIP_T; + } + } +} + +static void g_fillrect(Gwords *words) { + // If the color buffer is set to the depth buffer, the game is probably trying to clear it; this can be ignored + if (c_buffer == z_buffer) return; + + // Get the alpha value and return early if it's 0 (alpha 0 is wireframe on the DS) + const int alpha = fill_color.a >> 3; + if (alpha == 0) return; + + // Push the current matrices to the stack, and load identity matrices so the outgoing vertices won't be affected + glMatrixMode(GL_MODELVIEW); + glPushMatrix(); + glLoadIdentity(); + glMatrixMode(GL_PROJECTION); + glPushMatrix(); + glLoadIdentity(); + + // Clear the texture + glBindTexture(GL_TEXTURE_2D, no_texture); + texture_dirty = true; + + // Apply the polygon attributes and the fill color + glPolyFmt(POLY_CULL_NONE | POLY_ALPHA(alpha)); + glBegin(GL_TRIANGLE); + glColor3b(fill_color.r, fill_color.g, fill_color.b); + + // Get the rectangle dimensions, scaled to be between -1 and 1 with 12 fractional bits + const int16_t x1 = ((((words->w1 >> 12) & 0xFFF) + (0 << 2)) * (2 << 12) / (320 << 2) - (1 << 12)); + const int16_t y1 = -((((words->w1 >> 0) & 0xFFF) + (0 << 2)) * (2 << 12) / (240 << 2) - (1 << 12)); + const int16_t x2 = ((((words->w0 >> 12) & 0xFFF) + (1 << 2)) * (2 << 12) / (320 << 2) - (1 << 12)); + const int16_t y2 = -((((words->w0 >> 0) & 0xFFF) + (1 << 2)) * (2 << 12) / (240 << 2) - (1 << 12)); + + // Draw one half of the rectangle, using depth hijacking + glVertex3v16(x1, y1, (--z_depth) / 6); + glVertex3v16(x1, y2, (--z_depth) / 6); + glVertex3v16(x2, y1, (--z_depth) / 6); + + // Draw the other half of the rectangle, using depth hijacking + glVertex3v16(x2, y1, (--z_depth) / 6); + glVertex3v16(x1, y2, (--z_depth) / 6); + glVertex3v16(x2, y2, (--z_depth) / 6); + + // Restore the original matrices + glMatrixMode(GL_PROJECTION); + glPopMatrix(1); + glMatrixMode(GL_MODELVIEW); + glPopMatrix(1); +} + +static void g_setfillcolor(Gwords *words) { + // Set the fill color + fill_color.r = (words->w1 >> 24) & 0xFF; + fill_color.g = (words->w1 >> 16) & 0xFF; + fill_color.b = (words->w1 >> 8) & 0xFF; + fill_color.a = (words->w1 >> 0) & 0xFF; +} + +static void g_setenvcolor(Gwords *words) { + // Set the environment color + env_color.r = (words->w1 >> 24) & 0xFF; + env_color.g = (words->w1 >> 16) & 0xFF; + env_color.b = (words->w1 >> 8) & 0xFF; + env_color.a = (words->w1 >> 0) & 0xFF; +} + +static void g_setcombine(Gwords *words) { + const uint8_t a_color = (words->w0 >> 20) & 0x0F; + const uint8_t b_color = (words->w1 >> 28) & 0x0F; + const uint8_t c_color = (words->w0 >> 15) & 0x1F; + const uint8_t d_color = (words->w1 >> 15) & 0x07; + //const uint8_t a_alpha = (words->w0 >> 12) & 0x07; + //const uint8_t b_alpha = (words->w1 >> 12) & 0x07; + const uint8_t c_alpha = (words->w0 >> 9) & 0x07; + const uint8_t d_alpha = (words->w1 >> 9) & 0x07; + + // The N64 color combiner works by using the formula (A - B) * C + D, with color and alpha handled separately + // The DS is much more limited when it comes to blending; this is just an approximation that seems to work well for SM64 + + use_env_color = (c_color == G_CCMUX_ENVIRONMENT || d_color == G_CCMUX_ENVIRONMENT); + use_env_alpha = (c_alpha == G_CCMUX_ENVIRONMENT || d_alpha == G_CCMUX_ENVIRONMENT); + use_color = use_env_color || (a_color == G_CCMUX_SHADE || b_color == G_CCMUX_SHADE || c_color == G_CCMUX_SHADE || d_color == G_CCMUX_SHADE); + use_texture = (a_color == G_CCMUX_TEXEL0 || b_color == G_CCMUX_TEXEL0 || c_color == G_CCMUX_TEXEL0 || d_color == G_CCMUX_TEXEL0); + + if (b_color == d_color) { + poly_fmt |= POLY_DECAL; + + // Hack to hide goddard's texture since it can't be properly blended + if (a_color == G_CCMUX_PRIMITIVE) { + use_texture = false; + } + } else { + poly_fmt &= ~POLY_DECAL; + } + + // The DS doesn't draw transparent pixels over other transparent pixels with the same polygon ID + // This prevents overlapping artifacts on polygons from the same object, but also breaks blending of separate objects + // As a guess of when objects start and end, change the polygon ID every time the color combine settings change + polygon_id = (polygon_id + 1) & 0x3F; +} + +static void g_settimg(Gwords *words) { + // Set the address of the current texture in memory + texture_address = (uint8_t*)words->w1; + texture_format = (words->w0 >> 21) & 0x07; + texture_bit_width = (words->w0 >> 19) & 0x03; + texture_dirty = true; +} + +static void g_setzimg(Gwords *words) { + // Set the address of the depth buffer + // This doesn't matter much on the DS, but it's used to detect attempts to draw to the depth buffer + z_buffer = (uint8_t*)words->w1; +} + +static void g_setcimg(Gwords *words) { + // Set the address of the color buffer + // This doesn't matter much on the DS, but it's used to detect attempts to draw to the depth buffer + c_buffer = (uint8_t*)words->w1; +} + +static void execute(Gfx* cmd) { + // Interpret a list of Fast3DEX2 commands using the DS hardware + while (true) { + const uint8_t opcode = cmd->words.w0 >> 24; + + switch (opcode) { + case G_VTX: g_vtx(&cmd->words); break; + case G_TRI1: g_tri1(&cmd->words); break; + case G_TRI2: g_tri2(&cmd->words); break; + case G_TEXTURE: g_texture(&cmd->words); break; + case G_POPMTX: g_popmtx(&cmd->words); break; + case G_GEOMETRYMODE: g_geometrymode(&cmd->words); break; + case G_MTX: g_mtx(&cmd->words); break; + case G_MOVEWORD: g_moveword(&cmd->words); break; + case G_MOVEMEM: g_movemem(&cmd->words); break; + case G_RDPHALF_1: g_rdphalf_1(&cmd->words); break; + case G_SETOTHERMODE_L: g_setothermode_l(&cmd->words); break; + case G_SETOTHERMODE_H: g_setothermode_h(&cmd->words); break; + case G_TEXRECT: g_texrect(&cmd->words); break; + case G_RDPHALF_2: g_rdphalf_2(&cmd->words); break; + case G_LOADBLOCK: g_loadblock(&cmd->words); break; + case G_SETTILE: g_settile(&cmd->words); break; + case G_FILLRECT: g_fillrect(&cmd->words); break; + case G_SETFILLCOLOR: g_setfillcolor(&cmd->words); break; + case G_SETENVCOLOR: g_setenvcolor(&cmd->words); break; + case G_SETCOMBINE: g_setcombine(&cmd->words); break; + case G_SETTIMG: g_settimg(&cmd->words); break; + case G_SETZIMG: g_setzimg(&cmd->words); break; + case G_SETCIMG: g_setcimg(&cmd->words); break; + + // Opcodes that don't need to do anything + case G_RDPLOADSYNC: break; + case G_RDPPIPESYNC: break; + case G_RDPTILESYNC: break; + case G_RDPFULLSYNC: break; + + // Unimplemented opcodes + case G_SETSCISSOR: break; + case G_SETTILESIZE: break; + case G_SETFOGCOLOR: break; + case G_SETBLENDCOLOR: break; + case G_SETPRIMCOLOR: break; + + case G_DL: + // Branch to another display list + if (cmd->words.w0 & (1 << 16)) { // Without return + cmd = (Gfx*)cmd->words.w1; + continue; + } else { // With return + execute((Gfx*)cmd->words.w1); + break; + } + + case G_ENDDL: + // Return from the current display list + return; + + default: + //printf("Unsupported GBI command: 0x%.2X\n", opcode); + break; + } + + cmd++; + } +} + +static void count_frames() { + // Count a frame (triggered at V-blank) + frame_count++; +} + +void renderer_init() { + // Set up the screens + videoSetMode(MODE_0_3D); + consoleDemoInit(); + + // Initialize the 3D renderer + glInit(); + glClearColor(0, 0, 0, 31); + glClearDepth(GL_MAX_DEPTH); + glEnable(GL_ANTIALIAS); + glEnable(GL_TEXTURE_2D); + glEnable(GL_BLEND); + + // Set up texture VRAM (bank C is used by the console) + vramSetBankA(VRAM_A_TEXTURE); + vramSetBankB(VRAM_B_TEXTURE); + vramSetBankD(VRAM_D_TEXTURE); + vramSetBankE(VRAM_E_TEX_PALETTE); + + // Generate an empty texture for when no texture should be used + glGenTextures(1, &no_texture); + glBindTexture(GL_TEXTURE_2D, no_texture); + glTexImage2D(GL_TEXTURE_2D, 0, GL_NOTEXTURE, 0, 0, 0, TEXGEN_TEXCOORD, NULL); + + // Set up an intensity palette for IA textures + uint16_t palette[8]; + for (int x = 0; x < 8; x++) { + const int i = x * 31 / 7; + palette[x] = (i << 10) | (i << 5) | i; + } + glColorTableEXT(GL_TEXTURE_2D, 0, 8, 0, 0, palette); + + // Set up the frame counter to trigger on V-blank + irqSet(IRQ_VBLANK, count_frames); + irqEnable(IRQ_VBLANK); +} + +void draw_frame(Gfx *display_list) { + // Reset the depth hack parameters + background = true; + z_depth = 0x1000 * 6; + + // Process and draw a frame + execute(display_list); + glFlush(0); + + // Limit to 30FPS by waiting for up to 2 frames, depending on how long it took the current frame to render + for (int i = frame_count; i < 2; i++) { + swiWaitForVBlank(); + } + + // Reset the frame counter + frame_count = 0; +} diff --git a/src/nds/nds_renderer.h b/src/nds/nds_renderer.h new file mode 100644 index 00000000..5beea002 --- /dev/null +++ b/src/nds/nds_renderer.h @@ -0,0 +1,7 @@ +#ifndef NDS_RENDERER_H +#define NDS_RENDERER_H + +extern void renderer_init(); +extern void draw_frame(Gfx *display_list); + +#endif // NDS_RENDERER_H diff --git a/src/nds/ultra_reimplementation.c b/src/nds/ultra_reimplementation.c new file mode 100644 index 00000000..2b28ea84 --- /dev/null +++ b/src/nds/ultra_reimplementation.c @@ -0,0 +1,187 @@ +#include +#include +#include "lib/src/libultra_internal.h" +#include "macros.h" + +#ifdef TARGET_WEB +#include +#endif + +extern OSMgrArgs piMgrArgs; + +u64 osClockRate = 62500000; + +s32 osPiStartDma(UNUSED OSIoMesg *mb, UNUSED s32 priority, UNUSED s32 direction, + uintptr_t devAddr, void *vAddr, size_t nbytes, + UNUSED OSMesgQueue *mq) { + memcpy(vAddr, (const void *) devAddr, nbytes); + return 0; +} + +void osCreateMesgQueue(OSMesgQueue *mq, OSMesg *msgBuf, s32 count) { + mq->validCount = 0; + mq->first = 0; + mq->msgCount = count; + mq->msg = msgBuf; + return; +} + +void osSetEventMesg(UNUSED OSEvent e, UNUSED OSMesgQueue *mq, UNUSED OSMesg msg) { +} +s32 osJamMesg(UNUSED OSMesgQueue *mq, UNUSED OSMesg msg, UNUSED s32 flag) { + return 0; +} +s32 osSendMesg(UNUSED OSMesgQueue *mq, UNUSED OSMesg msg, UNUSED s32 flag) { +#ifdef VERSION_EU + s32 index; + if (mq->validCount >= mq->msgCount) { + return -1; + } + index = (mq->first + mq->validCount) % mq->msgCount; + mq->msg[index] = msg; + mq->validCount++; +#endif + return 0; +} +s32 osRecvMesg(UNUSED OSMesgQueue *mq, UNUSED OSMesg *msg, UNUSED s32 flag) { +#if VERSION_EU + if (mq->validCount == 0) { + return -1; + } + if (msg != NULL) { + *msg = *(mq->first + mq->msg); + } + mq->first = (mq->first + 1) % mq->msgCount; + mq->validCount--; +#endif + return 0; +} + +uintptr_t osVirtualToPhysical(void *addr) { + return (uintptr_t) addr; +} + +void osCreateViManager(UNUSED OSPri pri) { +} +void osViSetMode(UNUSED OSViMode *mode) { +} +void osViSetEvent(UNUSED OSMesgQueue *mq, UNUSED OSMesg msg, UNUSED u32 retraceCount) { +} +void osViBlack(UNUSED u8 active) { +} +void osViSetSpecialFeatures(UNUSED u32 func) { +} +void osViSwapBuffer(UNUSED void *vaddr) { +} + +OSTime osGetTime(void) { + return 0; +} + +void osWritebackDCacheAll(void) { +} + +void osWritebackDCache(UNUSED void *a, UNUSED size_t b) { +} + +void osInvalDCache(UNUSED void *a, UNUSED size_t b) { +} + +u32 osGetCount(void) { + static u32 counter; + return counter++; +} + +s32 osAiSetFrequency(u32 freq) { + u32 a1; + s32 a2; + u32 D_8033491C; + +#ifdef VERSION_EU + D_8033491C = 0x02E6025C; +#else + D_8033491C = 0x02E6D354; +#endif + + a1 = D_8033491C / (float) freq + .5f; + + if (a1 < 0x84) { + return -1; + } + + a2 = (a1 / 66) & 0xff; + if (a2 > 16) { + a2 = 16; + } + + return D_8033491C / (s32) a1; +} + +s32 osEepromProbe(UNUSED OSMesgQueue *mq) { + return 1; +} + +s32 osEepromLongRead(UNUSED OSMesgQueue *mq, u8 address, u8 *buffer, int nbytes) { + u8 content[512]; + s32 ret = -1; + +#ifdef TARGET_WEB + if (EM_ASM_INT({ + var s = localStorage.sm64_save_file; + if (s && s.length === 684) { + try { + var binary = atob(s); + if (binary.length === 512) { + for (var i = 0; i < 512; i++) { + HEAPU8[$0 + i] = binary.charCodeAt(i); + } + return 1; + } + } catch (e) { + } + } + return 0; + }, content)) { + memcpy(buffer, content + address * 8, nbytes); + ret = 0; + } +#else + FILE *fp = fopen("sm64_save_file.bin", "rb"); + if (fp == NULL) { + return -1; + } + if (fread(content, 1, 512, fp) == 512) { + memcpy(buffer, content + address * 8, nbytes); + ret = 0; + } + fclose(fp); +#endif + return ret; +} + +s32 osEepromLongWrite(UNUSED OSMesgQueue *mq, u8 address, u8 *buffer, int nbytes) { + u8 content[512] = {0}; + if (address != 0 || nbytes != 512) { + osEepromLongRead(mq, 0, content, 512); + } + memcpy(content + address * 8, buffer, nbytes); + +#ifdef TARGET_WEB + EM_ASM({ + var str = ""; + for (var i = 0; i < 512; i++) { + str += String.fromCharCode(HEAPU8[$0 + i]); + } + localStorage.sm64_save_file = btoa(str); + }, content); + s32 ret = 0; +#else + FILE* fp = fopen("sm64_save_file.bin", "wb"); + if (fp == NULL) { + return -1; + } + s32 ret = fwrite(content, 1, 512, fp) == 512 ? 0 : -1; + fclose(fp); +#endif + return ret; +}