Initial DS(i) port, take 2

Rebuilt from the ground up; now featuring a custom GBI implementation
designed specifically for DS hardware.
This commit is contained in:
Sean Maas 2021-02-05 21:28:40 -05:00
parent ecd3d152fb
commit f2ff7b97e5
12 changed files with 1423 additions and 18 deletions

View File

@ -16,8 +16,9 @@ DEFINES :=
# 'make clean' may be required first.
# Build for the N64 (turn this off for ports)
TARGET_N64 ?= 1
TARGET_N64 ?= 0
# Build for Nintendo DS
TARGET_NDS ?= 1
# COMPILER - selects the C compiler to use
# ido - uses the SGI IRIS Development Option compiler, which is used to build
@ -57,6 +58,11 @@ else ifeq ($(VERSION),sh)
VERSION_JP_US ?= false
endif
ifeq ($(TARGET_NDS),1)
OPT_FLAGS := -O2
GRUCODE := f3dex2
endif
TARGET := sm64.$(VERSION)
@ -202,8 +208,13 @@ endif
BUILD_DIR_BASE := build
# BUILD_DIR is the location where all build artifacts are placed
ifeq ($(TARGET_NDS),1)
BUILD_DIR := $(BUILD_DIR_BASE)/$(VERSION)_nds
ROM := $(BUILD_DIR)/$(TARGET).nds
else
BUILD_DIR := $(BUILD_DIR_BASE)/$(VERSION)
ROM := $(BUILD_DIR)/$(TARGET).z64
endif
ELF := $(BUILD_DIR)/$(TARGET).elf
LIBULTRA := $(BUILD_DIR)/libultra.a
LD_SCRIPT := sm64.ld
@ -214,12 +225,19 @@ ACTOR_DIR := actors
LEVEL_DIRS := $(patsubst levels/%,%,$(dir $(wildcard levels/*/header.h)))
# Directories containing source files
SRC_DIRS := src src/engine src/game src/audio src/menu src/buffers actors levels bin data assets asm lib sound
SRC_DIRS := src src/engine src/game src/audio src/menu src/buffers actors levels bin data assets lib sound
BIN_DIRS := bin bin/$(VERSION)
ULTRA_SRC_DIRS := lib/src lib/src/math lib/asm lib/data
ULTRA_SRC_DIRS := lib/src lib/src/math lib/data
ULTRA_BIN_DIRS := lib/bin
ifeq ($(TARGET_NDS),1)
SRC_DIRS += src/nds
else
SRC_DIRS += asm
ULTRA_SRC_DIRS += lib/asm
endif
GODDARD_SRC_DIRS := src/goddard src/goddard/dynlists
# File dependencies and variables for specific files
@ -232,7 +250,23 @@ S_FILES := $(foreach dir,$(SRC_DIRS),$(wildcard $(dir)/*.s))
ULTRA_C_FILES := $(foreach dir,$(ULTRA_SRC_DIRS),$(wildcard $(dir)/*.c))
GODDARD_C_FILES := $(foreach dir,$(GODDARD_SRC_DIRS),$(wildcard $(dir)/*.c))
ULTRA_S_FILES := $(foreach dir,$(ULTRA_SRC_DIRS),$(wildcard $(dir)/*.s))
GENERATED_C_FILES := $(BUILD_DIR)/assets/mario_anim_data.c $(BUILD_DIR)/assets/demo_data.c
GENERATED_C_FILES := $(BUILD_DIR)/assets/mario_anim_data.c $(BUILD_DIR)/assets/demo_data.c \
$(addprefix $(BUILD_DIR)/bin/,$(addsuffix _skybox.c,$(notdir $(basename $(wildcard textures/skyboxes/*.png)))))
ifeq ($(TARGET_NDS),1)
C_FILES := $(filter-out src/game/main.c,$(C_FILES))
ULTRA_C_FILES := \
alBnkfNew.c \
guLookAtRef.c \
guMtxF2L.c \
guNormalize.c \
guOrthoF.c \
guPerspectiveF.c \
guRotateF.c \
guScaleF.c \
guTranslateF.c
ULTRA_C_FILES := $(addprefix lib/src/,$(ULTRA_C_FILES))
endif
# Sound files
SOUND_BANK_FILES := $(wildcard sound/sound_banks/*.json)
@ -277,6 +311,16 @@ endif
# Compiler Options #
#==============================================================================#
ifeq ($(TARGET_NDS),1)
AS := $(DEVKITARM)/bin/arm-none-eabi-as
CC := $(DEVKITARM)/bin/arm-none-eabi-gcc
CPP := $(DEVKITARM)/bin/arm-none-eabi-cpp -P
CXX := $(DEVKITARM)/bin/arm-none-eabi-g++
LD := $(CXX)
OBJDUMP := $(DEVKITARM)/bin/arm-none-eabi-objdump
OBJCOPY := $(DEVKITARM)/bin/arm-none-eabi-objcopy
else
# detect prefix for MIPS toolchain
ifneq ($(call find-command,mips-linux-gnu-ld),)
CROSS := mips-linux-gnu-
@ -315,6 +359,8 @@ AR := $(CROSS)ar
OBJDUMP := $(CROSS)objdump
OBJCOPY := $(CROSS)objcopy
endif
ifeq ($(TARGET_N64),1)
TARGET_CFLAGS := -nostdinc -DTARGET_N64 -D_LANGUAGE_C
CC_CFLAGS := -fno-builtin
@ -328,6 +374,21 @@ endif
C_DEFINES := $(foreach d,$(DEFINES),-D$(d))
DEF_INC_CFLAGS := $(foreach i,$(INCLUDE_DIRS),-I$(i)) $(C_DEFINES)
ifeq ($(TARGET_NDS),1)
LIBDIRS := $(DEVKITPRO)/libnds
TARGET_CFLAGS := -march=armv5te -mtune=arm946e-s -fomit-frame-pointer -ffast-math $(foreach dir,$(LIBDIRS),-I$(dir)/include) -DTARGET_NDS -DARM9 -D_LANGUAGE_C -DNO_SEGMENTED_MEMORY -DLIBFAT
TARGET_LDFLAGS := -lfat -lnds9 -specs=dsi_arm9.specs -g -mthumb -mthumb-interwork $(foreach dir,$(LIBDIRS),-L$(dir)/lib)
CC_CHECK := $(CC)
CC_CHECK_CFLAGS := -fsyntax-only -fsigned-char $(CC_CFLAGS) $(TARGET_CFLAGS) -Wall -Wextra -Wno-format-security -DNON_MATCHING -DAVOID_UB $(DEF_INC_CFLAGS)
ASFLAGS := $(foreach i,$(INCLUDE_DIRS),-I$(i)) $(foreach d,$(DEFINES),--defsym $(d))
CFLAGS := -fno-strict-aliasing -fwrapv $(OPT_FLAGS) $(TARGET_CFLAGS) $(DEF_INC_CFLAGS)
LDFLAGS := $(TARGET_LDFLAGS)
else
# Check code syntax with host compiler
CC_CHECK := gcc
CC_CHECK_CFLAGS := -fsyntax-only -fsigned-char $(CC_CFLAGS) $(TARGET_CFLAGS) -std=gnu90 -Wall -Wextra -Wno-format-security -Wno-main -DNON_MATCHING -DAVOID_UB $(DEF_INC_CFLAGS)
@ -357,6 +418,8 @@ endif
# Prevent a crash with -sopt
export LANG := C
endif
#==============================================================================#
# Miscellaneous Tools #
#==============================================================================#
@ -509,6 +572,7 @@ $(BUILD_DIR)/%.ci4: %.ci4.png
# Compressed Segment Generation #
#==============================================================================#
ifeq ($(TARGET_N64),1)
# Link segment file to resolve external labels
# TODO: ideally this would be `-Trodata-segment=0x07000000` but that doesn't set the address
$(BUILD_DIR)/%.elf: $(BUILD_DIR)/%.o
@ -537,6 +601,7 @@ $(BUILD_DIR)/%.mio0: $(BUILD_DIR)/%.bin
$(BUILD_DIR)/%.mio0.o: $(BUILD_DIR)/%.mio0
$(call print,Converting MIO0 to ELF:,$<,$@)
$(V)printf ".section .data\n\n.incbin \"$<\"\n" | $(AS) $(ASFLAGS) -o $@
endif
#==============================================================================#
@ -727,6 +792,13 @@ $(BUILD_DIR)/rsp/%.bin $(BUILD_DIR)/rsp/%_data.bin: rsp/%.s
$(call print,Assembling:,$<,$@)
$(V)$(RSPASM) -sym $@.sym $(RSPASMFLAGS) -strequ CODE_FILE $(BUILD_DIR)/rsp/$*.bin -strequ DATA_FILE $(BUILD_DIR)/rsp/$*_data.bin $<
# Build NDS ROM
ifeq ($(TARGET_NDS),1)
$(ROM): $(O_FILES) $(MIO0_FILES:.mio0=.o) $(ULTRA_O_FILES) $(GODDARD_O_FILES)
$(LD) -L $(BUILD_DIR) -o $@.elf $(O_FILES) $(ULTRA_O_FILES) $(GODDARD_O_FILES) $(LDFLAGS)
ndstool -c $@ -9 $@.elf
else
# Run linker script through the C preprocessor
$(BUILD_DIR)/$(LD_SCRIPT): $(LD_SCRIPT)
$(call print,Preprocessing linker script:,$<,$@)
@ -757,6 +829,7 @@ $(ROM): $(ELF)
$(BUILD_DIR)/$(TARGET).objdump: $(ELF)
$(OBJDUMP) -D $< > $@
endif
.PHONY: all clean distclean default diff test load libultra

View File

@ -95,13 +95,13 @@ chan_dyncall
.poll_023589:
chan_delay1
chan_ioreadval 0
chan_bltz .skip_023589 # if we have a signal:
chan_beqz .force_stop_023589 # told to stop
chan_jump .start_playing_023589 # told to play something else
chan_bltz .skip_023589 /* if we have a signal: */
chan_beqz .force_stop_023589 /* told to stop */
chan_jump .start_playing_023589 /* told to play something else */
.skip_023589:
chan_testlayerfinished 0
chan_beqz .poll_023589 # if layer 0 hasn't finished, keep polling
chan_jump .main_loop_023589 # otherwise go back to the main loop
chan_beqz .poll_023589 /* if layer 0 hasn't finished, keep polling */
chan_jump .main_loop_023589 /* otherwise go back to the main loop */
.force_stop_023589:
chan_freelayer 0
chan_freelayer 1
@ -202,15 +202,15 @@ chan_dyncall
.poll_7:
chan_delay1
chan_ioreadval 0
chan_bltz .skip_7 # if we have a signal:
chan_beqz .force_stop_7 # told to stop
chan_bltz .skip_7 /* if we have a signal: */
chan_beqz .force_stop_7 /* told to stop */
chan_unreservenotes
chan_jump .start_playing_7 # told to play something else
chan_jump .start_playing_7 /* told to play something else */
.skip_7:
chan_testlayerfinished 0
chan_beqz .poll_7 # if layer 0 hasn't finished, keep polling
chan_beqz .poll_7 /* if layer 0 hasn't finished, keep polling */
chan_unreservenotes
chan_jump .main_loop_7 # otherwise go back to the main loop
chan_jump .main_loop_7 /* otherwise go back to the main loop */
.force_stop_7:
chan_freelayer 0
chan_freelayer 1
@ -233,10 +233,10 @@ chan_end
chan_setpanmix 127
chan_setvolscale 127
chan_setvibratoextent 0
chan_ioreadval 1 # IO slots 0-3 are reset to -1 when read; restore the value
chan_ioreadval 1 /* IO slots 0-3 are reset to -1 when read; restore the value */
chan_iowriteval 0
chan_break # break out of the loop
chan_break # force the caller to return immediately
chan_break /* break out of the loop */
chan_break /* force the caller to return immediately */
chan_end
# Set reverb in way that takes area echo level and volume into account. This

View File

@ -712,6 +712,7 @@ void func_eu_802e9bec(s32 player, s32 channel, s32 arg2) {
* Called from threads: thread4_sound
*/
struct SPTask *create_next_audio_frame_task(void) {
#ifdef TARGET_N64
u32 samplesRemainingInAI;
s32 writtenCmds;
s32 index;
@ -813,6 +814,9 @@ struct SPTask *create_next_audio_frame_task(void) {
decrease_sample_dma_ttls();
return gAudioTask;
#else
return NULL;
#endif
}
#endif

View File

@ -1088,10 +1088,12 @@ s32 audio_shut_down_and_reset_step(void) {
*/
void wait_for_audio_frames(s32 frames) {
gAudioFrameCount = 0;
#ifdef TARGET_N64
// Sound thread will update gAudioFrameCount
while (gAudioFrameCount < frames) {
// spin
}
#endif
}
#endif

View File

@ -35,6 +35,7 @@ s32 audio_shut_down_and_reset_step(void);
void func_802ad7ec(u32);
struct SPTask *create_next_audio_frame_task(void) {
#ifdef TARGET_N64
u32 samplesRemainingInAI;
s32 writtenCmds;
s32 index;
@ -129,6 +130,9 @@ struct SPTask *create_next_audio_frame_task(void) {
task->yield_data_ptr = NULL;
task->yield_data_size = 0;
return gAudioTask;
#else
return NULL;
#endif
}
void eu_process_audio_cmd(struct EuAudioCmd *cmd) {

View File

@ -1,4 +1,7 @@
#include <PR/ultratypes.h>
#ifndef TARGET_N64
#include <string.h>
#endif
#include "sm64.h"
@ -244,6 +247,7 @@ u32 main_pool_pop_state(void) {
* function blocks until completion.
*/
static void dma_read(u8 *dest, u8 *srcStart, u8 *srcEnd) {
#ifdef TARGET_N64
u32 size = ALIGN16(srcEnd - srcStart);
osInvalDCache(dest, size);
@ -258,6 +262,9 @@ static void dma_read(u8 *dest, u8 *srcStart, u8 *srcEnd) {
srcStart += copySize;
size -= copySize;
}
#else
memcpy(dest, srcStart, srcEnd - srcStart);
#endif
}
/**

56
src/nds/nds_controller.c Normal file
View File

@ -0,0 +1,56 @@
#include "nds_include.h"
#include "lib/src/osContInternal.h"
s32 osContInit(UNUSED OSMesgQueue *mq, u8 *controllerBits, UNUSED OSContStatus *status) {
*controllerBits = 1;
return 0;
}
s32 osContStartReadData(UNUSED OSMesgQueue *mesg) {
return 0;
}
void osContGetReadData(OSContPad *pad) {
pad->button = 0;
pad->stick_x = 0;
pad->stick_y = 0;
scanKeys();
const u32 keys = keysHeld();
if (keys & KEY_A) {
pad->button |= A_BUTTON;
}
if (keys & KEY_B) {
pad->button |= B_BUTTON;
}
if (keys & KEY_X) {
pad->button |= U_CBUTTONS;
}
if (keys & KEY_Y) {
pad->button |= D_CBUTTONS;
}
if (keys & KEY_START) {
pad->button |= START_BUTTON;
}
if (keys & KEY_L) {
pad->button |= R_TRIG;
}
if (keys & KEY_R) {
pad->button |= Z_TRIG;
}
if (keys & KEY_UP) {
pad->stick_y = 80;
}
if (keys & KEY_DOWN) {
pad->stick_y = -80;
}
if (keys & KEY_LEFT) {
pad->stick_x = -80;
}
if (keys & KEY_RIGHT) {
pad->stick_x = 80;
}
}

26
src/nds/nds_include.h Normal file
View File

@ -0,0 +1,26 @@
#ifndef NDS_INCLUDE_H
#define NDS_INCLUDE_H
// Workaround for libnds type redefinitions
#define u64 _u64
#define s64 _s64
#define u32 _u32
#define vu32 _vu32
#define vs32 _vs32
#define s32 _s32
#define u16 _u16
#define s16 _s16
#define u8 _u8
#define s8 _s8
#include <nds.h>
#undef u64
#undef s64
#undef u32
#undef vu32
#undef vs32
#undef s32
#undef u16
#undef s16
#undef u8
#endif // NDS_INCLUDE_H

48
src/nds/nds_main.c Normal file
View File

@ -0,0 +1,48 @@
#include <stdio.h>
#include "nds_include.h"
#include <fat.h>
#include "audio/external.h"
#include "game/game_init.h"
#include "nds_renderer.h"
OSMesg D_80339BEC;
OSMesgQueue gSIEventMesgQueue;
s8 gResetTimer;
s8 D_8032C648;
s8 gDebugLevelSelect;
s8 gShowProfiler;
s8 gShowDebugText;
void set_vblank_handler(UNUSED s32 index, UNUSED struct VblankHandler *handler, UNUSED OSMesgQueue *queue, UNUSED OSMesg *msg) {
}
void dispatch_audio_sptask(UNUSED struct SPTask *spTask) {
}
void send_display_list(struct SPTask *spTask) {
draw_frame((Gfx*)spTask->task.t.data_ptr);
}
int main(void) {
static u64 pool[0x165000 / sizeof(u64)];
main_pool_init(pool, pool + sizeof(pool) / sizeof(pool[0]));
gEffectsMemoryPool = mem_pool_init(0x4000, MEMORY_POOL_LEFT);
renderer_init();
#ifdef LIBFAT
if (!fatInitDefault()) {
printf("Failed to initialize libfat!\n");
}
#endif
audio_init();
sound_init();
thread5_game_loop(NULL);
return 0;
}

991
src/nds/nds_renderer.c Normal file
View File

@ -0,0 +1,991 @@
#include <stdio.h>
#include <PR/gbi.h>
#include "nds_include.h"
#include <nds/arm9/postest.h>
#include "nds_renderer.h"
struct Color {
uint8_t r, g, b, a;
};
struct Vertex {
int16_t x, y, z;
int16_t s, t;
struct Color color;
};
struct Texture {
uint8_t *original;
uint8_t *converted;
int name;
uint8_t type;
uint8_t size_x;
uint8_t size_y;
};
struct Light {
int16_t nx, ny, nz;
int8_t x, y, z;
struct Color color;
};
static struct Color env_color;
static struct Color fill_color;
static struct Vertex vertex_buffer[16];
static struct Texture texture_map[2048];
static struct Light lights[5];
uint16_t texture_fifo[2048];
uint16_t texture_fifo_start;
uint16_t texture_fifo_end;
static uint8_t *texture_address;
static uint8_t texture_format;
static uint8_t texture_bit_width;
static uint16_t texture_row_size;
static uint16_t texture_size;
static uint16_t texture_scale_s;
static uint16_t texture_scale_t;
static uint32_t geometry_mode;
static uint32_t rdphalf_1;
static uint32_t other_mode_l;
static uint32_t other_mode_h;
static Gwords texrect;
static uint8_t *z_buffer;
static uint8_t *c_buffer;
static bool texture_dirty;
static bool lights_dirty;
static int num_lights;
static int polygon_id;
static int poly_fmt;
static int tex_params;
static bool use_color;
static bool use_texture;
static bool use_env_color;
static bool use_env_alpha;
static bool shrunk;
static bool background;
static int32_t z_depth;
static int no_texture;
static int frame_count;
static void load_texture() {
// Look up the current texture using a simple hash calculated from its address
uint32_t index = ((uint32_t)texture_address >> 5) & 0x7FF;
while (texture_map[index].original != texture_address && texture_map[index].original != NULL) {
index = (index + 1) & 0x7FF;
}
struct Texture *cur = &texture_map[index];
// Load the texture if it was found
if (cur->original != NULL) {
if (cur->name) {
glBindTexture(GL_TEXTURE_2D, cur->name);
return;
}
// Copy the texture back into VRAM if it was pushed out, pushing out other textures if necessary
glGenTextures(1, &cur->name);
glBindTexture(GL_TEXTURE_2D, cur->name);
while (!glTexImage2D(GL_TEXTURE_2D, 0, cur->type, cur->size_x, cur->size_y, 0, TEXGEN_TEXCOORD, cur->converted)) {
glDeleteTextures(1, &texture_map[texture_fifo[texture_fifo_end]].name);
texture_map[texture_fifo[texture_fifo_end]].name = 0;
texture_fifo_end = (texture_fifo_end + 1) & 0x7FF;
}
texture_fifo[texture_fifo_start] = index;
texture_fifo_start = (texture_fifo_start + 1) & 0x7FF;
return;
}
cur->original = texture_address;
// Determine the width of the new texture
const int width = texture_row_size << (4 - texture_bit_width);
switch (width) {
case 8: cur->size_x = TEXTURE_SIZE_8; break;
case 16: cur->size_x = TEXTURE_SIZE_16; break;
case 32: cur->size_x = TEXTURE_SIZE_32; break;
case 64: cur->size_x = TEXTURE_SIZE_64; break;
case 128: cur->size_x = TEXTURE_SIZE_128; break;
default:
//printf("Unsupported texture width: %d\n", width);
glBindTexture(GL_TEXTURE_2D, cur->name = no_texture);
return;
};
// Determine the height of the new texture
const int height = ((texture_size << 1) >> texture_bit_width) / width;
switch (height) {
case 8: cur->size_y = TEXTURE_SIZE_8; break;
case 16: cur->size_y = TEXTURE_SIZE_16; break;
case 32: cur->size_y = TEXTURE_SIZE_32; break;
case 64: cur->size_y = TEXTURE_SIZE_64; break;
case 128: cur->size_y = TEXTURE_SIZE_128; break;
default:
//printf("Unsupported texture height: %d\n", height);
glBindTexture(GL_TEXTURE_2D, cur->name = no_texture);
return;
};
// Convert the texture to a format the DS understands
switch (texture_format) {
case G_IM_FMT_RGBA:
switch (texture_bit_width) {
case G_IM_SIZ_16b:
cur->converted = (uint8_t*)malloc(texture_size);
for (uint32_t x = 0; x < texture_size / 2; x++) {
const uint16_t color = (texture_address[x * 2] << 8) | texture_address[x * 2 + 1];
const uint8_t r = ((color >> 11) & 0x1F);
const uint8_t g = ((color >> 6) & 0x1F);
const uint8_t b = ((color >> 1) & 0x1F);
const uint8_t a = ((color >> 0) & 0x01);
((uint16_t*)cur->converted)[x] = (a << 15) | (b << 10) | (g << 5) | r;
}
DC_FlushRange(cur->converted, texture_size);
cur->type = GL_RGBA;
break;
default:
//printf("Unsupported RGBA texture bit width: %d\n", texture_bit_width);
glBindTexture(GL_TEXTURE_2D, cur->name = no_texture);
return;
}
break;
case G_IM_FMT_IA:
switch (texture_bit_width) {
case G_IM_SIZ_4b:
cur->converted = (uint8_t*)malloc(texture_size * 2);
for (uint32_t x = 0; x < texture_size * 2; x++) {
const uint8_t color = (texture_address[x / 2] >> ((x & 1) ? 0 : 4)) & 0x0F;
const uint8_t i = ((color >> 1) & 0x07);
const uint8_t a = ((color >> 0) & 0x01) ? 31 : 0;
cur->converted[x] = (a << 3) | i;
}
DC_FlushRange(cur->converted, texture_size * 2);
cur->type = GL_RGB8_A5;
break;
case G_IM_SIZ_8b:
cur->converted = (uint8_t*)malloc(texture_size);
for (uint32_t x = 0; x < texture_size; x++) {
const uint8_t color = texture_address[x];
const uint8_t i = ((color >> 4) & 0x0F) * 7 / 15;
const uint8_t a = ((color >> 0) & 0x0F) * 31 / 15;
cur->converted[x] = (a << 3) | i;
}
DC_FlushRange(cur->converted, texture_size);
cur->type = GL_RGB8_A5;
break;
case G_IM_SIZ_16b:
cur->converted = (uint8_t*)malloc(texture_size / 2);
for (uint32_t x = 0; x < texture_size / 2; x++) {
const uint8_t i = texture_address[x * 2 + 0] * 7 / 255;
const uint8_t a = texture_address[x * 2 + 1] * 31 / 255;
cur->converted[x] = (a << 3) | i;
}
DC_FlushRange(cur->converted, texture_size / 2);
cur->type = GL_RGB8_A5;
break;
default:
//printf("Unsupported IA texture bit width: %d\n", texture_bit_width);
glBindTexture(GL_TEXTURE_2D, cur->name = no_texture);
return;
}
break;
default:
//printf("Unsupported texture format: %d\n", texture_format);
glBindTexture(GL_TEXTURE_2D, cur->name = no_texture);
return;
}
// Copy the texture into VRAM, pushing out other textures if necessary
glGenTextures(1, &cur->name);
glBindTexture(GL_TEXTURE_2D, cur->name);
while (!glTexImage2D(GL_TEXTURE_2D, 0, cur->type, cur->size_x, cur->size_y, 0, TEXGEN_TEXCOORD, cur->converted)) {
glDeleteTextures(1, &texture_map[texture_fifo[texture_fifo_end]].name);
texture_map[texture_fifo[texture_fifo_end]].name = 0;
texture_fifo_end = (texture_fifo_end + 1) & 0x7FF;
}
texture_fifo[texture_fifo_start] = index;
texture_fifo_start = (texture_fifo_start + 1) & 0x7FF;
}
static void draw_vertices(const struct Vertex **v, int count) {
// Get the alpha value and return early if it's 0 (alpha 0 is wireframe on the DS)
// Since the DS only supports one alpha value per polygon, just use the one from first vertex
const int alpha = ((other_mode_l & (G_BL_A_MEM << 18)) ? 31 : (v[0]->color.a >> 3));
if (alpha == 0) return;
// Clear the vertex color if it shoudn't be used
if (!use_color) {
glColor3b(0xFF, 0xFF, 0xFF);
}
// Clear the texture if it shouldn't be used, or load it if it's dirty
if (!use_texture) {
glBindTexture(GL_TEXTURE_2D, no_texture);
texture_dirty = true;
} else if (texture_dirty) {
load_texture();
glTexParameter(GL_TEXTURE_2D, tex_params);
texture_dirty = false;
}
// Apply the polygon attributes
glPolyFmt(poly_fmt | POLY_ALPHA(alpha) | POLY_ID(polygon_id));
glBegin(GL_TRIANGLE);
if (geometry_mode & G_ZBUFFER) {
// Incoming vertices expect W to be 1, not 1 << 12 like the DS sets
// This is a hack to scale W values; it's reverted during matrix multiplication to prevent breakage
if (!shrunk) {
const m4x4 shrink = {{
1 << 12, 0, 0, 0,
0, 1 << 12, 0, 0,
0, 0, 1 << 12, 0,
0, 0, 0, 1 << 0
}};
glMatrixMode(GL_MODELVIEW);
glMultMatrix4x4(&shrink);
shrunk = true;
}
// Send the vertices to the 3D engine
for (int i = 0; i < count; i++) {
if (use_color) glColor3b(v[i]->color.r, v[i]->color.g, v[i]->color.b);
if (use_texture) glTexCoord2t16(v[i]->s, v[i]->t);
glVertex3v16(v[i]->x, v[i]->y, v[i]->z);
}
// As part of the depth hack, move the hijacked Z value to the front once normal polygons start being sent
// This relies on the assumption that background 2D elements are sent first, and foreground last
if (background) {
z_depth = (128 - 0x1000) * 6; // Room for 128 foreground quads
background = false;
}
} else {
// Since depth test is disabled, 2D elements are likely being drawn and these expect proper multiplication by 1
// So instead of scaling the W value down, scale the other components up to have proper 12-bit fractionals
const m4x4 enlarge = {{
1 << 24, 0, 0, 0,
0, 1 << 24, 0, 0,
0, 0, 1 << 24, 0,
0, 0, 0, 1 << (shrunk ? 24 : 12)
}};
glMatrixMode(GL_MODELVIEW);
glPushMatrix();
glMultMatrix4x4(&enlarge);
for (int i = 0; i < count; i++) {
// Send the vertex attributes to the 3D engine
if (use_color) glColor3b(v[i]->color.r, v[i]->color.g, v[i]->color.b);
if (use_texture) glTexCoord2t16(v[i]->s, v[i]->t);
// Use position test to project the vertex so the result can be hijacked before sending it for real
PosTest(v[i]->x, v[i]->y, v[i]->z);
// Push the current matrices to the stack, and load an identity matrix so the outgoing vertex won't be affected
glPushMatrix();
glLoadIdentity();
glMatrixMode(GL_PROJECTION);
glPushMatrix();
// Depth test can't be disabled on the DS; this is a problem, since 2D elements are usually drawn this way
// This hack sets decreasing Z values so that these polygons will be properly rendered on top of each other
// Since the W value can't be set directly, use a scaling matrix with a vertex of 1s to send the coordinates
const m4x4 vertex = {{
PosTestXresult(), 0, 0, 0,
0, PosTestYresult(), 0, 0,
0, 0, (--z_depth) / 6, 0,
0, 0, 0, PosTestWresult()
}};
glLoadMatrix4x4(&vertex);
glVertex3v16(1 << 12, 1 << 12, 1 << 12);
// Restore the original matrices
glPopMatrix(1);
glMatrixMode(GL_MODELVIEW);
glPopMatrix(1);
}
glPopMatrix(1);
}
}
static uint32_t sqrt_fixed(uint32_t x) {
// Calculate the square root of a 16-bit fractional fixed point number
uint32_t r = x;
uint32_t b = 0x40000000;
uint32_t q = 0;
while (b > 0x40) {
uint32_t t = q + b;
if (r >= t) {
r -= t;
q = t + b;
}
r <<= 1;
b >>= 1;
}
return q >> 8;
}
static void g_vtx(Gwords *words) {
const uint8_t count = ((words->w0 >> 12) & 0xFF);
const uint8_t index = ((words->w0 >> 0) & 0xFF) >> 1;
const Vtx *vertices = (const Vtx*)words->w1;
// Store vertices in the vertex buffer
for (uint8_t i = index - count; i < index; i++) {
const Vtx_t *v = &vertices[i].v;
const Vtx_tn *n = &vertices[i].n;
// Set the vertex coordinates
vertex_buffer[i].x = v->ob[0];
vertex_buffer[i].y = v->ob[1];
vertex_buffer[i].z = v->ob[2];
// Scale the texture coordinates, and shift out an additional bit to get 4-bit fractionals for the DS
vertex_buffer[i].s = (v->tc[0] * texture_scale_s) >> 17;
vertex_buffer[i].t = (v->tc[1] * texture_scale_t) >> 17;
// Calulate vertex colors for lighting in software
// The DS can *almost* do this in hardware, but the vectors need to be normalized after being transformed
if (geometry_mode & G_LIGHTING) {
// Use the last light as ambient light (or emission, in DS terms)
uint32_t r = lights[num_lights].color.r;
uint32_t g = lights[num_lights].color.g;
uint32_t b = lights[num_lights].color.b;
// Recalculate transformed light vectors if the lights or modelview matrix changed
if (lights_dirty) {
// Read the current modelview matrix from hardware
int m[12];
glGetFixed(GL_GET_MATRIX_VECTOR, m);
for (int i = 0; i < num_lights; i++) {
// Multiply the light vector with the modelview matrix
lights[i].nx = (lights[i].x * m[0] + lights[i].y * m[1] + lights[i].z * m[2]) >> 7;
lights[i].ny = (lights[i].x * m[3] + lights[i].y * m[4] + lights[i].z * m[5]) >> 7;
lights[i].nz = (lights[i].x * m[6] + lights[i].y * m[7] + lights[i].z * m[8]) >> 7;
// Normalize the result
int s = (lights[i].nx * lights[i].nx + lights[i].ny * lights[i].ny + lights[i].nz * lights[i].nz) >> 8;
if (s > 0) {
s = sqrt_fixed(s);
lights[i].nx = (lights[i].nx << 16) / s;
lights[i].ny = (lights[i].ny << 16) / s;
lights[i].nz = (lights[i].nz << 16) / s;
}
}
lights_dirty = false;
}
// Multiply the light vertices with the vertex's normal to calculate light intensity
for (int i = 2; i < num_lights; i++) {
int intensity = (lights[i].nx * n->n[0] + lights[i].ny * n->n[1] + lights[i].nz * n->n[2]) >> 7;
if (intensity > 0) {
r += (intensity * lights[i].color.r) >> 12;
g += (intensity * lights[i].color.g) >> 12;
b += (intensity * lights[i].color.b) >> 12;
}
}
// Set the calulated vertex color
vertex_buffer[i].color.r = (r > 0xFF) ? 0xFF : r;
vertex_buffer[i].color.g = (g > 0xFF) ? 0xFF : g;
vertex_buffer[i].color.b = (b > 0xFF) ? 0xFF : b;
// Generate spherical texture coordinates by multiplying the vertex's normal with the lookat vectors
if (geometry_mode & G_TEXTURE_GEN) {
const int dot_y = (lights[0].nx * n->n[0] + lights[0].ny * n->n[1] + lights[0].nz * n->n[2]) >> 7;
const int dot_x = (lights[1].nx * n->n[0] + lights[1].ny * n->n[1] + lights[1].nz * n->n[2]) >> 7;
vertex_buffer[i].s = ((dot_x + (1 << 12)) * texture_scale_s) >> 15;
vertex_buffer[i].t = ((dot_y + (1 << 12)) * texture_scale_t) >> 15;
}
} else if (use_env_color) {
// Use the environment color as the vertex color if enabled
vertex_buffer[i].color.r = env_color.r;
vertex_buffer[i].color.g = env_color.g;
vertex_buffer[i].color.b = env_color.b;
} else {
// Set the vertex color normally
vertex_buffer[i].color.r = v->cn[0];
vertex_buffer[i].color.g = v->cn[1];
vertex_buffer[i].color.b = v->cn[2];
}
// Set the vertex alpha, using the environment alpha if enabled
vertex_buffer[i].color.a = (use_env_alpha ? env_color.a : v->cn[3]);
// Round texture coodinates (by adding 0.5) if linear filtering is enabled
// The DS can't actually do linear filtering, but this still keeps textures from being slightly misplaced
if ((other_mode_h & (3 << G_MDSFT_TEXTFILT)) != G_TF_POINT) {
vertex_buffer[i].s += 1 << 4;
vertex_buffer[i].t += 1 << 4;
}
}
}
static void g_tri1(Gwords *words) {
// Draw a triangle
const struct Vertex *v[] = {
&vertex_buffer[((words->w0 >> 16) & 0xFF) >> 1],
&vertex_buffer[((words->w0 >> 8) & 0xFF) >> 1],
&vertex_buffer[((words->w0 >> 0) & 0xFF) >> 1]
};
draw_vertices(v, 3);
}
static void g_tri2(Gwords *words) {
// Draw two triangles at once
const struct Vertex *v[] = {
&vertex_buffer[((words->w0 >> 16) & 0xFF) >> 1],
&vertex_buffer[((words->w0 >> 8) & 0xFF) >> 1],
&vertex_buffer[((words->w0 >> 0) & 0xFF) >> 1],
&vertex_buffer[((words->w1 >> 16) & 0xFF) >> 1],
&vertex_buffer[((words->w1 >> 8) & 0xFF) >> 1],
&vertex_buffer[((words->w1 >> 0) & 0xFF) >> 1]
};
draw_vertices(v, 6);
}
static void g_texture(Gwords *words) {
// Set the texture scaling factors
texture_scale_s = (words->w1 >> 16) & 0xFFFF;
texture_scale_t = (words->w1 >> 0) & 0xFFFF;
}
static void g_popmtx(Gwords *words) {
// Pop matrices from the modelview stack
glMatrixMode(GL_MODELVIEW);
glPopMatrix(words->w1 / 64);
}
static void g_geometrymode(Gwords *words) {
// Clear and set the geometry mode bits
geometry_mode = (geometry_mode & words->w0) | words->w1;
// Update the polygon culling settings
poly_fmt |= POLY_CULL_NONE;
if (geometry_mode & (1 << 9)) {
poly_fmt &= ~POLY_CULL_BACK;
}
if (geometry_mode & (1 << 10)) {
poly_fmt &= ~POLY_CULL_FRONT;
}
}
static void g_mtx(Gwords *words) {
// Load a matrix, shifting the elements so they have 12-bit fractionals for the DS
m4x4 matrix;
for (int i = 0; i < 16; i += 2) {
const uint32_t *data = &((uint32_t*)words->w1)[i / 2];
matrix.m[i + 0] = ((int32_t)((data[0] & 0xFFFF0000) | (data[8] >> 16)) + 8) >> 4;
matrix.m[i + 1] = ((int32_t)((data[0] << 16) | (data[8] & 0x0000FFFF)) + 8) >> 4;
}
// Perform a matrix operation
const uint8_t params = words->w0 ^ G_MTX_PUSH;
if (params & G_MTX_PROJECTION) {
glMatrixMode(GL_PROJECTION);
// Load or multiply the projection matrix
if (params & G_MTX_LOAD) {
glLoadMatrix4x4(&matrix);
} else {
glMultMatrix4x4(&matrix);
}
} else {
glMatrixMode(GL_MODELVIEW);
// Push the current modelview matrix to the stack if requested
if (params & G_MTX_PUSH) {
glPushMatrix();
}
// Load or multiply the modelview matrix
if (params & G_MTX_LOAD) {
glLoadMatrix4x4(&matrix);
} else {
// Revert the W value scaling hack so matrix multiplication works properly
if (shrunk) {
const m4x4 enlarge = {{
1 << 12, 0, 0, 0,
0, 1 << 12, 0, 0,
0, 0, 1 << 12, 0,
0, 0, 0, 1 << 24
}};
glMultMatrix4x4(&enlarge);
}
glMultMatrix4x4(&matrix);
}
shrunk = false;
lights_dirty = true;
}
}
static void g_moveword(Gwords *words) {
// Set values that are normally at specific locations in DMEM
const uint8_t index = (words->w0 >> 16) & 0xFF;
switch (index) {
case G_MW_NUMLIGHT:
// Set the current number of lights, including the lookat vectors
num_lights = (words->w1 / 24) + 2;
break;
// Unimplemented writes
case G_MW_CLIP: break;
case G_MW_FOG: break;
case G_MW_PERSPNORM: break;
default:
//printf("Unsupported G_MOVEWORD index: 0x%.2X\n", index);
break;
}
}
static void g_movemem(Gwords *words) {
// Set a block of values that are normally at specific locations in DMEM
const uint8_t index = (words->w0 >> 0) & 0xFF;
switch (index) {
case G_MV_VIEWPORT: {
// Calulate and set the specified viewport
const Vp_t *vp = (Vp_t*)words->w1;
const uint8_t x2 = ((vp->vscale[0] >> 1) * 255 / 320);
const uint8_t x1 = ((vp->vtrans[0] >> 1) * 255 / 320 - x2) >> 1;
const uint8_t y2 = ((vp->vscale[1] >> 1) * 191 / 240);
const uint8_t y1 = ((vp->vtrans[1] >> 1) * 191 / 240 - y2) >> 1;
glViewport(x1, y1, x2, y2);
break;
}
case G_MV_LIGHT: {
// Set light parameters
const int index = ((words->w0 >> 8) & 0xFF) / 3;
const Light_t *light = (Light_t*)words->w1;
if (index >= 2) { // Not lookat vectors
lights[index].color.r = light->col[0];
lights[index].color.g = light->col[1];
lights[index].color.b = light->col[2];
}
if (index < num_lights) { // Not ambient light
lights[index].x = light->dir[0];
lights[index].y = light->dir[1];
lights[index].z = light->dir[2];
lights_dirty = true;
}
break;
}
default:
//printf("Unsupported G_MOVEMEM index: 0x%.2X\n", index);
break;
}
}
static void g_rdphalf_1(Gwords *words) {
// Set the higher half of the RDP word (holds upper-left texture coordinates for G_TEXRECT)
rdphalf_1 = words->w1;
}
static void g_setothermode_l(Gwords *words) {
// Set the specified bits in the lower half of the other mode word
const uint8_t bits = ((words->w0 >> 0) & 0xFF) + 1;
const uint8_t shift = 32 - ((words->w0 >> 8) & 0xFF) - bits;
const uint32_t mask = ((1 << bits) - 1) << shift;
other_mode_l = (other_mode_l & ~mask) | (words->w1 & mask);
}
static void g_setothermode_h(Gwords *words) {
// Set the specified bits in the higher half of the other mode word
const uint8_t bits = ((words->w0 >> 0) & 0xFF) + 1;
const uint8_t shift = 32 - ((words->w0 >> 8) & 0xFF) - bits;
const uint32_t mask = ((1 << bits) - 1) << shift;
other_mode_h = (other_mode_h & ~mask) | (words->w1 & mask);
}
static void g_texrect(Gwords *words) {
// Store the G_TEXRECT parameters so they can be used after the texture coordinates are set
texrect = *words;
}
static void g_rdphalf_2(Gwords *words) {
// G_TEXRECT is actually performed here; the texture coordinates must be set in the RDP word before it can begin
// Get the alpha value and return early if it's 0 (alpha 0 is wireframe on the DS)
const int alpha = (use_env_alpha ? (env_color.a >> 3) : 31);
if (alpha == 0) return;
// Push the current matrices to the stack, and load identity matrices so the outgoing vertices won't be affected
glMatrixMode(GL_MODELVIEW);
glPushMatrix();
glLoadIdentity();
glMatrixMode(GL_PROJECTION);
glPushMatrix();
glLoadIdentity();
// Load the texture if it's dirty
if (texture_dirty) {
load_texture();
glTexParameter(GL_TEXTURE_2D, tex_params);
texture_dirty = false;
}
// Apply the polygon attributes, using the environment alpha if enabled
glPolyFmt(POLY_CULL_NONE | POLY_ALPHA(alpha));
glBegin(GL_TRIANGLE);
// Check if copy mode is enabled; certian rules change if this is the case
// The rectangle dimensions are a pixel bigger, and the S-coordinate change has 2 extra fractional bits(?)
const bool copy = ((other_mode_h & (3 << G_MDSFT_CYCLETYPE)) == G_CYC_COPY);
// Use the environment color if enabled, or clear the vertex color
if (use_env_color && !copy) {
glColor3b(env_color.r, env_color.g, env_color.b);
} else {
glColor3b(0xFF, 0xFF, 0xFF);
}
// Get the rectangle dimensions
int16_t x1 = ((texrect.w1 >> 12) & 0xFFF);
int16_t y1 = ((texrect.w1 >> 0) & 0xFFF);
int16_t x2 = ((texrect.w0 >> 12) & 0xFFF) + (copy ? (1 << 2) : 0);
int16_t y2 = ((texrect.w0 >> 0) & 0xFFF) + (copy ? (1 << 2) : 0);
// Calculate the texture coordinates
const int16_t s1 = (((rdphalf_1 >> 16) & 0xFFFF) >> 1);
const int16_t t1 = (((rdphalf_1 >> 0) & 0xFFFF) >> 1);
const int16_t s2 = s1 + ((((words->w1 >> 16) & 0xFFFF) * (x2 - x1)) >> (copy ? 10 : 8));
const int16_t t2 = t1 + ((((words->w1 >> 0) & 0xFFFF) * (y2 - y1)) >> 8);
// Scale the dimensions to be between -1 and 1 with 12 fractional bits
x1 = (x1 * (2 << 12) / (320 << 2) - (1 << 12));
y1 = -(y1 * (2 << 12) / (240 << 2) - (1 << 12));
x2 = (x2 * (2 << 12) / (320 << 2) - (1 << 12));
y2 = -(y2 * (2 << 12) / (240 << 2) - (1 << 12));
// Draw one half of the rectangle, using depth hijacking
glTexCoord2t16(s1, t1);
glVertex3v16(x1, y1, (--z_depth) / 6);
glTexCoord2t16(s1, t2);
glVertex3v16(x1, y2, (--z_depth) / 6);
glTexCoord2t16(s2, t1);
glVertex3v16(x2, y1, (--z_depth) / 6);
// Draw the other half of the rectangle, using depth hijacking
glTexCoord2t16(s2, t1);
glVertex3v16(x2, y1, (--z_depth) / 6);
glTexCoord2t16(s1, t2);
glVertex3v16(x1, y2, (--z_depth) / 6);
glTexCoord2t16(s2, t2);
glVertex3v16(x2, y2, (--z_depth) / 6);
// Restore the original matrices
glPopMatrix(1);
glMatrixMode(GL_MODELVIEW);
glPopMatrix(1);
}
static void g_loadblock(Gwords *words) {
const int tile = (words->w1 >> 24) & 0x07;
if (tile != G_TX_LOADTILE) return;
// Set the size of the current texture in memory, in bytes
texture_size = (((words->w1 >> 12) & 0xFFF) + 1);
switch (texture_bit_width) {
case G_IM_SIZ_4b: texture_size >>= 1; break;
case G_IM_SIZ_16b: texture_size <<= 1; break;
}
}
static void g_settile(Gwords *words) {
const int tile = (words->w1 >> 24) & 0x07;
if (tile != G_TX_RENDERTILE) return;
// Set the texture properties
texture_format = (words->w0 >> 21) & 0x007;
texture_bit_width = (words->w0 >> 19) & 0x003;
texture_row_size = (words->w0 >> 9) & 0x1FF;
const uint8_t cms = (words->w1 >> 8) & 0x003;
const uint8_t cmt = (words->w1 >> 18) & 0x003;
// Update the texture parameters
tex_params = 0;
if (!(cms & G_TX_CLAMP)) {
tex_params |= GL_TEXTURE_WRAP_S;
if (cms & G_TX_MIRROR) {
tex_params |= GL_TEXTURE_FLIP_S;
}
}
if (!(cmt & G_TX_CLAMP)) {
tex_params |= GL_TEXTURE_WRAP_T;
if (cmt & G_TX_MIRROR) {
tex_params |= GL_TEXTURE_FLIP_T;
}
}
}
static void g_fillrect(Gwords *words) {
// If the color buffer is set to the depth buffer, the game is probably trying to clear it; this can be ignored
if (c_buffer == z_buffer) return;
// Get the alpha value and return early if it's 0 (alpha 0 is wireframe on the DS)
const int alpha = fill_color.a >> 3;
if (alpha == 0) return;
// Push the current matrices to the stack, and load identity matrices so the outgoing vertices won't be affected
glMatrixMode(GL_MODELVIEW);
glPushMatrix();
glLoadIdentity();
glMatrixMode(GL_PROJECTION);
glPushMatrix();
glLoadIdentity();
// Clear the texture
glBindTexture(GL_TEXTURE_2D, no_texture);
texture_dirty = true;
// Apply the polygon attributes and the fill color
glPolyFmt(POLY_CULL_NONE | POLY_ALPHA(alpha));
glBegin(GL_TRIANGLE);
glColor3b(fill_color.r, fill_color.g, fill_color.b);
// Get the rectangle dimensions, scaled to be between -1 and 1 with 12 fractional bits
const int16_t x1 = ((((words->w1 >> 12) & 0xFFF) + (0 << 2)) * (2 << 12) / (320 << 2) - (1 << 12));
const int16_t y1 = -((((words->w1 >> 0) & 0xFFF) + (0 << 2)) * (2 << 12) / (240 << 2) - (1 << 12));
const int16_t x2 = ((((words->w0 >> 12) & 0xFFF) + (1 << 2)) * (2 << 12) / (320 << 2) - (1 << 12));
const int16_t y2 = -((((words->w0 >> 0) & 0xFFF) + (1 << 2)) * (2 << 12) / (240 << 2) - (1 << 12));
// Draw one half of the rectangle, using depth hijacking
glVertex3v16(x1, y1, (--z_depth) / 6);
glVertex3v16(x1, y2, (--z_depth) / 6);
glVertex3v16(x2, y1, (--z_depth) / 6);
// Draw the other half of the rectangle, using depth hijacking
glVertex3v16(x2, y1, (--z_depth) / 6);
glVertex3v16(x1, y2, (--z_depth) / 6);
glVertex3v16(x2, y2, (--z_depth) / 6);
// Restore the original matrices
glMatrixMode(GL_PROJECTION);
glPopMatrix(1);
glMatrixMode(GL_MODELVIEW);
glPopMatrix(1);
}
static void g_setfillcolor(Gwords *words) {
// Set the fill color
fill_color.r = (words->w1 >> 24) & 0xFF;
fill_color.g = (words->w1 >> 16) & 0xFF;
fill_color.b = (words->w1 >> 8) & 0xFF;
fill_color.a = (words->w1 >> 0) & 0xFF;
}
static void g_setenvcolor(Gwords *words) {
// Set the environment color
env_color.r = (words->w1 >> 24) & 0xFF;
env_color.g = (words->w1 >> 16) & 0xFF;
env_color.b = (words->w1 >> 8) & 0xFF;
env_color.a = (words->w1 >> 0) & 0xFF;
}
static void g_setcombine(Gwords *words) {
const uint8_t a_color = (words->w0 >> 20) & 0x0F;
const uint8_t b_color = (words->w1 >> 28) & 0x0F;
const uint8_t c_color = (words->w0 >> 15) & 0x1F;
const uint8_t d_color = (words->w1 >> 15) & 0x07;
//const uint8_t a_alpha = (words->w0 >> 12) & 0x07;
//const uint8_t b_alpha = (words->w1 >> 12) & 0x07;
const uint8_t c_alpha = (words->w0 >> 9) & 0x07;
const uint8_t d_alpha = (words->w1 >> 9) & 0x07;
// The N64 color combiner works by using the formula (A - B) * C + D, with color and alpha handled separately
// The DS is much more limited when it comes to blending; this is just an approximation that seems to work well for SM64
use_env_color = (c_color == G_CCMUX_ENVIRONMENT || d_color == G_CCMUX_ENVIRONMENT);
use_env_alpha = (c_alpha == G_CCMUX_ENVIRONMENT || d_alpha == G_CCMUX_ENVIRONMENT);
use_color = use_env_color || (a_color == G_CCMUX_SHADE || b_color == G_CCMUX_SHADE || c_color == G_CCMUX_SHADE || d_color == G_CCMUX_SHADE);
use_texture = (a_color == G_CCMUX_TEXEL0 || b_color == G_CCMUX_TEXEL0 || c_color == G_CCMUX_TEXEL0 || d_color == G_CCMUX_TEXEL0);
if (b_color == d_color) {
poly_fmt |= POLY_DECAL;
// Hack to hide goddard's texture since it can't be properly blended
if (a_color == G_CCMUX_PRIMITIVE) {
use_texture = false;
}
} else {
poly_fmt &= ~POLY_DECAL;
}
// The DS doesn't draw transparent pixels over other transparent pixels with the same polygon ID
// This prevents overlapping artifacts on polygons from the same object, but also breaks blending of separate objects
// As a guess of when objects start and end, change the polygon ID every time the color combine settings change
polygon_id = (polygon_id + 1) & 0x3F;
}
static void g_settimg(Gwords *words) {
// Set the address of the current texture in memory
texture_address = (uint8_t*)words->w1;
texture_format = (words->w0 >> 21) & 0x07;
texture_bit_width = (words->w0 >> 19) & 0x03;
texture_dirty = true;
}
static void g_setzimg(Gwords *words) {
// Set the address of the depth buffer
// This doesn't matter much on the DS, but it's used to detect attempts to draw to the depth buffer
z_buffer = (uint8_t*)words->w1;
}
static void g_setcimg(Gwords *words) {
// Set the address of the color buffer
// This doesn't matter much on the DS, but it's used to detect attempts to draw to the depth buffer
c_buffer = (uint8_t*)words->w1;
}
static void execute(Gfx* cmd) {
// Interpret a list of Fast3DEX2 commands using the DS hardware
while (true) {
const uint8_t opcode = cmd->words.w0 >> 24;
switch (opcode) {
case G_VTX: g_vtx(&cmd->words); break;
case G_TRI1: g_tri1(&cmd->words); break;
case G_TRI2: g_tri2(&cmd->words); break;
case G_TEXTURE: g_texture(&cmd->words); break;
case G_POPMTX: g_popmtx(&cmd->words); break;
case G_GEOMETRYMODE: g_geometrymode(&cmd->words); break;
case G_MTX: g_mtx(&cmd->words); break;
case G_MOVEWORD: g_moveword(&cmd->words); break;
case G_MOVEMEM: g_movemem(&cmd->words); break;
case G_RDPHALF_1: g_rdphalf_1(&cmd->words); break;
case G_SETOTHERMODE_L: g_setothermode_l(&cmd->words); break;
case G_SETOTHERMODE_H: g_setothermode_h(&cmd->words); break;
case G_TEXRECT: g_texrect(&cmd->words); break;
case G_RDPHALF_2: g_rdphalf_2(&cmd->words); break;
case G_LOADBLOCK: g_loadblock(&cmd->words); break;
case G_SETTILE: g_settile(&cmd->words); break;
case G_FILLRECT: g_fillrect(&cmd->words); break;
case G_SETFILLCOLOR: g_setfillcolor(&cmd->words); break;
case G_SETENVCOLOR: g_setenvcolor(&cmd->words); break;
case G_SETCOMBINE: g_setcombine(&cmd->words); break;
case G_SETTIMG: g_settimg(&cmd->words); break;
case G_SETZIMG: g_setzimg(&cmd->words); break;
case G_SETCIMG: g_setcimg(&cmd->words); break;
// Opcodes that don't need to do anything
case G_RDPLOADSYNC: break;
case G_RDPPIPESYNC: break;
case G_RDPTILESYNC: break;
case G_RDPFULLSYNC: break;
// Unimplemented opcodes
case G_SETSCISSOR: break;
case G_SETTILESIZE: break;
case G_SETFOGCOLOR: break;
case G_SETBLENDCOLOR: break;
case G_SETPRIMCOLOR: break;
case G_DL:
// Branch to another display list
if (cmd->words.w0 & (1 << 16)) { // Without return
cmd = (Gfx*)cmd->words.w1;
continue;
} else { // With return
execute((Gfx*)cmd->words.w1);
break;
}
case G_ENDDL:
// Return from the current display list
return;
default:
//printf("Unsupported GBI command: 0x%.2X\n", opcode);
break;
}
cmd++;
}
}
static void count_frames() {
// Count a frame (triggered at V-blank)
frame_count++;
}
void renderer_init() {
// Set up the screens
videoSetMode(MODE_0_3D);
consoleDemoInit();
// Initialize the 3D renderer
glInit();
glClearColor(0, 0, 0, 31);
glClearDepth(GL_MAX_DEPTH);
glEnable(GL_ANTIALIAS);
glEnable(GL_TEXTURE_2D);
glEnable(GL_BLEND);
// Set up texture VRAM (bank C is used by the console)
vramSetBankA(VRAM_A_TEXTURE);
vramSetBankB(VRAM_B_TEXTURE);
vramSetBankD(VRAM_D_TEXTURE);
vramSetBankE(VRAM_E_TEX_PALETTE);
// Generate an empty texture for when no texture should be used
glGenTextures(1, &no_texture);
glBindTexture(GL_TEXTURE_2D, no_texture);
glTexImage2D(GL_TEXTURE_2D, 0, GL_NOTEXTURE, 0, 0, 0, TEXGEN_TEXCOORD, NULL);
// Set up an intensity palette for IA textures
uint16_t palette[8];
for (int x = 0; x < 8; x++) {
const int i = x * 31 / 7;
palette[x] = (i << 10) | (i << 5) | i;
}
glColorTableEXT(GL_TEXTURE_2D, 0, 8, 0, 0, palette);
// Set up the frame counter to trigger on V-blank
irqSet(IRQ_VBLANK, count_frames);
irqEnable(IRQ_VBLANK);
}
void draw_frame(Gfx *display_list) {
// Reset the depth hack parameters
background = true;
z_depth = 0x1000 * 6;
// Process and draw a frame
execute(display_list);
glFlush(0);
// Limit to 30FPS by waiting for up to 2 frames, depending on how long it took the current frame to render
for (int i = frame_count; i < 2; i++) {
swiWaitForVBlank();
}
// Reset the frame counter
frame_count = 0;
}

7
src/nds/nds_renderer.h Normal file
View File

@ -0,0 +1,7 @@
#ifndef NDS_RENDERER_H
#define NDS_RENDERER_H
extern void renderer_init();
extern void draw_frame(Gfx *display_list);
#endif // NDS_RENDERER_H

View File

@ -0,0 +1,187 @@
#include <stdio.h>
#include <string.h>
#include "lib/src/libultra_internal.h"
#include "macros.h"
#ifdef TARGET_WEB
#include <emscripten.h>
#endif
extern OSMgrArgs piMgrArgs;
u64 osClockRate = 62500000;
s32 osPiStartDma(UNUSED OSIoMesg *mb, UNUSED s32 priority, UNUSED s32 direction,
uintptr_t devAddr, void *vAddr, size_t nbytes,
UNUSED OSMesgQueue *mq) {
memcpy(vAddr, (const void *) devAddr, nbytes);
return 0;
}
void osCreateMesgQueue(OSMesgQueue *mq, OSMesg *msgBuf, s32 count) {
mq->validCount = 0;
mq->first = 0;
mq->msgCount = count;
mq->msg = msgBuf;
return;
}
void osSetEventMesg(UNUSED OSEvent e, UNUSED OSMesgQueue *mq, UNUSED OSMesg msg) {
}
s32 osJamMesg(UNUSED OSMesgQueue *mq, UNUSED OSMesg msg, UNUSED s32 flag) {
return 0;
}
s32 osSendMesg(UNUSED OSMesgQueue *mq, UNUSED OSMesg msg, UNUSED s32 flag) {
#ifdef VERSION_EU
s32 index;
if (mq->validCount >= mq->msgCount) {
return -1;
}
index = (mq->first + mq->validCount) % mq->msgCount;
mq->msg[index] = msg;
mq->validCount++;
#endif
return 0;
}
s32 osRecvMesg(UNUSED OSMesgQueue *mq, UNUSED OSMesg *msg, UNUSED s32 flag) {
#if VERSION_EU
if (mq->validCount == 0) {
return -1;
}
if (msg != NULL) {
*msg = *(mq->first + mq->msg);
}
mq->first = (mq->first + 1) % mq->msgCount;
mq->validCount--;
#endif
return 0;
}
uintptr_t osVirtualToPhysical(void *addr) {
return (uintptr_t) addr;
}
void osCreateViManager(UNUSED OSPri pri) {
}
void osViSetMode(UNUSED OSViMode *mode) {
}
void osViSetEvent(UNUSED OSMesgQueue *mq, UNUSED OSMesg msg, UNUSED u32 retraceCount) {
}
void osViBlack(UNUSED u8 active) {
}
void osViSetSpecialFeatures(UNUSED u32 func) {
}
void osViSwapBuffer(UNUSED void *vaddr) {
}
OSTime osGetTime(void) {
return 0;
}
void osWritebackDCacheAll(void) {
}
void osWritebackDCache(UNUSED void *a, UNUSED size_t b) {
}
void osInvalDCache(UNUSED void *a, UNUSED size_t b) {
}
u32 osGetCount(void) {
static u32 counter;
return counter++;
}
s32 osAiSetFrequency(u32 freq) {
u32 a1;
s32 a2;
u32 D_8033491C;
#ifdef VERSION_EU
D_8033491C = 0x02E6025C;
#else
D_8033491C = 0x02E6D354;
#endif
a1 = D_8033491C / (float) freq + .5f;
if (a1 < 0x84) {
return -1;
}
a2 = (a1 / 66) & 0xff;
if (a2 > 16) {
a2 = 16;
}
return D_8033491C / (s32) a1;
}
s32 osEepromProbe(UNUSED OSMesgQueue *mq) {
return 1;
}
s32 osEepromLongRead(UNUSED OSMesgQueue *mq, u8 address, u8 *buffer, int nbytes) {
u8 content[512];
s32 ret = -1;
#ifdef TARGET_WEB
if (EM_ASM_INT({
var s = localStorage.sm64_save_file;
if (s && s.length === 684) {
try {
var binary = atob(s);
if (binary.length === 512) {
for (var i = 0; i < 512; i++) {
HEAPU8[$0 + i] = binary.charCodeAt(i);
}
return 1;
}
} catch (e) {
}
}
return 0;
}, content)) {
memcpy(buffer, content + address * 8, nbytes);
ret = 0;
}
#else
FILE *fp = fopen("sm64_save_file.bin", "rb");
if (fp == NULL) {
return -1;
}
if (fread(content, 1, 512, fp) == 512) {
memcpy(buffer, content + address * 8, nbytes);
ret = 0;
}
fclose(fp);
#endif
return ret;
}
s32 osEepromLongWrite(UNUSED OSMesgQueue *mq, u8 address, u8 *buffer, int nbytes) {
u8 content[512] = {0};
if (address != 0 || nbytes != 512) {
osEepromLongRead(mq, 0, content, 512);
}
memcpy(content + address * 8, buffer, nbytes);
#ifdef TARGET_WEB
EM_ASM({
var str = "";
for (var i = 0; i < 512; i++) {
str += String.fromCharCode(HEAPU8[$0 + i]);
}
localStorage.sm64_save_file = btoa(str);
}, content);
s32 ret = 0;
#else
FILE* fp = fopen("sm64_save_file.bin", "wb");
if (fp == NULL) {
return -1;
}
s32 ret = fwrite(content, 1, 512, fp) == 512 ? 0 : -1;
fclose(fp);
#endif
return ret;
}