diff --git a/Makefile b/Makefile index 84e2ba97..1103ef3a 100644 --- a/Makefile +++ b/Makefile @@ -59,7 +59,7 @@ else ifeq ($(VERSION),sh) endif ifeq ($(TARGET_NDS),1) - OPT_FLAGS := -O2 + OPT_FLAGS := -O2 -flto -ffast-math GRUCODE := f3dex2 COMPILER := gcc DEVKITPRO ?= /opt/devkitpro @@ -110,8 +110,10 @@ ifeq ($(COMPILER),ido) else ifeq ($(COMPILER),gcc) NON_MATCHING := 1 MIPSISET := -mips3 +ifeq ($(TARGET_NDS),0) OPT_FLAGS := -O2 endif +endif # NON_MATCHING - whether to build a matching, identical copy of the ROM @@ -392,8 +394,8 @@ DEF_INC_CFLAGS := $(foreach i,$(INCLUDE_DIRS),-I$(i)) $(C_DEFINES) ifeq ($(TARGET_NDS),1) LIBDIRS := $(DEVKITPRO)/libnds -TARGET_CFLAGS := -march=armv5te -mtune=arm946e-s -fomit-frame-pointer -ffast-math $(foreach dir,$(LIBDIRS),-I$(dir)/include) -DTARGET_NDS -DARM9 -D_LANGUAGE_C -DNO_SEGMENTED_MEMORY -DLIBFAT -ARM7_TARGET_CFLAGS := -mcpu=arm7tdmi -mtune=arm7tdmi -fomit-frame-pointer -ffast-math $(foreach dir,$(LIBDIRS),-I$(dir)/include) -DTARGET_NDS -DARM7 +TARGET_CFLAGS := -march=armv5te -mtune=arm946e-s $(foreach dir,$(LIBDIRS),-I$(dir)/include) -DTARGET_NDS -DARM9 -D_LANGUAGE_C -DNO_SEGMENTED_MEMORY -DLIBFAT +ARM7_TARGET_CFLAGS := -mcpu=arm7tdmi -mtune=arm7tdmi $(foreach dir,$(LIBDIRS),-I$(dir)/include) -DTARGET_NDS -DARM7 CC_CHECK := $(CC) CC_CHECK_CFLAGS := -fsyntax-only -fsigned-char $(CC_CFLAGS) $(TARGET_CFLAGS) -Wall -Wextra -Wno-format-security -DNON_MATCHING -DAVOID_UB $(DEF_INC_CFLAGS) @@ -401,10 +403,10 @@ ARM7_CC_CHECK_CFLAGS := -fsyntax-only -fsigned-char $(CC_CFLAGS) $(ARM7_TARGET_C ASFLAGS := $(foreach i,$(INCLUDE_DIRS),-I$(i)) $(foreach d,$(DEFINES),--defsym $(d)) CFLAGS := -fno-strict-aliasing -fwrapv $(OPT_FLAGS) $(TARGET_CFLAGS) $(DEF_INC_CFLAGS) -LDFLAGS := -lfat -lnds9 -specs=dsi_arm9.specs -g -mthumb -mthumb-interwork $(foreach dir,$(LIBDIRS),-L$(dir)/lib) +LDFLAGS := -lfat -lnds9 -specs=dsi_arm9.specs -g -mthumb -mthumb-interwork $(foreach dir,$(LIBDIRS),-L$(dir)/lib) $(TARGET_CFLAGS) ARM7_CFLAGS := -fno-strict-aliasing -fwrapv $(OPT_FLAGS) $(ARM7_TARGET_CFLAGS) $(DEF_INC_CFLAGS) -ARM7_LDFLAGS := -lnds7 -specs=ds_arm7.specs -g -mthumb-interwork $(foreach dir,$(LIBDIRS),-L$(dir)/lib) +ARM7_LDFLAGS := -lnds7 -specs=ds_arm7.specs -g -mthumb-interwork $(foreach dir,$(LIBDIRS),-L$(dir)/lib) $(ARM7_TARGET_CFLAGS) else diff --git a/src/nds/nds_renderer.c b/src/nds/nds_renderer.c index 05327c25..c83a3473 100644 --- a/src/nds/nds_renderer.c +++ b/src/nds/nds_renderer.c @@ -10,12 +10,6 @@ struct Color { uint8_t r, g, b, a; }; -struct Vertex { - int16_t x, y, z; - int16_t s, t; - struct Color color; -}; - struct Texture { uint8_t *address; int name; @@ -27,13 +21,13 @@ struct Texture { struct Light { int16_t nx, ny, nz; int8_t x, y, z; - struct Color color; + uint8_t r, g, b; }; static struct Color env_color; static struct Color fill_color; -static struct Vertex vertex_buffer[16]; +static Vtx vertex_buffer[16]; static struct Texture texture_map[2048]; static struct Light lights[5]; @@ -138,14 +132,20 @@ static void load_texture() { texture_fifo_start = (texture_fifo_start + 1) & 0x7FF; } -static void draw_vertices(const struct Vertex **v, int count) { +static void draw_vertices(const Vtx_t **v, int count) { // Get the alpha value and return early if it's 0 (alpha 0 is wireframe on the DS) // Since the DS only supports one alpha value per polygon, just use the one from first vertex - const int alpha = ((other_mode_l & (G_BL_A_MEM << 18)) ? 31 : (v[0]->color.a >> 3)); + const int alpha = ((other_mode_l & (G_BL_A_MEM << 18)) ? 31 : ((use_env_alpha ? env_color.a : v[0]->cn[3]) >> 3)); if (alpha == 0) return; - // Clear the vertex color if it shoudn't be used - if (!use_color) { + // Round texture coodinates (by adding 0.5) if linear filtering is enabled + // The DS can't actually do linear filtering, but this still keeps textures from being slightly misplaced + const uint8_t tex_ofs = ((other_mode_h & (3 << G_MDSFT_TEXTFILT)) == G_TF_POINT) ? 0 : (1 << 4); + + // Handle special vertex color settings + if (use_env_color) { + glColor3b(env_color.r, env_color.g, env_color.b); + } else if (!use_color) { glColor3b(0xFF, 0xFF, 0xFF); } @@ -182,11 +182,11 @@ static void draw_vertices(const struct Vertex **v, int count) { if ((other_mode_l & ZMODE_DEC) == ZMODE_DEC) { for (int i = 0; i < count; i++) { // Send the vertex attributes to the 3D engine - if (use_color) glColor3b(v[i]->color.r, v[i]->color.g, v[i]->color.b); - if (use_texture) glTexCoord2t16(v[i]->s, v[i]->t); + if (use_color) glColor3b(v[i]->cn[0], v[i]->cn[1], v[i]->cn[2]); + if (use_texture) glTexCoord2t16(((v[i]->tc[0] * texture_scale_s) >> 17) + tex_ofs, ((v[i]->tc[1] * texture_scale_t) >> 17) + tex_ofs); // Use position test to project the vertex so the result can be hijacked before sending it for real - PosTest(v[i]->x, v[i]->y, v[i]->z); + PosTest(v[i]->ob[0], v[i]->ob[1], v[i]->ob[2]); // Push the current matrices to the stack, and load an identity matrix so the outgoing vertex won't be affected glMatrixMode(GL_MODELVIEW); @@ -214,9 +214,9 @@ static void draw_vertices(const struct Vertex **v, int count) { } else { // Send the vertices normally for (int i = 0; i < count; i++) { - if (use_color) glColor3b(v[i]->color.r, v[i]->color.g, v[i]->color.b); - if (use_texture) glTexCoord2t16(v[i]->s, v[i]->t); - glVertex3v16(v[i]->x, v[i]->y, v[i]->z); + if (use_color) glColor3b(v[i]->cn[0], v[i]->cn[1], v[i]->cn[2]); + if (use_texture) glTexCoord2t16(((v[i]->tc[0] * texture_scale_s) >> 17) + tex_ofs, ((v[i]->tc[1] * texture_scale_t) >> 17) + tex_ofs); + glVertex3v16(v[i]->ob[0], v[i]->ob[1], v[i]->ob[2]); } } @@ -241,11 +241,11 @@ static void draw_vertices(const struct Vertex **v, int count) { for (int i = 0; i < count; i++) { // Send the vertex attributes to the 3D engine - if (use_color) glColor3b(v[i]->color.r, v[i]->color.g, v[i]->color.b); - if (use_texture) glTexCoord2t16(v[i]->s, v[i]->t); + if (use_color) glColor3b(v[i]->cn[0], v[i]->cn[1], v[i]->cn[2]); + if (use_texture) glTexCoord2t16(((v[i]->tc[0] * texture_scale_s) >> 17) + tex_ofs, ((v[i]->tc[1] * texture_scale_t) >> 17) + tex_ofs); // Use position test to project the vertex so the result can be hijacked before sending it for real - PosTest(v[i]->x, v[i]->y, v[i]->z); + PosTest(v[i]->ob[0], v[i]->ob[1], v[i]->ob[2]); // Push the current matrices to the stack, and load an identity matrix so the outgoing vertex won't be affected glPushMatrix(); @@ -298,117 +298,87 @@ static void g_vtx(Gwords *words) { const Vtx *vertices = (const Vtx*)words->w1; // Store vertices in the vertex buffer - for (uint8_t i = index - count; i < index; i++) { - const Vtx_t *v = &vertices[i].v; - const Vtx_tn *n = &vertices[i].n; + memcpy(&vertex_buffer[index - count], vertices, count * sizeof(Vtx)); - // Set the vertex coordinates - vertex_buffer[i].x = v->ob[0]; - vertex_buffer[i].y = v->ob[1]; - vertex_buffer[i].z = v->ob[2]; + if (geometry_mode & G_LIGHTING) { + // Recalculate transformed light vectors if the lights or modelview matrix changed + if (lights_dirty) { + // Read the current modelview matrix from hardware + int m[12]; + glGetFixed(GL_GET_MATRIX_VECTOR, m); - // Scale the texture coordinates, and shift out an additional bit to get 4-bit fractionals for the DS - vertex_buffer[i].s = (v->tc[0] * texture_scale_s) >> 17; - vertex_buffer[i].t = (v->tc[1] * texture_scale_t) >> 17; + for (int i = 0; i < num_lights; i++) { + // Multiply the light vector with the modelview matrix + lights[i].nx = (lights[i].x * m[0] + lights[i].y * m[1] + lights[i].z * m[2]) >> 7; + lights[i].ny = (lights[i].x * m[3] + lights[i].y * m[4] + lights[i].z * m[5]) >> 7; + lights[i].nz = (lights[i].x * m[6] + lights[i].y * m[7] + lights[i].z * m[8]) >> 7; - // Calulate vertex colors for lighting in software - // The DS can *almost* do this in hardware, but the vectors need to be normalized after being transformed - if (geometry_mode & G_LIGHTING) { - // Use the last light as ambient light (or emission, in DS terms) - uint32_t r = lights[num_lights].color.r; - uint32_t g = lights[num_lights].color.g; - uint32_t b = lights[num_lights].color.b; - - // Recalculate transformed light vectors if the lights or modelview matrix changed - if (lights_dirty) { - // Read the current modelview matrix from hardware - int m[12]; - glGetFixed(GL_GET_MATRIX_VECTOR, m); - - for (int i = 0; i < num_lights; i++) { - // Multiply the light vector with the modelview matrix - lights[i].nx = (lights[i].x * m[0] + lights[i].y * m[1] + lights[i].z * m[2]) >> 7; - lights[i].ny = (lights[i].x * m[3] + lights[i].y * m[4] + lights[i].z * m[5]) >> 7; - lights[i].nz = (lights[i].x * m[6] + lights[i].y * m[7] + lights[i].z * m[8]) >> 7; - - // Normalize the result - int s = (lights[i].nx * lights[i].nx + lights[i].ny * lights[i].ny + lights[i].nz * lights[i].nz) >> 8; - if (s > 0) { - s = sqrt_fixed(s); - lights[i].nx = (lights[i].nx << 16) / s; - lights[i].ny = (lights[i].ny << 16) / s; - lights[i].nz = (lights[i].nz << 16) / s; - } + // Normalize the result + int s = (lights[i].nx * lights[i].nx + lights[i].ny * lights[i].ny + lights[i].nz * lights[i].nz) >> 8; + if (s > 0) { + s = sqrt_fixed(s); + lights[i].nx = (lights[i].nx << 16) / s; + lights[i].ny = (lights[i].ny << 16) / s; + lights[i].nz = (lights[i].nz << 16) / s; } - - lights_dirty = false; } + lights_dirty = false; + } + + // Calulate vertex colors for lighting in software, since hardware doesn't normalize the light vectors + for (int i = index - count; i < index; i++) { + Vtx_t *v = &vertex_buffer[i].v; + Vtx_tn *n = &vertex_buffer[i].n; + + // Use the last light as ambient light (or emission, in DS terms) + uint32_t r = lights[num_lights].r; + uint32_t g = lights[num_lights].g; + uint32_t b = lights[num_lights].b; + // Multiply the light vertices with the vertex's normal to calculate light intensity for (int i = 2; i < num_lights; i++) { int intensity = (lights[i].nx * n->n[0] + lights[i].ny * n->n[1] + lights[i].nz * n->n[2]) >> 7; if (intensity > 0) { - r += (intensity * lights[i].color.r) >> 12; - g += (intensity * lights[i].color.g) >> 12; - b += (intensity * lights[i].color.b) >> 12; + r += (intensity * lights[i].r) >> 12; + g += (intensity * lights[i].g) >> 12; + b += (intensity * lights[i].b) >> 12; } } - // Set the calulated vertex color - vertex_buffer[i].color.r = (r > 0xFF) ? 0xFF : r; - vertex_buffer[i].color.g = (g > 0xFF) ? 0xFF : g; - vertex_buffer[i].color.b = (b > 0xFF) ? 0xFF : b; - - // Generate spherical texture coordinates by multiplying the vertex's normal with the lookat vectors + // Generate spherical texture coordinates by multiplying the lookat vectors with the vertex's normal if (geometry_mode & G_TEXTURE_GEN) { - const int dot_y = (lights[0].nx * n->n[0] + lights[0].ny * n->n[1] + lights[0].nz * n->n[2]) >> 7; - const int dot_x = (lights[1].nx * n->n[0] + lights[1].ny * n->n[1] + lights[1].nz * n->n[2]) >> 7; - vertex_buffer[i].s = ((dot_x + (1 << 12)) * texture_scale_s) >> 15; - vertex_buffer[i].t = ((dot_y + (1 << 12)) * texture_scale_t) >> 15; + v->tc[0] = ((lights[1].nx * n->n[0] + lights[1].ny * n->n[1] + lights[1].nz * n->n[2]) >> 5) + (1 << 14); + v->tc[1] = ((lights[0].nx * n->n[0] + lights[0].ny * n->n[1] + lights[0].nz * n->n[2]) >> 5) + (1 << 14); } - } else if (use_env_color) { - // Use the environment color as the vertex color if enabled - vertex_buffer[i].color.r = env_color.r; - vertex_buffer[i].color.g = env_color.g; - vertex_buffer[i].color.b = env_color.b; - } else { - // Set the vertex color normally - vertex_buffer[i].color.r = v->cn[0]; - vertex_buffer[i].color.g = v->cn[1]; - vertex_buffer[i].color.b = v->cn[2]; - } - // Set the vertex alpha, using the environment alpha if enabled - vertex_buffer[i].color.a = (use_env_alpha ? env_color.a : v->cn[3]); - - // Round texture coodinates (by adding 0.5) if linear filtering is enabled - // The DS can't actually do linear filtering, but this still keeps textures from being slightly misplaced - if ((other_mode_h & (3 << G_MDSFT_TEXTFILT)) != G_TF_POINT) { - vertex_buffer[i].s += 1 << 4; - vertex_buffer[i].t += 1 << 4; + // Set the calulated vertex color + v->cn[0] = (r > 0xFF) ? 0xFF : r; + v->cn[1] = (g > 0xFF) ? 0xFF : g; + v->cn[2] = (b > 0xFF) ? 0xFF : b; } } } static void g_tri1(Gwords *words) { // Draw a triangle - const struct Vertex *v[] = { - &vertex_buffer[((words->w0 >> 16) & 0xFF) >> 1], - &vertex_buffer[((words->w0 >> 8) & 0xFF) >> 1], - &vertex_buffer[((words->w0 >> 0) & 0xFF) >> 1] + const Vtx_t *v[] = { + &vertex_buffer[((words->w0 >> 16) & 0xFF) >> 1].v, + &vertex_buffer[((words->w0 >> 8) & 0xFF) >> 1].v, + &vertex_buffer[((words->w0 >> 0) & 0xFF) >> 1].v }; draw_vertices(v, 3); } static void g_tri2(Gwords *words) { // Draw two triangles at once - const struct Vertex *v[] = { - &vertex_buffer[((words->w0 >> 16) & 0xFF) >> 1], - &vertex_buffer[((words->w0 >> 8) & 0xFF) >> 1], - &vertex_buffer[((words->w0 >> 0) & 0xFF) >> 1], - &vertex_buffer[((words->w1 >> 16) & 0xFF) >> 1], - &vertex_buffer[((words->w1 >> 8) & 0xFF) >> 1], - &vertex_buffer[((words->w1 >> 0) & 0xFF) >> 1] + const Vtx_t *v[] = { + &vertex_buffer[((words->w0 >> 16) & 0xFF) >> 1].v, + &vertex_buffer[((words->w0 >> 8) & 0xFF) >> 1].v, + &vertex_buffer[((words->w0 >> 0) & 0xFF) >> 1].v, + &vertex_buffer[((words->w1 >> 16) & 0xFF) >> 1].v, + &vertex_buffer[((words->w1 >> 8) & 0xFF) >> 1].v, + &vertex_buffer[((words->w1 >> 0) & 0xFF) >> 1].v }; draw_vertices(v, 6); } @@ -542,17 +512,20 @@ static void g_movemem(Gwords *words) { case G_MV_LIGHT: { // Set light parameters - const int index = ((words->w0 >> 8) & 0xFF) / 3; - const Light_t *light = (Light_t*)words->w1; + const uint8_t index = ((words->w0 >> 8) & 0xFF) / 3; + const Light_t *src = (Light_t*)words->w1; + struct Light *dst = &lights[index]; if (index >= 2) { // Not lookat vectors - lights[index].color.r = light->col[0]; - lights[index].color.g = light->col[1]; - lights[index].color.b = light->col[2]; + dst->r = src->col[0]; + dst->g = src->col[1]; + dst->b = src->col[2]; } - if (index < num_lights) { // Not ambient light - lights[index].x = light->dir[0]; - lights[index].y = light->dir[1]; - lights[index].z = light->dir[2]; + if (index < num_lights && // Not ambient light + // The game likes to rewrite the same light vectors, so avoid making the lights dirty if possible + (dst->x != src->dir[0] || dst->y != src->dir[1] || dst->z != src->dir[2])) { + dst->x = src->dir[0]; + dst->y = src->dir[1]; + dst->z = src->dir[2]; lights_dirty = true; } break; @@ -785,7 +758,7 @@ static void g_setcombine(Gwords *words) { use_env_color = (c_color == G_CCMUX_ENVIRONMENT || d_color == G_CCMUX_ENVIRONMENT); use_env_alpha = (c_alpha == G_CCMUX_ENVIRONMENT || d_alpha == G_CCMUX_ENVIRONMENT); - use_color = use_env_color || (a_color == G_CCMUX_SHADE || b_color == G_CCMUX_SHADE || c_color == G_CCMUX_SHADE || d_color == G_CCMUX_SHADE); + use_color = !use_env_color && (a_color == G_CCMUX_SHADE || b_color == G_CCMUX_SHADE || c_color == G_CCMUX_SHADE || d_color == G_CCMUX_SHADE); use_texture = (a_color == G_CCMUX_TEXEL0 || b_color == G_CCMUX_TEXEL0 || c_color == G_CCMUX_TEXEL0 || d_color == G_CCMUX_TEXEL0); if (b_color == d_color) {