#define NOMINMAX #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef _LANGUAGE_C #define _LANGUAGE_C #endif #include #include "platform.h" #include "gfx_pc.h" #include "gfx_cc.h" #include "gfx_window_manager_api.h" #include "gfx_rendering_api.h" #include "gfx_screen_config.h" uintptr_t gfxFramebuffer; std::stack currentDir; using namespace std; #define ALIGN(x, a) (((x) + (a - 1)) & ~(a - 1)) #define SUPPORT_CHECK(x) if(!(x)) __builtin_trap() // assert(x) // SCALE_M_N: upscale/downscale M-bit integer to N-bit #define SCALE_5_8(VAL_) (((VAL_)*0xFF) / 0x1F) #define SCALE_8_5(VAL_) ((((VAL_) + 4) * 0x1F) / 0xFF) #define SCALE_4_8(VAL_) ((VAL_)*0x11) #define SCALE_8_4(VAL_) ((VAL_) / 0x11) #define SCALE_3_8(VAL_) ((VAL_)*0x24) #define SCALE_8_3(VAL_) ((VAL_) / 0x24) // SCREEN_WIDTH and SCREEN_HEIGHT are defined in the headerfile #define HALF_SCREEN_WIDTH (SCREEN_WIDTH / 2) #define HALF_SCREEN_HEIGHT (SCREEN_HEIGHT / 2) #define RATIO_X (gfx_current_dimensions.width / (2.0f * HALF_SCREEN_WIDTH)) #define RATIO_Y (gfx_current_dimensions.height / (2.0f * HALF_SCREEN_HEIGHT)) #define MAX_BUFFERED 256 // #define MAX_LIGHTS 2 #define MAX_LIGHTS 32 #define MAX_VERTICES 240 #define MAX_VERTEX_COLORS 64 #define TEXTURE_CACHE_MAX_SIZE 1024 #define C0(pos, width) ((cmd->words.w0 >> (pos)) & ((1U << width) - 1)) #define C1(pos, width) ((cmd->words.w1 >> (pos)) & ((1U << width) - 1)) struct RGBA { uint8_t r, g, b, a; }; struct NormalColor { union { struct { uint8_t r, g, b, a; }; struct { uint8_t x, y, z, w; }; }; }; struct LoadedVertex { float x, y, z, w; float u, v; struct RGBA color; uint8_t fog; uint8_t clip_rej; }; static struct { TextureCacheMap map; list lru; vector free_texture_ids; } gfx_texture_cache; struct ColorCombiner { uint64_t shader_id0; uint32_t shader_id1; bool used_textures[2]; struct ShaderProgram* prg[16]; uint8_t shader_input_mapping[2][7]; }; static map color_combiner_pool; static map::iterator prev_combiner = color_combiner_pool.end(); static uint8_t* tex_upload_buffer = nullptr; static struct RSP { float modelview_matrix_stack[11][4][4]; uint8_t modelview_matrix_stack_size; float MP_matrix[4][4]; float P_matrix[4][4]; Light_t lookat[2]; Light_t current_lights[MAX_LIGHTS + 1]; float current_lights_coeffs[MAX_LIGHTS][3]; float current_lookat_coeffs[2][3]; // lookat_x, lookat_y uint8_t current_num_lights; // includes ambient light bool lights_changed; uint32_t geometry_mode; int16_t fog_mul, fog_offset; uint32_t extra_geometry_mode; float depth_zfar; struct { // U0.16 uint16_t s, t; } texture_scaling_factor; struct LoadedVertex loaded_vertices[MAX_VERTICES + 4]; const struct NormalColor *vertex_colors; //[MAX_VERTEX_COLORS]; } rsp; struct RawTexMetadata { uint16_t width, height; float h_byte_scale = 1, v_pixel_scale = 1; }; static struct RDP { uint16_t palette[256]; const uint8_t* palette_addrs[2]; uint32_t palette_fmt; struct { const uint8_t* addr; uint8_t siz; uint32_t width; uint32_t tex_flags; struct RawTexMetadata raw_tex_metadata; } texture_to_load; struct { const uint8_t* addr; uint32_t orig_size_bytes; uint32_t full_size_bytes; // full_image_line_size_bytes * height uint32_t size_bytes; // line_size_bytes * height uint32_t full_image_line_size_bytes; uint32_t line_size_bytes; uint32_t tex_flags; struct RawTexMetadata raw_tex_metadata; bool masked; bool blended; } loaded_texture[8]; struct { uint8_t fmt; uint8_t siz; uint8_t cms, cmt; uint8_t shifts, shiftt; uint16_t uls, ult, lrs, lrt; // U10.2 uint16_t width, height; // in texels uint16_t tmem; // 0-511, in 64-bit word units uint32_t line_size_bytes; uint8_t palette; uint8_t tmem_index; // 0 or 1 for offset 0 kB or offset 2 kB, respectively } texture_tile[8]; bool textures_changed[2]; uint8_t first_tile_index; uint32_t other_mode_l, other_mode_h; uint64_t combine_mode; bool grayscale; bool tex_lod; uint8_t prim_lod_fraction; struct RGBA env_color, prim_color, fog_color, fill_color, grayscale_color; struct XYWidthHeight viewport, scissor; bool viewport_or_scissor_changed; void* z_buf_address; void* color_image_address; } rdp; static struct RenderingState { uint8_t depth_test_and_mask; // 1: depth test, 2: depth mask float depth_zfar; bool decal_mode; bool alpha_blend; struct XYWidthHeight viewport, scissor; struct ShaderProgram* shader_program; TextureCacheNode* textures[SHADER_MAX_TEXTURES]; } rendering_state; struct GfxDimensions gfx_current_window_dimensions; int32_t gfx_current_window_position_x; int32_t gfx_current_window_position_y; struct GfxDimensions gfx_current_dimensions; static struct GfxDimensions gfx_prev_dimensions; struct XYWidthHeight gfx_current_game_window_viewport; struct XYWidthHeight gfx_current_native_viewport; static bool game_renders_to_framebuffer; static int game_framebuffer; static int game_framebuffer_msaa_resolved; uint32_t gfx_msaa_level = 1; static bool has_drawn_imgui_menu; static bool dropped_frame; static float buf_vbo[MAX_BUFFERED * (32 * 3)]; // 3 vertices in a triangle and 32 floats per vtx static size_t buf_vbo_len; static size_t buf_vbo_num_tris; static struct GfxWindowManagerAPI* gfx_wapi; static struct GfxRenderingAPI* gfx_rapi; static int markerOn; static uintptr_t segmentPointers[16]; struct FBInfo { uint32_t orig_width, orig_height; uint32_t applied_width, applied_height; }; static bool fbActive = 0; static map::iterator active_fb; static map framebuffers; static constexpr float clampf(const float x, const float min, const float max) { return (x < min) ? min : (x > max) ? max : x; } static void gfx_flush(void) { if (buf_vbo_len > 0) { gfx_rapi->draw_triangles(buf_vbo, buf_vbo_len, buf_vbo_num_tris); buf_vbo_len = 0; buf_vbo_num_tris = 0; } } static struct ShaderProgram* gfx_lookup_or_create_shader_program(uint64_t shader_id0, uint32_t shader_id1) { struct ShaderProgram* prg = gfx_rapi->lookup_shader(shader_id0, shader_id1); if (prg == NULL) { gfx_rapi->unload_shader(rendering_state.shader_program); prg = gfx_rapi->create_and_load_new_shader(shader_id0, shader_id1); rendering_state.shader_program = prg; } return prg; } static const char* ccmux_to_string(uint32_t ccmux) { static const char* const tbl[] = { "G_CCMUX_COMBINED", "G_CCMUX_TEXEL0", "G_CCMUX_TEXEL1", "G_CCMUX_PRIMITIVE", "G_CCMUX_SHADE", "G_CCMUX_ENVIRONMENT", "G_CCMUX_1", "G_CCMUX_COMBINED_ALPHA", "G_CCMUX_TEXEL0_ALPHA", "G_CCMUX_TEXEL1_ALPHA", "G_CCMUX_PRIMITIVE_ALPHA", "G_CCMUX_SHADE_ALPHA", "G_CCMUX_ENV_ALPHA", "G_CCMUX_LOD_FRACTION", "G_CCMUX_PRIM_LOD_FRAC", "G_CCMUX_K5", }; if (ccmux > 15) { return "G_CCMUX_0"; } else { return tbl[ccmux]; } } static const char* acmux_to_string(uint32_t acmux) { static const char* const tbl[] = { "G_ACMUX_COMBINED or G_ACMUX_LOD_FRACTION", "G_ACMUX_TEXEL0", "G_ACMUX_TEXEL1", "G_ACMUX_PRIMITIVE", "G_ACMUX_SHADE", "G_ACMUX_ENVIRONMENT", "G_ACMUX_1 or G_ACMUX_PRIM_LOD_FRAC", "G_ACMUX_0", }; return tbl[acmux]; } static void gfx_generate_cc(struct ColorCombiner* comb, const ColorCombinerKey& key) { bool is_2cyc = (key.options & (uint64_t)SHADER_OPT_2CYC) != 0; uint8_t c[2][2][4] = { { { 0 } } }; uint64_t shader_id0 = 0; uint32_t shader_id1 = key.options; uint8_t shader_input_mapping[2][7] = { { 0 } }; bool used_textures[2] = { false, false }; for (int i = 0; i < 2 && (i == 0 || is_2cyc); i++) { uint32_t rgb_a = (key.combine_mode >> (i * 28)) & 0xf; uint32_t rgb_b = (key.combine_mode >> (i * 28 + 4)) & 0xf; uint32_t rgb_c = (key.combine_mode >> (i * 28 + 8)) & 0x1f; uint32_t rgb_d = (key.combine_mode >> (i * 28 + 13)) & 7; uint32_t alpha_a = (key.combine_mode >> (i * 28 + 16)) & 7; uint32_t alpha_b = (key.combine_mode >> (i * 28 + 16 + 3)) & 7; uint32_t alpha_c = (key.combine_mode >> (i * 28 + 16 + 6)) & 7; uint32_t alpha_d = (key.combine_mode >> (i * 28 + 16 + 9)) & 7; if (rgb_a >= 8) { rgb_a = G_CCMUX_0; } if (rgb_b >= 8) { rgb_b = G_CCMUX_0; } if (rgb_c >= 16) { rgb_c = G_CCMUX_0; } if (rgb_d == 7) { rgb_d = G_CCMUX_0; } if (rgb_a == rgb_b || rgb_c == G_CCMUX_0) { // Normalize rgb_a = G_CCMUX_0; rgb_b = G_CCMUX_0; rgb_c = G_CCMUX_0; } if (alpha_a == alpha_b || alpha_c == G_ACMUX_0) { // Normalize alpha_a = G_ACMUX_0; alpha_b = G_ACMUX_0; alpha_c = G_ACMUX_0; } if (i == 1) { if (rgb_a != G_CCMUX_COMBINED && rgb_b != G_CCMUX_COMBINED && rgb_c != G_CCMUX_COMBINED && rgb_d != G_CCMUX_COMBINED) { // First cycle RGB not used, so clear it away c[0][0][0] = c[0][0][1] = c[0][0][2] = c[0][0][3] = G_CCMUX_0; } if (rgb_c != G_CCMUX_COMBINED_ALPHA && alpha_a != G_ACMUX_COMBINED && alpha_b != G_ACMUX_COMBINED && alpha_d != G_ACMUX_COMBINED) { // First cycle ALPHA not used, so clear it away c[0][1][0] = c[0][1][1] = c[0][1][2] = c[0][1][3] = G_ACMUX_0; } } c[i][0][0] = rgb_a; c[i][0][1] = rgb_b; c[i][0][2] = rgb_c; c[i][0][3] = rgb_d; c[i][1][0] = alpha_a; c[i][1][1] = alpha_b; c[i][1][2] = alpha_c; c[i][1][3] = alpha_d; } if (!is_2cyc) { for (int i = 0; i < 2; i++) { for (int k = 0; k < 4; k++) { c[1][i][k] = i == 0 ? G_CCMUX_0 : G_ACMUX_0; } } } { uint8_t input_number[32] = { 0 }; int next_input_number = SHADER_INPUT_1; for (int i = 0; i < 2 && (i == 0 || is_2cyc); i++) { for (int j = 0; j < 4; j++) { uint32_t val = 0; switch (c[i][0][j]) { case G_CCMUX_0: val = SHADER_0; break; case G_CCMUX_1: val = SHADER_1; break; case G_CCMUX_TEXEL0: val = SHADER_TEXEL0; used_textures[0] = true; break; case G_CCMUX_TEXEL1: val = SHADER_TEXEL1; used_textures[1] = true; break; case G_CCMUX_TEXEL0_ALPHA: val = SHADER_TEXEL0A; used_textures[0] = true; break; case G_CCMUX_TEXEL1_ALPHA: val = SHADER_TEXEL1A; used_textures[1] = true; break; case G_CCMUX_NOISE: val = SHADER_NOISE; break; case G_CCMUX_PRIMITIVE: case G_CCMUX_PRIMITIVE_ALPHA: case G_CCMUX_PRIM_LOD_FRAC: case G_CCMUX_SHADE: case G_CCMUX_SHADE_ALPHA: case G_CCMUX_ENVIRONMENT: case G_CCMUX_ENV_ALPHA: case G_CCMUX_LOD_FRACTION: if (input_number[c[i][0][j]] == 0) { shader_input_mapping[0][next_input_number - 1] = c[i][0][j]; input_number[c[i][0][j]] = next_input_number++; } val = input_number[c[i][0][j]]; break; case G_CCMUX_COMBINED: val = SHADER_COMBINED; break; default: fprintf(stderr, "Unsupported ccmux: %d\n", c[i][0][j]); break; } shader_id0 |= (uint64_t)val << (i * 32 + j * 4); } } } { uint8_t input_number[16] = { 0 }; int next_input_number = SHADER_INPUT_1; for (int i = 0; i < 2; i++) { for (int j = 0; j < 4; j++) { uint32_t val = 0; switch (c[i][1][j]) { case G_ACMUX_0: val = SHADER_0; break; case G_ACMUX_TEXEL0: val = SHADER_TEXEL0; used_textures[0] = true; break; case G_ACMUX_TEXEL1: val = SHADER_TEXEL1; used_textures[1] = true; break; case G_ACMUX_LOD_FRACTION: // case G_ACMUX_COMBINED: same numerical value if (j != 2) { val = SHADER_COMBINED; break; } c[i][1][j] = G_CCMUX_LOD_FRACTION; [[fallthrough]]; // for G_ACMUX_LOD_FRACTION case G_ACMUX_1: // case G_ACMUX_PRIM_LOD_FRAC: same numerical value if (j != 2) { val = SHADER_1; break; } [[fallthrough]]; // for G_ACMUX_PRIM_LOD_FRAC case G_ACMUX_PRIMITIVE: case G_ACMUX_SHADE: case G_ACMUX_ENVIRONMENT: if (input_number[c[i][1][j]] == 0) { shader_input_mapping[1][next_input_number - 1] = c[i][1][j]; input_number[c[i][1][j]] = next_input_number++; } val = input_number[c[i][1][j]]; break; } shader_id0 |= (uint64_t)val << (i * 32 + 16 + j * 4); } } } comb->shader_id0 = shader_id0; comb->shader_id1 = shader_id1; comb->used_textures[0] = used_textures[0]; comb->used_textures[1] = used_textures[1]; // comb->prg = gfx_lookup_or_create_shader_program(shader_id0, shader_id1); memcpy(comb->shader_input_mapping, shader_input_mapping, sizeof(shader_input_mapping)); } static struct ColorCombiner* gfx_lookup_or_create_color_combiner(const ColorCombinerKey& key) { if (prev_combiner != color_combiner_pool.end() && prev_combiner->first == key) { return &prev_combiner->second; } prev_combiner = color_combiner_pool.find(key); if (prev_combiner != color_combiner_pool.end()) { return &prev_combiner->second; } gfx_flush(); prev_combiner = color_combiner_pool.insert(make_pair(key, ColorCombiner())).first; gfx_generate_cc(&prev_combiner->second, key); return &prev_combiner->second; } void gfx_texture_cache_clear() { for (const auto& entry : gfx_texture_cache.map) { gfx_texture_cache.free_texture_ids.push_back(entry.second.texture_id); } gfx_texture_cache.map.clear(); gfx_texture_cache.lru.clear(); } static bool gfx_texture_cache_lookup(int i, const TextureCacheKey& key) { TextureCacheMap::iterator it = gfx_texture_cache.map.find(key); TextureCacheNode** n = &rendering_state.textures[i]; if (it != gfx_texture_cache.map.end()) { gfx_rapi->select_texture(i, it->second.texture_id); *n = &*it; gfx_texture_cache.lru.splice(gfx_texture_cache.lru.end(), gfx_texture_cache.lru, it->second.lru_location); // move to back return true; } if (gfx_texture_cache.map.size() >= TEXTURE_CACHE_MAX_SIZE) { // Remove the texture that was least recently used it = gfx_texture_cache.lru.front().it; gfx_texture_cache.free_texture_ids.push_back(it->second.texture_id); gfx_texture_cache.map.erase(it); gfx_texture_cache.lru.pop_front(); } uint32_t texture_id; if (!gfx_texture_cache.free_texture_ids.empty()) { texture_id = gfx_texture_cache.free_texture_ids.back(); gfx_texture_cache.free_texture_ids.pop_back(); } else { texture_id = gfx_rapi->new_texture(); } it = gfx_texture_cache.map.insert(make_pair(key, TextureCacheValue())).first; TextureCacheNode* node = &*it; node->second.texture_id = texture_id; node->second.lru_location = gfx_texture_cache.lru.insert(gfx_texture_cache.lru.end(), { it }); gfx_rapi->select_texture(i, texture_id); gfx_rapi->set_sampler_parameters(i, false, 0, 0); *n = node; return false; } static void gfx_texture_cache_delete(const uint8_t* orig_addr) { while (gfx_texture_cache.map.bucket_count() > 0) { TextureCacheKey key = { orig_addr, { 0 }, 0, 0 }; // bucket index only depends on the address size_t bucket = gfx_texture_cache.map.bucket(key); bool again = false; for (auto it = gfx_texture_cache.map.begin(bucket); it != gfx_texture_cache.map.end(bucket); ++it) { if (it->first.texture_addr == orig_addr) { gfx_texture_cache.lru.erase(it->second.lru_location); gfx_texture_cache.free_texture_ids.push_back(it->second.texture_id); gfx_texture_cache.map.erase(it->first); again = true; break; } } if (!again) { break; } } } static void import_texture_rgba16(int tile, bool importReplacement) { const RawTexMetadata* metadata = &rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].raw_tex_metadata; const uint8_t* addr = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].addr; const uint32_t size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].size_bytes; const uint32_t full_image_line_size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].full_image_line_size_bytes; const uint32_t line_size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].line_size_bytes; SUPPORT_CHECK(full_image_line_size_bytes == line_size_bytes); uint8_t *dest = tex_upload_buffer; for (uint32_t i = 0; i < size_bytes / 2; i++, dest += 4) { const uint16_t col16 = (addr[2 * i] << 8) | addr[2 * i + 1]; const uint8_t a = col16 & 1; const uint8_t r = col16 >> 11; const uint8_t g = (col16 >> 6) & 0x1f; const uint8_t b = (col16 >> 1) & 0x1f; dest[0] = SCALE_5_8(r); dest[1] = SCALE_5_8(g); dest[2] = SCALE_5_8(b); dest[3] = a ? 255 : 0; } const uint32_t width = rdp.texture_tile[tile].line_size_bytes / 2; const uint32_t height = size_bytes / rdp.texture_tile[tile].line_size_bytes; gfx_rapi->upload_texture(tex_upload_buffer, width, height); // DumpTexture(rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].otr_path, rgba32_buf, width, height); } static void import_texture_rgba32(int tile, bool importReplacement) { const RawTexMetadata* metadata = &rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].raw_tex_metadata; const uint8_t* addr = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].addr; const uint32_t size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].size_bytes; const uint32_t full_image_line_size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].full_image_line_size_bytes; const uint32_t line_size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].line_size_bytes; SUPPORT_CHECK(full_image_line_size_bytes == line_size_bytes); uint32_t *dest = (uint32_t *)tex_upload_buffer; const uint32_t *src = (const uint32_t *)addr; for (uint32_t i = 0; i < size_bytes; i += 4, ++dest, ++src) { *dest = PD_BE32(*src); } const uint32_t width = rdp.texture_tile[tile].line_size_bytes / 2; const uint32_t height = (size_bytes / 2) / rdp.texture_tile[tile].line_size_bytes; gfx_rapi->upload_texture(tex_upload_buffer, width, height); // DumpTexture(rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].otr_path, addr, width, height); } static void import_texture_ia4(int tile, bool importReplacement) { const RawTexMetadata* metadata = &rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].raw_tex_metadata; const uint8_t* addr = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].addr; const uint32_t size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].size_bytes; const uint32_t full_image_line_size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].full_image_line_size_bytes; const uint32_t line_size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].line_size_bytes; SUPPORT_CHECK(full_image_line_size_bytes == line_size_bytes); uint8_t *dest = tex_upload_buffer; for (uint32_t i = 0; i < size_bytes * 2; i++, dest += 4) { const uint8_t byte = addr[i / 2]; const uint8_t part = (byte >> (4 - (i % 2) * 4)) & 0xf; const uint8_t intensity = part >> 1; const uint8_t alpha = part & 1; const uint8_t c = SCALE_3_8(intensity); dest[0] = c; dest[1] = c; dest[2] = c; dest[3] = alpha ? 255 : 0; } const uint32_t width = rdp.texture_tile[tile].line_size_bytes * 2; const uint32_t height = size_bytes / rdp.texture_tile[tile].line_size_bytes; gfx_rapi->upload_texture(tex_upload_buffer, width, height); // DumpTexture(rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].otr_path, rgba32_buf, width, height); } static void import_texture_ia8(int tile, bool importReplacement) { const RawTexMetadata* metadata = &rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].raw_tex_metadata; const uint8_t* addr = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].addr; const uint32_t size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].size_bytes; const uint32_t full_image_line_size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].full_image_line_size_bytes; const uint32_t line_size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].line_size_bytes; SUPPORT_CHECK(full_image_line_size_bytes == line_size_bytes); uint8_t *dest = tex_upload_buffer; for (uint32_t i = 0; i < size_bytes; i++, dest += 4) { const uint8_t intensity = SCALE_4_8(addr[i] >> 4); const uint8_t alpha = SCALE_4_8(addr[i] & 0xf); dest[0] = intensity; dest[1] = intensity; dest[2] = intensity; dest[3] = alpha; } const uint32_t width = rdp.texture_tile[tile].line_size_bytes; const uint32_t height = size_bytes / rdp.texture_tile[tile].line_size_bytes; gfx_rapi->upload_texture(tex_upload_buffer, width, height); // DumpTexture(rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].otr_path, rgba32_buf, width, height); } static void import_texture_ia16(int tile, bool importReplacement) { const RawTexMetadata* metadata = &rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].raw_tex_metadata; const uint8_t* addr = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].addr; const uint32_t size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].size_bytes; const uint32_t full_image_line_size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].full_image_line_size_bytes; const uint32_t line_size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].line_size_bytes; SUPPORT_CHECK(full_image_line_size_bytes == line_size_bytes); uint8_t *dest = tex_upload_buffer; for (uint32_t i = 0; i < size_bytes / 2; i++, dest += 4) { const uint8_t intensity = addr[2 * i]; const uint8_t alpha = addr[2 * i + 1]; dest[0] = intensity; dest[1] = intensity; dest[2] = intensity; dest[3] = alpha; } const uint32_t width = rdp.texture_tile[tile].line_size_bytes / 2; const uint32_t height = size_bytes / rdp.texture_tile[tile].line_size_bytes; gfx_rapi->upload_texture(tex_upload_buffer, width, height); // DumpTexture(rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].otr_path, rgba32_buf, width, height); } static void import_texture_i4(int tile, bool importReplacement) { const RawTexMetadata* metadata = &rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].raw_tex_metadata; const uint8_t* addr = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].addr; const uint32_t size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].size_bytes; const uint32_t full_image_line_size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].full_image_line_size_bytes; const uint32_t line_size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].line_size_bytes; SUPPORT_CHECK(full_image_line_size_bytes == line_size_bytes); uint8_t *dest = tex_upload_buffer; for (uint32_t i = 0; i < size_bytes * 2; i++, dest += 4) { const uint8_t byte = addr[i / 2]; const uint8_t part = (byte >> (4 - (i % 2) * 4)) & 0xf; const uint8_t intensity = SCALE_4_8(part); dest[0] = intensity; dest[1] = intensity; dest[2] = intensity; dest[3] = intensity; } const uint32_t width = rdp.texture_tile[tile].line_size_bytes * 2; const uint32_t height = size_bytes / rdp.texture_tile[tile].line_size_bytes; gfx_rapi->upload_texture(tex_upload_buffer, width, height); // DumpTexture(rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].otr_path, rgba32_buf, width, height); } static void import_texture_i8(int tile, bool importReplacement) { const RawTexMetadata* metadata = &rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].raw_tex_metadata; const uint8_t* addr = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].addr; const uint32_t size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].size_bytes; uint32_t full_image_line_size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].full_image_line_size_bytes; const uint32_t line_size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].line_size_bytes; SUPPORT_CHECK(full_image_line_size_bytes == line_size_bytes); uint8_t *dest = tex_upload_buffer; for (uint32_t i = 0; i < size_bytes; i++, dest += 4) { const uint8_t intensity = addr[i]; dest[0] = intensity; dest[1] = intensity; dest[2] = intensity; dest[3] = intensity; } const uint32_t width = rdp.texture_tile[tile].line_size_bytes; const uint32_t height = size_bytes / rdp.texture_tile[tile].line_size_bytes; gfx_rapi->upload_texture(tex_upload_buffer, width, height); // DumpTexture(rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].otr_path, rgba32_buf, width, height); } static inline void palette_to_rgba32(const uint16_t palentry, uint8_t *rgba32_buf) { if (rdp.palette_fmt == G_TT_IA16) { const uint8_t intensity = (palentry & 0xff); const uint8_t alpha = palentry >> 8; rgba32_buf[0] = intensity; rgba32_buf[1] = intensity; rgba32_buf[2] = intensity; rgba32_buf[3] = alpha; } else { // assume G_TT_RGBA16 const uint8_t a = palentry & 1; const uint8_t r = palentry >> 11; const uint8_t g = (palentry >> 6) & 0x1f; const uint8_t b = (palentry >> 1) & 0x1f; rgba32_buf[0] = SCALE_5_8(r); rgba32_buf[1] = SCALE_5_8(g); rgba32_buf[2] = SCALE_5_8(b); rgba32_buf[3] = a ? 255 : 0; } } static void import_texture_ci4(int tile, bool importReplacement) { const RawTexMetadata* metadata = &rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].raw_tex_metadata; const uint8_t* addr = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].addr; const uint32_t size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].size_bytes; const uint32_t full_image_line_size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].full_image_line_size_bytes; const uint32_t line_size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].line_size_bytes; const uint32_t pal_idx = rdp.texture_tile[tile].palette; // 0-15 const uint16_t* palette = (const uint16_t *)(rdp.palette + pal_idx * 16); // 16 pixel entries, 16 bits each SUPPORT_CHECK(full_image_line_size_bytes == line_size_bytes); for (uint32_t i = 0; i < size_bytes * 2; i++) { const uint8_t byte = addr[i / 2]; const uint8_t idx = (byte >> (4 - (i % 2) * 4)) & 0xf; palette_to_rgba32(palette[idx], tex_upload_buffer +4 * i); } uint32_t result_line_size = rdp.texture_tile[tile].line_size_bytes; if (metadata->h_byte_scale != 1) { result_line_size *= metadata->h_byte_scale; } const uint32_t width = result_line_size * 2; const uint32_t height = size_bytes / result_line_size; gfx_rapi->upload_texture(tex_upload_buffer, width, height); } static void import_texture_ci8(int tile, bool importReplacement) { const RawTexMetadata* metadata = &rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].raw_tex_metadata; const uint8_t* addr = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].addr; const uint32_t size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].size_bytes; const uint32_t full_image_line_size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].full_image_line_size_bytes; const uint32_t line_size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].line_size_bytes; for (uint32_t i = 0, j = 0; i < size_bytes; j += full_image_line_size_bytes - line_size_bytes) { for (uint32_t k = 0; k < line_size_bytes; i++, k++, j++) { const uint8_t idx = addr[j]; palette_to_rgba32(rdp.palette[idx], tex_upload_buffer + 4 * i); } } uint32_t result_line_size = rdp.texture_tile[tile].line_size_bytes; if (metadata->h_byte_scale != 1) { result_line_size *= metadata->h_byte_scale; } const uint32_t width = result_line_size; const uint32_t height = size_bytes / result_line_size; gfx_rapi->upload_texture(tex_upload_buffer, width, height); // DumpTexture(rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].otr_path, rgba32_buf, width, height); } static void import_texture(int i, int tile, bool importReplacement) { const uint8_t fmt = rdp.texture_tile[tile].fmt; const uint8_t siz = rdp.texture_tile[tile].siz; const uint32_t texFlags = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].tex_flags; const uint32_t tmem_index = rdp.texture_tile[tile].tmem_index; const uint8_t palette_index = rdp.texture_tile[tile].palette; if (rdp.tex_lod && tile == rdp.first_tile_index) { // set up miplevel 0 rdp.loaded_texture[tmem_index].line_size_bytes = rdp.texture_tile[tile].line_size_bytes; rdp.loaded_texture[tmem_index].full_image_line_size_bytes = rdp.texture_tile[tile].line_size_bytes; rdp.loaded_texture[tmem_index].full_size_bytes = rdp.loaded_texture[tmem_index].full_image_line_size_bytes * rdp.texture_tile[tile].height; rdp.loaded_texture[tmem_index].size_bytes = rdp.loaded_texture[tmem_index].line_size_bytes * rdp.texture_tile[tile].height; rdp.loaded_texture[tmem_index].orig_size_bytes = rdp.loaded_texture[tmem_index].size_bytes; rdp.loaded_texture[tmem_index].addr = rdp.texture_to_load.addr; } const RawTexMetadata* metadata = &rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].raw_tex_metadata; const uint8_t* orig_addr = rdp.loaded_texture[tmem_index].addr; SUPPORT_CHECK(orig_addr); TextureCacheKey key; if (fmt == G_IM_FMT_CI) { key = { orig_addr, { rdp.palette_addrs[0], rdp.palette_addrs[1] }, fmt, siz, palette_index }; } else { key = { orig_addr, {}, fmt, siz, palette_index }; } if (gfx_texture_cache_lookup(i, key)) { return; } if (fmt == G_IM_FMT_RGBA) { if (siz == G_IM_SIZ_16b) { import_texture_rgba16(tile, importReplacement); } else if (siz == G_IM_SIZ_32b) { import_texture_rgba32(tile, importReplacement); } else { // abort(); // OTRTODO: Sometimes, seemingly randomly, we end up here. Could be a bad dlist, could be // something F3D does not have supported. Further investigation is needed. } } else if (fmt == G_IM_FMT_IA) { if (siz == G_IM_SIZ_4b) { import_texture_ia4(tile, importReplacement); } else if (siz == G_IM_SIZ_8b) { import_texture_ia8(tile, importReplacement); } else if (siz == G_IM_SIZ_16b) { import_texture_ia16(tile, importReplacement); } else { abort(); } } else if (fmt == G_IM_FMT_CI) { if (siz == G_IM_SIZ_4b) { import_texture_ci4(tile, importReplacement); } else if (siz == G_IM_SIZ_8b) { import_texture_ci8(tile, importReplacement); } else { abort(); } } else if (fmt == G_IM_FMT_I) { if (siz == G_IM_SIZ_4b) { import_texture_i4(tile, importReplacement); } else if (siz == G_IM_SIZ_8b) { import_texture_i8(tile, importReplacement); } else { abort(); } } else { abort(); } } static void gfx_normalize_vector(float v[3]) { float s = sqrtf(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]); v[0] /= s; v[1] /= s; v[2] /= s; } static void gfx_transposed_matrix_mul(float res[3], const float a[3], const float b[4][4]) { res[0] = a[0] * b[0][0] + a[1] * b[0][1] + a[2] * b[0][2]; res[1] = a[0] * b[1][0] + a[1] * b[1][1] + a[2] * b[1][2]; res[2] = a[0] * b[2][0] + a[1] * b[2][1] + a[2] * b[2][2]; } static void calculate_normal_dir(const Light_t* light, float coeffs[3]) { float light_dir[3] = { light->dir[0] / 127.0f, light->dir[1] / 127.0f, light->dir[2] / 127.0f }; gfx_transposed_matrix_mul(coeffs, light_dir, rsp.modelview_matrix_stack[rsp.modelview_matrix_stack_size - 1]); gfx_normalize_vector(coeffs); } static void gfx_matrix_mul(float res[4][4], const float a[4][4], const float b[4][4]) { float tmp[4][4]; for (int i = 0; i < 4; i++) { for (int j = 0; j < 4; j++) { tmp[i][j] = a[i][0] * b[0][j] + a[i][1] * b[1][j] + a[i][2] * b[2][j] + a[i][3] * b[3][j]; } } memcpy(res, tmp, sizeof(tmp)); } static void gfx_sp_matrix(uint8_t parameters, const int32_t* addr) { float matrix[4][4]; #ifndef GBI_FLOATS // Original GBI where fixed point matrices are used for (int i = 0; i < 4; i++) { for (int j = 0; j < 4; j += 2) { int32_t int_part = addr[i * 2 + j / 2]; uint32_t frac_part = addr[8 + i * 2 + j / 2]; matrix[i][j] = (int32_t)((int_part & 0xffff0000) | (frac_part >> 16)) / 65536.0f; matrix[i][j + 1] = (int32_t)((int_part << 16) | (frac_part & 0xffff)) / 65536.0f; } } #else // For a modified GBI where fixed point values are replaced with floats memcpy(matrix, addr, sizeof(matrix)); #endif if (parameters & G_MTX_PROJECTION) { if (parameters & G_MTX_LOAD) { memcpy(rsp.P_matrix, matrix, sizeof(matrix)); } else { gfx_matrix_mul(rsp.P_matrix, matrix, rsp.P_matrix); } } else { // G_MTX_MODELVIEW if ((parameters & G_MTX_PUSH) && rsp.modelview_matrix_stack_size < 11) { ++rsp.modelview_matrix_stack_size; memcpy(rsp.modelview_matrix_stack[rsp.modelview_matrix_stack_size - 1], rsp.modelview_matrix_stack[rsp.modelview_matrix_stack_size - 2], sizeof(matrix)); } if (parameters & G_MTX_LOAD) { memcpy(rsp.modelview_matrix_stack[rsp.modelview_matrix_stack_size - 1], matrix, sizeof(matrix)); } else { gfx_matrix_mul(rsp.modelview_matrix_stack[rsp.modelview_matrix_stack_size - 1], matrix, rsp.modelview_matrix_stack[rsp.modelview_matrix_stack_size - 1]); } rsp.lights_changed = 1; } gfx_matrix_mul(rsp.MP_matrix, rsp.modelview_matrix_stack[rsp.modelview_matrix_stack_size - 1], rsp.P_matrix); } static void gfx_sp_pop_matrix(uint32_t count) { while (count--) { if (rsp.modelview_matrix_stack_size > 0) { --rsp.modelview_matrix_stack_size; if (rsp.modelview_matrix_stack_size > 0) { gfx_matrix_mul(rsp.MP_matrix, rsp.modelview_matrix_stack[rsp.modelview_matrix_stack_size - 1], rsp.P_matrix); } } } } static float gfx_adjust_x_for_aspect_ratio(float x) { if (fbActive) { return x; } else { return x * (4.0f / 3.0f) / ((float)gfx_current_dimensions.width / (float)gfx_current_dimensions.height); } } static void gfx_adjust_width_height_for_scale(uint32_t& width, uint32_t& height) { width = std::round(width * RATIO_Y); height = std::round(height * RATIO_Y); if (width == 0) { width = 1; } if (height == 0) { height = 1; } } static void gfx_sp_vertex(size_t n_vertices, size_t dest_index, const Vtx* vertices) { SUPPORT_CHECK(n_vertices <= MAX_VERTICES); // seems like PD likes to use fog color/alpha without enabling G_FOG const bool use_fog = (rsp.geometry_mode & G_FOG) || (rdp.other_mode_l >> 30) == G_BL_CLR_FOG || (rdp.other_mode_l >> 26) == G_BL_A_FOG; for (size_t i = 0; i < n_vertices; i++, dest_index++) { const Vtx* v = &vertices[i]; struct LoadedVertex* d = &rsp.loaded_vertices[dest_index]; float x = v->v[0] * rsp.MP_matrix[0][0] + v->v[1] * rsp.MP_matrix[1][0] + v->v[2] * rsp.MP_matrix[2][0] + rsp.MP_matrix[3][0]; float y = v->v[0] * rsp.MP_matrix[0][1] + v->v[1] * rsp.MP_matrix[1][1] + v->v[2] * rsp.MP_matrix[2][1] + rsp.MP_matrix[3][1]; float z = v->v[0] * rsp.MP_matrix[0][2] + v->v[1] * rsp.MP_matrix[1][2] + v->v[2] * rsp.MP_matrix[2][2] + rsp.MP_matrix[3][2]; float w = v->v[0] * rsp.MP_matrix[0][3] + v->v[1] * rsp.MP_matrix[1][3] + v->v[2] * rsp.MP_matrix[2][3] + rsp.MP_matrix[3][3]; x = gfx_adjust_x_for_aspect_ratio(x); short U = v->s * rsp.texture_scaling_factor.s >> 16; short V = v->t * rsp.texture_scaling_factor.t >> 16; const struct NormalColor *vcn = &rsp.vertex_colors[v->colour >> 2]; if (rsp.geometry_mode & G_LIGHTING) { if (rsp.lights_changed) { for (int i = 0; i < rsp.current_num_lights - 1; i++) { calculate_normal_dir(&rsp.current_lights[i], rsp.current_lights_coeffs[i]); } /*static const Light_t lookat_x = {{0, 0, 0}, 0, {0, 0, 0}, 0, {127, 0, 0}, 0}; static const Light_t lookat_y = {{0, 0, 0}, 0, {0, 0, 0}, 0, {0, 127, 0}, 0};*/ calculate_normal_dir(&rsp.lookat[0], rsp.current_lookat_coeffs[0]); calculate_normal_dir(&rsp.lookat[1], rsp.current_lookat_coeffs[1]); rsp.lights_changed = false; } int r = rsp.current_lights[rsp.current_num_lights - 1].col[0]; int g = rsp.current_lights[rsp.current_num_lights - 1].col[1]; int b = rsp.current_lights[rsp.current_num_lights - 1].col[2]; for (int i = 0; i < rsp.current_num_lights - 1; i++) { float intensity = 0; intensity += vcn->x * rsp.current_lights_coeffs[i][0]; intensity += vcn->y * rsp.current_lights_coeffs[i][1]; intensity += vcn->z * rsp.current_lights_coeffs[i][2]; intensity /= 127.0f; if (intensity > 0.0f) { r += intensity * rsp.current_lights[i].col[0]; g += intensity * rsp.current_lights[i].col[1]; b += intensity * rsp.current_lights[i].col[2]; } } d->color.r = r > 255 ? 255 : r; d->color.g = g > 255 ? 255 : g; d->color.b = b > 255 ? 255 : b; if (rsp.geometry_mode & G_TEXTURE_GEN) { float dotx = 0, doty = 0; dotx += vcn->x * rsp.current_lookat_coeffs[0][0]; dotx += vcn->y * rsp.current_lookat_coeffs[0][1]; dotx += vcn->z * rsp.current_lookat_coeffs[0][2]; doty += vcn->x * rsp.current_lookat_coeffs[1][0]; doty += vcn->y * rsp.current_lookat_coeffs[1][1]; doty += vcn->z * rsp.current_lookat_coeffs[1][2]; dotx /= 127.0f; doty /= 127.0f; dotx = clampf(dotx, -1.0f, 1.0f); doty = clampf(doty, -1.0f, 1.0f); if (rsp.geometry_mode & G_TEXTURE_GEN_LINEAR) { // Not sure exactly what formula we should use to get accurate values /*dotx = (2.906921f * dotx * dotx + 1.36114f) * dotx; doty = (2.906921f * doty * doty + 1.36114f) * doty; dotx = (dotx + 1.0f) / 4.0f; doty = (doty + 1.0f) / 4.0f;*/ dotx = acosf(-dotx) /* M_PI */ / 4.0f; doty = acosf(-doty) /* M_PI */ / 4.0f; } else { dotx = (dotx + 1.0f) / 4.0f; doty = (doty + 1.0f) / 4.0f; } U = (int32_t)(dotx * rsp.texture_scaling_factor.s); V = (int32_t)(doty * rsp.texture_scaling_factor.t); } } else { d->color.r = vcn->r; d->color.g = vcn->g; d->color.b = vcn->b; } d->u = U; d->v = V; // trivial clip rejection d->clip_rej = 0; if (x < -w) { d->clip_rej |= 1; // CLIP_LEFT } if (x > w) { d->clip_rej |= 2; // CLIP_RIGHT } if (y < -w) { d->clip_rej |= 4; // CLIP_BOTTOM } if (y > w) { d->clip_rej |= 8; // CLIP_TOP } // if (z < -w) d->clip_rej |= 16; // CLIP_NEAR if (z > w) { d->clip_rej |= 32; // CLIP_FAR } d->x = x; d->y = y; d->z = z; d->w = w; if (use_fog) { if (fabsf(w) < 0.001f) { // To avoid division by zero w = 0.001f; } float winv = 1.0f / w; if (winv < 0.0f) { winv = std::numeric_limits::max(); } float fog_z = z * winv * rsp.fog_mul + rsp.fog_offset; d->fog = clampf(fog_z, 0.f, 255.f); } else { d->fog = 0; } d->color.a = vcn->a; // can be required for SHADE_ALPHA even if fog is enabled } } static void gfx_sp_modify_vertex(uint16_t vtx_idx, uint8_t where, uint32_t val) { SUPPORT_CHECK(where == G_MWO_POINT_ST); int16_t s = (int16_t)(val >> 16); int16_t t = (int16_t)val; struct LoadedVertex* v = &rsp.loaded_vertices[vtx_idx]; v->u = s; v->v = t; } static void gfx_sp_tri1(uint8_t vtx1_idx, uint8_t vtx2_idx, uint8_t vtx3_idx, bool is_rect) { struct LoadedVertex* v1 = &rsp.loaded_vertices[vtx1_idx]; struct LoadedVertex* v2 = &rsp.loaded_vertices[vtx2_idx]; struct LoadedVertex* v3 = &rsp.loaded_vertices[vtx3_idx]; struct LoadedVertex* v_arr[3] = { v1, v2, v3 }; // if (rand()%2) return; if (v1->clip_rej & v2->clip_rej & v3->clip_rej) { // The whole triangle lies outside the visible area return; } if ((rsp.geometry_mode & G_CULL_BOTH) != 0) { float dx1 = v1->x / (v1->w) - v2->x / (v2->w); float dy1 = v1->y / (v1->w) - v2->y / (v2->w); float dx2 = v3->x / (v3->w) - v2->x / (v2->w); float dy2 = v3->y / (v3->w) - v2->y / (v2->w); float cross = dx1 * dy2 - dy1 * dx2; if ((v1->w < 0) ^ (v2->w < 0) ^ (v3->w < 0)) { // If one vertex lies behind the eye, negating cross will give the correct result. // If all vertices lie behind the eye, the triangle will be rejected anyway. cross = -cross; } // If inverted culling is requested, negate the cross // if ((rsp.extra_geometry_mode & G_EX_INVERT_CULLING) == 1) { // cross = -cross; // } switch (rsp.geometry_mode & G_CULL_BOTH) { case G_CULL_FRONT: if (cross <= 0) { return; } break; case G_CULL_BACK: if (cross >= 0) { return; } break; case G_CULL_BOTH: // Why is this even an option? return; } } bool depth_test = (rsp.geometry_mode & G_ZBUFFER) == G_ZBUFFER; bool depth_mask = (rdp.other_mode_l & Z_UPD) == Z_UPD; uint8_t depth_test_and_mask = (depth_test ? 1 : 0) | (depth_mask ? 2 : 0); if (depth_test_and_mask != rendering_state.depth_test_and_mask) { gfx_flush(); gfx_rapi->set_depth_test_and_mask(depth_test, depth_mask); rendering_state.depth_test_and_mask = depth_test_and_mask; } if (rsp.depth_zfar != rendering_state.depth_zfar) { gfx_flush(); gfx_rapi->set_depth_range(0.0f, rsp.depth_zfar); rendering_state.depth_zfar = rsp.depth_zfar; } bool zmode_decal = (rdp.other_mode_l & ZMODE_DEC) == ZMODE_DEC; if (zmode_decal != rendering_state.decal_mode) { gfx_flush(); gfx_rapi->set_zmode_decal(zmode_decal); rendering_state.decal_mode = zmode_decal; } if (rdp.viewport_or_scissor_changed) { if (memcmp(&rdp.viewport, &rendering_state.viewport, sizeof(rdp.viewport)) != 0) { gfx_flush(); gfx_rapi->set_viewport(rdp.viewport.x, rdp.viewport.y, rdp.viewport.width, rdp.viewport.height); rendering_state.viewport = rdp.viewport; } if (memcmp(&rdp.scissor, &rendering_state.scissor, sizeof(rdp.scissor)) != 0) { gfx_flush(); gfx_rapi->set_scissor(rdp.scissor.x, rdp.scissor.y, rdp.scissor.width, rdp.scissor.height); rendering_state.scissor = rdp.scissor; } rdp.viewport_or_scissor_changed = false; } uint64_t cc_id = rdp.combine_mode; uint64_t cc_options = 0; bool use_alpha = (rdp.other_mode_l & (3 << 20)) == (G_BL_CLR_MEM << 20) && (rdp.other_mode_l & (3 << 16)) == (G_BL_1MA << 16); bool use_fog = ((rdp.other_mode_l >> 30) == G_BL_CLR_FOG) || ((rdp.other_mode_l >> 26) == G_BL_A_FOG); bool texture_edge = (rdp.other_mode_l & CVG_X_ALPHA) == CVG_X_ALPHA; bool use_noise = (rdp.other_mode_l & (3U << G_MDSFT_ALPHACOMPARE)) == G_AC_DITHER; bool use_2cyc = (rdp.other_mode_h & (3U << G_MDSFT_CYCLETYPE)) == G_CYC_2CYCLE; bool alpha_threshold = (rdp.other_mode_l & (3U << G_MDSFT_ALPHACOMPARE)) == G_AC_THRESHOLD; bool invisible = (rdp.other_mode_l & (3 << 24)) == (G_BL_0 << 24) && (rdp.other_mode_l & (3 << 20)) == (G_BL_CLR_MEM << 20); bool use_grayscale = rdp.grayscale; if (texture_edge) { use_alpha = true; } if (use_alpha) { cc_options |= (uint64_t)SHADER_OPT_ALPHA; } if (use_fog) { cc_options |= (uint64_t)SHADER_OPT_FOG; } if (texture_edge) { cc_options |= (uint64_t)SHADER_OPT_TEXTURE_EDGE; } if (use_noise) { cc_options |= (uint64_t)SHADER_OPT_NOISE; } if (use_2cyc) { cc_options |= (uint64_t)SHADER_OPT_2CYC; } if (alpha_threshold) { cc_options |= (uint64_t)SHADER_OPT_ALPHA_THRESHOLD; } if (invisible) { cc_options |= (uint64_t)SHADER_OPT_INVISIBLE; } if (use_grayscale) { cc_options |= (uint64_t)SHADER_OPT_GRAYSCALE; } if (rdp.loaded_texture[0].masked) { cc_options |= (uint64_t)SHADER_OPT_TEXEL0_MASK; } if (rdp.loaded_texture[1].masked) { cc_options |= (uint64_t)SHADER_OPT_TEXEL1_MASK; } if (rdp.loaded_texture[0].blended) { cc_options |= (uint64_t)SHADER_OPT_TEXEL0_BLEND; } if (rdp.loaded_texture[1].blended) { cc_options |= (uint64_t)SHADER_OPT_TEXEL1_BLEND; } // If we are not using alpha, clear the alpha components of the combiner as they have no effect if (!use_alpha) { cc_options &= ~((0xfff << 16) | ((uint64_t)0xfff << 44)); } ColorCombinerKey key; key.combine_mode = rdp.combine_mode; key.options = cc_options; ColorCombiner* comb = gfx_lookup_or_create_color_combiner(key); uint32_t tm = 0; uint32_t tex_width[2], tex_height[2], tex_width2[2], tex_height2[2]; for (int i = 0; i < 2; i++) { // TODO: fix this; for now just ignore smaller mips const uint32_t tile = rdp.first_tile_index + (rdp.tex_lod ? 0 : i); if (comb->used_textures[i]) { if (rdp.textures_changed[i]) { gfx_flush(); import_texture(i, tile, false); rdp.textures_changed[i] = false; } uint8_t cms = rdp.texture_tile[tile].cms; uint8_t cmt = rdp.texture_tile[tile].cmt; uint32_t tex_size_bytes = rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].orig_size_bytes; uint32_t line_size = rdp.texture_tile[tile].line_size_bytes; if (line_size == 0) { line_size = 1; } tex_height[i] = tex_size_bytes / line_size; switch (rdp.texture_tile[tile].siz) { case G_IM_SIZ_4b: line_size <<= 1; break; case G_IM_SIZ_8b: break; case G_IM_SIZ_16b: line_size /= G_IM_SIZ_16b_LINE_BYTES; break; case G_IM_SIZ_32b: line_size /= G_IM_SIZ_32b_LINE_BYTES; // this is 2! tex_height[i] /= 2; break; } tex_width[i] = line_size; tex_width2[i] = (rdp.texture_tile[tile].lrs - rdp.texture_tile[tile].uls + 4) / 4; tex_height2[i] = (rdp.texture_tile[tile].lrt - rdp.texture_tile[tile].ult + 4) / 4; uint32_t tex_width1 = tex_width[i] << (cms & G_TX_MIRROR); uint32_t tex_height1 = tex_height[i] << (cmt & G_TX_MIRROR); if ((cms & G_TX_CLAMP) && ((cms & G_TX_MIRROR) || tex_width1 != tex_width2[i])) { tm |= 1 << 2 * i; cms &= ~G_TX_CLAMP; } if ((cmt & G_TX_CLAMP) && ((cmt & G_TX_MIRROR) || tex_height1 != tex_height2[i])) { tm |= 1 << 2 * i + 1; cmt &= ~G_TX_CLAMP; } bool linear_filter = (rdp.other_mode_h & (3U << G_MDSFT_TEXTFILT)) != G_TF_POINT; if (linear_filter != rendering_state.textures[i]->second.linear_filter || cms != rendering_state.textures[i]->second.cms || cmt != rendering_state.textures[i]->second.cmt) { gfx_flush(); gfx_rapi->set_sampler_parameters(i, linear_filter, cms, cmt); rendering_state.textures[i]->second.linear_filter = linear_filter; rendering_state.textures[i]->second.cms = cms; rendering_state.textures[i]->second.cmt = cmt; } } } struct ShaderProgram* prg = comb->prg[tm]; if (prg == NULL) { comb->prg[tm] = prg = gfx_lookup_or_create_shader_program(comb->shader_id0, comb->shader_id1 | (tm * SHADER_OPT_TEXEL0_CLAMP_S)); } if (prg != rendering_state.shader_program) { gfx_flush(); gfx_rapi->unload_shader(rendering_state.shader_program); gfx_rapi->load_shader(prg); rendering_state.shader_program = prg; } if (use_alpha != rendering_state.alpha_blend) { gfx_flush(); gfx_rapi->set_use_alpha(use_alpha); rendering_state.alpha_blend = use_alpha; } uint8_t num_inputs; bool used_textures[2]; gfx_rapi->shader_get_info(prg, &num_inputs, used_textures); struct GfxClipParameters clip_parameters = gfx_rapi->get_clip_parameters(); for (int i = 0; i < 3; i++) { float z = v_arr[i]->z, w = v_arr[i]->w; if (clip_parameters.z_is_from_0_to_1) { z = (z + w) / 2.0f; } buf_vbo[buf_vbo_len++] = v_arr[i]->x; buf_vbo[buf_vbo_len++] = clip_parameters.invert_y ? -v_arr[i]->y : v_arr[i]->y; buf_vbo[buf_vbo_len++] = z; buf_vbo[buf_vbo_len++] = w; for (int t = 0; t < 2; t++) { if (!used_textures[t]) { continue; } // TODO: fix this; for now just ignore smaller mips const uint32_t tile = (rdp.tex_lod ? 0 : t); float u = v_arr[i]->u / 32.0f; float v = v_arr[i]->v / 32.0f; int shifts = rdp.texture_tile[rdp.first_tile_index + tile].shifts; int shiftt = rdp.texture_tile[rdp.first_tile_index + tile].shiftt; if (shifts != 0) { if (shifts <= 10) { u /= 1 << shifts; } else { u *= 1 << (16 - shifts); } } if (shiftt != 0) { if (shiftt <= 10) { v /= 1 << shiftt; } else { v *= 1 << (16 - shiftt); } } u -= rdp.texture_tile[rdp.first_tile_index + tile].uls / 4.0f; v -= rdp.texture_tile[rdp.first_tile_index + tile].ult / 4.0f; if ((rdp.other_mode_h & (3U << G_MDSFT_TEXTFILT)) != G_TF_POINT) { // Linear filter adds 0.5f to the coordinates if (!is_rect) { u += 0.5f; v += 0.5f; } } buf_vbo[buf_vbo_len++] = u / tex_width[t]; buf_vbo[buf_vbo_len++] = v / tex_height[t]; bool clampS = tm & (1 << 2 * t); bool clampT = tm & (1 << 2 * t + 1); if (clampS) { buf_vbo[buf_vbo_len++] = (tex_width2[t] - 0.5f) / tex_width[t]; } #ifdef __WIIU__ else { buf_vbo[buf_vbo_len++] = 0.0f; } #endif if (clampT) { buf_vbo[buf_vbo_len++] = (tex_height2[t] - 0.5f) / tex_height[t]; } #ifdef __WIIU__ else { buf_vbo[buf_vbo_len++] = 0.0f; } #endif } if (use_fog) { buf_vbo[buf_vbo_len++] = rdp.fog_color.r / 255.0f; buf_vbo[buf_vbo_len++] = rdp.fog_color.g / 255.0f; buf_vbo[buf_vbo_len++] = rdp.fog_color.b / 255.0f; buf_vbo[buf_vbo_len++] = v_arr[i]->fog / 255.0f; // fog factor } if (use_grayscale) { buf_vbo[buf_vbo_len++] = rdp.grayscale_color.r / 255.0f; buf_vbo[buf_vbo_len++] = rdp.grayscale_color.g / 255.0f; buf_vbo[buf_vbo_len++] = rdp.grayscale_color.b / 255.0f; buf_vbo[buf_vbo_len++] = rdp.grayscale_color.a / 255.0f; // lerp interpolation factor (not alpha) } for (int j = 0; j < num_inputs; j++) { struct RGBA* color = 0; struct RGBA tmp = { 0 }; for (int k = 0; k < 1 + (use_alpha ? 1 : 0); k++) { switch (comb->shader_input_mapping[k][j]) { // Note: CCMUX constants and ACMUX constants used here have same value, which is why this works // (except LOD fraction). case G_CCMUX_PRIMITIVE: color = &rdp.prim_color; break; case G_CCMUX_SHADE: color = &v_arr[i]->color; break; case G_CCMUX_SHADE_ALPHA: tmp.r = tmp.g = tmp.b = v_arr[i]->color.a; color = &tmp; break; case G_CCMUX_ENVIRONMENT: color = &rdp.env_color; break; case G_CCMUX_PRIMITIVE_ALPHA: { tmp.r = tmp.g = tmp.b = rdp.prim_color.a; color = &tmp; break; } case G_CCMUX_ENV_ALPHA: { tmp.r = tmp.g = tmp.b = rdp.env_color.a; color = &tmp; break; } case G_CCMUX_PRIM_LOD_FRAC: { tmp.r = tmp.g = tmp.b = rdp.prim_lod_fraction; color = &tmp; break; } case G_CCMUX_LOD_FRACTION: { if (rdp.other_mode_h & G_TL_LOD) { // "Hack" that works for Bowser - Peach painting float distance_frac = (v1->w - 3000.0f) / 3000.0f; if (distance_frac < 0.0f) { distance_frac = 0.0f; } if (distance_frac > 1.0f) { distance_frac = 1.0f; } tmp.r = tmp.g = tmp.b = tmp.a = distance_frac * 255.0f; } else { tmp.r = tmp.g = tmp.b = tmp.a = 255; } color = &tmp; break; } case G_ACMUX_PRIM_LOD_FRAC: tmp.a = rdp.prim_lod_fraction; color = &tmp; break; default: memset(&tmp, 0, sizeof(tmp)); color = &tmp; break; } if (k == 0) { buf_vbo[buf_vbo_len++] = color->r / 255.0f; buf_vbo[buf_vbo_len++] = color->g / 255.0f; buf_vbo[buf_vbo_len++] = color->b / 255.0f; #ifdef __WIIU__ // padding if (!use_alpha) { buf_vbo[buf_vbo_len++] = 1.0f; } #endif } else { if (use_fog && color == &v_arr[i]->color) { // Shade alpha is 100% for fog buf_vbo[buf_vbo_len++] = 1.0f; } else { buf_vbo[buf_vbo_len++] = color->a / 255.0f; } } } } // struct RGBA *color = &v_arr[i]->color; // buf_vbo[buf_vbo_len++] = color->r / 255.0f; // buf_vbo[buf_vbo_len++] = color->g / 255.0f; // buf_vbo[buf_vbo_len++] = color->b / 255.0f; // buf_vbo[buf_vbo_len++] = color->a / 255.0f; } if (++buf_vbo_num_tris == MAX_BUFFERED) { // if (++buf_vbo_num_tris == 1) { gfx_flush(); } } static inline void gfx_sp_tri4(Gfx *cmd) { // the game issues gSPTri2 for quads, which uses G_TRI4 with 2 empty triangles uint8_t x = C1(0, 4); uint8_t y = C1(4, 4); uint8_t z = C0(0, 4); if(x || y || z) { gfx_sp_tri1(x, y, z, false); } x = C1(8, 4); y = C1(12, 4); z = C0(4, 4); if (x || y || z) { gfx_sp_tri1(x, y, z, false); } x = C1(16, 4); y = C1(20, 4); z = C0(8, 4); if (x || y || z) { gfx_sp_tri1(x, y, z, false); } x = C1(24, 4); y = C1(28, 4); z = C0(12, 4); if (x || y || z) { gfx_sp_tri1(x, y, z, false); } } static void gfx_sp_geometry_mode(uint32_t clear, uint32_t set) { rsp.geometry_mode &= ~clear; rsp.geometry_mode |= set; } static void gfx_sp_extra_geometry_mode(uint32_t clear, uint32_t set) { rsp.extra_geometry_mode &= ~clear; rsp.extra_geometry_mode |= set; } static void gfx_adjust_viewport_or_scissor(XYWidthHeight* area) { if (!fbActive) { area->width *= RATIO_X; area->height *= RATIO_Y; area->x *= RATIO_X; area->y = SCREEN_HEIGHT - area->y; area->y *= RATIO_Y; if (!game_renders_to_framebuffer || (gfx_msaa_level > 1 && gfx_current_dimensions.width == gfx_current_game_window_viewport.width && gfx_current_dimensions.height == gfx_current_game_window_viewport.height)) { area->x += gfx_current_game_window_viewport.x; area->y += gfx_current_window_dimensions.height - (gfx_current_game_window_viewport.y + gfx_current_game_window_viewport.height); } } else { area->width *= RATIO_Y; area->height *= RATIO_Y; area->x *= RATIO_Y; area->y = active_fb->second.orig_height - area->y; area->y *= RATIO_Y; } } static void gfx_calc_and_set_viewport(const Vp_t* viewport) { // 2 bits fraction float width = 2.0f * viewport->vscale[0] / 4.0f; float height = 2.0f * viewport->vscale[1] / 4.0f; float x = (viewport->vtrans[0] / 4.0f) - width / 2.0f; float y = ((viewport->vtrans[1] / 4.0f) + height / 2.0f); rdp.viewport.x = x; rdp.viewport.y = y; rdp.viewport.width = width; rdp.viewport.height = height; gfx_adjust_viewport_or_scissor(&rdp.viewport); rdp.viewport_or_scissor_changed = true; } static void gfx_sp_movemem(uint8_t index, uint8_t offset, const void* data) { switch (index) { case G_MV_VIEWPORT: gfx_calc_and_set_viewport((const Vp_t*)data); break; case G_MV_LOOKATY: case G_MV_LOOKATX: // I think this is only really used for guLookAtReflect index = (index - G_MV_LOOKATY) / 2; rsp.lookat[!index] = ((const Light *)data)->l; rsp.lights_changed = true; break; #ifdef F3DEX_GBI_2 case G_MV_LIGHT: { int lightidx = offset / 24 - 2; if (lightidx >= 0 && lightidx <= MAX_LIGHTS) { // skip lookat // NOTE: reads out of bounds if it is an ambient light memcpy(rsp.current_lights + lightidx, data, sizeof(Light_t)); } else if (lightidx < 0) { memcpy(rsp.lookat + offset / 24, data, sizeof(Light_t)); } break; } #else case G_MV_L0: case G_MV_L1: case G_MV_L2: // NOTE: reads out of bounds if it is an ambient light memcpy(rsp.current_lights + (index - G_MV_L0) / 2, data, sizeof(Light_t)); break; #endif } } static void gfx_sp_moveword(uint8_t index, uint16_t offset, uintptr_t data) { switch (index) { case G_MW_NUMLIGHT: #ifdef F3DEX_GBI_2 rsp.current_num_lights = data / 24 + 1; // add ambient light #else // Ambient light is included // The 31th bit is a flag that lights should be recalculated rsp.current_num_lights = (data - 0x80000000U) / 32; #endif rsp.lights_changed = 1; break; case G_MW_FOG: rsp.fog_mul = (int16_t)(data >> 16); rsp.fog_offset = (int16_t)data; break; case G_MW_SEGMENT: segmentPointers[(offset >> 2) & 0xff] = data; break; case G_MW_PERSPNORM: // the default z range is around [100, 10000] // data is 2 / (znear + zfar) represented as a 0.16 fixed point // => (znear + zfar) = (2 / (data / 65536)) = 131072 / data constexpr float full_range_mul = 1.f / 11000.f; // that's around the biggest value I got when testing if (data == 0) { rsp.depth_zfar = 1.f; } else { // sometimes this will overshoot 1 but GL can handle that rsp.depth_zfar =((131072.f * full_range_mul) / (float)data); } break; } } static void gfx_sp_texture(uint16_t sc, uint16_t tc, uint8_t level, uint8_t tile, uint8_t on) { rsp.texture_scaling_factor.s = sc; rsp.texture_scaling_factor.t = tc; if (rdp.first_tile_index != tile) { rdp.textures_changed[0] = true; rdp.textures_changed[1] = true; } rdp.first_tile_index = tile; } static void gfx_dp_set_scissor(uint32_t mode, uint32_t ulx, uint32_t uly, uint32_t lrx, uint32_t lry) { float x = ulx / 4.0f; float y = lry / 4.0f; float width = (lrx - ulx) / 4.0f; float height = (lry - uly) / 4.0f; rdp.scissor.x = x; rdp.scissor.y = y; rdp.scissor.width = width; rdp.scissor.height = height; gfx_adjust_viewport_or_scissor(&rdp.scissor); rdp.viewport_or_scissor_changed = true; } static void gfx_dp_set_texture_image(uint32_t format, uint32_t size, uint32_t width, const char* texPath, uint32_t texFlags, RawTexMetadata rawTexMetdata, const void* addr) { rdp.texture_to_load.addr = (const uint8_t*)addr; rdp.texture_to_load.siz = size; rdp.texture_to_load.width = width; rdp.texture_to_load.tex_flags = texFlags; rdp.texture_to_load.raw_tex_metadata = rawTexMetdata; } static void gfx_dp_set_tile(uint8_t fmt, uint32_t siz, uint32_t line, uint32_t tmem, uint8_t tile, uint32_t palette, uint32_t cmt, uint32_t maskt, uint32_t shiftt, uint32_t cms, uint32_t masks, uint32_t shifts) { // OTRTODO: // SUPPORT_CHECK(tmem == 0 || tmem == 256); static uint32_t max_tmem = 0; if (cms == G_TX_WRAP && masks == G_TX_NOMASK) { cms = G_TX_CLAMP; } if (cmt == G_TX_WRAP && maskt == G_TX_NOMASK) { cmt = G_TX_CLAMP; } rdp.texture_tile[tile].palette = palette; // palette should set upper 4 bits of color index in 4b mode rdp.texture_tile[tile].fmt = fmt; rdp.texture_tile[tile].siz = siz; rdp.texture_tile[tile].cms = cms; rdp.texture_tile[tile].cmt = cmt; rdp.texture_tile[tile].shifts = shifts; rdp.texture_tile[tile].shiftt = shiftt; rdp.texture_tile[tile].line_size_bytes = line * 8; if (rdp.texture_tile[tile].line_size_bytes > 15000) { int bp = 0; } // assume one texture is loaded at address 0 and another texture at any other address rdp.texture_tile[tile].tmem = tmem; rdp.texture_tile[tile].tmem_index = (rdp.tex_lod && tile < 6) ? tile : 6 + (tmem > 0); rdp.textures_changed[0] = true; rdp.textures_changed[1] = true; } static void gfx_dp_set_tile_size(uint8_t tile, uint16_t uls, uint16_t ult, uint16_t lrs, uint16_t lrt) { rdp.texture_tile[tile].uls = uls; rdp.texture_tile[tile].ult = ult; rdp.texture_tile[tile].lrs = lrs; rdp.texture_tile[tile].lrt = lrt; rdp.texture_tile[tile].width = (lrs - uls + 4) / 4; rdp.texture_tile[tile].height = (lrt - ult + 4) / 4; rdp.textures_changed[0] = true; rdp.textures_changed[1] = true; } static void gfx_dp_load_tlut(uint8_t tile, uint32_t uls, uint32_t ult, uint32_t lrs, uint32_t lrt) { // SUPPORT_CHECK(tile == G_TX_LOADTILE); SUPPORT_CHECK(rdp.texture_to_load.siz == G_IM_SIZ_16b); SUPPORT_CHECK(rdp.texture_tile[tile].tmem >= 256); rdp.texture_tile[tile].uls = uls; rdp.texture_tile[tile].ult = ult; rdp.texture_tile[tile].lrs = lrs; rdp.texture_tile[tile].lrt = lrt; const uint32_t width = (lrs - uls + 1); const uint32_t height = (lrt - ult + 1); const uint32_t pitch = rdp.texture_to_load.width + 1; const uint32_t count = width * height; const uint16_t *base = (const uint16_t *)rdp.texture_to_load.addr + pitch * ult + uls; if (rdp.texture_tile[tile].tmem == 256) { rdp.palette_addrs[0] = (const uint8_t *)base; if (count >= 256) { rdp.palette_addrs[1] = (const uint8_t *)(base + 128); } } else { rdp.palette_addrs[1] = (const uint8_t *)base; } const uint32_t palofs = rdp.texture_tile[tile].tmem - 256; SUPPORT_CHECK(palofs + count <= 256); const uint16_t *src = base; uint16_t *dst = rdp.palette + palofs; for (uint32_t i = 0; i < count; ++i) { *dst++ = PD_BE16(*src++); } } static void gfx_dp_load_block(uint8_t tile, uint32_t uls, uint32_t ult, uint32_t lrs, uint32_t dxt) { // SUPPORT_CHECK(tile == G_TX_LOADTILE); SUPPORT_CHECK(uls == 0); SUPPORT_CHECK(ult == 0); // The lrs field rather seems to be number of pixels to load uint32_t orig_size_bytes = (lrs + 1) << rdp.texture_to_load.siz >> 1; uint32_t size_bytes = orig_size_bytes; if (rdp.texture_to_load.raw_tex_metadata.h_byte_scale != 1 || rdp.texture_to_load.raw_tex_metadata.v_pixel_scale != 1) { size_bytes *= rdp.texture_to_load.raw_tex_metadata.h_byte_scale; size_bytes *= rdp.texture_to_load.raw_tex_metadata.v_pixel_scale; } rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].orig_size_bytes = orig_size_bytes; rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].size_bytes = size_bytes; rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].full_size_bytes = size_bytes; rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].line_size_bytes = size_bytes; rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].full_image_line_size_bytes = size_bytes; // assert(size_bytes <= 4096 && "bug: too big texture"); rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].tex_flags = rdp.texture_to_load.tex_flags; rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].raw_tex_metadata = rdp.texture_to_load.raw_tex_metadata; rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].addr = rdp.texture_to_load.addr; rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].masked = false; rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].blended = false; if (rdp.tex_lod) { rdp.textures_changed[0] = rdp.textures_changed[1] = true; } else { rdp.textures_changed[rdp.texture_tile[tile].tmem_index > 6] = true; } } static void gfx_dp_load_tile(uint8_t tile, uint32_t uls, uint32_t ult, uint32_t lrs, uint32_t lrt) { SUPPORT_CHECK(tile == G_TX_LOADTILE); uint32_t offset_x = uls >> G_TEXTURE_IMAGE_FRAC; uint32_t offset_y = ult >> G_TEXTURE_IMAGE_FRAC; uint32_t tile_width = ((lrs - uls) >> G_TEXTURE_IMAGE_FRAC) + 1; uint32_t tile_height = ((lrt - ult) >> G_TEXTURE_IMAGE_FRAC) + 1; uint32_t full_image_width = rdp.texture_to_load.width + 1; uint32_t offset_x_in_bytes = offset_x << rdp.texture_to_load.siz >> 1; uint32_t tile_line_size_bytes = tile_width << rdp.texture_to_load.siz >> 1; uint32_t full_image_line_size_bytes = full_image_width << rdp.texture_to_load.siz >> 1; uint32_t orig_size_bytes = tile_line_size_bytes * tile_height; uint32_t size_bytes = orig_size_bytes; uint32_t start_offset_bytes = full_image_line_size_bytes * offset_y + offset_x_in_bytes; float h_byte_scale = rdp.texture_to_load.raw_tex_metadata.h_byte_scale; float v_pixel_scale = rdp.texture_to_load.raw_tex_metadata.v_pixel_scale; if (h_byte_scale != 1 || v_pixel_scale != 1) { start_offset_bytes = h_byte_scale * (v_pixel_scale * offset_y * full_image_line_size_bytes + offset_x_in_bytes); size_bytes *= h_byte_scale * v_pixel_scale; full_image_line_size_bytes *= h_byte_scale; tile_line_size_bytes *= h_byte_scale; } rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].orig_size_bytes = orig_size_bytes; rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].size_bytes = size_bytes; rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].full_size_bytes = full_image_line_size_bytes * tile_height; rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].full_image_line_size_bytes = full_image_line_size_bytes; rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].line_size_bytes = tile_line_size_bytes; // assert(size_bytes <= 4096 && "bug: too big texture"); rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].tex_flags = rdp.texture_to_load.tex_flags; rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].raw_tex_metadata = rdp.texture_to_load.raw_tex_metadata; rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].addr = rdp.texture_to_load.addr + start_offset_bytes; rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].masked = false; rdp.loaded_texture[rdp.texture_tile[tile].tmem_index].blended = false; rdp.texture_tile[tile].uls = uls; rdp.texture_tile[tile].ult = ult; rdp.texture_tile[tile].lrs = lrs; rdp.texture_tile[tile].lrt = lrt; rdp.texture_tile[tile].width = ((lrs - uls) >> G_TEXTURE_IMAGE_FRAC) + 1; rdp.texture_tile[tile].height = ((lrt - ult) >> G_TEXTURE_IMAGE_FRAC) + 1; if (rdp.tex_lod) { rdp.textures_changed[0] = rdp.textures_changed[1] = true; } else { rdp.textures_changed[rdp.texture_tile[tile].tmem_index > 6] = true; } } /*static uint8_t color_comb_component(uint32_t v) { switch (v) { case G_CCMUX_TEXEL0: return CC_TEXEL0; case G_CCMUX_TEXEL1: return CC_TEXEL1; case G_CCMUX_PRIMITIVE: return CC_PRIM; case G_CCMUX_SHADE: return CC_SHADE; case G_CCMUX_ENVIRONMENT: return CC_ENV; case G_CCMUX_TEXEL0_ALPHA: return CC_TEXEL0A; case G_CCMUX_LOD_FRACTION: return CC_LOD; default: return CC_0; } } static inline uint32_t color_comb(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { return color_comb_component(a) | (color_comb_component(b) << 3) | (color_comb_component(c) << 6) | (color_comb_component(d) << 9); } static void gfx_dp_set_combine_mode(uint32_t rgb, uint32_t alpha) { rdp.combine_mode = rgb | (alpha << 12); }*/ static void gfx_dp_set_combine_mode(uint32_t rgb, uint32_t alpha, uint32_t rgb_cyc2, uint32_t alpha_cyc2) { rdp.combine_mode = rgb | (alpha << 16) | ((uint64_t)rgb_cyc2 << 28) | ((uint64_t)alpha_cyc2 << 44); } static inline uint32_t color_comb(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { return (a & 0xf) | ((b & 0xf) << 4) | ((c & 0x1f) << 8) | ((d & 7) << 13); } static inline uint32_t alpha_comb(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { return (a & 7) | ((b & 7) << 3) | ((c & 7) << 6) | ((d & 7) << 9); } static void gfx_dp_set_grayscale_color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) { rdp.grayscale_color.r = r; rdp.grayscale_color.g = g; rdp.grayscale_color.b = b; rdp.grayscale_color.a = a; } static void gfx_dp_set_env_color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) { rdp.env_color.r = r; rdp.env_color.g = g; rdp.env_color.b = b; rdp.env_color.a = a; } static void gfx_dp_set_prim_color(uint8_t m, uint8_t l, uint8_t r, uint8_t g, uint8_t b, uint8_t a) { rdp.prim_lod_fraction = l; rdp.prim_color.r = r; rdp.prim_color.g = g; rdp.prim_color.b = b; rdp.prim_color.a = a; rdp.fill_color.r = r; rdp.fill_color.g = g; rdp.fill_color.b = b; rdp.fill_color.a = a; } static void gfx_dp_set_fog_color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) { rdp.fog_color.r = r; rdp.fog_color.g = g; rdp.fog_color.b = b; rdp.fog_color.a = a; } static void gfx_dp_set_fill_color(uint32_t packed_color) { uint16_t col16 = (uint16_t)packed_color; uint32_t r = col16 >> 11; uint32_t g = (col16 >> 6) & 0x1f; uint32_t b = (col16 >> 1) & 0x1f; uint32_t a = col16 & 1; rdp.fill_color.r = SCALE_5_8(r); rdp.fill_color.g = SCALE_5_8(g); rdp.fill_color.b = SCALE_5_8(b); rdp.fill_color.a = a * 255; } static void gfx_draw_rectangle(int32_t ulx, int32_t uly, int32_t lrx, int32_t lry) { uint32_t saved_other_mode_h = rdp.other_mode_h; uint32_t cycle_type = (rdp.other_mode_h & (3U << G_MDSFT_CYCLETYPE)); if (cycle_type == G_CYC_COPY) { rdp.other_mode_h = (rdp.other_mode_h & ~(3U << G_MDSFT_TEXTFILT)) | G_TF_POINT; } // U10.2 coordinates float ulxf = ulx; float ulyf = uly; float lrxf = lrx; float lryf = lry; ulxf = ulxf / (4.0f * HALF_SCREEN_WIDTH) - 1.0f; ulyf = -(ulyf / (4.0f * HALF_SCREEN_HEIGHT)) + 1.0f; lrxf = lrxf / (4.0f * HALF_SCREEN_WIDTH) - 1.0f; lryf = -(lryf / (4.0f * HALF_SCREEN_HEIGHT)) + 1.0f; ulxf = gfx_adjust_x_for_aspect_ratio(ulxf); lrxf = gfx_adjust_x_for_aspect_ratio(lrxf); struct LoadedVertex* ul = &rsp.loaded_vertices[MAX_VERTICES + 0]; struct LoadedVertex* ll = &rsp.loaded_vertices[MAX_VERTICES + 1]; struct LoadedVertex* lr = &rsp.loaded_vertices[MAX_VERTICES + 2]; struct LoadedVertex* ur = &rsp.loaded_vertices[MAX_VERTICES + 3]; ul->x = ulxf; ul->y = ulyf; ul->z = -1.0f; ul->w = 1.0f; ll->x = ulxf; ll->y = lryf; ll->z = -1.0f; ll->w = 1.0f; lr->x = lrxf; lr->y = lryf; lr->z = -1.0f; lr->w = 1.0f; ur->x = lrxf; ur->y = ulyf; ur->z = -1.0f; ur->w = 1.0f; // The coordinates for texture rectangle shall bypass the viewport setting struct XYWidthHeight default_viewport = { 0, SCREEN_HEIGHT, SCREEN_WIDTH, SCREEN_HEIGHT }; struct XYWidthHeight viewport_saved = rdp.viewport; uint32_t geometry_mode_saved = rsp.geometry_mode; gfx_adjust_viewport_or_scissor(&default_viewport); rdp.viewport = default_viewport; rdp.viewport_or_scissor_changed = true; rsp.geometry_mode = 0; gfx_sp_tri1(MAX_VERTICES + 0, MAX_VERTICES + 1, MAX_VERTICES + 3, true); gfx_sp_tri1(MAX_VERTICES + 1, MAX_VERTICES + 2, MAX_VERTICES + 3, true); rsp.geometry_mode = geometry_mode_saved; rdp.viewport = viewport_saved; rdp.viewport_or_scissor_changed = true; if (cycle_type == G_CYC_COPY) { rdp.other_mode_h = saved_other_mode_h; } } static void gfx_dp_texture_rectangle(int32_t ulx, int32_t uly, int32_t lrx, int32_t lry, uint8_t tile, int16_t uls, int16_t ult, int16_t dsdx, int16_t dtdy, bool flip) { // printf("render %d at %d\n", tile, lrx); uint64_t saved_combine_mode = rdp.combine_mode; if ((rdp.other_mode_h & (3U << G_MDSFT_CYCLETYPE)) == G_CYC_COPY) { // Per RDP Command Summary Set Tile's shift s and this dsdx should be set to 4 texels // Divide by 4 to get 1 instead dsdx >>= 2; // Color combiner is turned off in copy mode gfx_dp_set_combine_mode(color_comb(0, 0, 0, G_CCMUX_TEXEL0), alpha_comb(0, 0, 0, G_ACMUX_TEXEL0), 0, 0); // Per documentation one extra pixel is added in this modes to each edge lrx += 1 << 2; lry += 1 << 2; } // uls and ult are S10.5 // dsdx and dtdy are S5.10 // lrx, lry, ulx, uly are U10.2 // lrs, lrt are S10.5 if (flip) { dsdx = -dsdx; dtdy = -dtdy; } int16_t width = !flip ? lrx - ulx : lry - uly; int16_t height = !flip ? lry - uly : lrx - ulx; float lrs = ((uls << 7) + dsdx * width) >> 7; float lrt = ((ult << 7) + dtdy * height) >> 7; struct LoadedVertex* ul = &rsp.loaded_vertices[MAX_VERTICES + 0]; struct LoadedVertex* ll = &rsp.loaded_vertices[MAX_VERTICES + 1]; struct LoadedVertex* lr = &rsp.loaded_vertices[MAX_VERTICES + 2]; struct LoadedVertex* ur = &rsp.loaded_vertices[MAX_VERTICES + 3]; ul->u = uls; ul->v = ult; lr->u = lrs; lr->v = lrt; if (!flip) { ll->u = uls; ll->v = lrt; ur->u = lrs; ur->v = ult; } else { ll->u = lrs; ll->v = ult; ur->u = uls; ur->v = lrt; } uint8_t saved_tile = rdp.first_tile_index; if (saved_tile != tile) { rdp.textures_changed[0] = true; rdp.textures_changed[1] = true; } rdp.first_tile_index = tile; gfx_draw_rectangle(ulx, uly, lrx, lry); if (saved_tile != tile) { rdp.textures_changed[0] = true; rdp.textures_changed[1] = true; } rdp.first_tile_index = saved_tile; rdp.combine_mode = saved_combine_mode; } static void gfx_dp_fill_rectangle(int32_t ulx, int32_t uly, int32_t lrx, int32_t lry) { if (rdp.color_image_address == rdp.z_buf_address) { // Don't clear Z buffer here since we already did it with glClear return; } uint32_t mode = (rdp.other_mode_h & (3U << G_MDSFT_CYCLETYPE)); // OTRTODO: This is a bit of a hack for widescreen screen fades, but it'll work for now... if (ulx == 0 && uly == 0 && lrx == (319 * 4) && lry == (239 * 4)) { ulx = -1024; uly = -1024; lrx = 2048; lry = 2048; } if (mode == G_CYC_COPY || mode == G_CYC_FILL) { // Per documentation one extra pixel is added in this modes to each edge lrx += 1 << 2; lry += 1 << 2; } for (int i = MAX_VERTICES; i < MAX_VERTICES + 4; i++) { struct LoadedVertex* v = &rsp.loaded_vertices[i]; v->color = rdp.fill_color; } uint64_t saved_combine_mode = rdp.combine_mode; if (mode == G_CYC_FILL) { gfx_dp_set_combine_mode(color_comb(0, 0, 0, G_CCMUX_SHADE), alpha_comb(0, 0, 0, G_ACMUX_SHADE), 0, 0); } gfx_draw_rectangle(ulx, uly, lrx, lry); rdp.combine_mode = saved_combine_mode; } static void gfx_dp_set_z_image(void* z_buf_address) { rdp.z_buf_address = z_buf_address; } static void gfx_dp_set_color_image(uint32_t format, uint32_t size, uint32_t width, void* address) { rdp.color_image_address = address; } static void gfx_sp_set_other_mode(uint32_t shift, uint32_t num_bits, uint64_t mode) { uint64_t mask = (((uint64_t)1 << num_bits) - 1) << shift; uint64_t om = rdp.other_mode_l | ((uint64_t)rdp.other_mode_h << 32); om = (om & ~mask) | mode; rdp.other_mode_l = (uint32_t)om; rdp.other_mode_h = (uint32_t)(om >> 32); rdp.palette_fmt = rdp.other_mode_h & (3U << G_MDSFT_TEXTLUT); rdp.tex_lod = (rdp.other_mode_h & G_TL_LOD) != 0; } static void gfx_sp_set_vertex_colors(uint32_t count, const struct NormalColor *vcn) { // common sense dictates that we should copy the colors as the command is supposed to do, // but it actually doesn't seem to matter // SUPPORT_CHECK(count <= sizeof(rsp.vertex_colors) / sizeof(rsp.vertex_colors[0])); // for (uint32_t i = 0; i < count; ++i) { // rsp.vertex_colors[i] = vcn[i]; // } rsp.vertex_colors = vcn; } static void gfx_dp_set_other_mode(uint32_t h, uint32_t l) { rdp.other_mode_h = h; rdp.other_mode_l = l; } static inline void *seg_addr(uintptr_t w1) { // all segmented addresses have the least significant bit set if (w1 & 1) { // seg 0 is reserved and doesn't count here const uintptr_t seg = (w1 & 0x0f000000) >> 24; if (seg && segmentPointers[seg]) { const uintptr_t addr = (w1 & 0x00fffffe); return (void *)(segmentPointers[seg] + addr); } } return (void *)w1; } uintptr_t clearMtx; static void gfx_run_dl(Gfx* cmd) { // puts("dl"); int dummy = 0; char dlName[128]; const char* fileName; Gfx* dListStart = cmd; uint64_t ourHash = -1; for (;;) { uint32_t opcode = cmd->words.w0 >> 24; // gfx_print_cmd(cmd); switch (opcode) { // RSP commands: case G_NOOP: break; case G_MTX: { #ifdef F3DEX_GBI_2 gfx_sp_matrix(C0(0, 8) ^ G_MTX_PUSH, (const int32_t*)seg_addr(cmd->words.w1)); #else gfx_sp_matrix(C0(16, 8), (const int32_t*)seg_addr(cmd->words.w1)); #endif break; } case (uint8_t)G_POPMTX: #ifdef F3DEX_GBI_2 gfx_sp_pop_matrix(cmd->words.w1 / 64); #else gfx_sp_pop_matrix(1); #endif break; case G_MOVEMEM: #ifdef F3DEX_GBI_2 gfx_sp_movemem(C0(0, 8), C0(8, 8) * 8, seg_addr(cmd->words.w1)); #else gfx_sp_movemem(C0(16, 8), 0, seg_addr(cmd->words.w1)); #endif break; case (uint8_t)G_MOVEWORD: #ifdef F3DEX_GBI_2 gfx_sp_moveword(C0(16, 8), C0(0, 16), cmd->words.w1); #else gfx_sp_moveword(C0(0, 8), C0(8, 16), cmd->words.w1); #endif break; case (uint8_t)G_TEXTURE: #ifdef F3DEX_GBI_2 gfx_sp_texture(C1(16, 16), C1(0, 16), C0(11, 3), C0(8, 3), C0(1, 7)); #else gfx_sp_texture(C1(16, 16), C1(0, 16), C0(11, 3), C0(8, 3), C0(0, 8)); #endif break; case G_VTX: #ifdef F3DEX_GBI_2 gfx_sp_vertex(C0(12, 8), C0(1, 7) - C0(12, 8), (const Vtx*)seg_addr(cmd->words.w1)); #elif defined(F3DEX_GBI) || defined(F3DLP_GBI) gfx_sp_vertex(C0(10, 6), C0(16, 8) / 2, (const Vtx*)seg_addr(cmd->words.w1)); #else gfx_sp_vertex(C0(0, 16), C0(16, 4), (const Vtx*)seg_addr(cmd->words.w1)); #endif break; case G_DL: if (C0(16, 1) == 0) { // Push return address Gfx* subGFX = (Gfx*)seg_addr(cmd->words.w1); if (subGFX != nullptr) { gfx_run_dl(subGFX); } } else { cmd = (Gfx*)seg_addr(cmd->words.w1); --cmd; // increase after break } break; case (uint8_t)G_ENDDL: // if (markerOn) // printf("END DL ON MARKER\n"); markerOn = false; return; #ifdef F3DEX_GBI_2 case G_GEOMETRYMODE: gfx_sp_geometry_mode(~C0(0, 24), cmd->words.w1); break; #else case (uint8_t)G_SETGEOMETRYMODE: gfx_sp_geometry_mode(0, cmd->words.w1); break; case (uint8_t)G_CLEARGEOMETRYMODE: gfx_sp_geometry_mode(cmd->words.w1, 0); break; #endif case (uint8_t)G_TRI1: #ifdef F3DEX_GBI_2 gfx_sp_tri1(C0(16, 8) / 2, C0(8, 8) / 2, C0(0, 8) / 2, false); #elif defined(F3DEX_GBI) || defined(F3DLP_GBI) gfx_sp_tri1(C1(16, 8) / 2, C1(8, 8) / 2, C1(0, 8) / 2, false); #else gfx_sp_tri1(C1(16, 8) / 10, C1(8, 8) / 10, C1(0, 8) / 10, false); #endif break; #ifdef F3DEX_GBI_2 case G_QUAD: { [[fallthrough]]; } #endif #if defined(F3DEX_GBI) || defined(F3DLP_GBI) case (uint8_t)G_TRI2: gfx_sp_tri1(C0(16, 8) / 2, C0(8, 8) / 2, C0(0, 8) / 2, false); gfx_sp_tri1(C1(16, 8) / 2, C1(8, 8) / 2, C1(0, 8) / 2, false); break; #endif case (uint8_t)G_TRI4: gfx_sp_tri4(cmd); break; case (uint8_t)G_SETOTHERMODE_L: #ifdef F3DEX_GBI_2 gfx_sp_set_other_mode(31 - C0(8, 8) - C0(0, 8), C0(0, 8) + 1, cmd->words.w1); #else gfx_sp_set_other_mode(C0(8, 8), C0(0, 8), cmd->words.w1); #endif break; case (uint8_t)G_SETOTHERMODE_H: #ifdef F3DEX_GBI_2 gfx_sp_set_other_mode(63 - C0(8, 8) - C0(0, 8), C0(0, 8) + 1, (uint64_t)cmd->words.w1 << 32); #else gfx_sp_set_other_mode(C0(8, 8) + 32, C0(0, 8), (uint64_t)cmd->words.w1 << 32); #endif break; case G_COL: gfx_sp_set_vertex_colors(C0(0, 16) / 4, (struct NormalColor *)seg_addr(cmd->words.w1)); break; // RDP Commands: case G_SETTIMG: { uintptr_t i = (uintptr_t)seg_addr(cmd->words.w1); char* imgData = (char*)i; uint32_t texFlags = 0; RawTexMetadata rawTexMetdata = {}; gfx_dp_set_texture_image(C0(21, 3), C0(19, 2), C0(0, 10), imgData, texFlags, rawTexMetdata, (void*)i); break; } case G_LOADBLOCK: gfx_dp_load_block(C1(24, 3), C0(12, 12), C0(0, 12), C1(12, 12), C1(0, 12)); break; case G_LOADTILE: gfx_dp_load_tile(C1(24, 3), C0(12, 12), C0(0, 12), C1(12, 12), C1(0, 12)); break; case G_SETTILE: gfx_dp_set_tile(C0(21, 3), C0(19, 2), C0(9, 9), C0(0, 9), C1(24, 3), C1(20, 4), C1(18, 2), C1(14, 4), C1(10, 4), C1(8, 2), C1(4, 4), C1(0, 4)); break; case G_SETTILESIZE: gfx_dp_set_tile_size(C1(24, 3), C0(12, 12), C0(0, 12), C1(12, 12), C1(0, 12)); break; case G_LOADTLUT: gfx_dp_load_tlut(C1(24, 3), C0(14, 10), C0(2, 10), C1(14, 10), C1(2, 10)); break; case G_SETENVCOLOR: gfx_dp_set_env_color(C1(24, 8), C1(16, 8), C1(8, 8), C1(0, 8)); break; case G_SETPRIMCOLOR: gfx_dp_set_prim_color(C0(8, 8), C0(0, 8), C1(24, 8), C1(16, 8), C1(8, 8), C1(0, 8)); break; case G_SETFOGCOLOR: gfx_dp_set_fog_color(C1(24, 8), C1(16, 8), C1(8, 8), C1(0, 8)); break; case G_SETFILLCOLOR: gfx_dp_set_fill_color(cmd->words.w1); break; case G_SETCOMBINE: gfx_dp_set_combine_mode(color_comb(C0(20, 4), C1(28, 4), C0(15, 5), C1(15, 3)), alpha_comb(C0(12, 3), C1(12, 3), C0(9, 3), C1(9, 3)), color_comb(C0(5, 4), C1(24, 4), C0(0, 5), C1(6, 3)), alpha_comb(C1(21, 3), C1(3, 3), C1(18, 3), C1(0, 3))); break; // G_SETPRIMCOLOR, G_CCMUX_PRIMITIVE, G_ACMUX_PRIMITIVE, is used by Goddard // G_CCMUX_TEXEL1, LOD_FRACTION is used in Bowser room 1 case G_TEXRECT: case G_TEXRECTFLIP: { int32_t lrx, lry, tile, ulx, uly; uint32_t uls, ult, dsdx, dtdy; #ifdef F3DEX_GBI_2E lrx = (int32_t)(C0(0, 24) << 8) >> 8; lry = (int32_t)(C1(0, 24) << 8) >> 8; tile = C1(24, 3); ++cmd; ulx = (int32_t)(C0(0, 24) << 8) >> 8; uly = (int32_t)(C1(0, 24) << 8) >> 8; ++cmd; uls = C0(16, 16); ult = C0(0, 16); dsdx = C1(16, 16); dtdy = C1(0, 16); #else lrx = C0(12, 12); lry = C0(0, 12); tile = C1(24, 3); ulx = C1(12, 12); uly = C1(0, 12); ++cmd; uls = C1(16, 16); ult = C1(0, 16); ++cmd; dsdx = C1(16, 16); dtdy = C1(0, 16); #endif gfx_dp_texture_rectangle(ulx, uly, lrx, lry, tile, uls, ult, dsdx, dtdy, opcode == G_TEXRECTFLIP); break; } case G_FILLRECT: #ifdef F3DEX_GBI_2E { int32_t lrx, lry, ulx, uly; lrx = (int32_t)(C0(0, 24) << 8) >> 8; lry = (int32_t)(C1(0, 24) << 8) >> 8; ++cmd; ulx = (int32_t)(C0(0, 24) << 8) >> 8; uly = (int32_t)(C1(0, 24) << 8) >> 8; gfx_dp_fill_rectangle(ulx, uly, lrx, lry); break; } #else gfx_dp_fill_rectangle(C1(12, 12), C1(0, 12), C0(12, 12), C0(0, 12)); break; #endif case G_SETSCISSOR: gfx_dp_set_scissor(C1(24, 2), C0(12, 12), C0(0, 12), C1(12, 12), C1(0, 12)); break; case G_SETZIMG: gfx_dp_set_z_image(seg_addr(cmd->words.w1)); break; case G_SETCIMG: gfx_dp_set_color_image(C0(21, 3), C0(19, 2), C0(0, 11), seg_addr(cmd->words.w1)); break; case G_RDPSETOTHERMODE: gfx_dp_set_other_mode(C0(0, 24), cmd->words.w1); break; case G_RDPPIPESYNC: case G_RDPFULLSYNC: case G_RDPLOADSYNC: case G_RDPTILESYNC: break; default: fprintf(stderr, "gfx: unknown opcode %d, words %08x %08x\n", (s8)opcode, cmd->words.w0, cmd->words.w1); __builtin_trap(); break; } ++cmd; } } static void gfx_sp_reset() { rsp.modelview_matrix_stack_size = 1; rsp.current_num_lights = 2; rsp.lights_changed = true; } extern "C" void gfx_get_dimensions(uint32_t* width, uint32_t* height, int32_t* posX, int32_t* posY) { gfx_wapi->get_dimensions(width, height, posX, posY); } extern "C" void gfx_init(struct GfxWindowManagerAPI* wapi, struct GfxRenderingAPI* rapi, const char* game_name, bool start_in_fullscreen, uint32_t width, uint32_t height, uint32_t posX, uint32_t posY) { gfx_wapi = wapi; gfx_rapi = rapi; gfx_wapi->init(game_name, rapi->get_name(), start_in_fullscreen, width, height, posX, posY); gfx_rapi->init(); gfx_rapi->update_framebuffer_parameters(0, width, height, 1, false, true, true, true); gfx_current_dimensions.internal_mul = 1; gfx_msaa_level = 1; gfx_current_dimensions.width = width; gfx_current_dimensions.height = height; game_framebuffer = gfx_rapi->create_framebuffer(); game_framebuffer_msaa_resolved = gfx_rapi->create_framebuffer(); for (int i = 0; i < 16; i++) { segmentPointers[i] = 0; } if (tex_upload_buffer == nullptr) { // We cap texture max to 8k, because why would you need more? int max_tex_size = min(8192, gfx_rapi->get_max_texture_size()); tex_upload_buffer = (uint8_t*)malloc(max_tex_size * max_tex_size * 4); } rsp.depth_zfar = 1.0f; } extern "C" void gfx_destroy(void) { // TODO: should also destroy rapi and wapi, and any other resources acquired in fast3d // Texture cache and loaded textures store references to Resources which need to be unreferenced. gfx_texture_cache_clear(); } extern "C" struct GfxRenderingAPI* gfx_get_current_rendering_api(void) { return gfx_rapi; } extern "C" void gfx_start_frame(void) { gfx_wapi->handle_events(); gfx_wapi->get_dimensions(&gfx_current_window_dimensions.width, &gfx_current_window_dimensions.height, &gfx_current_window_position_x, &gfx_current_window_position_y); if (gfx_current_window_dimensions.height == 0) { // Avoid division by zero gfx_current_window_dimensions.height = 1; } // for now ensure that the game renders in a centered 4:3 window // proper 16:9 support requires some fixes, namely the sky, fullscreen fades and room culling gfx_current_dimensions.aspect_ratio = 4.0f / 3.0f; gfx_current_dimensions.height = gfx_current_window_dimensions.height; gfx_current_dimensions.width = gfx_current_dimensions.height * gfx_current_dimensions.aspect_ratio; gfx_current_game_window_viewport.width = gfx_current_dimensions.width; gfx_current_game_window_viewport.height = gfx_current_dimensions.height; gfx_current_game_window_viewport.x = (gfx_current_window_dimensions.width - gfx_current_dimensions.width) / 2; if (gfx_current_dimensions.height != gfx_prev_dimensions.height) { for (auto& fb : framebuffers) { uint32_t width = fb.second.orig_width, height = fb.second.orig_height; gfx_adjust_width_height_for_scale(width, height); if (width != fb.second.applied_width || height != fb.second.applied_height) { gfx_rapi->update_framebuffer_parameters(fb.first, width, height, 1, true, true, true, true); fb.second.applied_width = width; fb.second.applied_height = height; } } } gfx_prev_dimensions = gfx_current_dimensions; bool different_size = gfx_current_dimensions.width != gfx_current_game_window_viewport.width || gfx_current_dimensions.height != gfx_current_game_window_viewport.height; if (different_size || gfx_msaa_level > 1) { game_renders_to_framebuffer = true; if (different_size) { gfx_rapi->update_framebuffer_parameters(game_framebuffer, gfx_current_dimensions.width, gfx_current_dimensions.height, gfx_msaa_level, true, true, true, true); } else { // MSAA framebuffer needs to be resolved to an equally sized target when complete, which must therefore // match the window size gfx_rapi->update_framebuffer_parameters(game_framebuffer, gfx_current_window_dimensions.width, gfx_current_window_dimensions.height, gfx_msaa_level, false, true, true, true); } if (gfx_msaa_level > 1 && different_size) { gfx_rapi->update_framebuffer_parameters(game_framebuffer_msaa_resolved, gfx_current_dimensions.width, gfx_current_dimensions.height, 1, false, false, false, false); } } else { game_renders_to_framebuffer = false; } fbActive = 0; } uint32_t num_dls = 0; extern "C" void gfx_run(Gfx* commands) { ++num_dls; gfx_sp_reset(); // puts("New frame"); if (!gfx_wapi->start_frame()) { dropped_frame = true; return; } dropped_frame = false; gfx_rapi->update_framebuffer_parameters(0, gfx_current_window_dimensions.width, gfx_current_window_dimensions.height, 1, false, true, true, !game_renders_to_framebuffer); gfx_rapi->start_frame(); gfx_rapi->start_draw_to_framebuffer(game_renders_to_framebuffer ? game_framebuffer : 0, (float)gfx_current_dimensions.height / SCREEN_HEIGHT); gfx_rapi->clear_framebuffer(); rdp.viewport_or_scissor_changed = true; rendering_state.viewport = {}; rendering_state.scissor = {}; gfx_run_dl(commands); gfx_flush(); gfxFramebuffer = 0; currentDir = std::stack(); if (game_renders_to_framebuffer) { gfx_rapi->start_draw_to_framebuffer(0, 1); gfx_rapi->clear_framebuffer(); if (gfx_msaa_level > 1) { bool different_size = gfx_current_dimensions.width != gfx_current_game_window_viewport.width || gfx_current_dimensions.height != gfx_current_game_window_viewport.height; if (different_size) { gfx_rapi->resolve_msaa_color_buffer(game_framebuffer_msaa_resolved, game_framebuffer); gfxFramebuffer = (uintptr_t)gfx_rapi->get_framebuffer_texture_id(game_framebuffer_msaa_resolved); } else { gfx_rapi->resolve_msaa_color_buffer(0, game_framebuffer); } } else { gfxFramebuffer = (uintptr_t)gfx_rapi->get_framebuffer_texture_id(game_framebuffer); } } gfx_rapi->end_frame(); gfx_wapi->swap_buffers_begin(); has_drawn_imgui_menu = false; } extern "C" void gfx_end_frame(void) { if (!dropped_frame) { gfx_rapi->finish_render(); gfx_wapi->swap_buffers_end(); } } extern "C" void gfx_set_target_fps(int fps) { gfx_wapi->set_target_fps(fps); } extern "C" int gfx_create_framebuffer(uint32_t width, uint32_t height) { uint32_t orig_width = width, orig_height = height; gfx_adjust_width_height_for_scale(width, height); int fb = gfx_rapi->create_framebuffer(); gfx_rapi->update_framebuffer_parameters(fb, width, height, 1, true, true, true, true); framebuffers[fb] = { orig_width, orig_height, width, height }; return fb; } extern "C" void gfx_set_framebuffer(int fb, float noise_scale) { gfx_rapi->start_draw_to_framebuffer(fb, noise_scale); gfx_rapi->clear_framebuffer(); } extern "C" void gfx_reset_framebuffer() { gfx_rapi->start_draw_to_framebuffer(0, (float)gfx_current_dimensions.height / SCREEN_HEIGHT); }