Remove asm-processor in favor of preprocess.sh (#1760)

* Remove asm-processor in favor of preprocess.sh * Remove duplicate CMD_F * preprocess_pragmas gitignore * Mac fixes * Macos + clang as cc check fixes * Need quotes for paths with spaces * Fix bss
2025-02-21 18:02:19 -08:00 · 2025-02-21 18:02:19 -08:00 · 61961fb938
parent a96c93d936
commit 61961fb938
46 changed files with 1760 additions and 280 deletions
--- a/32
+++ b/32
@ -14,17 +14,21 @@ ifeq ($(OS),Windows_NT)
  DETECTED_OS = windows
  MAKE = make
  VENV_BIN_DIR = Scripts
+  ICONV = iconv
 else
  UNAME_S := $(shell uname -s)
  ifeq ($(UNAME_S),Linux)
    DETECTED_OS = linux
    MAKE = make
    VENV_BIN_DIR = bin
+    ICONV = iconv
  endif
  ifeq ($(UNAME_S),Darwin)
    DETECTED_OS = macos
    MAKE = gmake
    VENV_BIN_DIR = bin
+    # The default iconv on macOS has some differences from GNU iconv, so we use the Homebrew version instead
+    ICONV = $(shell brew --prefix)/opt/libiconv/bin/iconv
  endif
 endif

@ -144,8 +148,7 @@ CC_CHECK_WARNINGS := -Wall -Wextra -Wno-unknown-pragmas -Wno-unused-parameter -W
 CC_CHECK_WARNINGS += -Werror=implicit-int -Werror=implicit-function-declaration -Werror=int-conversion -Werror=incompatible-pointer-types
 # Have CC_CHECK pretend to be a MIPS compiler
 MIPS_BUILTIN_DEFS := -DMIPSEB -D_MIPS_FPSET=16 -D_MIPS_ISA=2 -D_ABIO32=1 -D_MIPS_SIM=_ABIO32 -D_MIPS_SZINT=32 -D_MIPS_SZPTR=32
-# The -MMD flags additionaly creates a .d file with the same name as the .o file.
-CC_CHECK_FLAGS    := -MMD -MP -fno-builtin -fsyntax-only -funsigned-char -fdiagnostics-color -std=gnu89 -m32 -DNON_MATCHING -DAVOID_UB -DCC_CHECK=1
+CC_CHECK_FLAGS    := -fno-builtin -fsyntax-only -funsigned-char -fdiagnostics-color -std=gnu89 -m32 -DNON_MATCHING -DAVOID_UB -DCC_CHECK=1

 ifneq ($(WERROR), 0)
  CC_CHECK_WARNINGS += -Werror
@ -429,7 +432,6 @@ $(BUILD_DIR)/src/boot/libc64/%.o: OPTFLAGS := -O2
 $(BUILD_DIR)/src/audio/%.o: OPTFLAGS := -O2

 $(BUILD_DIR)/assets/%.o: OPTFLAGS := -O1
-$(BUILD_DIR)/assets/%.o: ASM_PROC_FLAGS := 

 # file flags
 $(BUILD_DIR)/src/libultra/libc/ll.o: OPTFLAGS := -O1
@ -452,14 +454,14 @@ $(BUILD_DIR)/src/code/osFlash.o: MIPS_VERSION := -mips1
 # cc & asm-processor
 $(BUILD_DIR)/src/libultra/%.o: CC := $(CC_OLD)

-$(BUILD_DIR)/src/boot/%.o: CC := $(ASM_PROC) $(ASM_PROC_FLAGS) $(CC) -- $(AS) $(ASFLAGS) --
+# For using asm_processor on some files:
+#$(BUILD_DIR)/.../%.o: CC := $(PYTHON) $(ASM_PROC) $(ASM_PROC_FLAGS) $(CC) -- $(AS) $(ASFLAGS) --

-$(BUILD_DIR)/src/code/%.o: CC := $(ASM_PROC) $(ASM_PROC_FLAGS) $(CC) -- $(AS) $(ASFLAGS) --
-$(BUILD_DIR)/src/audio/%.o: CC := $(ASM_PROC) $(ASM_PROC_FLAGS) $(CC) -- $(AS) $(ASFLAGS) --
+ifeq ($(PERMUTER),)  # permuter + preprocess.sh misbehaves, permuter doesn't care about rodata diffs or bss ordering so just don't use it in that case
+# Handle encoding (UTF-8 -> EUC-JP) and custom pragmas
+$(BUILD_DIR)/src/%.o: CC := ./tools/buildtools/preprocess.sh -v $(VERSION) -i $(ICONV) -- $(CC)
+endif

-$(BUILD_DIR)/src/overlays/%.o: CC := $(ASM_PROC) $(ASM_PROC_FLAGS) $(CC) -- $(AS) $(ASFLAGS) --
-
-$(BUILD_DIR)/assets/%.o: CC := $(ASM_PROC) $(ASM_PROC_FLAGS) $(CC) -- $(AS) $(ASFLAGS) --
 else
 # Note that if adding additional assets directories for modding reasons these flags must also be used there
 $(BUILD_DIR)/assets/%.o: CFLAGS += -fno-zero-initialized-in-bss -fno-toplevel-reorder
@ -644,7 +646,7 @@ $(BUILD_DIR)/src/overlays/%_reloc.o: $(BUILD_DIR)/spec

 # Incremental link z_game_over data into rodata
 $(BUILD_DIR)/src/code/z_game_over.o: src/code/z_game_over.c
-	$(CC_CHECK_COMP) $(CC_CHECK_FLAGS) $(IINC) $(CC_CHECK_WARNINGS) $(C_DEFINES) $(MIPS_BUILTIN_DEFS) -o $(@:.o=.tmp) $<
+	$(CC_CHECK_COMP) $(CC_CHECK_FLAGS) $(IINC) $(CC_CHECK_WARNINGS) $(C_DEFINES) $(MIPS_BUILTIN_DEFS) $<
 	$(CC) -c $(CFLAGS) $(IINC) $(WARNINGS) $(C_DEFINES) $(MIPS_VERSION) $(ENDIAN) $(OPTFLAGS) -o $(@:.o=.tmp) $<
 	$(LD) -r -T linker_scripts/data_with_rodata.ld $(@:.o=.tmp) -o $@
 	@$(RM) $(@:.o=.tmp)
@ -653,7 +655,7 @@ $(BUILD_DIR)/src/code/z_game_over.o: src/code/z_game_over.c

 # Incremental link audio/session_init data into rodata
 $(BUILD_DIR)/src/audio/session_init.o: src/audio/session_init.c $(BUILD_DIR)/assets/audio/soundfont_sizes.h $(BUILD_DIR)/assets/audio/sequence_sizes.h
-	$(CC_CHECK_COMP) $(CC_CHECK_FLAGS) $(IINC) $(CC_CHECK_WARNINGS) $(C_DEFINES) $(MIPS_BUILTIN_DEFS) -o $(@:.o=.tmp) $<
+	$(CC_CHECK_COMP) $(CC_CHECK_FLAGS) $(IINC) $(CC_CHECK_WARNINGS) $(C_DEFINES) $(MIPS_BUILTIN_DEFS) $<
 	$(CC) -c $(CFLAGS) $(IINC) $(WARNINGS) $(C_DEFINES) $(MIPS_VERSION) $(ENDIAN) $(OPTFLAGS) -o $(@:.o=.tmp) $<
 	$(LD) -r -T linker_scripts/data_with_rodata.ld $(@:.o=.tmp) -o $@
 	@$(RM) $(@:.o=.tmp)
@ -662,27 +664,27 @@ $(BUILD_DIR)/src/audio/session_init.o: src/audio/session_init.c $(BUILD_DIR)/ass

 $(SHIFTJIS_O_FILES): $(BUILD_DIR)/src/%.o: src/%.c
 	$(SHIFTJIS_CONV) -o $(@:.o=.enc.c) $<
-	$(CC_CHECK_COMP) $(CC_CHECK_FLAGS) $(IINC) $(CC_CHECK_WARNINGS) $(C_DEFINES) $(MIPS_BUILTIN_DEFS) -o $@ $(@:.o=.enc.c)
+	$(CC_CHECK_COMP) $(CC_CHECK_FLAGS) $(IINC) $(CC_CHECK_WARNINGS) $(C_DEFINES) $(MIPS_BUILTIN_DEFS) $(@:.o=.enc.c)
 	$(CC) -c $(CFLAGS) $(IINC) $(WARNINGS) $(C_DEFINES) $(MIPS_VERSION) $(ENDIAN) $(OPTFLAGS) -o $@ $(@:.o=.enc.c)
 	$(OBJDUMP_CMD)
 	$(RM_MDEBUG)

 $(BUILD_DIR)/src/libultra/libc/ll.o: src/libultra/libc/ll.c
-	$(CC_CHECK_COMP) $(CC_CHECK_FLAGS) $(IINC) $(CC_CHECK_WARNINGS) $(C_DEFINES) $(MIPS_BUILTIN_DEFS) -o $@ $<
+	$(CC_CHECK_COMP) $(CC_CHECK_FLAGS) $(IINC) $(CC_CHECK_WARNINGS) $(C_DEFINES) $(MIPS_BUILTIN_DEFS) $<
 	$(CC) -c $(CFLAGS) $(IINC) $(WARNINGS) $(C_DEFINES) $(MIPS_VERSION) $(ENDIAN) $(OPTFLAGS) -o $@ $<
 	$(PYTHON) tools/set_o32abi_bit.py $@
 	$(OBJDUMP_CMD)
 	$(RM_MDEBUG)

 $(BUILD_DIR)/src/libultra/libc/llcvt.o: src/libultra/libc/llcvt.c
-	$(CC_CHECK_COMP) $(CC_CHECK_FLAGS) $(IINC) $(CC_CHECK_WARNINGS) $(C_DEFINES) $(MIPS_BUILTIN_DEFS) -o $@ $<
+	$(CC_CHECK_COMP) $(CC_CHECK_FLAGS) $(IINC) $(CC_CHECK_WARNINGS) $(C_DEFINES) $(MIPS_BUILTIN_DEFS) $<
 	$(CC) -c $(CFLAGS) $(IINC) $(WARNINGS) $(C_DEFINES) $(MIPS_VERSION) $(ENDIAN) $(OPTFLAGS) -o $@ $<
 	$(PYTHON) tools/set_o32abi_bit.py $@
 	$(OBJDUMP_CMD)
 	$(RM_MDEBUG)

 $(BUILD_DIR)/%.o: %.c
-	$(CC_CHECK_COMP) $(CC_CHECK_FLAGS) $(IINC) $(CC_CHECK_WARNINGS) $(C_DEFINES) $(MIPS_BUILTIN_DEFS) -o $@ $<
+	$(CC_CHECK_COMP) $(CC_CHECK_FLAGS) $(IINC) $(CC_CHECK_WARNINGS) $(C_DEFINES) $(MIPS_BUILTIN_DEFS) $<
 	$(CC) -c $(CFLAGS) $(IINC) $(WARNINGS) $(C_DEFINES) $(MIPS_VERSION) $(ENDIAN) $(OPTFLAGS) -o $@ $<
 	$(OBJDUMP_CMD)
 	$(RM_MDEBUG)
--- a/docs/BUILDING_MACOS.md
+++ b/docs/BUILDING_MACOS.md
@ -12,12 +12,13 @@ For macOS, use Homebrew to install the following dependencies:
 * libpng
 * bash
 * libxml2
+* libiconv

 You can install them with the following commands:

 ```bash
 brew update
-brew install coreutils make python3 libpng bash libxml2
+brew install coreutils make python3 libpng bash libxml2 libiconv
 ```

 (The repository expects Homebrew-installed programs to be either linked correctly in `$PATH` etc. or in their default locations.)
@ -72,10 +73,10 @@ make -j
 sudo make install
 ```

-Edit your `~/.bash_profile`/`~/.zsh_profile` (or whichever shell you use) to add the new binutils binaries to the system PATH
+Edit your `~/.bash_profile`/`~/.zprofile` (or whichever shell you use) to add the new binutils binaries to the system PATH

 ```bash
-echo "export PATH=$PATH:/opt/cross/bin" >> ~/.bash_profile
+echo 'export PATH="$PATH:/opt/cross/bin"' >> ~/.bash_profile
 ```

 Reload ~/.bash_profile (or just launch a new terminal tab)
--- a/include/prevent_bss_reordering.h
+++ b/include/prevent_bss_reordering.h
@ -1,83 +0,0 @@
-#ifndef PREVENT_BSS_REORDERING_H
-#define PREVENT_BSS_REORDERING_H
-
-/**
- * To determine variable order for .bss, the compiler sorts variables by their
- * "name index" mod 256, where name index is something that, with -g, gets
- * incremented by struct and variable declarations, typedefs, and file markers,
- * among else. (Without -g, only variable declarations affects the index.)
- * This file contains enough dummy declarations to bump the index by 128.
- * Including it, or removing the include, should fix bss reordering problems
- * for a file, assuming the name index distance between its first and last bss
- * variable is at most 128.
- * Note that if a variable is declared "extern" within a header file, the name
- * index is taken at that point of the extern declaration. Thus, this include
- * must come before any such header.
- */
-
-struct Dummy0 { int x; };
-struct Dummy1 { int x; };
-struct Dummy2 { int x; };
-struct Dummy3 { int x; };
-struct Dummy4 { int x; };
-struct Dummy5 { int x; };
-struct Dummy6 { int x; };
-struct Dummy7 { int x; };
-struct Dummy8 { int x; };
-struct Dummy9 { int x; };
-struct Dummy10 { int x; };
-struct Dummy11 { int x; };
-struct Dummy12 { int x; };
-struct Dummy13 { int x; };
-struct Dummy14 { int x; };
-struct Dummy15 { int x; };
-struct Dummy16 { int x; };
-struct Dummy17 { int x; };
-struct Dummy18 { int x; };
-struct Dummy19 { int x; };
-struct Dummy20 { int x; };
-struct Dummy21 { int x; };
-struct Dummy22 { int x; };
-struct Dummy23 { int x; };
-struct Dummy24 { int x; };
-struct Dummy25 { int x; };
-struct Dummy26 { int x; };
-struct Dummy27 { int x; };
-struct Dummy28 { int x; };
-struct Dummy29 { int x; };
-struct Dummy30 { int x; };
-struct Dummy31 { int x; };
-struct Dummy32 { int x; };
-struct Dummy33 { int x; };
-struct Dummy34 { int x; };
-struct Dummy35 { int x; };
-struct Dummy36 { int x; };
-struct Dummy37 { int x; };
-struct Dummy38 { int x; };
-struct Dummy39 { int x; };
-struct Dummy40 { int x; };
-struct Dummy41 { int x; };
-struct Dummy42 { int x; };
-struct Dummy43 { int x; };
-struct Dummy44 { int x; };
-struct Dummy45 { int x; };
-struct Dummy46 { int x; };
-struct Dummy47 { int x; };
-struct Dummy48 { int x; };
-struct Dummy49 { int x; };
-struct Dummy50 { int x; };
-struct Dummy51 { int x; };
-struct Dummy52 { int x; };
-struct Dummy53 { int x; };
-struct Dummy54 { int x; };
-struct Dummy55 { int x; };
-struct Dummy56 { int x; };
-struct Dummy57 { int x; };
-struct Dummy58 { int x; };
-struct Dummy59 { int x; };
-struct Dummy60 { int x; };
-struct Dummy61 { int x; };
-struct Dummy62 { int x; };
-typedef int Dummy63;
-
-#endif
--- a/include/prevent_bss_reordering2.h
+++ b/include/prevent_bss_reordering2.h
@ -1,44 +0,0 @@
-#ifndef PREVENT_BSS_REORDERING2_H
-#define PREVENT_BSS_REORDERING2_H
-
-/**
- * See the explanation at prevent_bss_reordering.h
- *
- * Instead of producing 64 dummy declarations, this header only produces 32
- * dummy declarations
- */
-
-struct Dummy100 { int x; };
-struct Dummy101 { int x; };
-struct Dummy102 { int x; };
-struct Dummy103 { int x; };
-struct Dummy104 { int x; };
-struct Dummy105 { int x; };
-struct Dummy106 { int x; };
-struct Dummy107 { int x; };
-struct Dummy108 { int x; };
-struct Dummy109 { int x; };
-struct Dummy110 { int x; };
-struct Dummy111 { int x; };
-struct Dummy112 { int x; };
-struct Dummy113 { int x; };
-struct Dummy114 { int x; };
-struct Dummy115 { int x; };
-struct Dummy116 { int x; };
-struct Dummy117 { int x; };
-struct Dummy118 { int x; };
-struct Dummy119 { int x; };
-struct Dummy120 { int x; };
-struct Dummy121 { int x; };
-struct Dummy122 { int x; };
-struct Dummy123 { int x; };
-struct Dummy124 { int x; };
-struct Dummy125 { int x; };
-struct Dummy126 { int x; };
-struct Dummy127 { int x; };
-struct Dummy128 { int x; };
-struct Dummy129 { int x; };
-struct Dummy130 { int x; };
-struct Dummy131 { int x; };
-
-#endif
--- a/include/z64cutscene_commands.h
+++ b/include/z64cutscene_commands.h
@ -6,8 +6,34 @@

 /**
 * Cutscene scripts are arrays of `CutsceneData` words, including bit-packed integers and floats.
+ *
+ * Most command macros have unused arguments. This is to account for the vanilla assets setting specific values
+ * that don't end up being used by any code. They can safely be set to anything, as they aren't used in the
+ * implementation.
+ *
+ * It is believed the original tool used for cutscenes handled most commands the same way, using similar
+ * fields, and the code would have accessed them using common structs. Given this, the unused values observed in vanilla
+ * assets may appear to map to a variable that makes sense, even if it doesn't end up being used in the code. It
+ * probably isn't garbage data.
+ *
+ * This codebase goes with specialized structs and macros to make it easier to follow the code.
+ * Note this common struct design is still partially reflected in all commands having a `startFrame` and `endFrame`,
+ * when sometimes only the `startFrame` matters (as documented).
 */

+/**
+ * CMD_F expects an (IEEE 754) encoded float (colloquially "in hex", such as `0x42280000`),
+ * rather than a C float literal (such as `42.0f`).
+ * Float literals cannot be used because cutscenes are arrays of union type CutsceneData, which may contain integers and floats.
+ * Regardless of CutsceneData having a float member, initializing with a float will cast the float to s32.
+ * Designated initializers (added in C99) would solve this problem but are not supported by IDO (C89 and some extensions).
+ */
+#ifdef __GNUC__
+#define CS_FLOAT(ieee754bin, f) (f)
+#else
+#define CS_FLOAT(ieee754bin, f) (ieee754bin)
+#endif
+
 /**
 * Marks the beginning of a cutscene script.
 * 
--- a/src/audio/lib/effects.c
+++ b/src/audio/lib/effects.c
@ -271,8 +271,8 @@ f32 AudioEffects_UpdateAdsr(AdsrState* adsr) {
        case ADSR_STATUS_START_LOOP:
            adsr->envelopeIndex = 0;
            adsr->action.s.status = ADSR_STATUS_LOOP;
+        retry:;
            FALLTHROUGH;
-        retry:
        case ADSR_STATUS_LOOP:
            adsr->delay = adsr->envelope[adsr->envelopeIndex].delay;
            switch (adsr->delay) {
--- a/src/boot/CIC6105.c
+++ b/src/boot/CIC6105.c
@ -1,9 +1,10 @@
-#include "prevent_bss_reordering.h"
 #include "CIC6105.h"

 #include "build.h"
 #include "fault.h"

+#pragma increment_block_number "n64-us:200"
+
 s32 gCICAddr1Val;
 s32 gCICAddr2Val;
 FaultClient sRomInfoFaultClient;
--- a/src/boot/idle.c
+++ b/src/boot/idle.c
@ -1,5 +1,3 @@
-#include "prevent_bss_reordering.h"
-
 #include "stdbool.h"

 #include "buffers.h"
@ -11,6 +9,8 @@
 #include "z64dma.h"
 #include "z64thread.h"

+#pragma increment_block_number "n64-us:128"
+
 // Variables are put before most headers as a hacky way to bypass bss reordering
 struct IrqMgr gIrqMgr;
 STACK(sIrqMgrStack, 0x500);
--- a/src/boot/z_std_dma.c
+++ b/src/boot/z_std_dma.c
@ -18,7 +18,6 @@
 * There are some additional provisions to ensure that audio DMA is particularly high-speed, the audio data is assumed
 * to be uncompressed and the request queue and address translation is skipped.
 */
-#include "prevent_bss_reordering.h"
 #include "z64dma.h"

 #include "carthandle.h"
--- a/src/buffers/gfxbuffers.c
+++ b/src/buffers/gfxbuffers.c
@ -1,4 +1,3 @@
-#include "prevent_bss_reordering.h"
 #include "buffers.h"

 u64 gGfxSPTaskYieldBuffer[OS_YIELD_DATA_SIZE / sizeof(u64)] ALIGNED(16);
--- a/src/code/main.c
+++ b/src/code/main.c
@ -1,7 +1,8 @@
-#include "prevent_bss_reordering.h"
 #include "ultra64.h"
 #include "stack.h"

+#pragma increment_block_number "n64-us:128"
+
 // Variables are put before most headers as a hacky way to bypass bss reordering
 OSMesgQueue sSerialEventQueue;
 OSMesg sSerialMsgBuf[1];
--- a/src/code/sys_math3d.c
+++ b/src/code/sys_math3d.c
@ -1,48 +1,15 @@
-#include "prevent_bss_reordering2.h" // bumps the bss index by 65
-// clang-format off
-// Partial structs taken from "prevent_bss_reordering.h", bumps the bss index by 59
-struct Dummy200 { int x; };
-struct Dummy201 { int x; };
-struct Dummy202 { int x; };
-struct Dummy203 { int x; };
-struct Dummy204 { int x; };
-struct Dummy205 { int x; };
-struct Dummy206 { int x; };
-struct Dummy207 { int x; };
-struct Dummy208 { int x; };
-struct Dummy209 { int x; };
-struct Dummy210 { int x; };
-struct Dummy211 { int x; };
-struct Dummy212 { int x; };
-struct Dummy213 { int x; };
-struct Dummy214 { int x; };
-struct Dummy215 { int x; };
-struct Dummy216 { int x; };
-struct Dummy217 { int x; };
-struct Dummy218 { int x; };
-struct Dummy219 { int x; };
-struct Dummy220 { int x; };
-struct Dummy221 { int x; };
-struct Dummy222 { int x; };
-struct Dummy223 { int x; };
-struct Dummy224 { int x; };
-struct Dummy225 { int x; };
-struct Dummy226 { int x; };
-struct Dummy227 { int x; };
-struct Dummy228 { int x; };
-typedef int Dummy229;
-
-// clang-format on
-
 // Headers are currently valued at 60 mod 256 (./tools/calc_bss.sh <headers>)
 #include "z64math.h"

 #include "stdbool.h"
 #include "PR/gu.h"
-// The bss index at this point should be 184

 #include "macros.h"

+#pragma increment_block_number "n64-us:124"
+
+// The bss index at this point should be 184
+
 Vec3f gZeroVec3f = { 0.0f, 0.0f, 0.0f };
 Vec3s gZeroVec3s = { 0, 0, 0 };

--- a/src/code/z_bgcheck.c
+++ b/src/code/z_bgcheck.c
@ -1,4 +1,3 @@
-#include "prevent_bss_reordering.h"
 #include "z64bgcheck.h"

 #include "libc64/fixed_point.h"
@ -9,6 +8,8 @@
 #include "vt.h"
 #include "z64actor.h"

+#pragma increment_block_number "n64-us:128"
+
 #define DYNA_RAYCAST_FLOORS 1
 #define DYNA_RAYCAST_WALLS 2
 #define DYNA_RAYCAST_CEILINGS 4
--- a/src/code/z_collision_check.c
+++ b/src/code/z_collision_check.c
@ -18,66 +18,7 @@ typedef void (*ColChkApplyFunc)(struct PlayState*, CollisionCheckContext*, Colli
 typedef void (*ColChkVsFunc)(struct PlayState*, CollisionCheckContext*, Collider*, Collider*);
 typedef s32 (*ColChkLineFunc)(struct PlayState*, CollisionCheckContext*, Collider*, Vec3f*, Vec3f*);

-#include "prevent_bss_reordering2.h"
-// clang-format off
-// Partial structs taken from "prevent_bss_reordering.h"
-struct Dummy200 { int x; };
-struct Dummy201 { int x; };
-struct Dummy202 { int x; };
-struct Dummy203 { int x; };
-struct Dummy204 { int x; };
-struct Dummy205 { int x; };
-struct Dummy206 { int x; };
-struct Dummy207 { int x; };
-struct Dummy208 { int x; };
-struct Dummy209 { int x; };
-struct Dummy210 { int x; };
-struct Dummy211 { int x; };
-struct Dummy212 { int x; };
-struct Dummy213 { int x; };
-struct Dummy214 { int x; };
-struct Dummy215 { int x; };
-struct Dummy216 { int x; };
-struct Dummy217 { int x; };
-struct Dummy218 { int x; };
-struct Dummy219 { int x; };
-struct Dummy220 { int x; };
-struct Dummy221 { int x; };
-struct Dummy222 { int x; };
-struct Dummy223 { int x; };
-struct Dummy224 { int x; };
-struct Dummy225 { int x; };
-struct Dummy226 { int x; };
-struct Dummy227 { int x; };
-struct Dummy228 { int x; };
-struct Dummy229 { int x; };
-struct Dummy230 { int x; };
-struct Dummy231 { int x; };
-struct Dummy232 { int x; };
-struct Dummy233 { int x; };
-struct Dummy234 { int x; };
-struct Dummy235 { int x; };
-struct Dummy236 { int x; };
-struct Dummy237 { int x; };
-struct Dummy238 { int x; };
-struct Dummy239 { int x; };
-struct Dummy240 { int x; };
-struct Dummy241 { int x; };
-struct Dummy242 { int x; };
-struct Dummy243 { int x; };
-struct Dummy244 { int x; };
-struct Dummy245 { int x; };
-struct Dummy246 { int x; };
-struct Dummy247 { int x; };
-struct Dummy248 { int x; };
-struct Dummy249 { int x; };
-struct Dummy250 { int x; };
-struct Dummy251 { int x; };
-struct Dummy252 { int x; };
-struct Dummy253 { int x; };
-struct Dummy254 { int x; };
-struct Dummy255 { int x; };
-// clang-format on
+#pragma increment_block_number "n64-us:192"

 Vec3f D_801EDE00;
 Vec3f D_801EDE10;
@ -103,7 +44,7 @@ TriNorm D_801EE0E8[2];
 TriNorm D_801EE150;
 TriNorm D_801EE188;

-#include "prevent_bss_reordering.h"
+#pragma increment_block_number "n64-us:128"

 /**
 * Gets the damage and effect that should be applied for the collision between
--- a/src/code/z_demo.c
+++ b/src/code/z_demo.c
@ -1,8 +1,7 @@
-#include "prevent_bss_reordering.h"
-#include "prevent_bss_reordering2.h"
-
 #include "PR/ultratypes.h"

+#pragma increment_block_number "n64-us:128"
+
 // Variables are put before most headers as a hacky way to bypass bss reordering
 struct CutsceneCamera;

--- a/src/code/z_kaleido_scope_call.c
+++ b/src/code/z_kaleido_scope_call.c
@ -1,4 +1,3 @@
-#include "prevent_bss_reordering.h"
 #include "kaleido_manager.h"

 #include "z64.h"
--- a/src/code/z_map_disp.c
+++ b/src/code/z_map_disp.c
@ -1,4 +1,3 @@
-#include "prevent_bss_reordering.h"
 #include "global.h"
 #include "gfx.h"
 #include "sys_cmpdma.h"
--- a/src/code/z_message.c
+++ b/src/code/z_message.c
@ -1,4 +1,3 @@
-#include "prevent_bss_reordering.h"
 #include "z64message.h"
 #include "global.h"

--- a/src/overlays/actors/ovl_Bg_F40_Switch/z_bg_f40_switch.c
+++ b/src/overlays/actors/ovl_Bg_F40_Switch/z_bg_f40_switch.c
@ -3,7 +3,6 @@
 * Overlay: ovl_Bg_F40_Switch
 * Description: Stone Tower FloorSwitch
 */
-#include "prevent_bss_reordering.h"
 #include "z_bg_f40_switch.h"
 #include "z64rumble.h"
 #include "assets/objects/object_f40_switch/object_f40_switch.h"
--- a/src/overlays/actors/ovl_Bg_Hakugin_Switch/z_bg_hakugin_switch.c
+++ b/src/overlays/actors/ovl_Bg_Hakugin_Switch/z_bg_hakugin_switch.c
@ -4,7 +4,6 @@
 * Description: Goron Link Switch
 */

-#include "prevent_bss_reordering.h"
 #include "z_bg_hakugin_switch.h"
 #include "z64rumble.h"
 #include "assets/objects/object_goronswitch/object_goronswitch.h"
--- a/src/overlays/actors/ovl_Boss_02/z_boss_02.c
+++ b/src/overlays/actors/ovl_Boss_02/z_boss_02.c
@ -4,7 +4,6 @@
 * Description: Twinmold
 */

-#include "prevent_bss_reordering.h"
 #include "z_boss_02.h"
 #include "z64rumble.h"
 #include "z64shrink_window.h"
@ -14,6 +13,8 @@
 #include "overlays/actors/ovl_Item_B_Heart/z_item_b_heart.h"
 #include "assets/objects/gameplay_keep/gameplay_keep.h"

+#pragma increment_block_number "n64-us:128"
+
 #define FLAGS                                                                                 \
    (ACTOR_FLAG_ATTENTION_ENABLED | ACTOR_FLAG_HOSTILE | ACTOR_FLAG_UPDATE_CULLING_DISABLED | \
     ACTOR_FLAG_DRAW_CULLING_DISABLED)
--- a/src/overlays/actors/ovl_Boss_03/z_boss_03.c
+++ b/src/overlays/actors/ovl_Boss_03/z_boss_03.c
@ -49,7 +49,6 @@
 * - Seaweed
 */

-#include "prevent_bss_reordering.h"
 #include "z_boss_03.h"
 #include "overlays/actors/ovl_Door_Warp1/z_door_warp1.h"
 #include "overlays/actors/ovl_En_Water_Effect/z_en_water_effect.h"
--- a/src/overlays/actors/ovl_Boss_06/z_boss_06.c
+++ b/src/overlays/actors/ovl_Boss_06/z_boss_06.c
@ -4,7 +4,6 @@
 * Description: Igos du Ikana window - curtains and ray effects
 */

-#include "prevent_bss_reordering.h"
 #include "z_boss_06.h"
 #include "z64shrink_window.h"
 #include "attributes.h"
--- a/src/overlays/actors/ovl_Boss_07/z_boss_07.c
+++ b/src/overlays/actors/ovl_Boss_07/z_boss_07.c
@ -17,7 +17,6 @@
 * - An invisible "battle initializer" that spawns Majora's Mask and the "battle handler", resets the effects, etc.
 */

-#include "prevent_bss_reordering.h"
 #include "z_boss_07.h"
 #include "z64shrink_window.h"
 #include "attributes.h"
--- a/src/overlays/actors/ovl_Eff_Kamejima_Wave/z_eff_kamejima_wave.c
+++ b/src/overlays/actors/ovl_Eff_Kamejima_Wave/z_eff_kamejima_wave.c
@ -3,7 +3,6 @@
 * Overlay: ovl_Eff_Kamejima_Wave
 * Description: Wave Created by Turtle Awakening
 */
-#include "prevent_bss_reordering.h"
 #include "z_eff_kamejima_wave.h"
 #include "assets/objects/object_kamejima/object_kamejima.h"

--- a/src/overlays/actors/ovl_En_Az/z_en_az.c
+++ b/src/overlays/actors/ovl_En_Az/z_en_az.c
@ -4,7 +4,6 @@
 * Description: Beaver Bros
 */

-#include "prevent_bss_reordering.h"
 #include "z_en_az.h"
 #include "overlays/actors/ovl_En_Twig/z_en_twig.h"
 #include "overlays/actors/ovl_En_Fish/z_en_fish.h"
--- a/src/overlays/actors/ovl_En_Kakasi/z_en_kakasi.c
+++ b/src/overlays/actors/ovl_En_Kakasi/z_en_kakasi.c
@ -4,7 +4,6 @@
 * Description: Pierre the Scarecorw
 */

-#include "prevent_bss_reordering.h"
 #include "z_en_kakasi.h"

 #include "z64olib.h"
--- a/src/overlays/actors/ovl_En_Kanban/z_en_kanban.c
+++ b/src/overlays/actors/ovl_En_Kanban/z_en_kanban.c
@ -672,8 +672,8 @@ void EnKanban_Update(Actor* thisx, PlayState* play) {
                this->actionState = ENKANBAN_GROUND;
            }
        }
+        nextCase:;
            FALLTHROUGH;
-        nextCase:
        case ENKANBAN_GROUND:
        case ENKANBAN_WATER:
            signpost = (EnKanban*)this->actor.parent;
--- a/src/overlays/actors/ovl_En_Knight/z_en_knight.c
+++ b/src/overlays/actors/ovl_En_Knight/z_en_knight.c
@ -4,7 +4,6 @@
 * Description: Igos du Ikana and his lackeys
 */

-#include "prevent_bss_reordering.h"
 #include "z_en_knight.h"
 #include "z64shrink_window.h"
 #include "attributes.h"
--- a/src/overlays/actors/ovl_En_Poh/z_en_poh.c
+++ b/src/overlays/actors/ovl_En_Poh/z_en_poh.c
@ -4,7 +4,6 @@
 * Description: Poe
 */

-#include "prevent_bss_reordering.h"
 #include "z_en_poh.h"
 #include "overlays/actors/ovl_En_Clear_Tag/z_en_clear_tag.h"

--- a/src/overlays/actors/ovl_En_Rg/z_en_rg.c
+++ b/src/overlays/actors/ovl_En_Rg/z_en_rg.c
@ -3,7 +3,6 @@
 * Overlay: ovl_En_Rg
 * Description: Racing Goron
 */
-#include "prevent_bss_reordering.h"
 #include "z_en_rg.h"
 #include "assets/objects/gameplay_keep/gameplay_keep.h"

--- a/src/overlays/actors/ovl_En_Test3/z_en_test3.c
+++ b/src/overlays/actors/ovl_En_Test3/z_en_test3.c
@ -4,7 +4,6 @@
 * Description: Kafei
 */

-#include "prevent_bss_reordering.h"
 #include "z_en_test3.h"

 #include "zelda_arena.h"
@ -14,6 +13,8 @@
 #include "assets/objects/gameplay_keep/gameplay_keep.h"
 #include "assets/objects/object_mask_ki_tan/object_mask_ki_tan.h"

+#pragma increment_block_number "n64-us:128"
+
 #define FLAGS (ACTOR_FLAG_UPDATE_CULLING_DISABLED | ACTOR_FLAG_DRAW_CULLING_DISABLED | ACTOR_FLAG_CAN_PRESS_SWITCHES)

 typedef struct {
--- a/src/overlays/actors/ovl_En_Test7/z_en_test7.c
+++ b/src/overlays/actors/ovl_En_Test7/z_en_test7.c
@ -4,7 +4,6 @@
 * Description: Soaring effects (wings, sphere, etc)
 */

-#include "prevent_bss_reordering.h"
 #include "z_en_test7.h"
 #include "assets/objects/gameplay_keep/gameplay_keep.h"

--- a/src/overlays/actors/ovl_En_Thiefbird/z_en_thiefbird.c
+++ b/src/overlays/actors/ovl_En_Thiefbird/z_en_thiefbird.c
@ -3,7 +3,6 @@
 * Overlay: ovl_En_Thiefbird
 * Description: Takkuri
 */
-#include "prevent_bss_reordering.h"
 #include "z_en_thiefbird.h"
 #include "overlays/actors/ovl_En_Clear_Tag/z_en_clear_tag.h"

--- a/src/overlays/actors/ovl_En_Wood02/z_en_wood02.c
+++ b/src/overlays/actors/ovl_En_Wood02/z_en_wood02.c
@ -4,7 +4,6 @@
 * Description: Trees, shrubs
 */

-#include "prevent_bss_reordering.h"
 #include "z_en_wood02.h"
 #include "attributes.h"
 #include "assets/objects/object_wood02/object_wood02.h"
--- a/src/overlays/actors/ovl_Oceff_Wipe3/z_oceff_wipe3.c
+++ b/src/overlays/actors/ovl_Oceff_Wipe3/z_oceff_wipe3.c
@ -4,7 +4,6 @@
 * Description: Unused OoT Saria's Song Ocarina Effect
 */

-#include "prevent_bss_reordering.h"
 #include "z_oceff_wipe3.h"

 #define FLAGS (ACTOR_FLAG_UPDATE_CULLING_DISABLED | ACTOR_FLAG_UPDATE_DURING_OCARINA)
--- a/src/overlays/actors/ovl_player_actor/z_player.c
+++ b/src/overlays/actors/ovl_player_actor/z_player.c
@ -3,7 +3,6 @@
 * Overlay: ovl_player_actor
 * Description: Player
 */
-#include "prevent_bss_reordering.h"
 #include "z64player.h"

 #include "global.h"
@ -47,6 +46,8 @@
 #include "assets/objects/object_link_nuts/object_link_nuts.h"
 #include "assets/objects/object_link_child/object_link_child.h"

+#pragma increment_block_number "n64-us:128"
+
 void Player_Init(Actor* thisx, PlayState* play);
 void Player_Destroy(Actor* thisx, PlayState* play);
 void Player_Update(Actor* thisx, PlayState* play);
--- a/src/overlays/gamestates/ovl_file_choose/z_file_nameset_NES.c
+++ b/src/overlays/gamestates/ovl_file_choose/z_file_nameset_NES.c
@ -4,7 +4,6 @@
 * Description: Entering name on a new file, selecting options from the options menu
 */

-#include "prevent_bss_reordering.h"
 #include "z_file_select.h"
 #include "z64rumble.h"
 #include "assets/misc/title_static/title_static.h"
--- a/tools/.gitignore
+++ b/tools/.gitignore
@ -3,6 +3,7 @@
 mkdmadata
 mkldscript
 reloc_prereq
+preprocess_pragmas
 vtxdis

 ido_recomp/
--- a/tools/buildtools/Makefile
+++ b/tools/buildtools/Makefile
@ -1,5 +1,5 @@
 CFLAGS := -Wall -Wextra -Wpedantic -std=c99 -g -Os
-PROGRAMS := mkdmadata mkldscript reloc_prereq
+PROGRAMS := mkdmadata mkldscript reloc_prereq preprocess_pragmas

 ifeq ($(shell command -v clang >/dev/null 2>&1; echo $$?),0)
  CC := clang
@ -15,6 +15,7 @@ clean:
 mkdmadata_SOURCES    := mkdmadata.c spec.c util.c
 mkldscript_SOURCES   := mkldscript.c spec.c util.c
 reloc_prereq_SOURCES := reloc_prereq.c spec.c util.c
+preprocess_pragmas_SOURCES := preprocess_pragmas.c

 define COMPILE =
 $(1): $($1_SOURCES)
--- a/tools/buildtools/preprocess.sh
+++ b/tools/buildtools/preprocess.sh
@ -0,0 +1,90 @@
+#!/usr/bin/env bash
+
+# SPDX-FileCopyrightText: © 2024 ZeldaRET
+# SPDX-License-Identifier: CC0-1.0
+
+# Usage: preprocess [flags] -- [compile command minus input file...] [single input file]
+# Flags:
+#    -v VERSION (required)
+#    -i ICONV_PATH (optional, default: iconv)
+# Preprocess a C file to:
+# * Re-encode from UTF-8 to EUC-JP
+#   (the repo uses UTF-8 for text encoding, but the strings in the ROM are encoded in EUC-JP)
+# * Replace `#pragma increment_block_number` (see preprocess_pragma)
+
+set -e
+set -o pipefail
+
+if [ "${VERBOSE-}" ]
+then
+    set -x
+fi
+
+for i in `seq ${#@}`
+do
+    if [[ "${!i}" = '--' ]]
+    then
+        # flags before --
+        flags=("${@:1:$(($i - 1))}")
+        # compile command, between -- and the input source file
+        compilecmd="${@:$(($i + 1)):$((${#@} - $i - 1))}"
+        # The last argument, the input source file to be compiled
+        srcfile="${@: -1}"
+        break
+    fi
+done
+
+if [ "${VERBOSE-}" ]
+then
+    echo flags="${flags[@]}"
+    echo compilecmd="$compilecmd"
+    echo srcfile="$srcfile"
+fi
+
+ICONV=iconv
+
+while getopts "v:i:" opt "${flags[@]}"
+do
+    case $opt in
+        v)
+            VERSION=$OPTARG
+            ;;
+        i)
+            ICONV=$OPTARG
+            ;;
+        ?)
+            echo "Error: Bad flags"
+            exit 1
+            ;;
+    esac
+done
+
+if [[ "${!OPTIND}" != '--' ]]
+then
+    echo "Error: Positional arguments in flags not allowed"
+    exit 1
+fi
+
+if [ -z "${VERSION-}" ]
+then
+    echo Missing -v
+    exit
+fi
+
+# Create a temporary directory, and remove it on script exit
+# We use a temp dir instead of a temp file because ido_block_numbers.py and fix_bss.py
+# need the symbol table .T file from IDO, which is always named like the input file.
+# So we use a file named like the original input file, inside a temp dir.
+tempdir=`mktemp -d`
+tempfile=$tempdir/`basename $srcfile`
+trap "rm -rf $tempdir" EXIT
+
+# Preprocess pragmas and re-encode from UTF-8 to EUC-JP
+{
+    printf '#line 1 "%s"\n' "$srcfile"  # linemarker
+    ./tools/buildtools/preprocess_pragmas $VERSION "$srcfile" < "$srcfile"
+} | "${ICONV}" -f UTF-8 -t EUC-JP > "$tempfile"
+
+# Also include the source file's directory to have the include path as if we compiled the original source.
+# Pass the processed temporary file for compilation.
+$compilecmd -I `dirname $srcfile` $tempfile
--- a/tools/buildtools/preprocess_pragmas.c
+++ b/tools/buildtools/preprocess_pragmas.c
@ -0,0 +1,154 @@
+
+// SPDX-FileCopyrightText: © 2024 ZeldaRET
+// SPDX-License-Identifier: CC0-1.0
+
+// Usage: preprocess_pragmas VERSION filename < source.c
+// The filename argument is only used for linemarkers.
+// Preprocess C source on stdin, writes to stdout
+// Replace `#pragma increment_block_number` with fake structs for controlling BSS ordering.
+// The names of these fake structs are expected to be increment_block_number_%d_%d with the first number indicating
+// the line number of the #pragma in the original source file. (this is for use by fix_bss.py)
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+const char str_pragma_increment_block_number[] = "#pragma increment_block_number";
+
+int main(int argc, char** argv) {
+    if (argc != 3) {
+        fprintf(stderr, "Usage: preprocess_pragmas VERSION filename < source.c\n");
+        return EXIT_FAILURE;
+    }
+    char* const version = argv[1];
+    char* const filename = argv[2];
+
+    const size_t len_version = strlen(version);
+    char version_needle[len_version + 2];
+    memcpy(version_needle, version, len_version);
+    version_needle[len_version] = ':';
+    version_needle[len_version + 1] = '\0';
+
+    char buf[32 * 1024];
+    char* const bufend = buf + sizeof(buf);
+    char* bufp = buf;
+    bool cont = true;
+    int line_num = 1;
+    // whether the current line follows a #pragma increment_block_number,
+    // including continuation lines (lines after a \-ending line)
+    bool is_in_pragma = false;
+    // the line where the #pragma increment_block_number is
+    int pragma_line_number;
+    // how many fake structs to write to replace the current pragma
+    int n_fake_structs;
+
+    while (cont) {
+        size_t nread = fread(bufp, 1, bufend - bufp, stdin);
+        bufp += nread;
+        if (nread == 0) {
+            if (!feof(stdin)) {
+                perror("fread");
+                fprintf(stderr, "Failed to read from stdin\n");
+                return EXIT_FAILURE;
+            }
+            cont = false;
+            if (bufp == buf) {
+                // All lines processed
+                break;
+            } else {
+                // The buffer contains the last line and that line isn't terminated with a newline.
+                // Add a final newline and do one last iteration.
+                assert(bufp < bufend);
+                *bufp = '\n';
+                bufp++;
+            }
+        }
+
+        char* last_newline = NULL;
+        for (char* p = bufp - 1; p >= buf; p--) {
+            if (*p == '\n') {
+                last_newline = p;
+                break;
+            }
+        }
+        if (last_newline == NULL) {
+            // No newline, read more data.
+            // Assert there is space for it (there should be no line long enough to not fit in buf).
+            assert(bufp < bufend);
+            continue;
+        }
+
+        char* line = buf;
+        while (true) {
+            char* line_end = line;
+            while (*line_end != '\n') {
+                line_end++;
+                assert(line_end <= last_newline);
+            }
+            if (!strncmp(line, str_pragma_increment_block_number, strlen(str_pragma_increment_block_number))) {
+                is_in_pragma = true;
+                pragma_line_number = line_num;
+                n_fake_structs = 0;
+            }
+            if (is_in_pragma) {
+                *line_end = '\0';
+                char* version_amount_item = strstr(line, version_needle);
+                if (version_amount_item != NULL) {
+                    char* version_amount_str_start = &version_amount_item[len_version + 1];
+                    char* version_amount_str_end;
+                    long amount = strtol(version_amount_str_start, &version_amount_str_end, 10);
+                    if (version_amount_str_start == version_amount_str_end) {
+                        fprintf(stderr, "Found version %s in pragma line but no amount integer\n", version);
+                        fprintf(stderr, "%s\n", line);
+                        return EXIT_FAILURE;
+                    }
+                    n_fake_structs = (int)amount;
+                }
+            } else {
+                char* p = line;
+                size_t sz = line_end + 1 - line;
+                while (sz != 0) {
+                    size_t nwritten = fwrite(p, 1, sz, stdout);
+                    if (nwritten == 0) {
+                        fprintf(stderr, "Failed to write to stdout\n");
+                        return EXIT_FAILURE;
+                    }
+                    p += nwritten;
+                    sz -= nwritten;
+                }
+            }
+            if (is_in_pragma && line_end[-1] != '\\') {
+                is_in_pragma = false;
+
+                // Always generate at least one struct,
+                // so that fix_bss.py can know where the increment_block_number pragmas are
+                if (n_fake_structs == 0) {
+                    n_fake_structs = 256;
+                }
+
+                // Write fake structs for BSS ordering
+                // pragma_line_number is used for symbol uniqueness,
+                // and also by fix_bss.py to locate the pragma these symbols originate from.
+                for (int i = 0; i < n_fake_structs; i++)
+                    fprintf(stdout, "struct increment_block_number_%05d_%03d;\n", pragma_line_number, i);
+                fprintf(stdout, "#line %d \"%s\"\n", line_num + 1, filename);
+            }
+            line_num++;
+            if (line_end == last_newline)
+                break;
+            line = line_end + 1;
+        }
+        assert(bufp <= bufend);
+        assert(bufp > last_newline);
+        char* next_incomplete_line_start = last_newline + 1;
+        ptrdiff_t next_incomplete_line_sz = bufp - next_incomplete_line_start;
+        assert(next_incomplete_line_sz >= 0);
+        memmove(buf, next_incomplete_line_start, next_incomplete_line_sz);
+        bufp = buf + next_incomplete_line_sz;
+    }
+
+    return EXIT_SUCCESS;
+}
--- a/tools/calc_bss.sh
+++ b/tools/calc_bss.sh
@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 # Given a list of header files, compute the bss index that results from
-# including them. (See prevent_bss_reordering.h for more information.)
+# including them.

 TEMPC=$(mktemp -t bss.XXXXXXX.c)
 TEMPO=$(mktemp -t bss.XXXXXXX.o)
--- a/tools/extract_assets.py
+++ b/tools/extract_assets.py
@ -26,7 +26,7 @@ def ExtractFile(xmlPath, outputPath, outputSourcePath):
            generateSourceFile = "0"
            break

-    execStr = f"tools/ZAPD/ZAPD.out e -eh -i {xmlPath} -b {globalBaseromSegmentsDir} -o {outputPath} -osf {outputSourcePath} -gsf {generateSourceFile} -rconf tools/ZAPDConfigs/MM/Config.xml {ZAPDArgs}"
+    execStr = f"tools/ZAPD/ZAPD.out e -eh -i {xmlPath} -b {globalBaseromSegmentsDir} -o {outputPath} -osf {outputSourcePath} -gsf {generateSourceFile} -rconf tools/ZAPDConfigs/MM/Config.xml --cs-float both {ZAPDArgs}"

    if globalUnaccounted:
        execStr += " -Wunaccounted"
--- a/tools/fix_bss.py
+++ b/tools/fix_bss.py
@ -0,0 +1,873 @@
+#!/usr/bin/env python3
+
+# SPDX-FileCopyrightText: 2024 zeldaret
+# SPDX-License-Identifier: CC0-1.0
+
+
+from __future__ import annotations
+
+import argparse
+from collections import Counter
+import colorama
+from dataclasses import dataclass
+import io
+import multiprocessing
+import multiprocessing.pool
+from pathlib import Path
+import re
+import shlex
+import sys
+import time
+import traceback
+from typing import BinaryIO, Iterator, Optional, Tuple
+
+from ido_block_numbers import (
+    generate_make_log,
+    find_compiler_command_line,
+    run_cfe,
+    SymbolTableEntry,
+    UcodeOp,
+)
+
+import elftools.elf.elffile
+import mapfile_parser.mapfile
+
+
+# Set on program start since we replace sys.stdout in worker processes
+stdout_isatty = sys.stdout.isatty()
+
+
+def output(message: str = "", color: Optional[str] = None, end: str = "\n"):
+    if color and stdout_isatty:
+        print(f"{color}{message}{colorama.Fore.RESET}", end=end)
+    else:
+        print(message, end=end)
+
+
+def read_u32(f: BinaryIO, offset: int) -> int:
+    f.seek(offset)
+    return int.from_bytes(f.read(4), "big")
+
+
+def read_u16(f: BinaryIO, offset: int) -> int:
+    f.seek(offset)
+    return int.from_bytes(f.read(2), "big")
+
+
+def read_s16(f: BinaryIO, offset: int) -> int:
+    f.seek(offset)
+    return int.from_bytes(f.read(2), "big", signed=True)
+
+
+class FixBssException(Exception):
+    pass
+
+
+@dataclass
+class Reloc:
+    name: str
+    offset_32: int | None
+    offset_hi16: int | None
+    offset_lo16: int | None
+    addend: int
+
+
+@dataclass
+class Pointer:
+    name: str
+    addend: int
+    base_value: int
+    build_value: int
+
+
+@dataclass
+class BssSection:
+    start_address: int
+    pointers: list[Pointer]
+
+
+# Read relocations from an ELF file section
+def read_relocs(object_path: Path, section_name: str) -> list[Reloc]:
+    with open(object_path, "rb") as f:
+        elffile = elftools.elf.elffile.ELFFile(f)
+        symtab = elffile.get_section_by_name(".symtab")
+        data = elffile.get_section_by_name(section_name).data()
+
+        reloc_section = elffile.get_section_by_name(f".rel{section_name}")
+        if reloc_section is None:
+            return []
+
+        relocs = []
+        offset_hi16 = 0
+        for reloc in reloc_section.iter_relocations():
+            reloc_offset = reloc.entry["r_offset"]
+            reloc_type = reloc.entry["r_info_type"]
+            reloc_name = symtab.get_symbol(reloc.entry["r_info_sym"]).name
+
+            if reloc_type == 2:  # R_MIPS_32
+                offset_32 = reloc_offset
+                addend = int.from_bytes(
+                    data[reloc_offset : reloc_offset + 4], "big", signed=True
+                )
+                relocs.append(Reloc(reloc_name, offset_32, None, None, addend))
+            elif reloc_type == 4:  # R_MIPS_26
+                pass
+            elif reloc_type == 5:  # R_MIPS_HI16
+                offset_hi16 = reloc_offset
+            elif reloc_type == 6:  # R_MIPS_LO16
+                offset_lo16 = reloc_offset
+                addend_hi16 = int.from_bytes(
+                    data[offset_hi16 + 2 : offset_hi16 + 4], "big", signed=False
+                )
+                addend_lo16 = int.from_bytes(
+                    data[offset_lo16 + 2 : offset_lo16 + 4], "big", signed=True
+                )
+                addend = (addend_hi16 << 16) + addend_lo16
+                relocs.append(Reloc(reloc_name, None, offset_hi16, offset_lo16, addend))
+            else:
+                raise NotImplementedError(f"Unsupported relocation type: {reloc_type}")
+
+        return relocs
+
+
+def get_file_pointers(
+    file: mapfile_parser.mapfile.File,
+    base: BinaryIO,
+    build: BinaryIO,
+) -> list[Pointer]:
+    pointers = []
+    # TODO: open each ELF file only once instead of once per section?
+    for reloc in read_relocs(file.filepath, file.sectionType):
+        if reloc.offset_32 is not None:
+            base_value = read_u32(base, file.vrom + reloc.offset_32)
+            build_value = read_u32(build, file.vrom + reloc.offset_32)
+        elif reloc.offset_hi16 is not None and reloc.offset_lo16 is not None:
+            if (
+                read_u16(base, file.vrom + reloc.offset_hi16)
+                != read_u16(build, file.vrom + reloc.offset_hi16)
+            ) or (
+                read_u16(base, file.vrom + reloc.offset_lo16)
+                != read_u16(build, file.vrom + reloc.offset_lo16)
+            ):
+                raise FixBssException(
+                    f"Reference to {reloc.name} in {file.filepath} is in a shifted or non-matching portion of the ROM.\n"
+                    "Please ensure that the only differences between the baserom and the current build are due to BSS ordering."
+                )
+
+            base_value = (
+                read_u16(base, file.vrom + reloc.offset_hi16 + 2) << 16
+            ) + read_s16(base, file.vrom + reloc.offset_lo16 + 2)
+            build_value = (
+                read_u16(build, file.vrom + reloc.offset_hi16 + 2) << 16
+            ) + read_s16(build, file.vrom + reloc.offset_lo16 + 2)
+        else:
+            assert False, "Invalid relocation"
+
+        # For relocations against a global symbol, subtract the addend so that the pointer
+        # is for the start of the symbol. This can help deal with things like STACK_TOP
+        # (where the pointer is past the end of the symbol) or negative addends. If the
+        # relocation is against a section however, it's not useful to subtract the addend,
+        # so we keep it as-is and hope for the best.
+        if reloc.name.startswith("."):  # section
+            addend = reloc.addend
+        else:  # symbol
+            addend = 0
+            base_value -= reloc.addend
+            build_value -= reloc.addend
+
+        pointers.append(Pointer(reloc.name, addend, base_value, build_value))
+    return pointers
+
+
+base = None
+build = None
+
+
+def get_file_pointers_worker_init(base_path: Path, build_path: Path):
+    global base
+    global build
+    base = open(base_path, "rb")
+    build = open(build_path, "rb")
+
+
+def get_file_pointers_worker(file: mapfile_parser.mapfile.File) -> list[Pointer]:
+    assert base is not None
+    assert build is not None
+    return get_file_pointers(file, base, build)
+
+
+# Compare pointers between the baserom and the current build, returning a dictionary from
+# C files to a list of pointers into their BSS sections
+def compare_pointers(version: str) -> dict[Path, BssSection]:
+    mapfile_path = Path(f"build/{version}/mm-{version}.map")
+    base_path = Path(f"baseroms/{version}/baserom-decompressed.z64")
+    build_path = Path(f"build/{version}/mm-{version}.z64")
+    if not mapfile_path.exists():
+        raise FixBssException(f"Could not open {mapfile_path}")
+    if not base_path.exists():
+        raise FixBssException(f"Could not open {base_path}")
+    if not build_path.exists():
+        raise FixBssException(f"Could not open {build_path}")
+
+    mapfile = mapfile_parser.mapfile.MapFile()
+    mapfile.readMapFile(mapfile_path)
+
+    # Segments built from source code (filtering out assets)
+    source_code_segments = []
+    for mapfile_segment in mapfile:
+        if not (
+            mapfile_segment.name.startswith("..boot")
+            or mapfile_segment.name.startswith("..code")
+            or mapfile_segment.name.startswith("..n64dd")
+            or mapfile_segment.name.startswith("..ovl_")
+        ):
+            continue
+        source_code_segments.append(mapfile_segment)
+
+    # Find all pointers with different values
+    if not stdout_isatty:
+        output(f"Comparing pointers between baserom and build ...")
+    pointers = []
+    file_results = []
+    with multiprocessing.Pool(
+        initializer=get_file_pointers_worker_init,
+        initargs=(base_path, build_path),
+    ) as p:
+        for mapfile_segment in source_code_segments:
+            for file in mapfile_segment:
+                if not str(file.filepath).endswith(".o"):
+                    continue
+                if file.sectionType == ".bss":
+                    continue
+                file_result = p.apply_async(get_file_pointers_worker, (file,))
+                file_results.append(file_result)
+
+        # Report progress and wait until all files are done
+        num_files = len(file_results)
+        while True:
+            time.sleep(0.010)
+            num_files_done = sum(file_result.ready() for file_result in file_results)
+            if stdout_isatty:
+                output(
+                    f"Comparing pointers between baserom and build ... {num_files_done:>{len(f'{num_files}')}}/{num_files}",
+                    end="\r",
+                )
+            if num_files_done == num_files:
+                break
+        if stdout_isatty:
+            output("")
+
+        # Collect results and check for errors
+        for file_result in file_results:
+            try:
+                pointers.extend(file_result.get())
+            except FixBssException as e:
+                output(f"Error: {str(e)}", color=colorama.Fore.RED)
+                sys.exit(1)
+
+    # Remove duplicates and sort by baserom address
+    pointers = list({p.base_value: p for p in pointers}.values())
+    pointers.sort(key=lambda p: p.base_value)
+
+    # Go through sections and collect differences
+    bss_sections = {}
+    for mapfile_segment in source_code_segments:
+        for file in mapfile_segment:
+            if not file.sectionType == ".bss":
+                continue
+
+            pointers_in_section = [
+                p
+                for p in pointers
+                if file.vram <= p.build_value < file.vram + file.size
+            ]
+
+            object_file = file.filepath.relative_to(f"build/{version}")
+            # Hack to handle the combined z_message_z_game_over.o file.
+            # Fortunately z_game_over has no BSS so we can just analyze z_message instead.
+            if str(object_file) == "src/code/z_message_z_game_over.o":
+                object_file = Path("src/code/z_message.o")
+
+            c_file = object_file.with_suffix(".c")
+            bss_sections[c_file] = BssSection(file.vram, pointers_in_section)
+
+    return bss_sections
+
+
+@dataclass
+class Pragma:
+    line_number: int
+    block_number: int
+    amount: int
+
+
+# A BSS variable in the source code
+@dataclass
+class BssVariable:
+    block_number: int
+    name: str
+    size: int
+    align: int
+    referenced_in_data: bool
+
+
+# A BSS variable with its offset in the compiled .bss section
+@dataclass
+class BssSymbol:
+    name: str
+    offset: int
+    size: int
+    align: int
+    referenced_in_data: bool
+
+
+INCREMENT_BLOCK_NUMBER_RE = re.compile(r"increment_block_number_(\d+)_(\d+)")
+
+
+# Find increment_block_number pragmas by parsing the symbol names generated by preprocess.py.
+# This is pretty ugly but it seems more reliable than trying to determine the line numbers of
+# BSS variables in the C file.
+def find_pragmas(symbol_table: list[SymbolTableEntry]) -> list[Pragma]:
+    # Keep track of first block number and count for each line number
+    first_block_number = {}
+    amounts: Counter[int] = Counter()
+    for block_number, entry in enumerate(symbol_table):
+        if match := INCREMENT_BLOCK_NUMBER_RE.match(entry.name):
+            line_number = int(match.group(1))
+            if line_number not in first_block_number:
+                first_block_number[line_number] = block_number
+            amounts[line_number] += 1
+
+    pragmas = []
+    for line_number, block_number in sorted(first_block_number.items()):
+        pragmas.append(Pragma(line_number, block_number, amounts[line_number]))
+    return pragmas
+
+
+# Find all BSS variables from IDO's symbol table and U-Code output.
+def find_bss_variables(
+    symbol_table: list[SymbolTableEntry], ucode: list[UcodeOp]
+) -> list[BssVariable]:
+    bss_variables = []
+    init_block_numbers = set(op.i1 for op in ucode if op.opcode_name == "init")
+    last_function_name = None
+    # Block numbers referenced in .data or .rodata (in order of appearance)
+    referenced_in_data_block_numbers = []
+
+    for op in ucode:
+        # gsym: file-level global symbol
+        # lsym: file-level static symbol
+        # fsym: function-level static symbol
+        if op.opcode_name in ("gsym", "lsym", "fsym"):
+            block_number = op.i1
+            if block_number in init_block_numbers:
+                continue  # not BSS
+
+            name = symbol_table[block_number].name
+            if op.opcode_name == "fsym":
+                name = f"{last_function_name}::{name}"
+
+            size = op.args[0]
+            align = 1 << op.lexlev
+            # TODO: IDO seems to automatically align anything with size 8 or more to
+            # an 8-byte boundary in BSS. Is this correct?
+            if size >= 8:
+                align = 8
+
+            referenced_in_data = block_number in referenced_in_data_block_numbers
+            bss_variables.append(
+                BssVariable(block_number, name, size, align, referenced_in_data)
+            )
+        elif op.opcode_name == "init":
+            if op.dtype == 10:  # Ndt, "non-local label"
+                assert op.const is not None
+                referenced_in_data_block_numbers.append(op.const)
+        elif op.opcode_name == "ent":
+            last_function_name = symbol_table[op.i1].name
+
+    # Sort any variables referenced in .data or .rodata first. For the others, sort by block number
+    # so it looks like the original ordering in the source code (it doesn't matter since
+    # predict_bss_ordering will sort them again anyway.
+    def sort_key(var: BssVariable) -> Tuple[int, int]:
+        if var.referenced_in_data:
+            index = referenced_in_data_block_numbers.index(var.block_number)
+        else:
+            index = len(referenced_in_data_block_numbers)
+        return (index, var.block_number)
+
+    bss_variables.sort(key=sort_key)
+    return bss_variables
+
+
+# Predict offsets of BSS variables in the build.
+def predict_bss_ordering(variables: list[BssVariable]) -> list[BssSymbol]:
+    bss_symbols = []
+    offset = 0
+
+    # For variables referenced in .data or .rodata, keep the original order.
+    referenced_in_data = [var for var in variables if var.referenced_in_data]
+
+    # For the others, sort by block number mod 256. For ties, sort by block number.
+    not_referenced_in_data = [var for var in variables if not var.referenced_in_data]
+    not_referenced_in_data.sort(
+        key=lambda var: (var.block_number % 256, var.block_number)
+    )
+
+    sorted_variables = referenced_in_data + not_referenced_in_data
+    for var in sorted_variables:
+        size = var.size
+        align = var.align
+        offset = (offset + align - 1) & ~(align - 1)
+        bss_symbols.append(
+            BssSymbol(var.name, offset, size, align, var.referenced_in_data)
+        )
+        offset += size
+    return bss_symbols
+
+
+# Match up BSS variables between the baserom and the build using the pointers from relocations.
+# Note that we may not be able to match all variables if a variable is not referenced by any pointer.
+def determine_base_bss_ordering(
+    build_bss_symbols: list[BssSymbol],
+    bss_section: BssSection,
+) -> list[BssSymbol]:
+    base_start_address = min(p.base_value for p in bss_section.pointers)
+
+    found_symbols: dict[str, BssSymbol] = {}
+    for p in bss_section.pointers:
+        base_offset = p.base_value - base_start_address
+        build_offset = p.build_value - bss_section.start_address
+
+        new_symbol = None
+        new_offset = 0
+        for symbol in build_bss_symbols:
+            if (
+                symbol.offset <= build_offset
+                and build_offset < symbol.offset + symbol.size
+            ):
+                new_symbol = symbol
+                new_offset = base_offset - (build_offset - symbol.offset)
+                break
+
+        if new_symbol is None:
+            if p.addend > 0:
+                addend_str = f"+0x{p.addend:X}"
+            elif p.addend < 0:
+                addend_str = f"-0x{-p.addend:X}"
+            else:
+                addend_str = ""
+            raise FixBssException(
+                f"Could not find BSS symbol for pointer {p.name}{addend_str} "
+                f"(base address 0x{p.base_value:08X}, build address 0x{p.build_value:08X}). Is the build up-to-date?"
+            )
+
+        if new_offset < 0:
+            raise FixBssException(
+                f"BSS symbol {new_symbol.name} found at negative offset in the baserom "
+                f"(-0x{-new_offset:04X}). Is the build up-to-date?"
+            )
+
+        if new_symbol.name in found_symbols:
+            # Sanity check that offsets agree
+            existing_offset = found_symbols[new_symbol.name].offset
+            if new_offset != existing_offset:
+                raise FixBssException(
+                    f"BSS symbol {new_symbol.name} found at conflicting offsets in the baserom "
+                    f"(0x{existing_offset:04X} and 0x{new_offset:04X}). Is the build up-to-date?"
+                )
+        else:
+            found_symbols[new_symbol.name] = BssSymbol(
+                new_symbol.name,
+                new_offset,
+                new_symbol.size,
+                new_symbol.align,
+                new_symbol.referenced_in_data,
+            )
+
+    return list(sorted(found_symbols.values(), key=lambda symbol: symbol.offset))
+
+
+# Generate a sequence of integers in the range [0, 256) with a 2-adic valuation of exactly `nu`.
+# The 2-adic valuation of an integer n is the largest k such that 2^k divides n
+# (see https://en.wikipedia.org/wiki/P-adic_valuation), and for convenience we define
+# the 2-adic valuation of 0 to be 8. Here's what the sequences look like for nu = 0..8:
+#   8: 0
+#   7: 128
+#   6: 64, 192
+#   5: 32, 96, 160, 224
+#   4: 16, 48, 80, 112, ...
+#   3: 8, 24, 40, 56, ...
+#   2: 4, 12, 20, 28, ...
+#   1: 2, 6, 10, 14, ...
+#   0: 1, 3, 5, 7, ...
+def gen_seq(nu: int) -> Iterator[int]:
+    if nu == 8:
+        yield 0
+    else:
+        for i in range(1 << (7 - nu)):
+            yield (2 * i + 1) * (1 << nu)
+
+
+# Yields all n-tuples of integers in the range [0, 256) with minimum 2-adic valuation
+# of exactly `min_nu`.
+def gen_candidates_impl(n: int, min_nu: int) -> Iterator[tuple[int, ...]]:
+    if n == 1:
+        for n in gen_seq(min_nu):
+            yield (n,)
+    else:
+        # (a, *b) has min 2-adic valuation = min_nu if and only if either:
+        #   a has 2-adic valuation >  min_nu and b has min 2-adic valuation == min_nu
+        #   a has 2-adic valuation == min_nu and b has min 2-adic valuation >= min_nu
+        for min_nu_a in reversed(range(min_nu + 1, 9)):
+            for a in gen_seq(min_nu_a):
+                for b in gen_candidates_impl(n - 1, min_nu):
+                    yield (a, *b)
+        for a in gen_seq(min_nu):
+            for min_nu_b in reversed(range(min_nu, 9)):
+                for b in gen_candidates_impl(n - 1, min_nu_b):
+                    yield (a, *b)
+
+
+# Yields all n-tuples of integers in the range [0, 256), ordered by descending minimum
+# 2-adic valuation of the elements in the tuple. For example, for n = 2 the sequence is:
+#   (0, 0), (0, 128), (128, 0), (128, 128), (0, 64), (0, 192), (128, 64), (128, 192), ...
+def gen_candidates(n: int) -> Iterator[tuple[int, ...]]:
+    for nu in reversed(range(9)):
+        yield from gen_candidates_impl(n, nu)
+
+
+# Determine a new set of increment_block_number pragmas that will fix the BSS ordering.
+def solve_bss_ordering(
+    pragmas: list[Pragma],
+    bss_variables: list[BssVariable],
+    base_bss_symbols: list[BssSymbol],
+) -> list[Pragma]:
+    base_symbols_by_name = {symbol.name: symbol for symbol in base_bss_symbols}
+
+    # Our "algorithm" just tries all possible combinations of increment_block_number amounts,
+    # which can get very slow with more than a few pragmas. But, we order the candidates in a
+    # binary-search-esque way to try to find a solution faster.
+    for new_amounts in gen_candidates(len(pragmas)):
+        # Generate new block numbers
+        new_bss_variables = []
+        for var in bss_variables:
+            new_block_number = var.block_number
+            for pragma, new_amount in zip(pragmas, new_amounts):
+                if var.block_number >= pragma.block_number:
+                    new_block_number += new_amount - pragma.amount
+            new_bss_variables.append(
+                BssVariable(
+                    new_block_number,
+                    var.name,
+                    var.size,
+                    var.align,
+                    var.referenced_in_data,
+                )
+            )
+
+        # Predict new BSS and check if new ordering matches
+        new_bss_symbols = predict_bss_ordering(new_bss_variables)
+
+        bss_ordering_matches = True
+        for symbol in new_bss_symbols:
+            base_symbol = base_symbols_by_name.get(symbol.name)
+            if base_symbol is None:
+                continue
+            if symbol.offset != base_symbol.offset:
+                bss_ordering_matches = False
+                break
+
+        if bss_ordering_matches:
+            new_pragmas = []
+            for pragma, new_amount in zip(pragmas, new_amounts):
+                new_pragmas.append(
+                    Pragma(pragma.line_number, pragma.block_number, new_amount)
+                )
+            return new_pragmas
+
+    raise FixBssException("Could not find any solutions")
+
+
+# Parses #pragma increment_block_number (with line continuations already removed)
+def parse_pragma(pragma_string: str) -> dict[str, int]:
+    amounts = {}
+    for part in pragma_string.replace('"', "").split()[2:]:
+        kv = part.split(":")
+        if len(kv) != 2:
+            raise FixBssException(
+                "#pragma increment_block_number"
+                f' arguments must be version:amount pairs, not "{part}"'
+            )
+        try:
+            amount = int(kv[1])
+        except ValueError:
+            raise FixBssException(
+                "#pragma increment_block_number"
+                f' amount must be an integer, not "{kv[1]}" (in "{part}")'
+            )
+        amounts[kv[0]] = amount
+    return amounts
+
+
+# Formats #pragma increment_block_number as a list of lines
+def format_pragma(amounts: dict[str, int], max_line_length: int) -> list[str]:
+    lines = []
+    pragma_start = "#pragma increment_block_number "
+    current_line = pragma_start + '"'
+    first = True
+    for version, amount in sorted(amounts.items()):
+        part = f"{version}:{amount}"
+        if len(current_line) + len(" ") + len(part) + len('" \\') > max_line_length:
+            lines.append(current_line + '" ')
+            current_line = " " * len(pragma_start) + '"'
+            first = True
+        if not first:
+            current_line += " "
+        current_line += part
+        first = False
+    lines.append(current_line + '"\n')
+
+    if len(lines) >= 2:
+        # add and align vertically all continuation \ characters
+        n_align = max(map(len, lines[:-1]))
+        for i in range(len(lines) - 1):
+            lines[i] = f"{lines[i]:{n_align}}\\\n"
+
+    return lines
+
+
+def update_source_file(version_to_update: str, file: Path, new_pragmas: list[Pragma]):
+    with open(file, "r", encoding="utf-8") as f:
+        lines = f.readlines()
+
+    replace_lines: list[tuple[int, int, list[str]]] = []
+
+    for pragma in new_pragmas:
+        i = pragma.line_number - 1
+        if not lines[i].startswith("#pragma increment_block_number"):
+            raise FixBssException(
+                f"Expected #pragma increment_block_number on line {pragma.line_number}"
+            )
+
+        # list the pragma line and any continuation line
+        pragma_lines = [lines[i]]
+        while pragma_lines[-1].endswith("\\\n"):
+            i += 1
+            pragma_lines.append(lines[i])
+
+        # concatenate all lines into one
+        pragma_string = "".join(s.replace("\\\n", "") for s in pragma_lines)
+
+        amounts = parse_pragma(pragma_string)
+
+        amounts[version_to_update] = pragma.amount
+
+        column_limit = 120  # matches .clang-format's ColumnLimit
+        new_pragma_lines = format_pragma(amounts, column_limit)
+
+        replace_lines.append(
+            (
+                pragma.line_number - 1,
+                pragma.line_number - 1 + len(pragma_lines),
+                new_pragma_lines,
+            )
+        )
+
+    # Replace the pragma lines starting from the end of the file, so the line numbers
+    # for pragmas earlier in the file stay accurate.
+    replace_lines.sort(key=lambda it: it[0], reverse=True)
+    for start, end, new_pragma_lines in replace_lines:
+        del lines[start:end]
+        lines[start:start] = new_pragma_lines
+
+    with open(file, "w", encoding="utf-8") as f:
+        f.writelines(lines)
+
+
+def process_file(
+    file: Path,
+    bss_section: BssSection,
+    make_log: list[str],
+    dry_run: bool,
+    version: str,
+):
+    output(f"Processing {file} ...", color=colorama.Fore.CYAN)
+
+    command_line = find_compiler_command_line(make_log, file)
+    if command_line is None:
+        raise FixBssException(f"Could not determine compiler command line for {file}")
+
+    output(f"Compiler command: {shlex.join(command_line)}")
+    symbol_table, ucode = run_cfe(command_line, keep_files=False)
+
+    bss_variables = find_bss_variables(symbol_table, ucode)
+    output("BSS variables:")
+    for var in bss_variables:
+        i = var.block_number
+        output(
+            f"  {i:>6} [{i%256:>3}]: size=0x{var.size:04X} align=0x{var.align:X} referenced_in_data={str(var.referenced_in_data):<5} {var.name}"
+        )
+
+    build_bss_symbols = predict_bss_ordering(bss_variables)
+    output("Current build BSS ordering:")
+    for symbol in build_bss_symbols:
+        output(
+            f"  offset=0x{symbol.offset:04X} size=0x{symbol.size:04X} align=0x{symbol.align:X} referenced_in_data={str(symbol.referenced_in_data):<5} {symbol.name}"
+        )
+
+    if not bss_section.pointers:
+        raise FixBssException(f"No pointers to BSS found in ROM for {file}")
+
+    base_bss_symbols = determine_base_bss_ordering(build_bss_symbols, bss_section)
+    output("Baserom BSS ordering:")
+    for symbol in base_bss_symbols:
+        output(
+            f"  offset=0x{symbol.offset:04X} size=0x{symbol.size:04X} align=0x{symbol.align:X} referenced_in_data={str(symbol.referenced_in_data):<5} {symbol.name}"
+        )
+
+    pragmas = find_pragmas(symbol_table)
+    max_pragmas = 3
+    if not pragmas:
+        raise FixBssException(f"No increment_block_number pragmas found in {file}")
+    elif len(pragmas) > max_pragmas:
+        raise FixBssException(
+            f"Too many increment_block_number pragmas found in {file} (found {len(pragmas)}, max {max_pragmas})"
+        )
+
+    output("Solving BSS ordering ...")
+    new_pragmas = solve_bss_ordering(pragmas, bss_variables, base_bss_symbols)
+    output("New increment_block_number amounts:")
+    for pragma in new_pragmas:
+        output(f"  line {pragma.line_number}: {pragma.amount}")
+
+    if not dry_run:
+        update_source_file(version, file, new_pragmas)
+        output(f"Updated {file}", color=colorama.Fore.GREEN)
+
+
+def process_file_worker(*x):
+    # Collect output in a buffer to avoid interleaving output when processing multiple files
+    old_stdout = sys.stdout
+    fake_stdout = io.StringIO()
+    try:
+        sys.stdout = fake_stdout
+        process_file(*x)
+    except FixBssException as e:
+        # exception with a message for the user
+        output(f"Error: {str(e)}", color=colorama.Fore.RED)
+        raise
+    except Exception as e:
+        # "unexpected" exception, also print a trace for devs
+        output(f"Error: {str(e)}", color=colorama.Fore.RED)
+        traceback.print_exc(file=sys.stdout)
+        raise
+    finally:
+        sys.stdout = old_stdout
+        output()
+        output(fake_stdout.getvalue(), end="")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Automatically fix BSS ordering by editing increment_block_number pragmas. "
+        "Assumes that the build is up-to-date and that only differences between the baserom and "
+        "the current build are due to BSS ordering."
+    )
+    parser.add_argument(
+        "-v",
+        "--version",
+        dest="version",
+        type=str,
+        required=True,
+        help="OOT version",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Print changes instead of editing source files",
+    )
+    parser.add_argument(
+        "files",
+        metavar="FILE",
+        nargs="*",
+        type=Path,
+        help="Fix BSS ordering for a particular C file (default: all files with BSS differences)",
+    )
+
+    args = parser.parse_args()
+    version = args.version
+
+    bss_sections = compare_pointers(version)
+
+    files_with_reordering = []
+    for file, bss_section in bss_sections.items():
+        if not bss_section.pointers:
+            continue
+        # The following heuristic doesn't work for session_config, since the first pointer into BSS is not
+        # at the start of the section so we skip it
+        if str(file) in ("src/audio/session_config.c"):
+            continue
+        # For the baserom, assume that the lowest address is the start of the BSS section. This might
+        # not be true if the first BSS variable is not referenced, but in practice this doesn't happen
+        # (except for z_locale above).
+        base_min_address = min(p.base_value for p in bss_section.pointers)
+        build_min_address = bss_section.start_address
+        if not all(
+            p.build_value - build_min_address == p.base_value - base_min_address
+            for p in bss_section.pointers
+        ):
+            files_with_reordering.append(file)
+
+    if files_with_reordering:
+        output("Files with BSS reordering:")
+        for file in files_with_reordering:
+            output(f"  {file}")
+    else:
+        output("No BSS reordering found.")
+
+    if args.files:
+        # Ignore files that don't have a BSS section in the ROM
+        files_to_fix = [file for file in args.files if file in bss_sections]
+    else:
+        files_to_fix = files_with_reordering
+    if not files_to_fix:
+        return
+
+    output(f"Running make to find compiler command line ...")
+    make_log = generate_make_log(version)
+
+    with multiprocessing.Pool() as p:
+        file_results = []
+        for file in files_to_fix:
+            file_result = p.apply_async(
+                process_file_worker,
+                (
+                    file,
+                    bss_sections[file],
+                    make_log,
+                    args.dry_run,
+                    version,
+                ),
+            )
+            file_results.append(file_result)
+
+        # Wait until all files are done
+        while not all(file_result.ready() for file_result in file_results):
+            time.sleep(0.010)
+
+        # Collect results and check for errors
+        num_successes = sum(file_result.successful() for file_result in file_results)
+        if num_successes == len(file_results):
+            output()
+            output(f"Processed {num_successes}/{len(file_results)} files.")
+        else:
+            output()
+            output(
+                f"Processed {num_successes}/{len(file_results)} files.",
+                color=colorama.Fore.RED,
+            )
+            sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/tools/ido_block_numbers.py
+++ b/tools/ido_block_numbers.py
@ -0,0 +1,567 @@
+#!/usr/bin/env python3
+
+# SPDX-FileCopyrightText: © 2024 ZeldaRET
+# SPDX-License-Identifier: CC0-1.0
+
+# IDO symbol table parser for BSS ordering debugging. The compiler will assign
+# "block numbers" or "dense numbers" to symbols in order as it encounters them
+# in the source file, and the BSS section is sorted by this block number mod 256.
+# This script dumps the compiler-generated symbol table so you can see which
+# block numbers are assigned to each symbol.
+#
+# Resources:
+#   https://hackmd.io/@Roman971/BJ2DOyhBa
+#   https://github.com/decompals/ultralib/blob/main/tools/mdebug.py
+#   https://www.cs.unibo.it/~solmi/teaching/arch_2002-2003/AssemblyLanguageProgDoc.pdf
+#   https://github.com/decompals/IDO/blob/main/IDO_7.1/dist/compiler_eoe/usr/include/sym.h
+#   https://github.com/Synray/ido-ucode-utils
+
+from __future__ import annotations
+
+import argparse
+from dataclasses import dataclass
+import itertools
+from pathlib import Path
+import platform
+import struct
+import subprocess
+import shlex
+import sys
+from typing import Optional, Tuple
+
+
+class Header:
+    SIZE = 0x60
+
+    def __init__(self, data):
+        (
+            self.magic,
+            self.vstamp,
+            self.ilineMax,
+            self.cbLine,
+            self.cbLineOffset,
+            self.idnMax,
+            self.cbDnOffset,
+            self.ipdMax,
+            self.cbPdOffset,
+            self.isymMax,
+            self.cbSymOffset,
+            self.ioptMax,
+            self.cbOptOffset,
+            self.iauxMax,
+            self.cbAuxOffset,
+            self.issMax,
+            self.cbSsOffset,
+            self.issExtMax,
+            self.cbSsExtOffset,
+            self.ifdMax,
+            self.cbFdOffset,
+            self.crfd,
+            self.cbRfdOffset,
+            self.iextMax,
+            self.cbExtOffset,
+        ) = struct.unpack(">2H23I", data)
+
+
+class FileDescriptor:
+    SIZE = 0x48
+
+    def __init__(self, data):
+        (
+            self.adr,
+            self.rss,
+            self.issBase,
+            self.cbSs,
+            self.isymBase,
+            self.csym,
+            self.ilineBase,
+            self.cline,
+            self.ioptBase,
+            self.copt,
+            self.ipdFirst,
+            self.cpd,
+            self.iauxBase,
+            self.caux,
+            self.rfdBase,
+            self.crfd,
+            self.flags,
+            self.cbLineOffset,
+            self.cbLine,
+        ) = struct.unpack(">10I2H7I", data)
+
+
+class Symbol:
+    SIZE = 0xC
+
+    def __init__(self, data):
+        (
+            self.iss,
+            self.value,
+            self.flags,
+        ) = struct.unpack(">3I", data)
+
+    def symbol_type(self):
+        symbol_types = {
+            0: "nil",
+            1: "global",
+            2: "static",
+            3: "param",
+            4: "local",
+            5: "label",
+            6: "proc",
+            7: "block",
+            8: "end",
+            9: "member",
+            10: "typedef",
+            11: "file",
+            14: "staticproc",
+            15: "constant",
+            26: "struct",
+            27: "union",
+            28: "enum",
+            34: "indirect",
+        }
+        return symbol_types[self.flags >> 26]
+
+    def symbol_storage_class(self):
+        symbol_storage_classes = {
+            0: "nil",
+            1: "text",
+            2: "data",
+            3: "bss",
+            4: "register",
+            5: "abs",
+            6: "undefined",
+            8: "bits",
+            9: "dbx",
+            10: "regimage",
+            11: "info",
+        }
+        return symbol_storage_classes[(self.flags >> 21) & 0x1F]
+
+
+class ExternalSymbol:
+    SIZE = 0x10
+
+    def __init__(self, data):
+        (
+            self.flags,
+            self.ifd,
+        ) = struct.unpack(">2H", data[0:4])
+        self.asym = Symbol(data[4:])
+
+
+def read_entry(data, base, offset, size):
+    start = base + offset * size
+    return data[start : start + size]
+
+
+def read_string(data, start):
+    size = 0
+    while data[start + size] != 0:
+        size += 1
+    return data[start : start + size].decode("ascii")
+
+
+@dataclass
+class SymbolTableEntry:
+    symbol: Optional[Symbol]
+    name: str
+    extern: bool
+
+
+def parse_symbol_table(data: bytes) -> list[SymbolTableEntry]:
+    header = Header(data[0 : Header.SIZE])
+
+    # File descriptors
+    fds = []
+    for i in range(header.ifdMax):
+        fds.append(
+            FileDescriptor(read_entry(data, header.cbFdOffset, i, FileDescriptor.SIZE))
+        )
+
+    # Symbol identifiers ("dense numbers")
+    entries = []
+    for i in range(header.idnMax):
+        ifd, isym = struct.unpack(">II", read_entry(data, header.cbDnOffset, i, 8))
+
+        if isym == 0xFFFFF:
+            sym = None
+            sym_name = ""
+            extern = False
+        else:
+            extern = ifd == 0x7FFFFFFF
+            if extern:
+                ext = ExternalSymbol(
+                    read_entry(data, header.cbExtOffset, isym, ExternalSymbol.SIZE)
+                )
+                sym = ext.asym
+                sym_name = read_string(data, header.cbSsExtOffset + sym.iss)
+            else:
+                fd = fds[ifd]
+                sym = Symbol(
+                    read_entry(
+                        data, header.cbSymOffset, fd.isymBase + isym, Symbol.SIZE
+                    )
+                )
+                sym_name = read_string(data, header.cbSsOffset + fd.issBase + sym.iss)
+
+        entries.append(SymbolTableEntry(sym, sym_name, extern))
+
+    return entries
+
+
+def print_symbol_table(symbol_table: list[SymbolTableEntry]):
+    print(f"block [mod 256]: linkage  type        class      name")
+    for i, entry in enumerate(symbol_table):
+        if not entry.symbol:
+            # TODO: is this always a string?
+            st = "string"
+            sc = ""
+        else:
+            st = entry.symbol.symbol_type()
+            sc = entry.symbol.symbol_storage_class()
+        print(
+            f'{i:>9} [{i%256:>3}]: {"extern" if entry.extern else "":<7}  {st:<10}  {sc:<9}  {entry.name:<40}'
+        )
+
+
+@dataclass
+class UcodeOp:
+    opcode: int
+    opcode_name: str
+    mtype: int
+    dtype: int
+    lexlev: int
+    i1: int
+    args: list[int]
+    const: Optional[int]
+    string: Optional[bytes]
+
+
+@dataclass
+class UcodeOpInfo:
+    opcode: int
+    name: str
+    length: int
+    has_const: bool
+
+
+UCODE_OP_INFO = [
+    UcodeOpInfo(0x00, "abs", 2, False),
+    UcodeOpInfo(0x01, "add", 2, False),
+    UcodeOpInfo(0x02, "adj", 4, False),
+    UcodeOpInfo(0x03, "aent", 4, False),
+    UcodeOpInfo(0x04, "and", 2, False),
+    UcodeOpInfo(0x05, "aos", 2, False),
+    UcodeOpInfo(0x06, "asym", 4, False),
+    UcodeOpInfo(0x07, "bgn", 4, False),
+    UcodeOpInfo(0x08, "bgnb", 2, False),
+    UcodeOpInfo(0x09, "bsub", 2, False),
+    UcodeOpInfo(0x0A, "cg1", 2, False),
+    UcodeOpInfo(0x0B, "cg2", 2, False),
+    UcodeOpInfo(0x0C, "chkh", 2, False),
+    UcodeOpInfo(0x0D, "chkl", 2, False),
+    UcodeOpInfo(0x0E, "chkn", 2, False),
+    UcodeOpInfo(0x0F, "chkt", 2, False),
+    UcodeOpInfo(0x10, "cia", 4, True),
+    UcodeOpInfo(0x11, "clab", 4, False),
+    UcodeOpInfo(0x12, "clbd", 2, False),
+    UcodeOpInfo(0x13, "comm", 4, True),
+    UcodeOpInfo(0x14, "csym", 4, False),
+    UcodeOpInfo(0x15, "ctrl", 4, False),
+    UcodeOpInfo(0x16, "cubd", 2, False),
+    UcodeOpInfo(0x17, "cup", 4, False),
+    UcodeOpInfo(0x18, "cvt", 4, False),
+    UcodeOpInfo(0x19, "cvtl", 2, False),
+    UcodeOpInfo(0x1A, "dec", 2, False),
+    UcodeOpInfo(0x1B, "def", 4, False),
+    UcodeOpInfo(0x1C, "dif", 4, False),
+    UcodeOpInfo(0x1D, "div", 2, False),
+    UcodeOpInfo(0x1E, "dup", 2, False),
+    UcodeOpInfo(0x1F, "end", 2, False),
+    UcodeOpInfo(0x20, "endb", 2, False),
+    UcodeOpInfo(0x21, "ent", 4, False),
+    UcodeOpInfo(0x22, "ueof", 2, False),
+    UcodeOpInfo(0x23, "equ", 2, False),
+    UcodeOpInfo(0x24, "esym", 4, False),
+    UcodeOpInfo(0x25, "fill", 4, False),
+    UcodeOpInfo(0x26, "fjp", 2, False),
+    UcodeOpInfo(0x27, "fsym", 4, False),
+    UcodeOpInfo(0x28, "geq", 2, False),
+    UcodeOpInfo(0x29, "grt", 2, False),
+    UcodeOpInfo(0x2A, "gsym", 4, False),
+    UcodeOpInfo(0x2B, "hsym", 4, False),
+    UcodeOpInfo(0x2C, "icuf", 4, False),
+    UcodeOpInfo(0x2D, "idx", 2, False),
+    UcodeOpInfo(0x2E, "iequ", 4, False),
+    UcodeOpInfo(0x2F, "igeq", 4, False),
+    UcodeOpInfo(0x30, "igrt", 4, False),
+    UcodeOpInfo(0x31, "ijp", 2, False),
+    UcodeOpInfo(0x32, "ilda", 6, False),
+    UcodeOpInfo(0x33, "ildv", 4, False),
+    UcodeOpInfo(0x34, "ileq", 4, False),
+    UcodeOpInfo(0x35, "iles", 4, False),
+    UcodeOpInfo(0x36, "ilod", 4, False),
+    UcodeOpInfo(0x37, "inc", 2, False),
+    UcodeOpInfo(0x38, "ineq", 4, False),
+    UcodeOpInfo(0x39, "init", 6, True),
+    UcodeOpInfo(0x3A, "inn", 4, False),
+    UcodeOpInfo(0x3B, "int", 4, False),
+    UcodeOpInfo(0x3C, "ior", 2, False),
+    UcodeOpInfo(0x3D, "isld", 4, False),
+    UcodeOpInfo(0x3E, "isst", 4, False),
+    UcodeOpInfo(0x3F, "istr", 4, False),
+    UcodeOpInfo(0x40, "istv", 4, False),
+    UcodeOpInfo(0x41, "ixa", 2, False),
+    UcodeOpInfo(0x42, "lab", 4, False),
+    UcodeOpInfo(0x43, "lbd", 2, False),
+    UcodeOpInfo(0x44, "lbdy", 2, False),
+    UcodeOpInfo(0x45, "lbgn", 2, False),
+    UcodeOpInfo(0x46, "lca", 4, True),
+    UcodeOpInfo(0x47, "lda", 6, False),
+    UcodeOpInfo(0x48, "ldap", 2, False),
+    UcodeOpInfo(0x49, "ldc", 4, True),
+    UcodeOpInfo(0x4A, "ldef", 4, False),
+    UcodeOpInfo(0x4B, "ldsp", 2, False),
+    UcodeOpInfo(0x4C, "lend", 2, False),
+    UcodeOpInfo(0x4D, "leq", 2, False),
+    UcodeOpInfo(0x4E, "les", 2, False),
+    UcodeOpInfo(0x4F, "lex", 2, False),
+    UcodeOpInfo(0x50, "lnot", 2, False),
+    UcodeOpInfo(0x51, "loc", 2, False),
+    UcodeOpInfo(0x52, "lod", 4, False),
+    UcodeOpInfo(0x53, "lsym", 4, False),
+    UcodeOpInfo(0x54, "ltrm", 2, False),
+    UcodeOpInfo(0x55, "max", 2, False),
+    UcodeOpInfo(0x56, "min", 2, False),
+    UcodeOpInfo(0x57, "mod", 2, False),
+    UcodeOpInfo(0x58, "mov", 4, False),
+    UcodeOpInfo(0x59, "movv", 2, False),
+    UcodeOpInfo(0x5A, "mpmv", 4, False),
+    UcodeOpInfo(0x5B, "mpy", 2, False),
+    UcodeOpInfo(0x5C, "mst", 2, False),
+    UcodeOpInfo(0x5D, "mus", 4, False),
+    UcodeOpInfo(0x5E, "neg", 2, False),
+    UcodeOpInfo(0x5F, "neq", 2, False),
+    UcodeOpInfo(0x60, "nop", 2, False),
+    UcodeOpInfo(0x61, "not", 2, False),
+    UcodeOpInfo(0x62, "odd", 2, False),
+    UcodeOpInfo(0x63, "optn", 4, False),
+    UcodeOpInfo(0x64, "par", 4, False),
+    UcodeOpInfo(0x65, "pdef", 4, False),
+    UcodeOpInfo(0x66, "pmov", 4, False),
+    UcodeOpInfo(0x67, "pop", 2, False),
+    UcodeOpInfo(0x68, "regs", 4, False),
+    UcodeOpInfo(0x69, "rem", 2, False),
+    UcodeOpInfo(0x6A, "ret", 2, False),
+    UcodeOpInfo(0x6B, "rlda", 4, False),
+    UcodeOpInfo(0x6C, "rldc", 4, True),
+    UcodeOpInfo(0x6D, "rlod", 4, False),
+    UcodeOpInfo(0x6E, "rnd", 4, False),
+    UcodeOpInfo(0x6F, "rpar", 4, False),
+    UcodeOpInfo(0x70, "rstr", 4, False),
+    UcodeOpInfo(0x71, "sdef", 4, False),
+    UcodeOpInfo(0x72, "sgs", 4, False),
+    UcodeOpInfo(0x73, "shl", 2, False),
+    UcodeOpInfo(0x74, "shr", 2, False),
+    UcodeOpInfo(0x75, "sign", 2, False),
+    UcodeOpInfo(0x76, "sqr", 2, False),
+    UcodeOpInfo(0x77, "sqrt", 2, False),
+    UcodeOpInfo(0x78, "ssym", 4, True),
+    UcodeOpInfo(0x79, "step", 2, False),
+    UcodeOpInfo(0x7A, "stp", 2, False),
+    UcodeOpInfo(0x7B, "str", 4, False),
+    UcodeOpInfo(0x7C, "stsp", 2, False),
+    UcodeOpInfo(0x7D, "sub", 2, False),
+    UcodeOpInfo(0x7E, "swp", 4, False),
+    UcodeOpInfo(0x7F, "tjp", 2, False),
+    UcodeOpInfo(0x80, "tpeq", 2, False),
+    UcodeOpInfo(0x81, "tpge", 2, False),
+    UcodeOpInfo(0x82, "tpgt", 2, False),
+    UcodeOpInfo(0x83, "tple", 2, False),
+    UcodeOpInfo(0x84, "tplt", 2, False),
+    UcodeOpInfo(0x85, "tpne", 2, False),
+    UcodeOpInfo(0x86, "typ", 4, False),
+    UcodeOpInfo(0x87, "ubd", 2, False),
+    UcodeOpInfo(0x88, "ujp", 2, False),
+    UcodeOpInfo(0x89, "unal", 2, False),
+    UcodeOpInfo(0x8A, "uni", 4, False),
+    UcodeOpInfo(0x8B, "vreg", 4, False),
+    UcodeOpInfo(0x8C, "xjp", 8, False),
+    UcodeOpInfo(0x8D, "xor", 2, False),
+    UcodeOpInfo(0x8E, "xpar", 2, False),
+    UcodeOpInfo(0x8F, "mtag", 2, False),
+    UcodeOpInfo(0x90, "alia", 2, False),
+    UcodeOpInfo(0x91, "ildi", 4, False),
+    UcodeOpInfo(0x92, "isti", 4, False),
+    UcodeOpInfo(0x93, "irld", 4, False),
+    UcodeOpInfo(0x94, "irst", 4, False),
+    UcodeOpInfo(0x95, "ldrc", 4, False),
+    UcodeOpInfo(0x96, "msym", 4, False),
+    UcodeOpInfo(0x97, "rcuf", 4, False),
+    UcodeOpInfo(0x98, "ksym", 4, False),
+    UcodeOpInfo(0x99, "osym", 4, False),
+    UcodeOpInfo(0x9A, "irlv", 2, False),
+    UcodeOpInfo(0x9B, "irsv", 2, False),
+]
+
+
+def parse_ucode(ucode: bytes) -> list[UcodeOp]:
+    ops = []
+    pos = 0
+    while pos < len(ucode):
+        opcode = ucode[pos]
+        mtype = ucode[pos + 1] >> 5
+        dtype = ucode[pos + 1] & 0x1F
+        lexlev = int.from_bytes(ucode[pos + 2 : pos + 4], "big")
+        i1 = int.from_bytes(ucode[pos + 4 : pos + 8], "big")
+        pos += 8
+
+        info = UCODE_OP_INFO[opcode]
+        size = 4 * info.length
+
+        args = []
+        for _ in range(info.length - 2):
+            args.append(int.from_bytes(ucode[pos : pos + 4], "big"))
+            pos += 4
+
+        const = None
+        string = None
+        if info.has_const:
+            const = int.from_bytes(ucode[pos : pos + 4], "big")
+            pos += 8
+            if dtype in (9, 12, 13, 14, 16) or info.name == "comm":
+                string = ucode[pos : pos + const]
+                pos += (const + 7) & ~7
+
+        ops.append(
+            UcodeOp(
+                opcode,
+                info.name,
+                mtype,
+                dtype,
+                lexlev,
+                i1,
+                args,
+                const,
+                string,
+            )
+        )
+    return ops
+
+
+def print_ucode(ucode: list[UcodeOp]):
+    for op in ucode:
+        args = " ".join(f"0x{arg:X}" for arg in op.args)
+        print(
+            f"{op.opcode_name:<4} mtype={op.mtype:X} dtype={op.dtype:X} lexlev={op.lexlev} i1={op.i1} args={args}",
+            end="",
+        )
+        if op.const is not None:
+            print(f" const=0x{op.const:X}", end="")
+        if op.string is not None:
+            print(f" string={op.string!r}", end="")
+        print()
+
+
+def generate_make_log(version: str) -> list[str]:
+    is_macos = platform.system() == "Darwin"
+    make = "gmake" if is_macos else "make"
+    make_command_line = [
+        make,
+        "--always-make",
+        "--dry-run",
+        f"VERSION={version}",
+    ]
+    return subprocess.check_output(make_command_line).decode("utf-8").splitlines()
+
+
+def find_compiler_command_line(
+    make_log: list[str], filename: Path
+) -> Optional[list[str]]:
+    found = 0
+    for line in make_log:
+        parts = line.split()
+        if "-o" in parts and str(filename) in parts:
+            compiler_command_line = parts
+            found += 1
+
+    if found != 1:
+        return None
+
+    return compiler_command_line
+
+
+def run_cfe(
+    command_line: list[str], keep_files: bool
+) -> Tuple[list[SymbolTableEntry], list[UcodeOp]]:
+    # Assume command line is of the form:
+    # python3 tools/preprocess.py [COMPILER] [COMPILER_ARGS] [INPUT_FILE]
+    input_file = Path(command_line[-1])
+    rest = command_line[:-1]
+
+    stem = input_file.stem
+    symbol_table_file = Path(f"{stem}.T")
+    ucode_file = Path(f"{stem}.B")
+
+    try:
+        # Invoke compiler
+        # -Hf stops compilation after cfe so we can inspect the symbol table
+        subprocess.run(rest + ["-Hf", input_file], check=True)
+
+        # Read symbol table
+        symbol_table = parse_symbol_table(symbol_table_file.read_bytes())
+        ucode = parse_ucode(ucode_file.read_bytes())
+        return (symbol_table, ucode)
+    finally:
+        # Cleanup
+        if not keep_files:
+            symbol_table_file.unlink(missing_ok=True)
+            ucode_file.unlink(missing_ok=True)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Dump IDO symbol table for debugging BSS ordering"
+    )
+    parser.add_argument("filename", metavar="FILE", type=Path, help="C source file")
+    parser.add_argument(
+        "-v",
+        "--version",
+        dest="version",
+        type=str,
+        default="gc-eu-mq-dbg",
+        help="OOT version (default: gc-eu-mq-dbg)",
+    )
+    parser.add_argument(
+        "--print-ucode", action="store_true", help="Print cfe ucode output"
+    )
+    parser.add_argument(
+        "--keep-files",
+        action="store_true",
+        help="Keep temporary files (symbol table and ucode)",
+    )
+
+    args = parser.parse_args()
+
+    print(f"Running make to find compiler command line ...", file=sys.stderr)
+    make_log = generate_make_log(args.version)
+
+    command_line = find_compiler_command_line(make_log, args.filename)
+    if command_line is None:
+        print(
+            f"Error: could not determine compiler command line for {filename}",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+    print(f"Compiler command: {shlex.join(command_line)}", file=sys.stderr)
+
+    symbol_table, ucode = run_cfe(command_line, args.keep_files)
+    print_symbol_table(symbol_table)
+    if args.print_ucode:
+        print_ucode(ucode)
+
+
+if __name__ == "__main__":
+    main()