From a0370e38b4451809f6dbf41bc3f4e1f423c91631 Mon Sep 17 00:00:00 2001
From: Anghelo Carvajal <angheloalf95@gmail.com>
Date: Sun, 19 Dec 2021 13:52:06 -0300
Subject: [PATCH] Fix bug in progress.py (#518)

* Fix autogenarted names regex

* Use sizes from map file instead of reading the asm directly

* Speed up

* Fix gameplay_object_exchange_static being in the wrong category

* Fix regex again
---
 tools/filelists/mm.us.rev1/objects.csv  |   1 -
 tools/filelists/mm.us.rev1/segments.csv |   1 +
 tools/progress.py                       | 227 +++++++++++-------------
 3 files changed, 102 insertions(+), 127 deletions(-)
diff --git a/tools/filelists/mm.us.rev1/objects.csv b/tools/filelists/mm.us.rev1/objects.csv
index 15d1f4ac26..187ec1c11a 100644
--- a/tools/filelists/mm.us.rev1/objects.csv
+++ b/tools/filelists/mm.us.rev1/objects.csv
@@ -1,7 +1,6 @@
 649,gameplay_keep
 650,gameplay_field_keep
 651,gameplay_dangeon_keep
-652,gameplay_object_exchange_static
 653,object_link_boy
 654,object_link_child
 655,object_link_goron
diff --git a/tools/filelists/mm.us.rev1/segments.csv b/tools/filelists/mm.us.rev1/segments.csv
index f397c8e1cf..b22534d94e 100644
--- a/tools/filelists/mm.us.rev1/segments.csv
+++ b/tools/filelists/mm.us.rev1/segments.csv
@@ -1,3 +1,4 @@
+652,gameplay_object_exchange_static
 1539,anime_model_1_static
 1540,anime_model_2_static
 1541,anime_model_3_static
diff --git a/tools/progress.py b/tools/progress.py
index 2ac68a076b..dccd743d78 100755
--- a/tools/progress.py
+++ b/tools/progress.py
@@ -14,7 +14,9 @@ NOT_ATTEMPTED_PATTERN = r'#pragma\s+GLOBAL_ASM\s*\(\s*"(.*?)"\s*\)'
 
 # This is the format ZAPD uses to autogenerate variable names
 # It should not be used for properly documented variables
-AUTOGENERATED_ASSET_NAME = re.compile(r".+[0-9A-Fa-f]{6}$")
+AUTOGENERATED_ASSET_NAME = re.compile(r".+0[0-9A-Fa-f]{5}")
+
+ASM_JMP_LABEL = re.compile(r"^(?P<name>L[0-9A-F]{8})$")
 
 # TODO: consider making this a parameter of this script
 GAME_VERSION = "mm.us.rev1"
@@ -53,28 +55,12 @@ def GetCsvFilelist(version, filelist):
     with open(path, newline='') as f:
         return list(csv.reader(f, delimiter=','))
 
-def GetRemovableSize(functions_to_count):
+def GetRemovableSize(functionSizes, functions_to_count):
     size = 0
 
-    for asm_file_path in functions_to_count:
-        if "//" in asm_file_path:
-            raise RuntimeError(f"Invalid file path: {asm_file_path}")
-        file_size = 0
-        asm_lines = ReadAllLines(asm_file_path)
-        shouldCount = True
-
-        for asm_line in asm_lines:
-            if asm_line[0] == ".":
-                if asm_line.startswith(".text") or asm_line.startswith(".section .text"):
-                    shouldCount = True
-                elif ".rdata" in asm_line or ".late_rodata" in asm_line:
-                    shouldCount = False
-
-            if shouldCount:
-                if (asm_line[0:2] == "/*" and asm_line[28:30] == "*/"):
-                    file_size += 4
-
-        size += file_size
+    for func in functions_to_count:
+        if func in functionSizes:
+            size += functionSizes[func]
 
     return size
 
@@ -82,11 +68,6 @@ def CalculateMapSizes(mapFileList):
     for mapFile in mapFileList:
         accumulatedSize = 0
 
-        if mapFile["section"] != ".data":
-            continue
-        if not mapFile["name"].startswith("build/assets/"):
-            continue
-
         symbolCount = len(mapFile["symbols"])
         if symbolCount == 0:
             continue
@@ -107,6 +88,19 @@ def CalculateMapSizes(mapFileList):
         mapFile["symbols"][-1]["size"] = size
     return mapFileList
 
+def GetFunctionSizes(mapFileList):
+    functionSizes = dict()
+
+    for mapFile in mapFileList:
+        if mapFile["section"] != ".text":
+            continue
+
+        for symbol in mapFile["symbols"]:
+            symbolName = symbol["name"]
+            functionSizes[symbolName] = symbol["size"]
+
+    return functionSizes
+
 def CalculateNonNamedAssets(mapFileList, assetsTracker):
     for mapFile in mapFileList:
         if mapFile["section"] != ".data":
@@ -171,16 +165,31 @@ for assetCat in assetsCategories:
 
 
 # Initialize all the code values
-src = 0
-src_code = 0
-src_boot = 0
-src_ovl = 0
-src_libultra = 0
-asm = 0
-asm_code = 0
-asm_boot = 0
-asm_ovl = 0
-asm_libultra = 0
+srcCategories = [
+    "boot",
+    "libultra",
+    "code",
+    "overlays",
+]
+
+srcCategoriesFixer = {
+    "boot_O2": "boot",
+    "boot_O2_g3": "boot",
+}
+
+srcTracker = dict()
+asmTracker = dict()
+
+for srcCat in srcCategories:
+    srcTracker[srcCat] = dict()
+    srcTracker[srcCat]["currentSize"] = 0
+    srcTracker[srcCat]["totalSize"] = 0
+    srcTracker[srcCat]["percent"] = 0
+
+    asmTracker[srcCat] = dict()
+    asmTracker[srcCat]["currentSize"] = 0
+    asmTracker[srcCat]["totalSize"] = 0
+    asmTracker[srcCat]["percent"] = 0
 
 mapFileList = []
 
@@ -199,35 +208,20 @@ for line in map_file:
 
         if (section == ".text"):
             objFileName = objFileSplit[-1].split(".o")[0]
+            srcCat = obj_file.split("/")[2]
+            if srcCat in srcCategoriesFixer:
+                srcCat = srcCategoriesFixer[srcCat]
 
             if objFileName in fileSectionFixer:
                 correctSection = fileSectionFixer[objFileName]
-                if correctSection == "code":
-                    src_code += file_size
-                elif correctSection == "libultra":
-                    src_libultra += file_size
-                elif correctSection == "boot":
-                    src_boot += file_size
-                elif correctSection == "overlays":
-                    src_ovl += file_size
-            elif (obj_file.startswith("build/src")):
-                if (obj_file.startswith("build/src/code")):
-                    src_code += file_size
-                elif (obj_file.startswith("build/src/libultra")):
-                    src_libultra += file_size
-                elif (obj_file.startswith("build/src/boot")):
-                    src_boot += file_size
-                elif (obj_file.startswith("build/src/overlays")):
-                    src_ovl += file_size
+                if correctSection in srcTracker:
+                    srcTracker[correctSection]["totalSize"] += file_size
+            elif obj_file.startswith("build/src"):
+                if srcCat in srcTracker:
+                    srcTracker[srcCat]["totalSize"] += file_size
             elif (obj_file.startswith("build/asm")):
-                if (obj_file.startswith("build/asm/code")):
-                    asm_code += file_size
-                elif (obj_file.startswith("build/asm/libultra")):
-                    asm_libultra += file_size
-                elif (obj_file.startswith("build/asm/boot")):
-                    asm_boot += file_size
-                elif (obj_file.startswith("build/asm/overlays")):
-                    asm_ovl += file_size
+                if srcCat in asmTracker:
+                    asmTracker[srcCat]["totalSize"] += file_size
 
         if section == ".data":
             if obj_file.startswith("build/assets/"):
@@ -244,36 +238,41 @@ for line in map_file:
         varName = varName.strip()
         if varName == "0x0":
             continue
-        #print(varVram, varName)
+        if ASM_JMP_LABEL.search(varName) is not None:
+            # Filter out jump table's labels
+            continue
         symbolData = {"name": varName, "vram": varVram, "size": 0}
         mapFileList[-1]["symbols"].append(symbolData)
 
 mapFileList = CalculateMapSizes(mapFileList)
+functionSizes = GetFunctionSizes(mapFileList)
 
 assetsTracker = CalculateNonNamedAssets(mapFileList, assetsTracker)
 
 
 # Add libultra to boot.
-src_boot += src_libultra
-asm_boot += asm_libultra
+srcTracker["boot"]["totalSize"] += srcTracker["libultra"]["totalSize"]
+asmTracker["boot"]["totalSize"] += asmTracker["libultra"]["totalSize"]
+del srcTracker["libultra"]
+del asmTracker["libultra"]
 
 # Calculate Non-Matching
-non_matching_functions_ovl = list(filter(lambda x: "/overlays/" in x, non_matching_functions))
-non_matching_functions_code = list(filter(lambda x: "/code/" in x, non_matching_functions))
-non_matching_functions_boot = list(filter(lambda x: "/boot/" in x, non_matching_functions))
+non_matching_functions_ovl = list(map(lambda x: x.split("/")[-1].split(".")[0], filter(lambda x: "/overlays/" in x, non_matching_functions)))
+non_matching_functions_code = list(map(lambda x: x.split("/")[-1].split(".")[0], filter(lambda x: "/code/" in x, non_matching_functions)))
+non_matching_functions_boot = list(map(lambda x: x.split("/")[-1].split(".")[0], filter(lambda x: "/boot/" in x, non_matching_functions)))
 
-non_matching_asm_ovl = GetRemovableSize(non_matching_functions_ovl)
-non_matching_asm_code = GetRemovableSize(non_matching_functions_code)
-non_matching_asm_boot = GetRemovableSize(non_matching_functions_boot)
+non_matching_asm_ovl = GetRemovableSize(functionSizes, non_matching_functions_ovl)
+non_matching_asm_code = GetRemovableSize(functionSizes, non_matching_functions_code)
+non_matching_asm_boot = GetRemovableSize(functionSizes, non_matching_functions_boot)
 
 # Calculate Not Attempted
-not_attempted_functions_ovl = list(filter(lambda x: "/overlays/" in x, not_attempted_functions))
-not_attempted_functions_code = list(filter(lambda x: "/code/" in x, not_attempted_functions))
-not_attempted_functions_boot = list(filter(lambda x: "/boot/" in x, not_attempted_functions))
+not_attempted_functions_ovl = list(map(lambda x: x.split("/")[-1].split(".")[0], filter(lambda x: "/overlays/" in x, not_attempted_functions)))
+not_attempted_functions_code = list(map(lambda x: x.split("/")[-1].split(".")[0], filter(lambda x: "/code/" in x, not_attempted_functions)))
+not_attempted_functions_boot = list(map(lambda x: x.split("/")[-1].split(".")[0], filter(lambda x: "/boot/" in x, not_attempted_functions)))
 
-not_attempted_asm_ovl = GetRemovableSize(not_attempted_functions_ovl)
-not_attempted_asm_code = GetRemovableSize(not_attempted_functions_code)
-not_attempted_asm_boot = GetRemovableSize(not_attempted_functions_boot)
+not_attempted_asm_ovl = GetRemovableSize(functionSizes, not_attempted_functions_ovl)
+not_attempted_asm_code = GetRemovableSize(functionSizes, not_attempted_functions_code)
+not_attempted_asm_boot = GetRemovableSize(functionSizes, not_attempted_functions_boot)
 
 # All the non matching asm is the sum of non-matching code
 non_matching_asm = non_matching_asm_ovl + non_matching_asm_code + non_matching_asm_boot
@@ -282,18 +281,19 @@ non_matching_asm = non_matching_asm_ovl + non_matching_asm_code + non_matching_a
 not_attempted_asm = not_attempted_asm_ovl + not_attempted_asm_code + not_attempted_asm_boot
 
 # Calculate total decompiled for each bucket by taking out the non-matching and not attempted in ovl/code/boot buckets.
-code = src_code - (non_matching_asm_code + not_attempted_asm_code)
-boot = src_boot - (non_matching_asm_boot + not_attempted_asm_boot)
-ovl = src_ovl - (non_matching_asm_ovl + not_attempted_asm_ovl)
+srcTracker["code"]["currentSize"] = srcTracker["code"]["totalSize"] - (non_matching_asm_code + not_attempted_asm_code)
+srcTracker["boot"]["currentSize"] = srcTracker["boot"]["totalSize"] - (non_matching_asm_boot + not_attempted_asm_boot)
+srcTracker["overlays"]["currentSize"] = srcTracker["overlays"]["totalSize"] - (non_matching_asm_ovl + not_attempted_asm_ovl)
 
 # Total code bucket sizes
-code_size = src_code # + asm_code
-boot_size = src_boot # + asm_boot
-ovl_size  = src_ovl  # + asm_ovl
-handwritten = asm_code + asm_boot + asm_ovl
+handwritten = 0
+for srcCat in asmTracker:
+    handwritten += asmTracker[srcCat]["totalSize"]
 
 # Calculate the total amount of decompilable code
-total = code_size + boot_size + ovl_size
+total = 0
+for srcCat in asmTracker:
+    total += srcTracker[srcCat]["totalSize"]
 
 # Calculate size of all assets
 for assetCat in assetsTracker:
@@ -305,11 +305,12 @@ if args.matching:
         assetsTracker[assetCat]["currentSize"] -= assetsTracker[assetCat]["removableSize"]
 
 # Calculate asm and src totals
-src = src_code + src_boot + src_ovl
-asm = asm_code + asm_boot + asm_ovl
-
-# Take out the non-matchings and not attempted in grand totals
-src -= non_matching_asm + not_attempted_asm
+src = 0
+for srcCat in srcTracker:
+    src += srcTracker[srcCat]["currentSize"]
+asm = 0
+for srcCat in asmTracker:
+    asm += asmTracker[srcCat]["totalSize"]
 asm += non_matching_asm + not_attempted_asm
 
 # Calculate assets totals
@@ -319,9 +320,8 @@ assets_total = sum(x["totalSize"] for x in assetsTracker.values())
 # Convert vaules to percentages
 src_percent = 100 * src / total
 asm_percent = 100 * asm / total
-code_percent = 100 * code / code_size
-boot_percent = 100 * boot / boot_size
-ovl_percent = 100 * ovl / ovl_size
+for srcCat in ["boot", "code", "overlays"]:
+    srcTracker[srcCat]["percent"] = 100 * srcTracker[srcCat]["currentSize"] / srcTracker[srcCat]["totalSize"]
 
 assets_percent = 100 * assets / assets_total
 
@@ -336,36 +336,6 @@ bytes_per_rupee = bytes_per_mask / max_rupees
 masks = int(src / bytes_per_mask)
 rupees = int((src % bytes_per_mask) / bytes_per_rupee)
 
-# Debug print statements for the values
-#print("Total: ", total)
-#print("src: ", src)
-#print("asm: ", asm)
-#print("")
-#print("src_code: ", src_code)
-#print("src_boot: ", src_boot)
-#print("src_ovl: ", src_ovl)
-#print("")
-#print("asm_code: ", asm_code)
-#print("asm_boot: ", asm_boot)
-#print("asm_ovl: ", asm_ovl)
-#print("")
-#print("Nonmatching code: ", non_matching_asm_code)
-#print("Nonmatching boot: ", non_matching_asm_boot)
-#print("Nonmatching ovl: ", non_matching_asm_ovl)
-#print("")
-#print("Not attempted code: ", not_attempted_asm_code)
-#print("Not attempted boot: ", not_attempted_asm_boot)
-#print("Not attempted ovl: ", not_attempted_asm_ovl)
-#print("")
-#print("code_size: ", code_size)
-#print("boot_size: ", boot_size)
-#print("ovl_size: ", ovl_size)
-#print("")
-#print("code: ", code)
-#print("boot: ", boot)
-#print("ovl: ", ovl)
-#print("")
-
 if args.format == 'csv':
     version = 2
     git_object = git.Repo().head.object
@@ -373,7 +343,11 @@ if args.format == 'csv':
     git_hash = git_object.hexsha
     csv_list = [
         version, timestamp, git_hash, src, total,
-        boot, boot_size, code, code_size, ovl, ovl_size,
+    ]
+    for srcCat in ["boot", "code", "overlays"]:
+        csv_list += [srcTracker[srcCat]["currentSize"], srcTracker[srcCat]["totalSize"]]
+
+    csv_list += [
         asm, len(non_matching_functions),
     ]
     csv_list += [
@@ -396,9 +370,10 @@ elif args.format == 'text':
     assetsAdjective = "debinarized" if not args.matching else "identified"
 
     print("src:    {:>9} / {:>8} total bytes {:<13} {:>9.4f}%".format(src, total, adjective, round(src_percent, 4)))
-    print("    boot:       {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(boot, boot_size, adjective, round(boot_percent, 4)))
-    print("    code:       {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(code, code_size, adjective, round(code_percent, 4)))
-    print("    overlays:   {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(ovl, ovl_size, adjective, round(ovl_percent, 4)))
+
+    for srcCat in ["boot", "code", "overlays"]:
+        src = srcTracker[srcCat]
+        print("    {:<10}  {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(f"{srcCat}:", src["currentSize"], src["totalSize"], adjective, round(src["percent"], 4)))
     print()
 
     print("assets: {:>9} / {:>8} total bytes {:<13} {:>9.4f}%".format(assets, assets_total, assetsAdjective, round(assets_percent, 4)))