From a0370e38b4451809f6dbf41bc3f4e1f423c91631 Mon Sep 17 00:00:00 2001 From: Anghelo Carvajal Date: Sun, 19 Dec 2021 13:52:06 -0300 Subject: [PATCH] Fix bug in progress.py (#518) * Fix autogenarted names regex * Use sizes from map file instead of reading the asm directly * Speed up * Fix gameplay_object_exchange_static being in the wrong category * Fix regex again --- tools/filelists/mm.us.rev1/objects.csv | 1 - tools/filelists/mm.us.rev1/segments.csv | 1 + tools/progress.py | 227 +++++++++++------------- 3 files changed, 102 insertions(+), 127 deletions(-) diff --git a/tools/filelists/mm.us.rev1/objects.csv b/tools/filelists/mm.us.rev1/objects.csv index 15d1f4ac26..187ec1c11a 100644 --- a/tools/filelists/mm.us.rev1/objects.csv +++ b/tools/filelists/mm.us.rev1/objects.csv @@ -1,7 +1,6 @@ 649,gameplay_keep 650,gameplay_field_keep 651,gameplay_dangeon_keep -652,gameplay_object_exchange_static 653,object_link_boy 654,object_link_child 655,object_link_goron diff --git a/tools/filelists/mm.us.rev1/segments.csv b/tools/filelists/mm.us.rev1/segments.csv index f397c8e1cf..b22534d94e 100644 --- a/tools/filelists/mm.us.rev1/segments.csv +++ b/tools/filelists/mm.us.rev1/segments.csv @@ -1,3 +1,4 @@ +652,gameplay_object_exchange_static 1539,anime_model_1_static 1540,anime_model_2_static 1541,anime_model_3_static diff --git a/tools/progress.py b/tools/progress.py index 2ac68a076b..dccd743d78 100755 --- a/tools/progress.py +++ b/tools/progress.py @@ -14,7 +14,9 @@ NOT_ATTEMPTED_PATTERN = r'#pragma\s+GLOBAL_ASM\s*\(\s*"(.*?)"\s*\)' # This is the format ZAPD uses to autogenerate variable names # It should not be used for properly documented variables -AUTOGENERATED_ASSET_NAME = re.compile(r".+[0-9A-Fa-f]{6}$") +AUTOGENERATED_ASSET_NAME = re.compile(r".+0[0-9A-Fa-f]{5}") + +ASM_JMP_LABEL = re.compile(r"^(?PL[0-9A-F]{8})$") # TODO: consider making this a parameter of this script GAME_VERSION = "mm.us.rev1" @@ -53,28 +55,12 @@ def GetCsvFilelist(version, filelist): with open(path, newline='') as f: return list(csv.reader(f, delimiter=',')) -def GetRemovableSize(functions_to_count): +def GetRemovableSize(functionSizes, functions_to_count): size = 0 - for asm_file_path in functions_to_count: - if "//" in asm_file_path: - raise RuntimeError(f"Invalid file path: {asm_file_path}") - file_size = 0 - asm_lines = ReadAllLines(asm_file_path) - shouldCount = True - - for asm_line in asm_lines: - if asm_line[0] == ".": - if asm_line.startswith(".text") or asm_line.startswith(".section .text"): - shouldCount = True - elif ".rdata" in asm_line or ".late_rodata" in asm_line: - shouldCount = False - - if shouldCount: - if (asm_line[0:2] == "/*" and asm_line[28:30] == "*/"): - file_size += 4 - - size += file_size + for func in functions_to_count: + if func in functionSizes: + size += functionSizes[func] return size @@ -82,11 +68,6 @@ def CalculateMapSizes(mapFileList): for mapFile in mapFileList: accumulatedSize = 0 - if mapFile["section"] != ".data": - continue - if not mapFile["name"].startswith("build/assets/"): - continue - symbolCount = len(mapFile["symbols"]) if symbolCount == 0: continue @@ -107,6 +88,19 @@ def CalculateMapSizes(mapFileList): mapFile["symbols"][-1]["size"] = size return mapFileList +def GetFunctionSizes(mapFileList): + functionSizes = dict() + + for mapFile in mapFileList: + if mapFile["section"] != ".text": + continue + + for symbol in mapFile["symbols"]: + symbolName = symbol["name"] + functionSizes[symbolName] = symbol["size"] + + return functionSizes + def CalculateNonNamedAssets(mapFileList, assetsTracker): for mapFile in mapFileList: if mapFile["section"] != ".data": @@ -171,16 +165,31 @@ for assetCat in assetsCategories: # Initialize all the code values -src = 0 -src_code = 0 -src_boot = 0 -src_ovl = 0 -src_libultra = 0 -asm = 0 -asm_code = 0 -asm_boot = 0 -asm_ovl = 0 -asm_libultra = 0 +srcCategories = [ + "boot", + "libultra", + "code", + "overlays", +] + +srcCategoriesFixer = { + "boot_O2": "boot", + "boot_O2_g3": "boot", +} + +srcTracker = dict() +asmTracker = dict() + +for srcCat in srcCategories: + srcTracker[srcCat] = dict() + srcTracker[srcCat]["currentSize"] = 0 + srcTracker[srcCat]["totalSize"] = 0 + srcTracker[srcCat]["percent"] = 0 + + asmTracker[srcCat] = dict() + asmTracker[srcCat]["currentSize"] = 0 + asmTracker[srcCat]["totalSize"] = 0 + asmTracker[srcCat]["percent"] = 0 mapFileList = [] @@ -199,35 +208,20 @@ for line in map_file: if (section == ".text"): objFileName = objFileSplit[-1].split(".o")[0] + srcCat = obj_file.split("/")[2] + if srcCat in srcCategoriesFixer: + srcCat = srcCategoriesFixer[srcCat] if objFileName in fileSectionFixer: correctSection = fileSectionFixer[objFileName] - if correctSection == "code": - src_code += file_size - elif correctSection == "libultra": - src_libultra += file_size - elif correctSection == "boot": - src_boot += file_size - elif correctSection == "overlays": - src_ovl += file_size - elif (obj_file.startswith("build/src")): - if (obj_file.startswith("build/src/code")): - src_code += file_size - elif (obj_file.startswith("build/src/libultra")): - src_libultra += file_size - elif (obj_file.startswith("build/src/boot")): - src_boot += file_size - elif (obj_file.startswith("build/src/overlays")): - src_ovl += file_size + if correctSection in srcTracker: + srcTracker[correctSection]["totalSize"] += file_size + elif obj_file.startswith("build/src"): + if srcCat in srcTracker: + srcTracker[srcCat]["totalSize"] += file_size elif (obj_file.startswith("build/asm")): - if (obj_file.startswith("build/asm/code")): - asm_code += file_size - elif (obj_file.startswith("build/asm/libultra")): - asm_libultra += file_size - elif (obj_file.startswith("build/asm/boot")): - asm_boot += file_size - elif (obj_file.startswith("build/asm/overlays")): - asm_ovl += file_size + if srcCat in asmTracker: + asmTracker[srcCat]["totalSize"] += file_size if section == ".data": if obj_file.startswith("build/assets/"): @@ -244,36 +238,41 @@ for line in map_file: varName = varName.strip() if varName == "0x0": continue - #print(varVram, varName) + if ASM_JMP_LABEL.search(varName) is not None: + # Filter out jump table's labels + continue symbolData = {"name": varName, "vram": varVram, "size": 0} mapFileList[-1]["symbols"].append(symbolData) mapFileList = CalculateMapSizes(mapFileList) +functionSizes = GetFunctionSizes(mapFileList) assetsTracker = CalculateNonNamedAssets(mapFileList, assetsTracker) # Add libultra to boot. -src_boot += src_libultra -asm_boot += asm_libultra +srcTracker["boot"]["totalSize"] += srcTracker["libultra"]["totalSize"] +asmTracker["boot"]["totalSize"] += asmTracker["libultra"]["totalSize"] +del srcTracker["libultra"] +del asmTracker["libultra"] # Calculate Non-Matching -non_matching_functions_ovl = list(filter(lambda x: "/overlays/" in x, non_matching_functions)) -non_matching_functions_code = list(filter(lambda x: "/code/" in x, non_matching_functions)) -non_matching_functions_boot = list(filter(lambda x: "/boot/" in x, non_matching_functions)) +non_matching_functions_ovl = list(map(lambda x: x.split("/")[-1].split(".")[0], filter(lambda x: "/overlays/" in x, non_matching_functions))) +non_matching_functions_code = list(map(lambda x: x.split("/")[-1].split(".")[0], filter(lambda x: "/code/" in x, non_matching_functions))) +non_matching_functions_boot = list(map(lambda x: x.split("/")[-1].split(".")[0], filter(lambda x: "/boot/" in x, non_matching_functions))) -non_matching_asm_ovl = GetRemovableSize(non_matching_functions_ovl) -non_matching_asm_code = GetRemovableSize(non_matching_functions_code) -non_matching_asm_boot = GetRemovableSize(non_matching_functions_boot) +non_matching_asm_ovl = GetRemovableSize(functionSizes, non_matching_functions_ovl) +non_matching_asm_code = GetRemovableSize(functionSizes, non_matching_functions_code) +non_matching_asm_boot = GetRemovableSize(functionSizes, non_matching_functions_boot) # Calculate Not Attempted -not_attempted_functions_ovl = list(filter(lambda x: "/overlays/" in x, not_attempted_functions)) -not_attempted_functions_code = list(filter(lambda x: "/code/" in x, not_attempted_functions)) -not_attempted_functions_boot = list(filter(lambda x: "/boot/" in x, not_attempted_functions)) +not_attempted_functions_ovl = list(map(lambda x: x.split("/")[-1].split(".")[0], filter(lambda x: "/overlays/" in x, not_attempted_functions))) +not_attempted_functions_code = list(map(lambda x: x.split("/")[-1].split(".")[0], filter(lambda x: "/code/" in x, not_attempted_functions))) +not_attempted_functions_boot = list(map(lambda x: x.split("/")[-1].split(".")[0], filter(lambda x: "/boot/" in x, not_attempted_functions))) -not_attempted_asm_ovl = GetRemovableSize(not_attempted_functions_ovl) -not_attempted_asm_code = GetRemovableSize(not_attempted_functions_code) -not_attempted_asm_boot = GetRemovableSize(not_attempted_functions_boot) +not_attempted_asm_ovl = GetRemovableSize(functionSizes, not_attempted_functions_ovl) +not_attempted_asm_code = GetRemovableSize(functionSizes, not_attempted_functions_code) +not_attempted_asm_boot = GetRemovableSize(functionSizes, not_attempted_functions_boot) # All the non matching asm is the sum of non-matching code non_matching_asm = non_matching_asm_ovl + non_matching_asm_code + non_matching_asm_boot @@ -282,18 +281,19 @@ non_matching_asm = non_matching_asm_ovl + non_matching_asm_code + non_matching_a not_attempted_asm = not_attempted_asm_ovl + not_attempted_asm_code + not_attempted_asm_boot # Calculate total decompiled for each bucket by taking out the non-matching and not attempted in ovl/code/boot buckets. -code = src_code - (non_matching_asm_code + not_attempted_asm_code) -boot = src_boot - (non_matching_asm_boot + not_attempted_asm_boot) -ovl = src_ovl - (non_matching_asm_ovl + not_attempted_asm_ovl) +srcTracker["code"]["currentSize"] = srcTracker["code"]["totalSize"] - (non_matching_asm_code + not_attempted_asm_code) +srcTracker["boot"]["currentSize"] = srcTracker["boot"]["totalSize"] - (non_matching_asm_boot + not_attempted_asm_boot) +srcTracker["overlays"]["currentSize"] = srcTracker["overlays"]["totalSize"] - (non_matching_asm_ovl + not_attempted_asm_ovl) # Total code bucket sizes -code_size = src_code # + asm_code -boot_size = src_boot # + asm_boot -ovl_size = src_ovl # + asm_ovl -handwritten = asm_code + asm_boot + asm_ovl +handwritten = 0 +for srcCat in asmTracker: + handwritten += asmTracker[srcCat]["totalSize"] # Calculate the total amount of decompilable code -total = code_size + boot_size + ovl_size +total = 0 +for srcCat in asmTracker: + total += srcTracker[srcCat]["totalSize"] # Calculate size of all assets for assetCat in assetsTracker: @@ -305,11 +305,12 @@ if args.matching: assetsTracker[assetCat]["currentSize"] -= assetsTracker[assetCat]["removableSize"] # Calculate asm and src totals -src = src_code + src_boot + src_ovl -asm = asm_code + asm_boot + asm_ovl - -# Take out the non-matchings and not attempted in grand totals -src -= non_matching_asm + not_attempted_asm +src = 0 +for srcCat in srcTracker: + src += srcTracker[srcCat]["currentSize"] +asm = 0 +for srcCat in asmTracker: + asm += asmTracker[srcCat]["totalSize"] asm += non_matching_asm + not_attempted_asm # Calculate assets totals @@ -319,9 +320,8 @@ assets_total = sum(x["totalSize"] for x in assetsTracker.values()) # Convert vaules to percentages src_percent = 100 * src / total asm_percent = 100 * asm / total -code_percent = 100 * code / code_size -boot_percent = 100 * boot / boot_size -ovl_percent = 100 * ovl / ovl_size +for srcCat in ["boot", "code", "overlays"]: + srcTracker[srcCat]["percent"] = 100 * srcTracker[srcCat]["currentSize"] / srcTracker[srcCat]["totalSize"] assets_percent = 100 * assets / assets_total @@ -336,36 +336,6 @@ bytes_per_rupee = bytes_per_mask / max_rupees masks = int(src / bytes_per_mask) rupees = int((src % bytes_per_mask) / bytes_per_rupee) -# Debug print statements for the values -#print("Total: ", total) -#print("src: ", src) -#print("asm: ", asm) -#print("") -#print("src_code: ", src_code) -#print("src_boot: ", src_boot) -#print("src_ovl: ", src_ovl) -#print("") -#print("asm_code: ", asm_code) -#print("asm_boot: ", asm_boot) -#print("asm_ovl: ", asm_ovl) -#print("") -#print("Nonmatching code: ", non_matching_asm_code) -#print("Nonmatching boot: ", non_matching_asm_boot) -#print("Nonmatching ovl: ", non_matching_asm_ovl) -#print("") -#print("Not attempted code: ", not_attempted_asm_code) -#print("Not attempted boot: ", not_attempted_asm_boot) -#print("Not attempted ovl: ", not_attempted_asm_ovl) -#print("") -#print("code_size: ", code_size) -#print("boot_size: ", boot_size) -#print("ovl_size: ", ovl_size) -#print("") -#print("code: ", code) -#print("boot: ", boot) -#print("ovl: ", ovl) -#print("") - if args.format == 'csv': version = 2 git_object = git.Repo().head.object @@ -373,7 +343,11 @@ if args.format == 'csv': git_hash = git_object.hexsha csv_list = [ version, timestamp, git_hash, src, total, - boot, boot_size, code, code_size, ovl, ovl_size, + ] + for srcCat in ["boot", "code", "overlays"]: + csv_list += [srcTracker[srcCat]["currentSize"], srcTracker[srcCat]["totalSize"]] + + csv_list += [ asm, len(non_matching_functions), ] csv_list += [ @@ -396,9 +370,10 @@ elif args.format == 'text': assetsAdjective = "debinarized" if not args.matching else "identified" print("src: {:>9} / {:>8} total bytes {:<13} {:>9.4f}%".format(src, total, adjective, round(src_percent, 4))) - print(" boot: {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(boot, boot_size, adjective, round(boot_percent, 4))) - print(" code: {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(code, code_size, adjective, round(code_percent, 4))) - print(" overlays: {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(ovl, ovl_size, adjective, round(ovl_percent, 4))) + + for srcCat in ["boot", "code", "overlays"]: + src = srcTracker[srcCat] + print(" {:<10} {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(f"{srcCat}:", src["currentSize"], src["totalSize"], adjective, round(src["percent"], 4))) print() print("assets: {:>9} / {:>8} total bytes {:<13} {:>9.4f}%".format(assets, assets_total, assetsAdjective, round(assets_percent, 4)))