diff --git a/.gitignore b/.gitignore index 9959bbd2..7c205bd7 100644 --- a/.gitignore +++ b/.gitignore @@ -40,4 +40,4 @@ perf.data.old .gdb_history # Tooling -/toolchain/clang/ +/toolchain/clang-* diff --git a/.gitmodules b/.gitmodules index 0cbb85c4..214709a7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,15 +7,12 @@ [submodule "agl"] path = lib/agl url = https://github.com/open-ead/agl -[submodule "asm-differ"] - path = tools/asm-differ - url = https://github.com/simonlindholm/asm-differ [submodule "lib/EventFlow"] path = lib/EventFlow url = https://github.com/open-ead/EventFlow +[submodule "tools/common"] + path = tools/common + url = https://github.com/open-ead/nx-decomp-tools [submodule "toolchain/musl"] path = toolchain/musl url = https://github.com/open-ead/botw-lib-musl -[submodule "tools/nx-decomp-tools-binaries"] - path = tools/nx-decomp-tools-binaries - url = https://github.com/open-ead/nx-decomp-tools-binaries diff --git a/Contributing.md b/Contributing.md index 17729f69..1709186a 100644 --- a/Contributing.md +++ b/Contributing.md @@ -4,7 +4,7 @@ To contribute to the project, you will need: * A disassembler or a decompiler such as Hex-Rays or Ghidra. * Python 3 and pip for the diff script -* These Python modules: `capstone colorama cxxfilt pyelftools` (install them with `pip install ...`) +* These Python modules: `capstone colorama cxxfilt pyelftools ansiwrap watchdog python-Levenshtein toml` (install them with `pip install ...`) Experience with reverse engineering optimized C++ code is very useful but not necessary if you already know how to decompile C code. @@ -145,7 +145,7 @@ public: 5. **Get the mangled name** of your function. For example, if you are decompiling BaseProcMgr::createInstance: ``` - $ tools/print_decomp_symbols.py -a | grep BaseProcMgr::createInstance + $ tools/common/print_decomp_symbols.py -a | grep BaseProcMgr::createInstance UNLISTED ksys::act::BaseProcMgr::createInstance(sead::Heap*) (_ZN4ksys3act11BaseProcMgr14createInstanceEPN4sead4HeapE) ``` @@ -206,15 +206,15 @@ This project sometimes uses small hacks to force particular code to be generated * Pass the `--source` flag to show source code interleaved with assembly code. * Add the `--inlines` flag to show inline function calls. This is not enabled by default because it usually produces too much output to be useful. * For more options, see [asm-differ](https://github.com/simonlindholm/asm-differ). -* To print progress: `tools/progress.py` +* To print progress: `tools/common/progress.py` * Note that progress is only approximate because of inline functions, templating and compiler-generated functions. * To print AI class decompilation status: `tools/ai_progress.py` * Use this to figure out which AI classes have not been decompiled yet. -* To dump symbols: `tools/print_decomp_symbols.py` +* To dump symbols: `tools/common/print_decomp_symbols.py` * Pass `-a` to list all symbols * Useful for getting the mangled name of a function. For example: ``` - $ tools/print_decomp_symbols.py -a | grep BaseProcMgr::createInstance + $ tools/common/print_decomp_symbols.py -a | grep BaseProcMgr::createInstance UNLISTED ksys::act::BaseProcMgr::createInstance(sead::Heap*) (_ZN4ksys3act11BaseProcMgr14createInstanceEPN4sead4HeapE) ``` diff --git a/README.md b/README.md index 97b16222..e087ed96 100644 --- a/README.md +++ b/README.md @@ -158,7 +158,7 @@ Additionally, you'll also need: 2. Run `git submodule update --init --recursive` -3. Run `cargo install --path tools/viking` +3. Run `cargo install --path tools/common/viking` Next, you'll need to acquire the **original 1.5.0 or 1.6.0 `main` NSO executable**. @@ -167,7 +167,7 @@ Additionally, you'll also need: * The decompressed 1.5.0 NSO has the following SHA256 hash: `d9fa308d0ee7c0ab081c66d987523385e1afe06f66731bbfa32628438521c106` * If you have a compressed NSO or a 1.6.0 executable, don't worry about this. -4. Run `tools/setup.py [path to the NSO]` +4. Run `tools/common/setup.py [path to the NSO]` * This will: * convert the executable if necessary * set up [Clang 4.0.1](https://releases.llvm.org/download.html#4.0.1) by downloading it from the official LLVM website diff --git a/diff_settings.py b/diff_settings.py index 5ce610cd..7af0f498 120000 --- a/diff_settings.py +++ b/diff_settings.py @@ -1 +1 @@ -tools/diff_settings.py \ No newline at end of file +tools/common/diff_settings.py \ No newline at end of file diff --git a/toolchain/ToolchainNX64.cmake b/toolchain/ToolchainNX64.cmake index 55cd5191..247e82e3 100644 --- a/toolchain/ToolchainNX64.cmake +++ b/toolchain/ToolchainNX64.cmake @@ -1,7 +1,7 @@ if (DEFINED ENV{UKING_CLANG}) set(UKING_CLANG "$ENV{UKING_CLANG}") else() - set(UKING_CLANG "${CMAKE_CURRENT_LIST_DIR}/clang") + set(UKING_CLANG "${CMAKE_CURRENT_LIST_DIR}/clang-4.0.1") endif() set(NX64_OPT_FLAGS "-O3 -g") diff --git a/tools/add_missing_functions.py b/tools/add_missing_functions.py deleted file mode 100755 index f07384fa..00000000 --- a/tools/add_missing_functions.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import csv -import sys -from pathlib import Path -from typing import Set, List - -from util import utils - - -def main() -> None: - parser = argparse.ArgumentParser() - parser.add_argument("csv_path", help="Path to function CSV to merge") - args = parser.parse_args() - - csv_path = Path(args.csv_path) - - known_fn_addrs: Set[int] = {func.addr for func in utils.get_functions()} - new_fns: List[utils.FunctionInfo] = [] - for func in utils.get_functions(csv_path): - if func.addr not in known_fn_addrs: - new_fns.append(func) - - new_fn_list: List[utils.FunctionInfo] = [] - new_fn_list.extend(utils.get_functions()) - new_fn_list.extend(new_fns) - new_fn_list.sort(key=lambda func: func.addr) - - # Output the modified function CSV. - writer = csv.writer(sys.stdout, lineterminator="\n") - for func in new_fn_list: - writer.writerow(func.raw_row) - - -if __name__ == "__main__": - main() diff --git a/tools/asm-differ b/tools/asm-differ deleted file mode 160000 index 7cd33c4e..00000000 --- a/tools/asm-differ +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 7cd33c4e7f5adadf5892261ad8a522efec51cac8 diff --git a/tools/common b/tools/common new file mode 160000 index 00000000..f0a952ce --- /dev/null +++ b/tools/common @@ -0,0 +1 @@ +Subproject commit f0a952ce32d81018ec9623922cd2726ddd07c423 diff --git a/tools/config.toml b/tools/config.toml new file mode 100644 index 00000000..9228fed9 --- /dev/null +++ b/tools/config.toml @@ -0,0 +1,2 @@ +functions_csv = "data/uking_functions.csv" +build_target = "uking" diff --git a/tools/diff_settings.py b/tools/diff_settings.py deleted file mode 100644 index d5f48269..00000000 --- a/tools/diff_settings.py +++ /dev/null @@ -1,36 +0,0 @@ -from pathlib import Path -import platform - - -ROOT = Path(__file__).resolve().parent.parent - - -def get_tools_bin_dir(): - path = ROOT / 'tools' / 'nx-decomp-tools-binaries' - system = platform.system() - if system == "Linux": - return str(path) + "/linux/" - if system == "Darwin": - return str(path) + "/macos/" - return "" - - -def apply(config, args): - config['arch'] = 'aarch64' - config['baseimg'] = 'data/main.elf' - config['myimg'] = 'build/uking' - config['source_directories'] = ['src', 'lib'] - config['objdump_executable'] = get_tools_bin_dir() + 'aarch64-none-elf-objdump' - - for dir in ('build', 'build/nx64-release'): - if (Path(dir) / 'build.ninja').is_file(): - config['make_command'] = ['ninja', '-C', dir] - - -def map_build_target(make_target: str): - if make_target == "build/uking": - return "uking" - - # TODO: When support for directly diffing object files is added, this needs to strip - # the build/ prefix from the object file targets. - return make_target diff --git a/tools/ghidra_scripts/RenameFunctionsInGhidra.java b/tools/ghidra_scripts/RenameFunctionsInGhidra.java deleted file mode 100644 index c2eedd7d..00000000 --- a/tools/ghidra_scripts/RenameFunctionsInGhidra.java +++ /dev/null @@ -1,119 +0,0 @@ -// Script to load BotW CSV data into Ghidra -//@author AlexApps99 -//@category BotW - -import ghidra.app.script.GhidraScript; -import ghidra.program.model.symbol.SourceType; -import java.io.BufferedReader; -import java.io.FileReader; -import java.io.File; -import ghidra.program.model.address.Address; -import ghidra.program.model.listing.Function; -import ghidra.program.model.listing.FunctionManager; -import ghidra.app.cmd.label.DemanglerCmd; -import ghidra.program.model.address.AddressSet; -import ghidra.util.NumericUtilities; -import ghidra.app.cmd.label.DemanglerCmd; -import ghidra.program.model.listing.FunctionTag; -import ghidra.util.exception.DuplicateNameException; -import ghidra.program.model.listing.FunctionTagManager; - -public class RenameFunctionsInGhidra extends GhidraScript { - private FunctionManager func_mgr; - private FunctionTagManager func_tag_mgr; - private String ok; - private String minor; - private String major; - private String wip; - private String undecompiled; - private String lib; - - private FunctionTag getOrMake(String name) { - FunctionTag f = func_tag_mgr.getFunctionTag(name); - if (f == null) f = func_tag_mgr.createFunctionTag(name, null); - return f; - } - - @Override - public void run() throws Exception { - func_mgr = currentProgram.getFunctionManager(); - func_tag_mgr = func_mgr.getFunctionTagManager(); - ok = getOrMake("OK").getName(); - minor = getOrMake("MINOR").getName(); - major = getOrMake("MAJOR").getName(); - wip = getOrMake("WIP").getName(); - undecompiled = getOrMake("UNDECOMPILED").getName(); - lib = getOrMake("LIBRARY").getName(); - - - File input_csv = askFile("uking_functions.csv", "Go"); - try (BufferedReader br = new BufferedReader(new FileReader(input_csv))) { - // Skip header - String line = br.readLine(); - while ((line = br.readLine()) != null) { - String[] pieces = line.split(",", -4); // Don't skip empty last column - if (pieces.length != 4) throw new Exception("Invalid CSV row: " + line); - - Address addr = toAddr(pieces[0]); - String status = pieces[1]; - long func_size = func_size = NumericUtilities.parseLong(pieces[2].strip()); - - String name = pieces[3].strip(); - - Function func = applyFunction(addr, status, name, func_size); - } - } - } - - - // TODO the j_ prefix probably breaks demangling - private Function applyFunction(Address addr, String status, String name, long func_size) throws Exception { - if (name.isEmpty()) name = null; - - Function func = func_mgr.getFunctionAt(addr); - AddressSet body = new AddressSet(addr, addr.addNoWrap(func_size - 1)); - - - if (func != null) { - // Demangling can break this, hence the try-catch - try { - if (func.getName() != name) func.setName(name, SourceType.IMPORTED); - } catch (DuplicateNameException e) {} - if (!func.getBody().hasSameAddresses(body)) { - func.setBody(body); - } - } else { - func = func_mgr.createFunction(name, addr, body, SourceType.IMPORTED); - } - - if (name != null) { - DemanglerCmd cmd = new DemanglerCmd(addr, name); - if (!cmd.applyTo(currentProgram, monitor)) { - // Something that isn't mangled - } - } - - func.removeTag(ok); - func.removeTag(minor); - func.removeTag(major); - func.removeTag(wip); - func.removeTag(undecompiled); - func.removeTag(lib); - if (status.equals("O")) { - func.addTag(ok); - } else if (status.equals("m")) { - func.addTag(minor); - } else if (status.equals("M")) { - func.addTag(major); - } else if (status.equals("W")) { - func.addTag(wip); - } else if (status.equals("L")) { - func.addTag(lib); - func.addTag(undecompiled); - } else { - func.addTag(undecompiled); - } - - return func; - } -} \ No newline at end of file diff --git a/tools/ida_remove_function_tails.py b/tools/ida_remove_function_tails.py deleted file mode 100644 index 1fe590f0..00000000 --- a/tools/ida_remove_function_tails.py +++ /dev/null @@ -1,12 +0,0 @@ -import idaapi - -for i in range(idaapi.get_fchunk_qty()): - chunk = idaapi.getn_fchunk(i) - if not idaapi.is_func_tail(chunk): - continue - - ea = chunk.start_ea - print("removing tail 0x%016x" % ea) - parent = idaapi.get_func(ea) - idaapi.remove_func_tail(parent, ea) - idaapi.add_func(ea) diff --git a/tools/identify_matching_functions.py b/tools/identify_matching_functions.py deleted file mode 100755 index afb149d2..00000000 --- a/tools/identify_matching_functions.py +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -from colorama import Fore -import csv -import sys -from pathlib import Path -from typing import Dict - -import util.checker -import util.elf -from util import utils - - -def read_candidates(path: Path) -> Dict[str, util.elf.Function]: - candidates: Dict[str, util.elf.Function] = dict() - - for candidate in path.read_text().splitlines(): - columns = candidate.split() - if len(columns) == 3: - candidate = columns[2] - - candidates[candidate] = util.elf.get_fn_from_my_elf(candidate) - - return candidates - - -def main() -> None: - parser = argparse.ArgumentParser() - parser.add_argument("csv_path", - help="Path to a list of functions to identify (in the same format as the main function CSV)") - parser.add_argument("candidates_path", - help="Path to a list of candidates (names only)") - args = parser.parse_args() - - csv_path = Path(args.csv_path) - candidates_path = Path(args.candidates_path) - - candidates = read_candidates(candidates_path) - - new_matches: Dict[int, str] = dict() - checker = util.checker.FunctionChecker() - - # Given a list L of functions to identify and a small list of candidates C, this tool will attempt to - # automatically identify matches by checking each function in L against each function in C. - # - # This matching algorithm is quite naive (quadratic time complexity if both lists have about the same size) - # but this should work well enough for short lists of candidates... - for func in utils.get_functions(csv_path): - if func.status != utils.FunctionStatus.NotDecompiled: - continue - - match_name = "" - - for candidate_name, candidate in candidates.items(): - if len(candidate.data) != func.size: - continue - if checker.check(util.elf.get_fn_from_base_elf(func.addr, func.size), candidate): - match_name = candidate_name - break - - if match_name: - new_matches[func.addr] = match_name - utils.print_note( - f"found new match: {Fore.BLUE}{match_name}{Fore.RESET} ({func.addr | 0x71_00000000:#018x})") - # This is no longer a candidate. - del candidates[match_name] - else: - utils.warn(f"no match found for {Fore.BLUE}{func.name}{Fore.RESET} ({func.addr | 0x71_00000000:#018x})") - - # Output the modified function CSV. - writer = csv.writer(sys.stdout, lineterminator="\n") - for func in utils.get_functions(): - if func.status == utils.FunctionStatus.NotDecompiled and func.addr in new_matches: - func.raw_row[3] = new_matches[func.addr] - writer.writerow(func.raw_row) - - -if __name__ == "__main__": - main() diff --git a/tools/identify_matching_functions_by_call.py b/tools/identify_matching_functions_by_call.py deleted file mode 100755 index 8496cc06..00000000 --- a/tools/identify_matching_functions_by_call.py +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env python3 -from typing import Dict, List -import argparse - -import cxxfilt -from colorama import Fore - -from util import utils, checker, elf - - -class Checker(checker.FunctionChecker): - def __init__(self): - super().__init__() - self.checking = "" - self.invalid_call_descriptions: List[str] = [] - self.addr_to_symbol = elf.build_addr_to_symbol_table(elf.my_symtab) - self._possible_calls: Dict[int, int] = dict() - - def reset(self) -> None: - self._possible_calls.clear() - - def get_possible_calls(self) -> Dict[int, int]: - return self._possible_calls - - def on_unknown_fn_call(self, orig_addr: int, decomp_addr: int): - existing_addr = self._possible_calls.get(orig_addr) - if existing_addr is not None and existing_addr != decomp_addr: - self.invalid_call_descriptions.append( - f"{orig_addr | 0x7100000000:#x} was mapped to {self.addr_to_symbol[existing_addr]} " - f"({existing_addr:#x}) " - f"but now maps to {self.addr_to_symbol[decomp_addr]} ({decomp_addr:#x})" - f" (while checking {self.checking})") - return - self._possible_calls[orig_addr] = decomp_addr - - -def main() -> None: - parser = argparse.ArgumentParser("Identifies matching functions by looking at function calls in matching functions") - parser.add_argument("-f", "--fn", help="Functions to analyze", nargs="*") - args = parser.parse_args() - - functions_to_analyze = set(args.fn) if args.fn else set() - - functions_by_addr: Dict[int, utils.FunctionInfo] = {fn.addr: fn for fn in utils.get_functions()} - fn_checker = Checker() - for fn in functions_by_addr.values(): - if functions_to_analyze and fn.decomp_name not in functions_to_analyze: - continue - - if fn.status != utils.FunctionStatus.Matching: - continue - - base_fn = elf.get_fn_from_base_elf(fn.addr, fn.size) - try: - my_fn = elf.get_fn_from_my_elf(fn.decomp_name) - except KeyError: - utils.warn(f"could not find function {fn.decomp_name}") - continue - - fn_checker.checking = fn.decomp_name - fn_checker.check(base_fn, my_fn) - - if fn_checker.invalid_call_descriptions: - for x in fn_checker.invalid_call_descriptions: - utils.print_note(x) - utils.fail("invalid calls detected") - - new_matches: Dict[int, str] = dict() - calls = fn_checker.get_possible_calls().copy() - for base_target, my_target in calls.items(): - target_info = functions_by_addr.get(base_target) - if target_info is None: - continue - if target_info.status != utils.FunctionStatus.NotDecompiled: - continue - - base_fn = elf.get_fn_from_base_elf(target_info.addr, target_info.size) - try: - name = fn_checker.addr_to_symbol[my_target] - my_fn = elf.get_fn_from_my_elf(name) - except KeyError: - continue - - if fn_checker.check(base_fn, my_fn): - new_matches[base_target] = name - utils.print_note(f"new match: {Fore.BLUE}{cxxfilt.demangle(name)}{Fore.RESET}") - - utils.add_decompiled_functions(new_matches) - - -if __name__ == '__main__': - main() diff --git a/tools/identify_matching_rtti_functions.py b/tools/identify_matching_rtti_functions.py deleted file mode 100755 index e49dd734..00000000 --- a/tools/identify_matching_rtti_functions.py +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env python3 -import struct -from typing import Dict, Set - -import capstone as cs -import cxxfilt -from colorama import Fore - -from util import utils, elf - - -def main() -> None: - new_matches: Dict[int, str] = dict() - functions_by_addr: Dict[int, utils.FunctionInfo] = {fn.addr: fn for fn in utils.get_functions()} - - md = cs.Cs(cs.CS_ARCH_ARM64, cs.CS_MODE_ARM) - md.detail = True - decomp_addr_to_symbol = elf.build_addr_to_symbol_table(elf.my_symtab) - decomp_glob_data_table = elf.build_glob_data_table(elf.my_elf) - - processed: Set[int] = set() - for fn in functions_by_addr.values(): - if fn.status != utils.FunctionStatus.Matching: - continue - - if fn.size != 0x5C or (not fn.decomp_name.endswith("8getRuntimeTypeInfoEv") and not fn.name.endswith("rtti2")): - continue - - base_fn = elf.get_fn_from_base_elf(fn.addr, fn.size) - try: - my_fn = elf.get_fn_from_my_elf(fn.decomp_name) - except KeyError: - utils.warn(f"could not find function {fn.decomp_name}") - continue - - assert len(base_fn.data) == len(my_fn.data) - - vtable_ptr1 = 0 - vtable_ptr2 = 0 - for j, (i1, i2) in enumerate(zip(md.disasm(base_fn.data, base_fn.addr), md.disasm(my_fn.data, my_fn.addr))): - assert i1.mnemonic == i2.mnemonic - if j == 10: - assert i1.mnemonic == "adrp" - assert i1.operands[0].reg == i2.operands[0].reg - vtable_ptr1 = i1.operands[1].imm - vtable_ptr2 = i2.operands[1].imm - elif j == 11: - assert i1.mnemonic == "ldr" - assert i1.operands[0].reg == i2.operands[0].reg - assert i1.operands[1].value.mem.base == i2.operands[1].value.mem.base - vtable_ptr1 += i1.operands[1].value.mem.disp - vtable_ptr2 += i2.operands[1].value.mem.disp - break - - assert vtable_ptr1 != 0 and vtable_ptr2 != 0 - if vtable_ptr1 in processed: - continue - processed.add(vtable_ptr1) - ptr1, = struct.unpack("7d} {label}{Fore.RESET} ({percentage}% | size: {size_percentage}%)" - - -def format_progress_for_status(label: str, status: FunctionStatus): - return format_progress(label, counts[status], code_size[status]) - - -if args.csv: - import git - - version = 1 - git_object = git.Repo().head.object - timestamp = str(git_object.committed_date) - git_hash = git_object.hexsha - - fields = [ - str(version), - timestamp, - git_hash, - - str(num_total), - str(code_size_total), - - str(counts[FunctionStatus.Matching]), - str(code_size[FunctionStatus.Matching]), - - str(counts[FunctionStatus.Equivalent]), - str(code_size[FunctionStatus.Equivalent]), - - str(counts[FunctionStatus.NonMatching]), - str(code_size[FunctionStatus.NonMatching]), - ] - print(",".join(fields)) - -else: - print() - - print(f"{num_total:>7d} functions (size: ~{code_size_total} bytes)") - - count_decompiled = counts[FunctionStatus.Matching] + counts[FunctionStatus.Equivalent] + counts[ - FunctionStatus.NonMatching] - code_size_decompiled = code_size[FunctionStatus.Matching] + code_size[FunctionStatus.Equivalent] + code_size[ - FunctionStatus.NonMatching] - - print(format_progress(f"{Fore.CYAN}decompiled", count_decompiled, code_size_decompiled)) - print(format_progress_for_status(f"{Fore.GREEN}matching", FunctionStatus.Matching)) - print(format_progress_for_status(f"{Fore.YELLOW}non-matching (minor issues)", FunctionStatus.Equivalent)) - print(format_progress_for_status(f"{Fore.RED}non-matching (major issues)", FunctionStatus.NonMatching)) - print() diff --git a/tools/rename_functions_in_ida.py b/tools/rename_functions_in_ida.py deleted file mode 100644 index 0dbf2be3..00000000 --- a/tools/rename_functions_in_ida.py +++ /dev/null @@ -1,18 +0,0 @@ -# Renames functions in an IDA database to match the function names -# in the decompiled source code. - -import csv -import idc -import os - -csv_path = os.path.join(os.path.dirname(__file__), "../data/uking_functions.csv") - -with open(csv_path, "r") as f: - reader = csv.reader(f) - # Skip headers - next(reader) - for fn in reader: - addr = int(fn[0], 16) - name = fn[3] - if name and not name.startswith(("sub_", "nullsub_", "j_")): - idc.set_name(addr, name) diff --git a/tools/setup.py b/tools/setup.py index 227fd84b..a14b3d66 100755 --- a/tools/setup.py +++ b/tools/setup.py @@ -9,36 +9,7 @@ import sys import tarfile import tempfile import urllib.request - -ROOT = Path(__file__).parent.parent - - -def fail(error: str): - print(">>> " + error) - sys.exit(1) - - -def _get_tool_binary_path(): - base = ROOT / "tools" / "nx-decomp-tools-binaries" - system = platform.system() - if system == "Linux": - return str(base / "linux") + "/" - if system == "Darwin": - return str(base / "macos") + "/" - return "" - - -def _convert_nso_to_elf(nso_path: Path): - print(">>>> converting NSO to ELF...") - binpath = _get_tool_binary_path() - subprocess.check_call([binpath + "nx2elf", str(nso_path)]) - - -def _decompress_nso(nso_path: Path, dest_path: Path): - print(">>>> decompressing NSO...") - binpath = _get_tool_binary_path() - subprocess.check_call([binpath + "hactool", "-tnso", - "--uncompressed=" + str(dest_path), str(nso_path)]) +from common import setup_common as setup def _download_v160_to_v150_patch(dest: Path): @@ -46,14 +17,6 @@ def _download_v160_to_v150_patch(dest: Path): urllib.request.urlretrieve("https://s.botw.link/v150_downgrade/v160_to_v150.patch", dest) -def _apply_xdelta3_patch(input: Path, patch: Path, dest: Path): - print(">>>> applying patch...") - try: - subprocess.check_call(["xdelta3", "-d", "-s", str(input), str(patch), str(dest)]) - except FileNotFoundError: - fail("error: install xdelta3 and try again") - - def prepare_executable(original_nso: Path): COMPRESSED_V150_HASH = "898dc199301f7c419be5144bb5cb27e2fc346e22b27345ba3fb40c0060c2baf8" UNCOMPRESSED_V150_HASH = "d9fa308d0ee7c0ab081c66d987523385e1afe06f66731bbfa32628438521c106" @@ -62,26 +25,24 @@ def prepare_executable(original_nso: Path): # The uncompressed v1.5.0 main NSO. TARGET_HASH = UNCOMPRESSED_V150_HASH - TARGET_PATH = ROOT / "data" / "main.nso" - TARGET_ELF_PATH = ROOT / "data" / "main.elf" - if TARGET_PATH.is_file() and hashlib.sha256(TARGET_PATH.read_bytes()).hexdigest() == TARGET_HASH and TARGET_ELF_PATH.is_file(): + if setup.TARGET_PATH.is_file() and hashlib.sha256(setup.TARGET_PATH.read_bytes()).hexdigest() == TARGET_HASH and setup.TARGET_ELF_PATH.is_file(): print(">>> NSO is already set up") return if not original_nso.is_file(): - fail(f"{original_nso} is not a file") + setup.fail(f"{original_nso} is not a file") nso_data = original_nso.read_bytes() nso_hash = hashlib.sha256(nso_data).hexdigest() if nso_hash == UNCOMPRESSED_V150_HASH: print(">>> found uncompressed 1.5.0 NSO") - TARGET_PATH.write_bytes(nso_data) + setup.TARGET_PATH.write_bytes(nso_data) elif nso_hash == COMPRESSED_V150_HASH: print(">>> found compressed 1.5.0 NSO") - _decompress_nso(original_nso, TARGET_PATH) + setup._decompress_nso(original_nso, setup.TARGET_PATH) elif nso_hash == UNCOMPRESSED_V160_HASH: print(">>> found uncompressed 1.6.0 NSO") @@ -89,7 +50,7 @@ def prepare_executable(original_nso: Path): with tempfile.TemporaryDirectory() as tmpdir: patch_path = Path(tmpdir) / "patch" _download_v160_to_v150_patch(patch_path) - _apply_xdelta3_patch(original_nso, patch_path, TARGET_PATH) + setup._apply_xdelta3_patch(original_nso, patch_path, setup.TARGET_PATH) elif nso_hash == COMPRESSED_V160_HASH: print(">>> found compressed 1.6.0 NSO") @@ -98,86 +59,22 @@ def prepare_executable(original_nso: Path): patch_path = Path(tmpdir) / "patch" decompressed_nso_path = Path(tmpdir) / "v160.nso" - _decompress_nso(original_nso, decompressed_nso_path) + setup._decompress_nso(original_nso, decompressed_nso_path) _download_v160_to_v150_patch(patch_path) - _apply_xdelta3_patch(decompressed_nso_path, patch_path, TARGET_PATH) + setup._apply_xdelta3_patch(decompressed_nso_path, patch_path, setup.TARGET_PATH) else: - fail(f"unknown executable: {nso_hash}") + setup.fail(f"unknown executable: {nso_hash}") - if not TARGET_PATH.is_file(): - fail("internal error while preparing executable (missing NSO); please report") - if hashlib.sha256(TARGET_PATH.read_bytes()).hexdigest() != TARGET_HASH: - fail("internal error while preparing executable (wrong NSO hash); please report") + if not setup.TARGET_PATH.is_file(): + setup.fail("internal error while preparing executable (missing NSO); please report") + if hashlib.sha256(setup.TARGET_PATH.read_bytes()).hexdigest() != TARGET_HASH: + setup.fail("internal error while preparing executable (wrong NSO hash); please report") - _convert_nso_to_elf(TARGET_PATH) - - if not TARGET_ELF_PATH.is_file(): - fail("internal error while preparing executable (missing ELF); please report") - - -def set_up_compiler(): - compiler_dir = ROOT / "toolchain" / "clang" - if compiler_dir.is_dir(): - print(">>> clang is already set up: nothing to do") - return - - system = platform.system() - machine = platform.machine() - - builds = { - # Linux - ("Linux", "x86_64"): { - "url": "https://releases.llvm.org/4.0.1/clang+llvm-4.0.1-x86_64-linux-gnu-Fedora-25.tar.xz", - "dir_name": "clang+llvm-4.0.1-x86_64-linux-gnu-Fedora-25", - }, - ("Linux", "aarch64"): { - "url": "https://releases.llvm.org/4.0.1/clang+llvm-4.0.1-aarch64-linux-gnu.tar.xz", - "dir_name": "clang+llvm-4.0.1-aarch64-linux-gnu", - }, - - # macOS - ("Darwin", "x86_64"): { - "url": "https://releases.llvm.org/4.0.1/clang+llvm-4.0.1-x86_64-apple-darwin.tar.xz", - "dir_name": "clang+llvm-4.0.1-x86_64-apple-darwin", - }, - ("Darwin", "aarch64"): { - "url": "https://releases.llvm.org/4.0.1/clang+llvm-4.0.1-x86_64-apple-darwin.tar.xz", - "dir_name": "clang+llvm-4.0.1-x86_64-apple-darwin", - }, - } - - build_info = builds.get((system, machine)) - if build_info is None: - fail( - f"unknown platform: {platform.platform()} (please report if you are on Linux and macOS)") - - url: str = build_info["url"] - dir_name: str = build_info["dir_name"] - - print(f">>> downloading Clang from {url}...") - with tempfile.TemporaryDirectory() as tmpdir: - path = tmpdir + "/" + url.split("/")[-1] - urllib.request.urlretrieve(url, path) - - print(f">>> extracting Clang...") - with tarfile.open(path) as f: - f.extractall(compiler_dir.parent) - (compiler_dir.parent / dir_name).rename(compiler_dir) - - print(">>> successfully set up Clang") - - -def create_build_dir(): - build_dir = ROOT / "build" - if build_dir.is_dir(): - print(">>> build directory already exists: nothing to do") - return - - subprocess.check_call( - "cmake -GNinja -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_TOOLCHAIN_FILE=toolchain/ToolchainNX64.cmake -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -B build/".split(" ")) - print(">>> created build directory") + setup._convert_nso_to_elf(setup.TARGET_PATH) + if not setup.TARGET_ELF_PATH.is_file(): + setup.fail("internal error while preparing executable (missing ELF); please report") def main(): parser = argparse.ArgumentParser( @@ -187,8 +84,8 @@ def main(): args = parser.parse_args() prepare_executable(args.original_nso) - set_up_compiler() - create_build_dir() + setup.set_up_compiler("4.0.1") + setup.create_build_dir() if __name__ == "__main__": diff --git a/tools/show_vtable.py b/tools/show_vtable.py deleted file mode 100755 index dfee5e13..00000000 --- a/tools/show_vtable.py +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import struct -from typing import Optional - -import cxxfilt -from colorama import Fore, Style - -import util.elf -from util import utils - - -def find_vtable(symtab, class_name: str) -> Optional[str]: - name_offset = len("vtable for ") - for sym in util.elf.iter_symbols(symtab): - if not sym.name.startswith("_ZTV"): - continue - if cxxfilt.demangle(sym.name)[name_offset:] == class_name: - return sym.name - return None - - -def bold(s) -> str: - return Style.BRIGHT + str(s) + Style.NORMAL - - -def dump_table(name: str) -> None: - try: - symbols = util.elf.build_addr_to_symbol_table(util.elf.my_symtab) - decomp_symbols = {fn.decomp_name for fn in utils.get_functions() if fn.decomp_name} - - offset, size = util.elf.get_symbol_file_offset_and_size(util.elf.my_elf, util.elf.my_symtab, name) - util.elf.my_elf.stream.seek(offset) - vtable_bytes = util.elf.my_elf.stream.read(size) - - if not vtable_bytes: - utils.fail( - "empty vtable; has the key function been implemented? (https://lld.llvm.org/missingkeyfunction.html)") - - print(f"{Fore.WHITE}{Style.BRIGHT}{cxxfilt.demangle(name)}{Style.RESET_ALL}") - print(f"{Fore.YELLOW}{Style.BRIGHT}vtable @ 0x0{Style.RESET_ALL}") - - assert size % 8 == 0 - for i in range(size // 8): - word: int = struct.unpack_from(" None: - parser = argparse.ArgumentParser() - parser.add_argument("symbol_name", help="Name of the vtable symbol (_ZTV...) or class name") - args = parser.parse_args() - - symbol_name: str = args.symbol_name - - if not symbol_name.startswith("_ZTV"): - symbol_name = find_vtable(util.elf.my_symtab, args.symbol_name) - - dump_table(symbol_name) - - -if __name__ == "__main__": - main() diff --git a/tools/translate_ida_types.py b/tools/translate_ida_types.py deleted file mode 100755 index 6320cd13..00000000 --- a/tools/translate_ida_types.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python3 - -from colorama import Back, Fore, Style -import sys - -mapping = { - "agl::utl::Parameter$uint$": "agl::utl::Parameter", - "agl::utl::Parameter$int$": "agl::utl::Parameter", - "agl::utl::Parameter$s32$": "agl::utl::Parameter", - "agl::utl::Parameter$float$": "agl::utl::Parameter", - "agl::utl::Parameter$f32$": "agl::utl::Parameter", - "agl::utl::Parameter$bool$": "agl::utl::Parameter", - "agl::utl::Parameter$sead::SafeString$": "agl::utl::Parameter", - "agl::utl::Parameter$sead::Vector3f$": "agl::utl::Parameter", - "agl::utl::Parameter$sead::FixedSafeString20$": "agl::utl::Parameter>", - "agl::utl::Parameter$sead::FixedSafeString40$": "agl::utl::Parameter>", - "agl::utl::Parameter$sead::FixedSafeString100$": "agl::utl::Parameter>", - "agl::utl::Parameter$sead::Color4f$": "agl::utl::Parameter", - "agl::utl::Parameter_String32": "agl::utl::Parameter>", - "agl::utl::Parameter_String64": "agl::utl::Parameter>", - "agl::utl::Parameter_String256": "agl::utl::Parameter>", -} - -lines = list(sys.stdin) - -sys.stderr.write(Back.BLUE + Fore.WHITE + Style.BRIGHT + "=" * 30 + " output " + "=" * 30 + Style.RESET_ALL + "\n") - -for line in lines: - for from_type, to_type in mapping.items(): - line = line.replace(from_type, to_type) - sys.stdout.write(line) diff --git a/tools/util/__init__.py b/tools/util/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tools/util/checker.py b/tools/util/checker.py deleted file mode 100644 index a78f1607..00000000 --- a/tools/util/checker.py +++ /dev/null @@ -1,223 +0,0 @@ -import struct -from collections import defaultdict -from typing import Set, DefaultDict, Dict, Optional, Tuple - -import capstone as cs - -from util import dsym, elf, utils - -_store_instructions = ("str", "strb", "strh", "stur", "sturb", "sturh") - - -class FunctionChecker: - def __init__(self, log_mismatch_cause: bool = False): - self.md = cs.Cs(cs.CS_ARCH_ARM64, cs.CS_MODE_ARM) - self.md.detail = True - self.my_symtab = elf.build_name_to_symbol_table(elf.my_symtab) - self.dsymtab = dsym.DataSymbolContainer() - self.decompiled_fns: Dict[int, str] = dict() - - self._log_mismatch_cause = log_mismatch_cause - self._mismatch_addr1 = -1 - self._mismatch_addr2 = -1 - self._mismatch_cause = "" - self._base_got_section = elf.base_elf.get_section_by_name(".got") - self._decomp_glob_data_table = elf.build_glob_data_table(elf.my_elf) - self._got_data_symbol_check_cache: Dict[Tuple[int, int], bool] = dict() - - self.load_data_for_project() - - def _reset_mismatch(self) -> None: - self._mismatch_addr1 = -1 - self._mismatch_addr2 = -1 - self._mismatch_cause = "" - - def get_data_symtab(self) -> dsym.DataSymbolContainer: - return self.dsymtab - - def get_mismatch(self) -> (int, int, str): - return self._mismatch_addr1, self._mismatch_addr2, self._mismatch_cause - - def load_data_for_project(self) -> None: - self.decompiled_fns = {func.addr: func.decomp_name for func in utils.get_functions() if func.decomp_name} - self.get_data_symtab().load_from_csv(utils.get_repo_root() / "data" / "data_symbols.csv") - - def check(self, base_fn: elf.Function, my_fn: elf.Function) -> bool: - self._reset_mismatch() - gprs1: DefaultDict[int, int] = defaultdict(int) - gprs2: DefaultDict[int, int] = defaultdict(int) - adrp_pair_registers: Set[int] = set() - - size = len(base_fn) - if len(base_fn) != len(my_fn): - if self._log_mismatch_cause: - self._set_mismatch_cause(None, None, "different function length") - return False - - def forget_modified_registers(insn): - _, regs_write = insn.regs_access() - for reg in regs_write: - adrp_pair_registers.discard(reg) - - for i1, i2 in zip(self.md.disasm(base_fn.data, base_fn.addr), self.md.disasm(my_fn.data, my_fn.addr)): - if i1.bytes == i2.bytes: - if i1.mnemonic == 'adrp': - gprs1[i1.operands[0].reg] = i1.operands[1].imm - gprs2[i2.operands[0].reg] = i2.operands[1].imm - adrp_pair_registers.add(i1.operands[0].reg) - elif i1.mnemonic == 'b': - branch_target = i1.operands[0].imm - if not (base_fn.addr <= branch_target < base_fn.addr + size): - if not self._check_function_call(i1, i2, branch_target, i2.operands[0].imm): - return False - else: - forget_modified_registers(i1) - continue - - if i1.mnemonic != i2.mnemonic: - if self._log_mismatch_cause: - self._set_mismatch_cause(i1, i2, "mnemonics are different") - return False - - # Ignore some address differences until a fully matching executable can be generated. - - if i1.mnemonic == 'bl': - if not self._check_function_call(i1, i2, i1.operands[0].imm, i2.operands[0].imm): - return False - continue - - if i1.mnemonic == 'b': - branch_target = i1.operands[0].imm - # If we are branching outside the function, this is likely a tail call. - # Treat this as a function call. - if not (base_fn.addr <= branch_target < base_fn.addr + size): - if not self._check_function_call(i1, i2, branch_target, i2.operands[0].imm): - return False - continue - # Otherwise, it's a mismatch. - return False - - if i1.mnemonic == 'adrp': - if i1.operands[0].reg != i2.operands[0].reg: - return False - reg = i1.operands[0].reg - - gprs1[reg] = i1.operands[1].imm - gprs2[reg] = i2.operands[1].imm - - adrp_pair_registers.add(reg) - continue - - if i1.mnemonic == 'ldp' or i1.mnemonic == 'ldpsw' or i1.mnemonic == 'stp': - if i1.operands[0].reg != i2.operands[0].reg: - return False - if i1.operands[1].reg != i2.operands[1].reg: - return False - if i1.operands[2].value.mem.base != i2.operands[2].value.mem.base: - return False - reg = i1.operands[2].value.mem.base - if reg not in adrp_pair_registers: - return False - - gprs1[reg] += i1.operands[2].value.mem.disp - gprs2[reg] += i2.operands[2].value.mem.disp - if not self._check_data_symbol_load(i1, i2, gprs1[reg], gprs2[reg]): - return False - - forget_modified_registers(i1) - continue - - if i1.mnemonic.startswith('ld') or i1.mnemonic in _store_instructions: - if i1.operands[0].reg != i2.operands[0].reg: - return False - if i1.operands[1].value.mem.base != i2.operands[1].value.mem.base: - return False - reg = i1.operands[1].value.mem.base - if reg not in adrp_pair_registers: - return False - - gprs1[reg] += i1.operands[1].value.mem.disp - gprs2[reg] += i2.operands[1].value.mem.disp - if not self._check_data_symbol_load(i1, i2, gprs1[reg], gprs2[reg]): - return False - - forget_modified_registers(i1) - continue - - if i1.mnemonic == 'add': - if i1.operands[0].reg != i2.operands[0].reg: - return False - if i1.operands[1].reg != i2.operands[1].reg: - return False - reg = i1.operands[1].reg - if reg not in adrp_pair_registers: - return False - - gprs1[reg] += i1.operands[2].imm - gprs2[reg] += i2.operands[2].imm - if not self._check_data_symbol(i1, i2, gprs1[reg], gprs2[reg]): - return False - - forget_modified_registers(i1) - continue - - return False - - return True - - def _set_mismatch_cause(self, i1: Optional[any], i2: Optional[any], description: str) -> None: - self._mismatch_addr1 = i1.address if i1 else -1 - self._mismatch_addr2 = i2.address if i2 else -1 - self._mismatch_cause = description - - def _check_data_symbol(self, i1, i2, orig_addr: int, decomp_addr: int) -> bool: - symbol = self.dsymtab.get_symbol(orig_addr) - if symbol is None: - return True - - decomp_symbol = self.my_symtab[symbol.name] - if decomp_symbol.addr == decomp_addr: - return True - - if self._log_mismatch_cause: - self._set_mismatch_cause(i1, i2, f"data symbol mismatch: {symbol.name} (original address: {orig_addr:#x}, " - f"expected: {decomp_symbol.addr:#x}, " - f"actual: {decomp_addr:#x})") - - return False - - def _check_data_symbol_load(self, i1, i2, orig_addr: int, decomp_addr: int) -> bool: - cached_result = self._got_data_symbol_check_cache.get((orig_addr, decomp_addr), None) - if cached_result is not None: - return cached_result - - if not elf.is_in_section(self._base_got_section, orig_addr, 8): - return True - - ptr1, = struct.unpack(" bool: - name = self.decompiled_fns.get(orig_addr, None) - if name is None: - self.on_unknown_fn_call(orig_addr, decomp_addr) - return True - - decomp_symbol = self.my_symtab[name] - if decomp_symbol.addr == decomp_addr: - return True - - if self._log_mismatch_cause: - self._set_mismatch_cause(i1, i2, f"function call mismatch: {name}") - - return False - - def on_unknown_fn_call(self, orig_addr: int, decomp_addr: int) -> None: - pass diff --git a/tools/util/dsym.py b/tools/util/dsym.py deleted file mode 100644 index 26216323..00000000 --- a/tools/util/dsym.py +++ /dev/null @@ -1,62 +0,0 @@ -import csv -from pathlib import Path -import typing as tp - -import util.elf - - -class DataSymbol(tp.NamedTuple): - addr: int # without the 0x7100000000 base - name: str - size: int - - -_IDA_BASE = 0x7100000000 - - -class DataSymbolContainer: - def __init__(self) -> None: - self.symbols: tp.List[DataSymbol] = [] - - def load_from_csv(self, path: Path): - symtab = util.elf.build_name_to_symbol_table(util.elf.my_symtab) - - with path.open("r") as f: - for i, line in enumerate(csv.reader(f)): - if len(line) != 2: - raise RuntimeError(f"Invalid line format at line {i}") - - addr = int(line[0], 16) - _IDA_BASE - name = line[1] - if name not in symtab: - continue - size = symtab[name].size - - self.symbols.append(DataSymbol(addr, name, size)) - - # Sort the list, just in case the entries were not sorted in the CSV. - self.symbols.sort(key=lambda sym: sym.addr) - - def get_symbol(self, addr: int) -> tp.Optional[DataSymbol]: - """If addr is part of a known data symbol, this function returns the corresponding symbol.""" - - # Perform a binary search on self.symbols. - a = 0 - b = len(self.symbols) - 1 - while a <= b: - m = (a + b) // 2 - - symbol: DataSymbol = self.symbols[m] - addr_begin = symbol.addr - addr_end = addr_begin + symbol.size - - if addr_begin <= addr < addr_end: - return symbol - if addr <= addr_begin: - b = m - 1 - elif addr >= addr_end: - a = m + 1 - else: - return None - - return None diff --git a/tools/util/elf.py b/tools/util/elf.py deleted file mode 100644 index f2bfe892..00000000 --- a/tools/util/elf.py +++ /dev/null @@ -1,169 +0,0 @@ -import io -import struct -from typing import Any, Dict, NamedTuple, Tuple - -from elftools.elf.elffile import ELFFile -from elftools.elf.relocation import RelocationSection -from elftools.elf.sections import Section - -import diff_settings -from util import utils - -_config: Dict[str, Any] = {} -diff_settings.apply(_config, {}) - -_root = utils.get_repo_root() - -base_elf_data = io.BytesIO((_root / _config["baseimg"]).read_bytes()) -my_elf_data = io.BytesIO((_root / _config["myimg"]).read_bytes()) - -base_elf = ELFFile(base_elf_data) -my_elf = ELFFile(my_elf_data) -my_symtab = my_elf.get_section_by_name(".symtab") -if not my_symtab: - utils.fail(f'{_config["myimg"]} has no symbol table') - - -class Symbol(NamedTuple): - addr: int - name: str - size: int - - -class Function(NamedTuple): - data: bytes - addr: int - - -_ElfSymFormat = struct.Struct(" int: - for seg in elf.iter_segments(): - if seg.header["p_type"] != "PT_LOAD": - continue - if seg["p_vaddr"] <= addr < seg["p_vaddr"] + seg["p_filesz"]: - return addr - seg["p_vaddr"] + seg["p_offset"] - raise KeyError(f"No segment found for {addr:#x}") - - -def is_in_section(section: Section, addr: int, size: int) -> bool: - begin = section["sh_addr"] - end = begin + section["sh_size"] - return begin <= addr < end and begin <= addr + size < end - - -_TableCache = dict() - - -def make_table_cached(symtab): - table = _TableCache.get(id(symtab)) - if table is None: - table = build_name_to_symbol_table(symtab) - _TableCache[id(symtab)] = table - return table - - -def get_symbol(symtab, name: str) -> Symbol: - table = make_table_cached(symtab) - return table[name] - - -def get_symbol_file_offset_and_size(elf, table, name: str) -> (int, int): - sym = get_symbol(table, name) - return get_file_offset(elf, sym.addr), sym.size - - -def iter_symbols(symtab): - offset = symtab["sh_offset"] - entsize = symtab["sh_entsize"] - for i in range(symtab.num_symbols()): - symtab.stream.seek(offset + i * entsize) - entry = _ElfSym.parse(symtab.stream.read(_ElfSymFormat.size)) - name = symtab.stringtable.get_string(entry.st_name) - yield Symbol(entry.st_value, name, entry.st_size) - - -def build_addr_to_symbol_table(symtab) -> Dict[int, str]: - table = dict() - for sym in iter_symbols(symtab): - addr = sym.addr - existing_value = table.get(addr, None) - if existing_value is None or not existing_value.startswith("_Z"): - table[addr] = sym.name - return table - - -def build_name_to_symbol_table(symtab) -> Dict[str, Symbol]: - return {sym.name: sym for sym in iter_symbols(symtab)} - - -def read_from_elf(elf: ELFFile, addr: int, size: int) -> bytes: - addr &= ~0x7100000000 - offset: int = get_file_offset(elf, addr) - elf.stream.seek(offset) - return elf.stream.read(size) - - -def get_fn_from_base_elf(addr: int, size: int) -> Function: - return Function(read_from_elf(base_elf, addr, size), addr) - - -def get_fn_from_my_elf(name: str) -> Function: - sym = get_symbol(my_symtab, name) - return Function(read_from_elf(my_elf, sym.addr, sym.size), sym.addr) - - -R_AARCH64_GLOB_DAT = 1025 -R_AARCH64_RELATIVE = 1027 - - -def build_glob_data_table(elf: ELFFile) -> Dict[int, int]: - table: Dict[int, int] = dict() - section = elf.get_section_by_name(".rela.dyn") - assert isinstance(section, RelocationSection) - - symtab = elf.get_section(section["sh_link"]) - offset = symtab["sh_offset"] - entsize = symtab["sh_entsize"] - - for reloc in section.iter_relocations(): - symtab.stream.seek(offset + reloc["r_info_sym"] * entsize) - sym_value = _ElfSym.parse(symtab.stream.read(_ElfSymFormat.size)).st_value - info_type = reloc["r_info_type"] - if info_type == R_AARCH64_GLOB_DAT: - table[reloc["r_offset"]] = sym_value + reloc["r_addend"] - elif info_type == R_AARCH64_RELATIVE: - # FIXME: this should be Delta(S) + A - table[reloc["r_offset"]] = sym_value + reloc["r_addend"] - - return table - - -def unpack_vtable_fns(vtable_bytes: bytes, num_entries: int) -> Tuple[int, ...]: - return struct.unpack(f"<{num_entries}Q", vtable_bytes[:num_entries * 8]) - - -def get_vtable_fns_from_base_elf(vtable_addr: int, num_entries: int) -> Tuple[int, ...]: - vtable_bytes = read_from_elf(base_elf, vtable_addr, num_entries * 8) - return unpack_vtable_fns(vtable_bytes, num_entries) - - -def get_vtable_fns_from_my_elf(vtable_name: str, num_entries: int) -> Tuple[int, ...]: - offset, size = get_symbol_file_offset_and_size(my_elf, my_symtab, vtable_name) - my_elf.stream.seek(offset + 0x10) - vtable_bytes = my_elf.stream.read(size - 0x10) - return unpack_vtable_fns(vtable_bytes, num_entries) diff --git a/tools/util/graph.py b/tools/util/graph.py deleted file mode 100644 index 9657103f..00000000 --- a/tools/util/graph.py +++ /dev/null @@ -1,61 +0,0 @@ -from collections import defaultdict - -_Visiting = 0 -_Visited = 1 - - -class Graph: - def __init__(self): - self.nodes = defaultdict(set) - - def add_edge(self, a, b): - self.nodes[a].add(b) - - def find_connected_components(self): - nodes = defaultdict(list) - for u in self.nodes: - for v in self.nodes[u]: - nodes[u].append(v) - nodes[v].append(u) - cc = [] - visited = set() - - def dfs(start): - result = [] - to_visit = [start] - while to_visit: - x = to_visit.pop() - result.append(x) - visited.add(x) - for y in nodes[x]: - if y not in visited: - to_visit.append(y) - return result - - for u in nodes.keys(): - if u in visited: - continue - cc.append(dfs(u)) - return cc - - def topological_sort(self) -> list: - result = [] - statuses = dict() - - def dfs(node): - if statuses.get(node) == _Visiting: - raise RuntimeError("Graph is not acyclic") - if statuses.get(node) == _Visited: - return - - statuses[node] = _Visiting - for y in self.nodes.get(node, set()): - dfs(y) - - statuses[node] = _Visited - result.insert(0, node) - - for x in self.nodes: - dfs(x) - - return result diff --git a/tools/util/utils.py b/tools/util/utils.py deleted file mode 100644 index 9a0b4a57..00000000 --- a/tools/util/utils.py +++ /dev/null @@ -1,129 +0,0 @@ -import io - -from colorama import Fore, Style -import csv -import warnings -import enum -from pathlib import Path -import sys -import typing as tp - -try: - import cxxfilt -except: - # cxxfilt cannot be used on Windows. - warnings.warn("cxxfilt could not be imported; demangling functions will fail") - - -class FunctionStatus(enum.Enum): - Matching = 0 - Equivalent = 1 # semantically equivalent but not perfectly matching - NonMatching = 2 - Wip = 3 - NotDecompiled = 4 - - -class FunctionInfo(tp.NamedTuple): - addr: int # without the 0x7100000000 base - name: str - size: int - decomp_name: str - library: bool - status: FunctionStatus - raw_row: tp.List[str] - - -_markers = { - "O": FunctionStatus.Matching, - "m": FunctionStatus.Equivalent, - "M": FunctionStatus.NonMatching, - "W": FunctionStatus.Wip, - "U": FunctionStatus.NotDecompiled, - "L": FunctionStatus.NotDecompiled, -} - - -def parse_function_csv_entry(row) -> FunctionInfo: - ea, stat, size, name = row - status = _markers.get(stat, FunctionStatus.NotDecompiled) - decomp_name = "" - - if status != FunctionStatus.NotDecompiled: - decomp_name = name - - addr = int(ea, 16) - 0x7100000000 - return FunctionInfo(addr, name, int(size), decomp_name, stat == "L", status, row) - - -def get_functions_csv_path() -> Path: - return get_repo_root() / "data" / "uking_functions.csv" - - -def get_functions(path: tp.Optional[Path] = None) -> tp.Iterable[FunctionInfo]: - if path is None: - path = get_functions_csv_path() - with path.open() as f: - reader = csv.reader(f) - # Skip headers - next(reader) - for row in reader: - try: - entry = parse_function_csv_entry(row) - # excluded library function - if entry.library: - continue - yield entry - except ValueError as e: - raise Exception(f"Failed to parse line {reader.line_num}") from e - - -def add_decompiled_functions(new_matches: tp.Dict[int, str], - new_orig_names: tp.Optional[tp.Dict[int, str]] = None) -> None: - buffer = io.StringIO() - writer = csv.writer(buffer, lineterminator="\n") - for func in get_functions(): - if new_orig_names is not None and func.status == FunctionStatus.NotDecompiled and func.addr in new_orig_names: - func.raw_row[3] = new_orig_names[func.addr] - if func.status == FunctionStatus.NotDecompiled and func.addr in new_matches: - func.raw_row[3] = new_matches[func.addr] - writer.writerow(func.raw_row) - get_functions_csv_path().write_text(buffer.getvalue()) - - -def format_symbol_name(name: str) -> str: - try: - return f"{cxxfilt.demangle(name)} {Style.DIM}({name}){Style.RESET_ALL}" - except: - return name - - -def format_symbol_name_for_msg(name: str) -> str: - try: - return f"{Fore.BLUE}{cxxfilt.demangle(name)}{Fore.RESET} {Style.DIM}({name}){Style.RESET_ALL}{Style.BRIGHT}" - except: - return name - - -def are_demangled_names_equal(name1: str, name2: str): - return cxxfilt.demangle(name1) == cxxfilt.demangle(name2) - - -def print_note(msg: str, prefix: str = ""): - sys.stderr.write(f"{Style.BRIGHT}{prefix}{Fore.CYAN}note:{Fore.RESET} {msg}{Style.RESET_ALL}\n") - - -def warn(msg: str, prefix: str = ""): - sys.stderr.write(f"{Style.BRIGHT}{prefix}{Fore.MAGENTA}warning:{Fore.RESET} {msg}{Style.RESET_ALL}\n") - - -def print_error(msg: str, prefix: str = ""): - sys.stderr.write(f"{Style.BRIGHT}{prefix}{Fore.RED}error:{Fore.RESET} {msg}{Style.RESET_ALL}\n") - - -def fail(msg: str, prefix: str = ""): - print_error(msg, prefix) - sys.exit(1) - - -def get_repo_root() -> Path: - return Path(__file__).parent.parent.parent diff --git a/tools/viking/.gitignore b/tools/viking/.gitignore deleted file mode 100644 index ea8c4bf7..00000000 --- a/tools/viking/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/target diff --git a/tools/viking/Cargo.lock b/tools/viking/Cargo.lock deleted file mode 100644 index 93534165..00000000 --- a/tools/viking/Cargo.lock +++ /dev/null @@ -1,506 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "aho-corasick" -version = "0.7.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" -dependencies = [ - "memchr", -] - -[[package]] -name = "anyhow" -version = "1.0.42" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "595d3cfa7a60d4555cb5067b99f07142a08ea778de5cf993f7b75c7d8fabc486" - -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi", - "libc", - "winapi", -] - -[[package]] -name = "autocfg" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" - -[[package]] -name = "bstr" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90682c8d613ad3373e66de8c6411e0ae2ab2571e879d2efbf73558cc66f21279" -dependencies = [ - "lazy_static", - "memchr", - "regex-automata", - "serde", -] - -[[package]] -name = "capstone" -version = "0.9.0" -source = "git+https://github.com/leoetlino/capstone-rs#1f962210b1e2ff418cf6c1bcb6c6785427662a07" -dependencies = [ - "capstone-sys", - "libc", -] - -[[package]] -name = "capstone-sys" -version = "0.13.0" -source = "git+https://github.com/leoetlino/capstone-rs#1f962210b1e2ff418cf6c1bcb6c6785427662a07" -dependencies = [ - "cc", - "libc", -] - -[[package]] -name = "cc" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e70cc2f62c6ce1868963827bd677764c62d07c3d9a3e1fb1177ee1a9ab199eb2" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "colored" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3616f750b84d8f0de8a58bda93e08e2a81ad3f523089b05f1dffecab48c6cbd" -dependencies = [ - "atty", - "lazy_static", - "winapi", -] - -[[package]] -name = "cpp_demangle" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea47428dc9d2237f3c6bc134472edfd63ebba0af932e783506dcfd66f10d18a" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "crossbeam-channel" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" -dependencies = [ - "cfg-if", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-deque" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9" -dependencies = [ - "cfg-if", - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd" -dependencies = [ - "cfg-if", - "crossbeam-utils", - "lazy_static", - "memoffset", - "scopeguard", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db" -dependencies = [ - "cfg-if", - "lazy_static", -] - -[[package]] -name = "csv" -version = "1.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" -dependencies = [ - "bstr", - "csv-core", - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "csv-core" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" -dependencies = [ - "memchr", -] - -[[package]] -name = "either" -version = "1.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" - -[[package]] -name = "goblin" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b1800b95efee8ad4ef04517d4d69f8e209e763b1668f1179aeeedd0e454da55" -dependencies = [ - "log", - "plain", - "scroll", -] - -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - -[[package]] -name = "itertools" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" - -[[package]] -name = "lazy-init" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23517540be87a91d06324e6bf6286ba8214171123ee8862ae9a5e7d938d71815" - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "libc" -version = "0.2.98" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "320cfe77175da3a483efed4bc0adc1968ca050b098ce4f2f1c13a56626128790" - -[[package]] -name = "libmimalloc-sys" -version = "0.1.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1b8479c593dba88c2741fc50b92e13dbabbbe0bd504d979f244ccc1a5b1c01" -dependencies = [ - "cc", -] - -[[package]] -name = "log" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "memchr" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" - -[[package]] -name = "memmap" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2ffa2c986de11a9df78620c01eeaaf27d94d3ff02bf81bfcca953102dd0c6ff" -dependencies = [ - "libc", - "winapi", -] - -[[package]] -name = "memoffset" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59accc507f1338036a0477ef61afdae33cde60840f4dfe481319ce3ad116ddf9" -dependencies = [ - "autocfg", -] - -[[package]] -name = "mimalloc" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb74897ce508e6c49156fd1476fc5922cbc6e75183c65e399c765a09122e5130" -dependencies = [ - "libmimalloc-sys", -] - -[[package]] -name = "num_cpus" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" -dependencies = [ - "hermit-abi", - "libc", -] - -[[package]] -name = "owning_ref" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff55baddef9e4ad00f88b6c743a2a8062d4c6ade126c2a528644b8e444d52ce" -dependencies = [ - "stable_deref_trait", -] - -[[package]] -name = "plain" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" - -[[package]] -name = "proc-macro2" -version = "1.0.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c7ed8b8c7b886ea3ed7dde405212185f423ab44682667c8c6dd14aa1d9f6612" -dependencies = [ - "unicode-xid", -] - -[[package]] -name = "quote" -version = "1.0.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rayon" -version = "1.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90" -dependencies = [ - "autocfg", - "crossbeam-deque", - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" -dependencies = [ - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-utils", - "lazy_static", - "num_cpus", -] - -[[package]] -name = "regex" -version = "1.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" - -[[package]] -name = "regex-syntax" -version = "0.6.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" - -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - -[[package]] -name = "ryu" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" - -[[package]] -name = "scopeguard" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" - -[[package]] -name = "scroll" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fda28d4b4830b807a8b43f7b0e6b5df875311b3e7621d84577188c175b6ec1ec" -dependencies = [ - "scroll_derive", -] - -[[package]] -name = "scroll_derive" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aaaae8f38bb311444cfb7f1979af0bc9240d95795f75f9ceddf6a59b79ceffa0" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde" -version = "1.0.126" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03" - -[[package]] -name = "smawk" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f67ad224767faa3c7d8b6d91985b78e70a1324408abcb1cfcc2be4c06bc06043" - -[[package]] -name = "stable_deref_trait" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" - -[[package]] -name = "syn" -version = "1.0.74" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1873d832550d4588c3dbc20f01361ab00bfe741048f71e3fecf145a7cc18b29c" -dependencies = [ - "proc-macro2", - "quote", - "unicode-xid", -] - -[[package]] -name = "textwrap" -version = "0.14.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0066c8d12af8b5acd21e00547c3797fde4e8677254a7ee429176ccebbe93dd80" -dependencies = [ - "smawk", - "unicode-linebreak", - "unicode-width", -] - -[[package]] -name = "unicode-linebreak" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a52dcaab0c48d931f7cc8ef826fa51690a08e1ea55117ef26f89864f532383f" -dependencies = [ - "regex", -] - -[[package]] -name = "unicode-width" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" - -[[package]] -name = "unicode-xid" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" - -[[package]] -name = "viking" -version = "1.0.0" -dependencies = [ - "anyhow", - "capstone", - "colored", - "cpp_demangle", - "csv", - "goblin", - "itertools", - "lazy-init", - "memmap", - "mimalloc", - "owning_ref", - "rayon", - "rustc-hash", - "textwrap", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/tools/viking/Cargo.toml b/tools/viking/Cargo.toml deleted file mode 100644 index b2d5f80e..00000000 --- a/tools/viking/Cargo.toml +++ /dev/null @@ -1,28 +0,0 @@ -[package] -name = "viking" -version = "1.0.0" -edition = "2018" - -[profile.release] -debug = 1 -lto = "thin" - -[dependencies] -anyhow = "1.0" -capstone = { git = "https://github.com/leoetlino/capstone-rs" } -colored = "2" -cpp_demangle = "0.3.3" -csv = "1.1" -goblin = "0.4" -itertools = "0.10.1" -lazy-init = "0.5.0" -memmap = "0.6.1" -mimalloc = { version = "*", default-features = false } -owning_ref = "0.4.1" -rayon = "1.5.1" -rustc-hash = "1.1.0" -textwrap = "0.14.2" - -[[bin]] -name = "botw-check" -path = "src/tools/check.rs" diff --git a/tools/viking/LICENSE b/tools/viking/LICENSE deleted file mode 100644 index b14590d0..00000000 --- a/tools/viking/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2021 leoetlino - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/tools/viking/src/capstone_utils.rs b/tools/viking/src/capstone_utils.rs deleted file mode 100644 index e1f19e00..00000000 --- a/tools/viking/src/capstone_utils.rs +++ /dev/null @@ -1,89 +0,0 @@ -use anyhow::{bail, Result}; -use capstone as cs; -use cs::arch::arm64::{Arm64Insn, Arm64OpMem, Arm64Operand, Arm64OperandType}; -use cs::{arch::ArchOperand, RegId}; - -pub fn translate_cs_error(err: cs::Error) -> Result { - bail!("capstone error: {}", err) -} - -#[inline] -pub fn map_two<'a, T, R, F: FnMut(&'a T) -> R>(x: &'a T, y: &'a T, mut f: F) -> (R, R) { - (f(x), f(y)) -} - -#[inline] -pub fn map_pair<'a, T, R, F: FnMut(&'a T) -> R>(pair: &'a (T, T), f: F) -> (R, R) { - map_two(&pair.0, &pair.1, f) -} - -#[inline] -pub fn try_map_two<'a, T, R, F: FnMut(&'a T) -> Result>( - x: &'a T, - y: &'a T, - mut f: F, -) -> Result<(R, R)> { - Ok(( - f(x).or_else(translate_cs_error)?, - f(y).or_else(translate_cs_error)?, - )) -} - -/// Checks if `id` is in [start, end] (inclusive range). -#[inline] -pub fn is_id_in_range(start: Arm64Insn, end: Arm64Insn, id: Arm64Insn) -> bool { - let range = (start as u32)..=(end as u32); - range.contains(&(id as u32)) -} - -/// Used to make accessing arch-specific data less cumbersome. -pub trait CsArchOperandUtil { - fn arm64(&self) -> &Arm64Operand; -} - -impl CsArchOperandUtil for ArchOperand { - fn arm64(&self) -> &Arm64Operand { - match self { - Self::Arm64Operand(x) => x, - _ => unreachable!(), - } - } -} - -/// Used to make accessing arch-specific data less cumbersome. -pub trait CsArm64OperandTypeUtil { - fn reg(&self) -> RegId; - fn imm(&self) -> i64; - fn try_mem(&self) -> Option; - fn mem(&self) -> Arm64OpMem; -} - -impl CsArm64OperandTypeUtil for Arm64OperandType { - fn reg(&self) -> RegId { - match self { - Self::Reg(x) => *x, - _ => panic!("expected Reg, got {:#?}", &self), - } - } - - fn imm(&self) -> i64 { - match self { - Self::Imm(x) => *x, - _ => panic!("expected Imm, got {:#?}", &self), - } - } - - fn try_mem(&self) -> Option { - match self { - Self::Mem(x) => Some(*x), - _ => None, - } - } - - fn mem(&self) -> Arm64OpMem { - match self { - Self::Mem(x) => *x, - _ => panic!("expected Mem, got {:#?}", &self), - } - } -} diff --git a/tools/viking/src/checks.rs b/tools/viking/src/checks.rs deleted file mode 100644 index ccdc8a38..00000000 --- a/tools/viking/src/checks.rs +++ /dev/null @@ -1,518 +0,0 @@ -use anyhow::{ensure, Result}; -use capstone as cs; -use cs::arch::arm64::{Arm64Insn, Arm64Operand, Arm64OperandType}; -use itertools::zip; -use lazy_init::Lazy; -use rustc_hash::FxHashMap; -use std::collections::{HashMap, HashSet}; -use std::convert::TryInto; -use std::path::{Path, PathBuf}; - -use crate::{capstone_utils::*, elf, functions, repo, ui}; - -struct DataSymbol { - /// Address of the symbol in the original executable. - pub addr: u64, - /// Name of the symbol in our source code. - pub name: String, - /// Size of the symbol in our source code (according to ELF info). - pub size: u64, -} - -/// Keeps track of known data symbols so that data loads can be validated. -#[derive(Default)] -struct KnownDataSymbolMap { - /// Symbols. Must be sorted by address. - symbols: Vec, -} - -impl KnownDataSymbolMap { - fn new() -> Self { - Default::default() - } - - fn load(&mut self, csv_path: &Path, decomp_symtab: &elf::SymbolTableByName) -> Result<()> { - let mut reader = csv::ReaderBuilder::new() - .has_headers(false) - .quoting(false) - .from_path(csv_path)?; - for (line, maybe_record) in reader.records().enumerate() { - let record = &maybe_record?; - ensure!( - record.len() == 2, - "invalid number of fields on line {}", - line - ); - - let addr = functions::parse_address(&record[0])?; - let name = &record[1]; - - let symbol = decomp_symtab.get(name); - // Ignore missing symbols. - if symbol.is_none() { - continue; - } - let symbol = symbol.unwrap(); - - self.symbols.push(DataSymbol { - addr, - name: name.to_string(), - size: symbol.st_size, - }); - } - self.symbols.sort_by_key(|sym| sym.addr); - Ok(()) - } - - /// If addr is part of a known data symbol, this function returns the corresponding symbol. - fn get_symbol(&self, addr: u64) -> Option<&DataSymbol> { - // Perform a binary search since `symbols` is sorted. - let mut a: isize = 0; - let mut b: isize = self.symbols.len() as isize - 1; - while a <= b { - let m = a + (b - a) / 2; - - let mid_symbol = &self.symbols[m as usize]; - let mid_addr_begin = mid_symbol.addr; - let mid_addr_end = mid_addr_begin + mid_symbol.size as u64; - - if mid_addr_begin <= addr && addr < mid_addr_end { - return Some(mid_symbol); - } - if addr <= mid_addr_begin { - b = m - 1; - } else if addr >= mid_addr_end { - a = m + 1; - } else { - break; - } - } - None - } -} - -fn get_data_symbol_csv_path() -> Result { - let mut path = repo::get_repo_root()?; - path.push("data"); - path.push("data_symbols.csv"); - Ok(path) -} - -#[derive(Debug)] -pub struct ReferenceDiff { - pub referenced_symbol: u64, - pub expected_ref_in_decomp: u64, - pub actual_ref_in_decomp: u64, - - pub expected_symbol_name: String, - pub actual_symbol_name: String, -} - -impl std::fmt::Display for ReferenceDiff { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "wrong reference to {ref} {ref_name}\n\ - --> decomp source code is referencing {actual} {actual_name}\n\ - --> expected to see {expected} to match original code", - ref=ui::format_address(self.referenced_symbol), - ref_name=ui::format_symbol_name(&self.expected_symbol_name), - expected=ui::format_address(self.expected_ref_in_decomp), - actual=ui::format_address(self.actual_ref_in_decomp), - actual_name=ui::format_symbol_name(&self.actual_symbol_name), - ) - } -} - -#[derive(Debug)] -pub enum MismatchCause { - FunctionSize, - Register, - Mnemonic, - BranchTarget, - FunctionCall(ReferenceDiff), - DataReference(ReferenceDiff), - Immediate, - Unknown, -} - -impl std::fmt::Display for MismatchCause { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match &self { - Self::FunctionSize => write!(f, "wrong function size"), - Self::Register => write!(f, "wrong register"), - Self::Mnemonic => write!(f, "wrong mnemonic"), - Self::BranchTarget => write!(f, "wrong branch target"), - Self::FunctionCall(diff) => write!(f, "wrong function call\n{}", diff), - Self::DataReference(diff) => write!(f, "wrong data reference\n{}", diff), - Self::Immediate => write!(f, "wrong immediate"), - Self::Unknown => write!(f, "unknown reason; check diff.py"), - } - } -} - -#[derive(Debug)] -pub struct Mismatch { - pub addr_orig: u64, - pub addr_decomp: u64, - pub cause: MismatchCause, -} - -impl std::fmt::Display for Mismatch { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "mismatch at {}: {}", - ui::format_address(self.addr_orig), - self.cause, - ) - } -} - -pub struct FunctionChecker<'a, 'functions, 'orig_elf, 'decomp_elf> { - decomp_elf: &'decomp_elf elf::OwnedElf, - decomp_symtab: &'a elf::SymbolTableByName<'decomp_elf>, - decomp_glob_data_table: elf::GlobDataTable, - - // Optional, only initialized when a mismatch is detected. - decomp_addr_to_name_map: Lazy>, - - known_data_symbols: KnownDataSymbolMap, - known_functions: FxHashMap, - - orig_elf: &'orig_elf elf::OwnedElf, - orig_got_section: &'orig_elf goblin::elf::SectionHeader, -} - -impl<'a, 'functions, 'orig_elf, 'decomp_elf> - FunctionChecker<'a, 'functions, 'orig_elf, 'decomp_elf> -{ - pub fn new( - orig_elf: &'orig_elf elf::OwnedElf, - decomp_elf: &'decomp_elf elf::OwnedElf, - decomp_symtab: &'a elf::SymbolTableByName<'decomp_elf>, - decomp_glob_data_table: elf::GlobDataTable, - functions: &'functions [functions::Info], - ) -> Result { - let mut known_data_symbols = KnownDataSymbolMap::new(); - known_data_symbols.load(get_data_symbol_csv_path()?.as_path(), &decomp_symtab)?; - - let known_functions = functions::make_known_function_map(functions); - let orig_got_section = elf::find_section(orig_elf, ".got")?; - - Ok(FunctionChecker { - decomp_elf, - decomp_symtab, - decomp_glob_data_table, - decomp_addr_to_name_map: Lazy::new(), - - known_data_symbols, - known_functions, - - orig_elf, - orig_got_section, - }) - } - - pub fn check( - &self, - cs: &mut cs::Capstone, - orig_fn: &elf::Function, - decomp_fn: &elf::Function, - ) -> Result> { - // Keep track of registers that are used with ADRP so that we can check global data - // references even when data is not placed at the same addresses - // as in the original executable. - #[derive(Default)] - struct State { - gprs1: HashMap, - gprs2: HashMap, - adrp_pair_registers: HashSet, - } - - impl State { - fn forget_modified_registers(&mut self, detail: &cs::InsnDetail) { - for reg in detail.regs_write() { - self.adrp_pair_registers.remove(®); - } - } - } - - let mut state = State::default(); - - if orig_fn.code.len() != decomp_fn.code.len() { - return Ok(Some(Mismatch { - addr_orig: orig_fn.addr, - addr_decomp: decomp_fn.addr, - cause: MismatchCause::FunctionSize, - })); - } - - let mut instructions = try_map_two(&orig_fn, &decomp_fn, |func| { - cs.disasm_iter(func.code, func.addr) - })?; - - // Check every pair of instructions. - while let (Some(i1), Some(i2)) = (instructions.0.next(), instructions.1.next()) { - let ids = map_two(&i1, &i2, |i| i.id().0); - let detail = try_map_two(&i1, &i2, |insn| cs.insn_detail(&insn))?; - let arch_detail = map_pair(&detail, |d| d.arch_detail()); - let ops = map_pair(&arch_detail, |a| a.arm64().unwrap().operands_ref()); - - if ids.0 != ids.1 { - return Self::make_mismatch(&i1, &i2, MismatchCause::Mnemonic); - } - - let id = ids.0; - - match id.into() { - // Branches or function calls. - Arm64Insn::ARM64_INS_B | Arm64Insn::ARM64_INS_BL => { - let target = - map_pair(&ops, |ops| Arm64Operand::from(&ops[0]).op_type.imm() as u64); - - // If we are branching outside the function, this is likely a tail call. - // Treat it as a function call. - if !orig_fn.get_addr_range().contains(&target.0) { - if let Some(mismatch_cause) = self.check_function_call(target.0, target.1) { - return Self::make_mismatch(&i1, &i2, mismatch_cause); - } - } else { - // Otherwise, it's a simple branch, and both targets must match. - if i1.bytes() != i2.bytes() { - return Self::make_mismatch(&i1, &i2, MismatchCause::BranchTarget); - } - } - } - - // Catch ADRP + (ADD/load/store) instruction pairs. - Arm64Insn::ARM64_INS_ADRP => { - let reg = map_pair(&ops, |ops| Arm64Operand::from(&ops[0]).op_type.reg()); - let imm = - map_pair(&ops, |ops| Arm64Operand::from(&ops[1]).op_type.imm() as u64); - - if reg.0 != reg.1 { - return Self::make_mismatch(&i1, &i2, MismatchCause::Register); - } - - state.gprs1.insert(reg.0, imm.0); - state.gprs2.insert(reg.1, imm.1); - state.adrp_pair_registers.insert(reg.0); - } - - // Catch ADRP + ADD instruction pairs. - Arm64Insn::ARM64_INS_ADD => { - let mut diff_ok = false; - - if ops.0.len() == 3 { - let dest_reg = - map_pair(&ops, |ops| Arm64Operand::from(&ops[0]).op_type.reg()); - let reg = map_pair(&ops, |ops| Arm64Operand::from(&ops[1]).op_type.reg()); - - if let Arm64OperandType::Imm(_) = Arm64Operand::from(&ops.0[2]).op_type { - let imm = - map_pair(&ops, |ops| Arm64Operand::from(&ops[2]).op_type.imm()); - - if dest_reg.0 != dest_reg.1 || reg.0 != reg.1 { - return Self::make_mismatch(&i1, &i2, MismatchCause::Register); - } - - // Is this an ADRP pair we can check? - if state.adrp_pair_registers.contains(®.0) { - let orig_addr = state.gprs1[®.0] + imm.0 as u64; - let decomp_addr = state.gprs2[®.1] + imm.1 as u64; - - if let Some(mismatch_cause) = - self.check_data_symbol(orig_addr, decomp_addr) - { - return Self::make_mismatch(&i1, &i2, mismatch_cause); - } - - // If the data symbol reference matches, allow the instructions to be different. - diff_ok = true; - } - } - } - - if !diff_ok && i1.bytes() != i2.bytes() { - return Self::make_mismatch(&i1, &i2, MismatchCause::Unknown); - } - - state.forget_modified_registers(&detail.0); - } - - // Loads and stores (single or paired). - id if is_id_in_range(Arm64Insn::ARM64_INS_LD1, Arm64Insn::ARM64_INS_LDXRH, id) - || is_id_in_range(Arm64Insn::ARM64_INS_ST1, Arm64Insn::ARM64_INS_STXR, id) => - { - let mut diff_ok = false; - - // Check all operands for mismatches, except the Arm64OpMem which will be checked later. - let mut mem = (None, None); - for (op1, op2) in zip(ops.0, ops.1) { - let op1 = Arm64Operand::from(op1); - let op2 = Arm64Operand::from(op2); - if let Some(mem1) = op1.op_type.try_mem() { - if let Some(mem2) = op2.op_type.try_mem() { - ensure!( - mem.0.is_none() && mem.1.is_none(), - "found more than one OpMem" - ); - mem.0 = Some(mem1); - mem.1 = Some(mem2); - continue; - } - } - - if op1 != op2 { - return Self::make_mismatch(&i1, &i2, MismatchCause::Unknown); - } - } - - ensure!(mem.0.is_some() && mem.1.is_some(), "didn't find an OpMem"); - - let mem = (mem.0.unwrap(), mem.1.unwrap()); - - if mem.0.base() != mem.1.base() { - return Self::make_mismatch(&i1, &i2, MismatchCause::Register); - } - - let reg = mem.0.base(); - - // Is this an ADRP pair we can check? - if state.adrp_pair_registers.contains(®) { - let orig_addr_ptr = (state.gprs1[®] as i64 + mem.0.disp() as i64) as u64; - let decomp_addr_ptr = - (state.gprs2[®] as i64 + mem.1.disp() as i64) as u64; - - if let Some(mismatch_cause) = - self.check_data_symbol_ptr(orig_addr_ptr, decomp_addr_ptr) - { - return Self::make_mismatch(&i1, &i2, mismatch_cause); - } - - // If the data symbol reference matches, allow the instructions to be different. - diff_ok = true; - } - - if !diff_ok && i1.bytes() != i2.bytes() { - return Self::make_mismatch(&i1, &i2, MismatchCause::Unknown); - } - - state.forget_modified_registers(&detail.0); - } - - // Anything else. - _ => { - if i1.bytes() != i2.bytes() { - return Self::make_mismatch(&i1, &i2, MismatchCause::Unknown); - } - - state.forget_modified_registers(&detail.0); - } - } - } - - Ok(None) - } - - /// Returns None on success and a MismatchCause on failure. - fn check_function_call(&self, orig_addr: u64, decomp_addr: u64) -> Option { - let info = *self.known_functions.get(&orig_addr)?; - let name = info.name.as_str(); - let decomp_symbol = self.decomp_symtab.get(name)?; - let expected = decomp_symbol.st_value; - - if decomp_addr == expected { - None - } else { - let actual_symbol_name = self.translate_decomp_addr_to_name(decomp_addr); - - Some(MismatchCause::FunctionCall(ReferenceDiff { - referenced_symbol: orig_addr, - expected_ref_in_decomp: expected, - actual_ref_in_decomp: decomp_addr, - expected_symbol_name: name.to_string(), - actual_symbol_name: actual_symbol_name.unwrap_or("unknown").to_string(), - })) - } - } - - /// Returns None on success and a MismatchCause on failure. - fn check_data_symbol_ex( - &self, - orig_addr: u64, - decomp_addr: u64, - symbol: &DataSymbol, - ) -> Option { - let decomp_symbol = self.decomp_symtab.get(symbol.name.as_str())?; - let expected = decomp_symbol.st_value; - - if decomp_addr == expected { - None - } else { - let actual_symbol_name = self.translate_decomp_addr_to_name(decomp_addr); - - Some(MismatchCause::DataReference(ReferenceDiff { - referenced_symbol: orig_addr, - expected_ref_in_decomp: expected, - actual_ref_in_decomp: decomp_addr, - expected_symbol_name: symbol.name.to_string(), - actual_symbol_name: actual_symbol_name.unwrap_or("unknown").to_string(), - })) - } - } - - /// Returns None on success and a MismatchCause on failure. - fn check_data_symbol(&self, orig_addr: u64, decomp_addr: u64) -> Option { - let symbol = self.known_data_symbols.get_symbol(orig_addr)?; - self.check_data_symbol_ex(orig_addr, decomp_addr, symbol) - } - - /// Returns None on success and a MismatchCause on failure. - /// Unlike check_data_symbol, this function takes the addresses of *pointers to* possible data symbols, - /// not the symbols themselves. - fn check_data_symbol_ptr( - &self, - orig_addr_ptr: u64, - decomp_addr_ptr: u64, - ) -> Option { - if !elf::is_in_section(&self.orig_got_section, orig_addr_ptr, 8) { - return None; - } - - let orig_offset = elf::get_offset_in_file(&self.orig_elf, orig_addr_ptr).ok()? as u64; - let orig_addr = u64::from_le_bytes( - elf::get_elf_bytes(&self.orig_elf, orig_offset, 8) - .ok()? - .try_into() - .ok()?, - ); - - let data_symbol = self.known_data_symbols.get_symbol(orig_addr)?; - let decomp_addr = *self.decomp_glob_data_table.get(&decomp_addr_ptr)?; - self.check_data_symbol_ex(orig_addr, decomp_addr, &data_symbol) - } - - fn make_mismatch( - i1: &cs::Insn, - i2: &cs::Insn, - cause: MismatchCause, - ) -> Result> { - Ok(Some(Mismatch { - addr_orig: i1.address(), - addr_decomp: i2.address(), - cause, - })) - } - - #[cold] - #[inline(never)] - fn translate_decomp_addr_to_name(&self, decomp_addr: u64) -> Option<&'decomp_elf str> { - let map = self.decomp_addr_to_name_map.get_or_create(|| { - let map = elf::make_addr_to_name_map(&self.decomp_elf).ok(); - map.unwrap_or_default() - }); - map.get(&decomp_addr).copied() - } -} diff --git a/tools/viking/src/elf.rs b/tools/viking/src/elf.rs deleted file mode 100644 index b289c8a4..00000000 --- a/tools/viking/src/elf.rs +++ /dev/null @@ -1,311 +0,0 @@ -use std::{collections::HashMap, ffi::CStr, fs::File, ops::Range, path::Path}; - -use anyhow::{anyhow, bail, Context, Result}; -use goblin::{ - container, - elf::{ - dynamic, reloc, section_header, sym, Dynamic, Elf, ProgramHeader, RelocSection, - SectionHeader, Sym, Symtab, - }, - elf64::program_header::PT_LOAD, - strtab::Strtab, -}; -use memmap::{Mmap, MmapOptions}; -use owning_ref::OwningHandle; -use rustc_hash::FxHashMap; - -use crate::repo; - -pub type OwnedElf = OwningHandle, Mmap)>, Box>>; -pub type SymbolTableByName<'a> = HashMap<&'a str, goblin::elf::Sym>; -pub type SymbolTableByAddr = FxHashMap; -pub type AddrToNameMap<'a> = FxHashMap; -pub type GlobDataTable = FxHashMap; - -pub struct Function<'a> { - /// The virtual address of the function in its containing executable. - /// *Note*: does not contain the IDA base (0x7100000000). - pub addr: u64, - /// The bytes that make up the code for this function. - pub code: &'a [u8], -} - -impl<'a> Function<'a> { - #[inline] - pub fn get_addr_range(&self) -> Range { - self.addr..(self.addr + self.code.len() as u64) - } -} - -#[inline] -fn make_goblin_ctx() -> container::Ctx { - // 64-bit, little endian - container::Ctx::new(container::Container::Big, container::Endian::Little) -} - -/// A stripped down version of `goblin::elf::Elf::parse`, parsing only the sections that we need. -/// -/// *Warning*: In particular, `strtab`, `dynstrtab`, `soname` and `libraries` are **not** parsed. -fn parse_elf_faster(bytes: &[u8]) -> Result { - let header = Elf::parse_header(bytes)?; - let mut elf = Elf::lazy_parse(header)?; - let ctx = make_goblin_ctx(); - - elf.program_headers = - ProgramHeader::parse(bytes, header.e_phoff as usize, header.e_phnum as usize, ctx)?; - - elf.section_headers = - SectionHeader::parse(bytes, header.e_shoff as usize, header.e_shnum as usize, ctx)?; - - let get_strtab = |section_headers: &[SectionHeader], section_idx: usize| { - if section_idx >= section_headers.len() { - Ok(Strtab::default()) - } else { - let shdr = §ion_headers[section_idx]; - shdr.check_size(bytes.len())?; - Strtab::parse(bytes, shdr.sh_offset as usize, shdr.sh_size as usize, 0x0) - } - }; - - let strtab_idx = header.e_shstrndx as usize; - elf.shdr_strtab = get_strtab(&elf.section_headers, strtab_idx)?; - - for shdr in &elf.section_headers { - if shdr.sh_type as u32 == section_header::SHT_SYMTAB { - let size = shdr.sh_entsize; - let count = if size == 0 { 0 } else { shdr.sh_size / size }; - elf.syms = Symtab::parse(bytes, shdr.sh_offset as usize, count as usize, ctx)?; - } - } - - elf.dynamic = Dynamic::parse(bytes, &elf.program_headers, ctx)?; - if let Some(ref dynamic) = elf.dynamic { - let dyn_info = &dynamic.info; - // parse the dynamic relocations - elf.dynrelas = RelocSection::parse(bytes, dyn_info.rela, dyn_info.relasz, true, ctx)?; - elf.dynrels = RelocSection::parse(bytes, dyn_info.rel, dyn_info.relsz, false, ctx)?; - let is_rela = dyn_info.pltrel as u64 == dynamic::DT_RELA; - elf.pltrelocs = - RelocSection::parse(bytes, dyn_info.jmprel, dyn_info.pltrelsz, is_rela, ctx)?; - } - - Ok(elf) -} - -pub fn load_elf(path: &Path) -> Result { - let file = Box::new(File::open(path)?); - let mmap = unsafe { MmapOptions::new().map(&file)? }; - - OwningHandle::try_new(Box::new((file, mmap)), |pair| unsafe { - let elf = parse_elf_faster(&(*pair).1).with_context(|| "failed to load ELF")?; - Ok(Box::new(elf)) - }) -} - -pub fn load_orig_elf() -> Result { - let mut path = repo::get_repo_root()?; - path.push("data"); - path.push("main.elf"); - load_elf(path.as_path()) -} - -pub fn load_decomp_elf() -> Result { - let mut path = repo::get_repo_root()?; - path.push("build"); - path.push("uking"); - load_elf(path.as_path()) -} - -struct SymbolStringTable<'elf> { - bytes: &'elf [u8], -} - -impl<'elf> SymbolStringTable<'elf> { - pub fn from_elf(elf: &'elf OwnedElf) -> Result { - let bytes = &*elf.as_owner().1; - for shdr in &elf.section_headers { - if shdr.sh_type as u32 == section_header::SHT_SYMTAB { - let table_hdr = elf - .section_headers - .get(shdr.sh_link as usize) - .ok_or_else(|| anyhow!("symbol string table index out of bounds"))?; - - table_hdr.check_size(bytes.len())?; - - let start = table_hdr.sh_offset as usize; - let end = start + table_hdr.sh_size as usize; - return Ok(SymbolStringTable { - bytes: &bytes[start..end], - }); - } - } - bail!("couldn't find symbol string table") - } - - pub fn get_string(&self, offset: usize) -> &'elf str { - unsafe { - std::str::from_utf8_unchecked( - CStr::from_ptr(self.bytes[offset..self.bytes.len()].as_ptr() as *const i8) - .to_bytes(), - ) - } - } -} - -fn filter_out_useless_syms(sym: &Sym) -> bool { - matches!( - sym.st_type(), - sym::STT_OBJECT | sym::STT_FUNC | sym::STT_COMMON | sym::STT_TLS - ) -} - -pub fn make_symbol_map_by_name(elf: &OwnedElf) -> Result { - let mut map = SymbolTableByName::with_capacity_and_hasher( - elf.syms.iter().filter(filter_out_useless_syms).count(), - Default::default(), - ); - - let strtab = SymbolStringTable::from_elf(&elf)?; - - for symbol in elf.syms.iter().filter(filter_out_useless_syms) { - map.entry(strtab.get_string(symbol.st_name)) - .or_insert(symbol); - } - Ok(map) -} - -pub fn make_symbol_map_by_addr(elf: &OwnedElf) -> SymbolTableByAddr { - let mut map = SymbolTableByAddr::with_capacity_and_hasher( - elf.syms.iter().filter(filter_out_useless_syms).count(), - Default::default(), - ); - for symbol in elf.syms.iter().filter(filter_out_useless_syms) { - map.entry(symbol.st_value).or_insert(symbol); - } - map -} - -pub fn make_addr_to_name_map(elf: &OwnedElf) -> Result { - let mut map = AddrToNameMap::with_capacity_and_hasher( - elf.syms.iter().filter(filter_out_useless_syms).count(), - Default::default(), - ); - - let strtab = SymbolStringTable::from_elf(&elf)?; - - for symbol in elf.syms.iter().filter(filter_out_useless_syms) { - map.entry(symbol.st_value) - .or_insert_with(|| strtab.get_string(symbol.st_name)); - } - Ok(map) -} - -fn parse_symtab<'a>(elf: &'a OwnedElf, shdr: &'a SectionHeader) -> Result> { - let bytes = &elf.as_owner().1; - let size = shdr.sh_entsize; - let count = if size == 0 { 0 } else { shdr.sh_size / size }; - - let syms = Symtab::parse( - bytes, - shdr.sh_offset as usize, - count as usize, - make_goblin_ctx(), - )?; - Ok(syms) -} - -pub fn find_section<'a>(elf: &'a OwnedElf, name: &str) -> Result<&'a SectionHeader> { - elf.section_headers - .iter() - .find(|&header| &elf.shdr_strtab[header.sh_name] == name) - .ok_or_else(|| anyhow!("failed to find {} section", name)) -} - -pub fn get_linked_section<'a>( - elf: &'a OwnedElf, - shdr: &'a SectionHeader, -) -> Result<&'a SectionHeader> { - elf.section_headers - .get(shdr.sh_link as usize) - .ok_or_else(|| anyhow!("could not get linked section")) -} - -#[inline] -pub fn is_in_section(section: &SectionHeader, addr: u64, size: u64) -> bool { - let begin = section.sh_addr; - let end = begin + section.sh_size; - (begin..end).contains(&addr) && (begin..=end).contains(&(addr + size)) -} - -pub fn build_glob_data_table(elf: &OwnedElf) -> Result { - let section = &elf.dynrelas; - let section_hdr = find_section(elf, ".rela.dyn")?; - // The corresponding symbol table. - let symtab = parse_symtab(elf, get_linked_section(elf, §ion_hdr)?)?; - - let mut table = GlobDataTable::with_capacity_and_hasher(section.len(), Default::default()); - - for reloc in section.iter() { - let symbol_value: u64 = symtab - .get(reloc.r_sym) - .ok_or_else(|| anyhow!("invalid symbol index"))? - .st_value; - - match reloc.r_type { - reloc::R_AARCH64_GLOB_DAT => { - table.insert( - reloc.r_offset, - (symbol_value as i64 + reloc.r_addend.unwrap()) as u64, - ); - } - reloc::R_AARCH64_RELATIVE => { - // FIXME: this should be Delta(S) + A. - table.insert( - reloc.r_offset, - (symbol_value as i64 + reloc.r_addend.unwrap()) as u64, - ); - } - _ => (), - } - } - - Ok(table) -} - -pub fn get_offset_in_file(elf: &OwnedElf, addr: u64) -> Result { - let addr = addr as usize; - for segment in elf.program_headers.iter() { - if segment.p_type != PT_LOAD { - continue; - } - - if segment.vm_range().contains(&addr) { - return Ok(segment.file_range().start + addr - segment.vm_range().start); - } - } - bail!("{:#x} doesn't belong to any segment", addr) -} - -pub fn get_elf_bytes(elf: &OwnedElf, addr: u64, size: u64) -> Result<&[u8]> { - let offset = get_offset_in_file(&elf, addr)?; - let size = size as usize; - Ok(&elf.as_owner().1[offset..(offset + size)]) -} - -pub fn get_function(elf: &OwnedElf, addr: u64, size: u64) -> Result { - Ok(Function { - addr, - code: get_elf_bytes(&elf, addr, size)?, - }) -} - -pub fn get_function_by_name<'a>( - elf: &'a OwnedElf, - symbols: &SymbolTableByName, - name: &str, -) -> Result> { - let symbol = symbols - .get(&name) - .ok_or_else(|| anyhow!("unknown function: {}", name))?; - get_function(&elf, symbol.st_value, symbol.st_size) -} diff --git a/tools/viking/src/functions.rs b/tools/viking/src/functions.rs deleted file mode 100644 index e8799d93..00000000 --- a/tools/viking/src/functions.rs +++ /dev/null @@ -1,221 +0,0 @@ -use crate::repo; -use anyhow::{bail, ensure, Context, Result}; -use rayon::prelude::*; -use rustc_hash::FxHashMap; -use std::{ - collections::HashSet, - path::{Path, PathBuf}, -}; - -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum Status { - Matching, - NonMatchingMinor, - NonMatchingMajor, - NotDecompiled, - Wip, - Library, -} - -impl Status { - pub fn description(&self) -> &'static str { - match &self { - Status::Matching => "matching", - Status::NonMatchingMinor => "non-matching (minor)", - Status::NonMatchingMajor => "non-matching (major)", - Status::NotDecompiled => "not decompiled", - Status::Wip => "WIP", - Status::Library => "library function", - } - } -} - -#[derive(Clone, Debug)] -pub struct Info { - pub addr: u64, - pub size: u32, - pub name: String, - pub status: Status, -} - -impl Info { - pub fn is_decompiled(&self) -> bool { - !matches!(self.status, Status::NotDecompiled | Status::Library) - } -} - -pub const CSV_HEADER: &[&str] = &["Address", "Quality", "Size", "Name"]; -pub const ADDRESS_BASE: u64 = 0x71_0000_0000; - -fn parse_base_16(value: &str) -> Result { - if let Some(stripped) = value.strip_prefix("0x") { - Ok(u64::from_str_radix(stripped, 16)?) - } else { - Ok(u64::from_str_radix(value, 16)?) - } -} - -pub fn parse_address(value: &str) -> Result { - Ok(parse_base_16(value)? - ADDRESS_BASE) -} - -fn parse_function_csv_entry(record: &csv::StringRecord) -> Result { - ensure!(record.len() == 4, "invalid record"); - - let addr = parse_address(&record[0])?; - let status_code = record[1].chars().next(); - let size = record[2].parse::()?; - let decomp_name = record[3].to_string(); - - let status = match status_code { - Some('m') => Status::NonMatchingMinor, - Some('M') => Status::NonMatchingMajor, - Some('O') => Status::Matching, - Some('U') => Status::NotDecompiled, - Some('W') => Status::Wip, - Some('L') => Status::Library, - Some(code) => bail!("unexpected status code: {}", code), - None => bail!("missing status code"), - }; - - Ok(Info { - addr, - size, - name: decomp_name, - status, - }) -} - -pub fn get_functions_csv_path() -> Result { - let mut path = repo::get_repo_root()?; - path.push("data"); - path.push("uking_functions.csv"); - Ok(path) -} - -/// Returns a Vec of all functions that are listed in the specified CSV. -pub fn get_functions_for_path(csv_path: &Path) -> Result> { - let mut reader = csv::ReaderBuilder::new() - .has_headers(false) - .quoting(false) - .from_path(csv_path)?; - - // We build the result array manually without using csv iterators for performance reasons. - let mut result = Vec::with_capacity(110_000); - let mut record = csv::StringRecord::new(); - let mut line_number = 1; - let mut num_names = 0; - if reader.read_record(&mut record)? { - // Verify that the CSV has the correct format. - ensure!(record.len() == 4, "invalid record; expected 4 fields"); - ensure!(record == *CSV_HEADER, - "wrong CSV format; this program only works with the new function list format (added in commit 1d4c815fbae3)" - ); - line_number += 1; - } - - while reader.read_record(&mut record)? { - let entry = parse_function_csv_entry(&record) - .with_context(|| format!("failed to parse CSV record at line {}", line_number))?; - - if !entry.name.is_empty() { - num_names += 1; - } - - result.push(entry); - line_number += 1; - } - - // Check for duplicate names in the CSV. - let mut known_names = HashSet::with_capacity(num_names); - let mut duplicates = Vec::new(); - for entry in &result { - if entry.is_decompiled() && entry.name.is_empty() { - bail!( - "function at {:016x} is marked as O/M/m but has an empty name", - entry.addr | ADDRESS_BASE - ); - } - - if !entry.name.is_empty() && !known_names.insert(&entry.name) { - duplicates.push(&entry.name); - } - } - if !duplicates.is_empty() { - bail!("found duplicates: {:#?}", duplicates); - } - - Ok(result) -} - -pub fn write_functions_to_path(csv_path: &Path, functions: &[Info]) -> Result<()> { - let mut writer = csv::Writer::from_path(csv_path)?; - writer.write_record(CSV_HEADER)?; - - for function in functions { - let addr = format!("0x{:016x}", function.addr | ADDRESS_BASE); - let status = match function.status { - Status::Matching => "O", - Status::NonMatchingMinor => "m", - Status::NonMatchingMajor => "M", - Status::NotDecompiled => "U", - Status::Wip => "W", - Status::Library => "L", - } - .to_string(); - let size = format!("{:06}", function.size); - let name = function.name.clone(); - writer.write_record(&[addr, status, size, name])?; - } - - Ok(()) -} - -/// Returns a Vec of all known functions in the executable. -pub fn get_functions() -> Result> { - get_functions_for_path(get_functions_csv_path()?.as_path()) -} - -pub fn write_functions(functions: &[Info]) -> Result<()> { - write_functions_to_path(get_functions_csv_path()?.as_path(), functions) -} - -pub fn make_known_function_map(functions: &[Info]) -> FxHashMap { - let mut known_functions = - FxHashMap::with_capacity_and_hasher(functions.len(), Default::default()); - - for function in functions { - if function.name.is_empty() { - continue; - } - known_functions.insert(function.addr, function); - } - - known_functions -} - -/// Demangle a C++ symbol. -pub fn demangle_str(name: &str) -> Result { - if !name.starts_with("_Z") { - bail!("not an external mangled name"); - } - - let symbol = cpp_demangle::Symbol::new(name)?; - let options = cpp_demangle::DemangleOptions::new(); - Ok(symbol.demangle(&options)?) -} - -pub fn find_function_fuzzy<'a>(functions: &'a [Info], name: &str) -> Option<&'a Info> { - functions - .par_iter() - .find_first(|function| function.name == name) - .or_else(|| { - // Comparing the demangled names is more expensive than a simple string comparison, - // so only do this as a last resort. - functions.par_iter().find_first(|function| { - demangle_str(&function.name) - .unwrap_or_else(|_| "".to_string()) - .contains(name) - }) - }) -} diff --git a/tools/viking/src/lib.rs b/tools/viking/src/lib.rs deleted file mode 100644 index cf6378b6..00000000 --- a/tools/viking/src/lib.rs +++ /dev/null @@ -1,6 +0,0 @@ -pub mod capstone_utils; -pub mod checks; -pub mod elf; -pub mod functions; -pub mod repo; -pub mod ui; diff --git a/tools/viking/src/repo.rs b/tools/viking/src/repo.rs deleted file mode 100644 index 2530d1d5..00000000 --- a/tools/viking/src/repo.rs +++ /dev/null @@ -1,24 +0,0 @@ -use anyhow::{bail, Result}; -use std::path::PathBuf; - -pub fn get_repo_root() -> Result { - let current_dir = std::env::current_dir()?; - let mut dir = current_dir.as_path(); - - loop { - if ["data", "src"].iter().all(|name| dir.join(name).is_dir()) { - return Ok(dir.to_path_buf()); - } - - match dir.parent() { - None => { - bail!("failed to find repo root -- run this program inside the repo"); - } - Some(parent) => dir = parent, - }; - } -} - -pub fn get_tools_path() -> Result { - Ok(get_repo_root()?.join("tools")) -} diff --git a/tools/viking/src/tools/check.rs b/tools/viking/src/tools/check.rs deleted file mode 100644 index 6cab54aa..00000000 --- a/tools/viking/src/tools/check.rs +++ /dev/null @@ -1,315 +0,0 @@ -use anyhow::bail; -use anyhow::ensure; -use anyhow::Context; -use anyhow::Result; -use capstone as cs; -use capstone::arch::BuildsCapstone; -use colored::*; -use itertools::Itertools; -use rayon::prelude::*; -use std::cell::RefCell; -use std::sync::atomic::AtomicBool; -use viking::checks::FunctionChecker; -use viking::elf; -use viking::functions; -use viking::functions::Status; -use viking::repo; -use viking::ui; - -use mimalloc::MiMalloc; - -#[global_allocator] -static GLOBAL: MiMalloc = MiMalloc; - -/// Returns false if the program should exit with a failure code at the end. -fn check_function( - checker: &FunctionChecker, - mut cs: &mut capstone::Capstone, - orig_elf: &elf::OwnedElf, - decomp_elf: &elf::OwnedElf, - decomp_symtab: &elf::SymbolTableByName, - function: &functions::Info, -) -> Result { - let name = function.name.as_str(); - let decomp_fn = elf::get_function_by_name(&decomp_elf, &decomp_symtab, &name); - - match function.status { - Status::NotDecompiled if decomp_fn.is_err() => return Ok(true), - Status::Library => return Ok(true), - _ => (), - } - - if decomp_fn.is_err() { - let error = decomp_fn.err().unwrap(); - ui::print_warning(&format!( - "couldn't check {}: {}", - ui::format_symbol_name(name), - error.to_string().dimmed(), - )); - return Ok(true); - } - - let decomp_fn = decomp_fn.unwrap(); - - let get_orig_fn = || { - elf::get_function(&orig_elf, function.addr, function.size as u64).with_context(|| { - format!( - "failed to get function {} ({}) from the original executable", - name, - ui::format_address(function.addr), - ) - }) - }; - - match function.status { - Status::Matching => { - let orig_fn = get_orig_fn()?; - - let result = checker - .check(&mut cs, &orig_fn, &decomp_fn) - .with_context(|| format!("checking {}", name))?; - - if let Some(mismatch) = result { - let stderr = std::io::stderr(); - let mut lock = stderr.lock(); - ui::print_error_ex( - &mut lock, - &format!( - "function {} is marked as matching but does not match", - ui::format_symbol_name(name), - ), - ); - ui::print_detail_ex(&mut lock, &format!("{}", mismatch)); - return Ok(false); - } - } - - Status::NotDecompiled - | Status::NonMatchingMinor - | Status::NonMatchingMajor - | Status::Wip => { - let orig_fn = get_orig_fn()?; - - let result = checker - .check(&mut cs, &orig_fn, &decomp_fn) - .with_context(|| format!("checking {}", name))?; - - if result.is_none() { - ui::print_note(&format!( - "function {} is marked as {} but matches", - ui::format_symbol_name(name), - function.status.description(), - )); - } - } - - Status::Library => unreachable!(), - }; - - Ok(true) -} - -#[cold] -#[inline(never)] -fn make_cs() -> Result { - cs::Capstone::new() - .arm64() - .mode(cs::arch::arm64::ArchMode::Arm) - .detail(true) - .build() - .or_else(viking::capstone_utils::translate_cs_error) -} - -thread_local! { - static CAPSTONE: RefCell = RefCell::new(make_cs().unwrap()); -} - -fn check_all( - functions: &[functions::Info], - checker: &FunctionChecker, - orig_elf: &elf::OwnedElf, - decomp_elf: &elf::OwnedElf, - decomp_symtab: &elf::SymbolTableByName, -) -> Result<()> { - let failed = AtomicBool::new(false); - - functions.par_iter().try_for_each(|function| { - CAPSTONE.with(|cs| -> Result<()> { - let mut cs = cs.borrow_mut(); - let ok = check_function( - &checker, - &mut cs, - &orig_elf, - &decomp_elf, - &decomp_symtab, - function, - )?; - if !ok { - failed.store(true, std::sync::atomic::Ordering::Relaxed); - } - - Ok(()) - }) - })?; - - if failed.load(std::sync::atomic::Ordering::Relaxed) { - bail!("found at least one error"); - } else { - Ok(()) - } -} - -fn get_function_to_check_from_args(args: &[String]) -> Result { - let mut maybe_fn_to_check: Vec = args - .iter() - .filter(|s| !s.starts_with("-")) - .map(|s| s.clone()) - .collect(); - - ensure!( - maybe_fn_to_check.len() == 1, - "expected only one function name (one argument that isn't prefixed with '-')" - ); - - Ok(maybe_fn_to_check.remove(0)) -} - -fn check_single( - functions: &[functions::Info], - checker: &FunctionChecker, - orig_elf: &elf::OwnedElf, - decomp_elf: &elf::OwnedElf, - decomp_symtab: &elf::SymbolTableByName, - args: &Vec, -) -> Result<()> { - let fn_to_check = get_function_to_check_from_args(&args)?; - let function = functions::find_function_fuzzy(&functions, &fn_to_check) - .with_context(|| format!("unknown function: {}", ui::format_symbol_name(&fn_to_check)))?; - let name = function.name.as_str(); - - eprintln!("{}", ui::format_symbol_name(name).bold()); - - if matches!(function.status, Status::Library) { - bail!("L functions should not be decompiled"); - } - - let decomp_fn = - elf::get_function_by_name(&decomp_elf, &decomp_symtab, &name).with_context(|| { - format!( - "failed to get decomp function: {}", - ui::format_symbol_name(name) - ) - })?; - - let orig_fn = elf::get_function(&orig_elf, function.addr, function.size as u64)?; - - let maybe_mismatch = checker - .check(&mut make_cs()?, &orig_fn, &decomp_fn) - .with_context(|| format!("checking {}", name))?; - - let mut should_show_diff = args - .iter() - .find(|s| s.as_str() == "--always-diff") - .is_some(); - - if let Some(mismatch) = &maybe_mismatch { - eprintln!("{}\n{}", "mismatch".red().bold(), &mismatch); - should_show_diff = true; - } else { - eprintln!("{}", "OK".green().bold()); - } - - if should_show_diff { - let diff_args = args - .iter() - .filter(|s| s.as_str() != &fn_to_check && s.as_str() != "--always-diff"); - - std::process::Command::new(repo::get_tools_path()?.join("asm-differ").join("diff.py")) - .arg("-I") - .arg("-e") - .arg(name) - .arg(format!("0x{:016x}", function.addr)) - .arg(format!("0x{:016x}", function.addr + function.size as u64)) - .args(diff_args) - .status()?; - } - - let new_status = match maybe_mismatch { - None => Status::Matching, - Some(_) => Status::Wip, - }; - - // Update the function status if needed. - if function.status != new_status { - ui::print_note(&format!( - "changing status from {:?} to {:?}", - function.status, new_status - )); - - let mut new_functions = functions.iter().cloned().collect_vec(); - new_functions - .iter_mut() - .find(|info| info.addr == function.addr) - .unwrap() - .status = new_status; - functions::write_functions(&new_functions)?; - } - - Ok(()) -} - -fn main() -> Result<()> { - let args: Vec = std::env::args().skip(1).collect(); - - let orig_elf = elf::load_orig_elf().with_context(|| "failed to load original ELF")?; - let decomp_elf = elf::load_decomp_elf().with_context(|| "failed to load decomp ELF")?; - - // Load these in parallel. - let mut decomp_symtab = None; - let mut decomp_glob_data_table = None; - let mut functions = None; - - rayon::scope(|s| { - s.spawn(|_| decomp_symtab = Some(elf::make_symbol_map_by_name(&decomp_elf))); - s.spawn(|_| decomp_glob_data_table = Some(elf::build_glob_data_table(&decomp_elf))); - s.spawn(|_| functions = Some(functions::get_functions())); - }); - - let decomp_symtab = decomp_symtab - .unwrap() - .with_context(|| "failed to make symbol map")?; - - let decomp_glob_data_table = decomp_glob_data_table - .unwrap() - .with_context(|| "failed to make global data table")?; - - let functions = functions - .unwrap() - .with_context(|| "failed to load function CSV")?; - - let checker = FunctionChecker::new( - &orig_elf, - &decomp_elf, - &decomp_symtab, - decomp_glob_data_table, - &functions, - ) - .with_context(|| "failed to construct FunctionChecker")?; - - if args.len() >= 1 { - // Single function mode. - check_single( - &functions, - &checker, - &orig_elf, - &decomp_elf, - &decomp_symtab, - &args, - )?; - } else { - // Normal check mode. - check_all(&functions, &checker, &orig_elf, &decomp_elf, &decomp_symtab)?; - } - - Ok(()) -} diff --git a/tools/viking/src/ui.rs b/tools/viking/src/ui.rs deleted file mode 100644 index 725dc6d0..00000000 --- a/tools/viking/src/ui.rs +++ /dev/null @@ -1,59 +0,0 @@ -use colored::*; -use std::io::StderrLock; -use std::io::Write; -use textwrap::indent; - -use crate::functions; - -pub fn print_note(msg: &str) { - eprintln!("{}{}{}", "note".bold().cyan(), ": ".bold(), msg.bold()) -} - -pub fn print_warning(msg: &str) { - eprintln!("{}{}{}", "warning".bold().yellow(), ": ".bold(), msg.bold()) -} - -pub fn print_error(msg: &str) { - let stderr = std::io::stderr(); - let mut lock = stderr.lock(); - print_error_ex(&mut lock, msg); -} - -pub fn print_error_ex(lock: &mut StderrLock, msg: &str) { - writeln!( - lock, - "{}{}{}", - "error".bold().red(), - ": ".bold(), - msg.bold() - ) - .unwrap(); -} - -pub fn format_symbol_name(name: &str) -> String { - functions::demangle_str(name).map_or(name.blue().to_string(), |demangled| { - format!("{} ({})", demangled.blue(), name.blue().dimmed(),) - }) -} - -pub fn format_address(addr: u64) -> String { - format!("{:#x}", addr).green().to_string() -} - -pub fn print_detail(msg: &str) { - let stderr = std::io::stderr(); - let mut lock = stderr.lock(); - print_detail_ex(&mut lock, msg); -} - -pub fn print_detail_ex(lock: &mut StderrLock, msg: &str) { - writeln!( - lock, - "{}\n", - indent( - &msg.clear().to_string(), - &" │ ".bold().dimmed().to_string() - ) - ) - .unwrap(); -} diff --git a/workflow.sh b/workflow.sh index 8e0cee7b..67a48136 100755 --- a/workflow.sh +++ b/workflow.sh @@ -21,14 +21,14 @@ FILE_SYNC_PY=../file-sync.py WORKING_FILES=../file-list.txt #Path to uking_functions.csv UKING_FUNCTIONS=data/uking_functions.csv -#Path to temporarily put the output of tools/print_decomp_symbols.py +#Path to temporarily put the output of tools/common/print_decomp_symbols.py SYMBOL_OUT=build/symbols.txt #Path to diff.py DIFF_PY=./diff.py #Path to print_decomp_symbols.py -PRINT_DECOMP_SYMBOLS_PY=tools/print_decomp_symbols.py +PRINT_DECOMP_SYMBOLS_PY=tools/common/print_decomp_symbols.py #Path to check.py -CHECK_PY=tools/check.py +CHECK_PY=tools/common/check.py #clang-format CLANG_FORMAT=clang-format @@ -231,7 +231,7 @@ check|c ) echo " [-f] Search only, do not update function list" echo "d|diff Diff function" echo " [-v] verbose, show source when diffing" - echo "c|check Format source code and run tools/check.py" + echo "c|check Format source code and run tools/common/check.py" if [[ ${FILE_SYNC} == "ON" ]] then echo " [-w] sync formatted code"