From 218a8da8ba4a10c702a5ca8c03af935f17116938 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Lam?= Date: Thu, 24 Sep 2020 01:30:12 +0200 Subject: [PATCH] tools: Add a tool to check all matchings for diffs And fix some incorrect function entries --- data/uking_functions.csv | 14 ++-- lib/agl | 2 +- lib/sead | 2 +- tools/check.py | 141 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 150 insertions(+), 9 deletions(-) create mode 100755 tools/check.py diff --git a/data/uking_functions.csv b/data/uking_functions.csv index fb9cef5e..341b2648 100644 --- a/data/uking_functions.csv +++ b/data/uking_functions.csv @@ -48353,7 +48353,7 @@ 0x00000071007d04b8,j__ZdlPv_354,4, 0x00000071007d04bc,_ZN4sead15FixedSafeStringILi24EEaSERKNS_14SafeStringBaseIcEE,240,_ZN4sead15FixedSafeStringILi24EEaSERKNS_14SafeStringBaseIcEE 0x00000071007d05ac,j__ZdlPv_355,4, -0x00000071007d05b0,_ZN4sead15FixedSafeStringILi96EEaSERKNS_14SafeStringBaseIcEE,240,_ZN4sead15FixedSafeStringILi96EEaSERKNS_14SafeStringBaseIcEE +0x00000071007d05b0,_ZN4sead15FixedSafeStringILi96EEaSERKNS_14SafeStringBaseIcEE,240, 0x00000071007d06a0,j__ZdlPv_356,4, 0x00000071007d06a4,sub_71007D06A4,40, 0x00000071007d06cc,sub_71007D06CC,24, @@ -51869,7 +51869,7 @@ 0x000000710089f83c,_ZN3agl3utl13IParameterObj8preCopy_Ev,8,_ZN3agl3utl13IParameterObj8preCopy_Ev 0x000000710089f844,_ZN3agl3utl13IParameterObj9postCopy_Ev,4,_ZN3agl3utl13IParameterObj9postCopy_Ev 0x000000710089f848,_ZNK3agl3utl13IParameterObj8isApply_ENS0_15ResParameterObjE,20,_ZNK3agl3utl13IParameterObj8isApply_ENS0_15ResParameterObjE -0x000000710089f85c,_ZN3agl3utl13IParameterObjD2Ev,20,_ZN3agl3utl13IParameterObjD1Ev +0x000000710089f85c,_ZN3agl3utl13IParameterObjD2Ev,20,_ZN3agl3utl13IParameterObjD2Ev 0x000000710089f870,_ZN3agl3utl13IParameterObjD0Ev,4,_ZN3agl3utl13IParameterObjD0Ev 0x000000710089f874,_ZN3agl3utl14IParameterListD2Ev,20,_ZN3agl3utl14IParameterListD2Ev 0x000000710089f888,_ZN3agl3utl14IParameterListD0Ev,4,_ZN3agl3utl14IParameterListD0Ev @@ -61956,8 +61956,8 @@ 0x0000007100b09d4c,bswap32_,8, 0x0000007100b09d54,nullsub_3220,4, 0x0000007100b09d58,bswap64_,8, -0x0000007100b09d60,_ZN4sead8EnumUtil15getParseTextCS_Ev,116,_ZN4sead8EnumUtil15getParseTextCS_Ev -0x0000007100b09dd4,sub_7100B09DD4,100,_ZN4sead8EnumUtil20getInitValueArrayCS_Ev +0x0000007100b09d60,_ZN4sead8EnumUtil15getParseTextCS_Ev,116,_ZN4sead8EnumUtil15getParseTextCS_Ev? +0x0000007100b09dd4,sub_7100B09DD4,100,_ZN4sead8EnumUtil20getInitValueArrayCS_Ev? 0x0000007100b09e38,_ZN4sead8EnumUtil10parseText_EPPcS1_i,248,_ZN4sead8EnumUtil10parseText_EPPcS1_i 0x0000007100b09f30,sub_7100B09F30,188, 0x0000007100b09fec,_ZN4sead17RegionLanguageMgr18SingletonDisposer_D2Ev,100, @@ -63256,7 +63256,7 @@ 0x0000007100b4db88,_ZN3agl3utl13ParameterBase19genMessageParameterEPN4sead6hostio7ContextERKNS2_14SafeStringBaseIcEE,188, 0x0000007100b4dc44,_ZN3agl3utl13ParameterBaseD0Ev,4,_ZN3agl3utl13ParameterBaseD0Ev 0x0000007100b4dc48,_ZNK3agl3utl13ParameterBase16calcBinarizeSizeEv,12,_ZNK3agl3utl13ParameterBase16calcBinarizeSizeEv -0x0000007100b4dc54,_ZN3agl3utl9ParameterIbED2Ev,20,_ZN3agl3utl9ParameterIbED1Ev +0x0000007100b4dc54,_ZN3agl3utl9ParameterIbED2Ev,20,_ZN3agl3utl9ParameterIbED2Ev 0x0000007100b4dc68,_ZN3agl3utl9ParameterIbED0Ev,4,_ZN3agl3utl9ParameterIbED0Ev 0x0000007100b4dc6c,_ZNK3agl3utl9ParameterIbE16getParameterTypeEv,8,_ZNK3agl3utl9ParameterIbE16getParameterTypeEv 0x0000007100b4dc74,_ZNK3agl3utl9ParameterIbE3ptrEv,8,_ZNK3agl3utl9ParameterIbE3ptrEv @@ -89518,7 +89518,7 @@ 0x00000071011bbc0c,BaseProcHandle::wakeUpActorAndReleaseUnit,184, 0x00000071011bbcc4,BaseProcHandle::getBaseProcEvent,48, 0x00000071011bbcf4,BaseProcHandle::allocUnit,228, -0x00000071011bbdd8,BaseProcUnit::setActor,540,_ZN4ksys3act12BaseProcUnit7setProcEPNS0_8BaseProcE +0x00000071011bbdd8,BaseProcUnit::setActor,540,_ZN4ksys3act12BaseProcUnit7setProcEPNS0_8BaseProcE? 0x00000071011bbff4,BaseProcUnit::cleanUp,512, 0x00000071011bc1f4,BaseProcUnit::unlinkActor,412, 0x00000071011bc390,BaseProcUnit::isParentHandleDefault,24,_ZNK4ksys3act12BaseProcUnit21isParentHandleDefaultEv @@ -90682,7 +90682,7 @@ 0x00000071011fc9c0,OverlayArenaSystem::Struct1::dtor,52,_ZN4ksys12OverlayArenaD1Ev 0x00000071011fc9f4,OverlayArenaSystem::Struct1::destroy,268, 0x00000071011fcb00,OverlayArenaSystem::Struct1::dtorDelete,64,_ZN4ksys12OverlayArenaD0Ev -0x00000071011fcb40,OverlayArena::makeHeap,384,_ZN4ksys12OverlayArena4initERKNS0_7InitArgE +0x00000071011fcb40,OverlayArena::makeHeap,384,_ZN4ksys12OverlayArena4initERKNS0_7InitArgE? 0x00000071011fccc0,nullsub_4690,4, 0x00000071011fccc4,sub_71011FCCC4,48, 0x00000071011fccf4,OverlayArenaSystem::Struct1::callResMgrClearCacheForSync,180, diff --git a/lib/agl b/lib/agl index 0e143787..1c7c9e30 160000 --- a/lib/agl +++ b/lib/agl @@ -1 +1 @@ -Subproject commit 0e14378755a859ef307405d8b1c1860949990e65 +Subproject commit 1c7c9e30033b0ad51fe3c0ce3aaabe18476e4046 diff --git a/lib/sead b/lib/sead index 1993212e..8edaa1b2 160000 --- a/lib/sead +++ b/lib/sead @@ -1 +1 @@ -Subproject commit 1993212edd22edcd3e7f89efadd16c27a508eeb8 +Subproject commit 8edaa1b2f55eac91301db8231285668b7d8c7b33 diff --git a/tools/check.py b/tools/check.py new file mode 100755 index 00000000..15648727 --- /dev/null +++ b/tools/check.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 + +import capstone as cs +from elftools.elf.elffile import ELFFile +import diff_settings +from pathlib import Path +from typing import Any, Dict, Set +import utils + +config: Dict[str, Any] = {} +diff_settings.apply(config, {}) + +base_elf = ELFFile((Path(__file__).parent.parent / config["baseimg"]).open("rb")) +my_elf = ELFFile((Path(__file__).parent.parent / config["myimg"]).open("rb")) +my_symtab = my_elf.get_section_by_name(".symtab") +if not my_symtab: + utils.fail(f'{config["myimg"]} has no symbol table') + + +def get_file_offset(elf, addr: int) -> int: + for seg in elf.iter_segments(): + if seg.header["p_type"] != "PT_LOAD": + continue + if seg["p_vaddr"] <= addr < seg["p_vaddr"] + seg["p_filesz"]: + return addr - seg["p_vaddr"] + seg["p_offset"] + assert False + + +def get_symbol_file_offset(elf, table, name: str) -> int: + syms = table.get_symbol_by_name(name) + if not syms or len(syms) != 1: + raise KeyError(name) + return get_file_offset(elf, syms[0]["st_value"]) + + +def get_fn_from_base_elf(addr: int, size: int) -> bytes: + offset = get_file_offset(base_elf, addr) + base_elf.stream.seek(offset) + return base_elf.stream.read(size) + + +def get_fn_from_my_elf(name: str, size: int) -> bytes: + offset = get_symbol_file_offset(my_elf, my_symtab, name) + my_elf.stream.seek(offset) + return my_elf.stream.read(size) + + +def check_function(addr: int, size: int, name: str) -> bool: + try: + base_fn = get_fn_from_base_elf(addr, size) + except KeyError: + utils.fail(f"couldn't find base function 0x{addr:016x} for {name}") + return False + + try: + my_fn = get_fn_from_my_elf(name, size) + except KeyError: + utils.warn(f"couldn't find decompiled function {name}") + return True + + md = cs.Cs(cs.CS_ARCH_ARM64, cs.CS_MODE_ARM) + md.detail = True + adrp_pair_registers: Set[int] = set() + + for i1, i2 in zip(md.disasm(base_fn, addr), md.disasm(my_fn, addr)): + if i1.bytes == i2.bytes: + continue + + if i1.mnemonic != i2.mnemonic: + return False + + # Ignore some address differences until a fully matching executable can be generated. + + if i1.mnemonic == 'bl': + continue + + if i1.mnemonic == 'b': + # Needed for tail calls. + branch_target = int(i1.op_str[1:], 16) + if not (addr <= branch_target < addr + size): + continue + + if i1.mnemonic == 'adrp': + if i1.operands[0].reg != i2.operands[0].reg: + return False + adrp_pair_registers.add(i1.operands[0].reg) + continue + + if i1.mnemonic == 'ldr': + if i1.operands[0].reg != i2.operands[0].reg: + return False + if i1.operands[1].value.mem.base != i2.operands[1].value.mem.base: + return False + reg = i1.operands[1].value.mem.base + if reg not in adrp_pair_registers: + return False + adrp_pair_registers.remove(reg) + continue + + if i1.mnemonic == 'ldp': + if i1.operands[0].reg != i2.operands[0].reg: + return False + if i1.operands[1].reg != i2.operands[1].reg: + return False + if i1.operands[2].value.mem.base != i2.operands[2].value.mem.base: + return False + reg = i1.operands[2].value.mem.base + if reg not in adrp_pair_registers: + return False + adrp_pair_registers.remove(reg) + continue + + if i1.mnemonic == 'add': + if i1.operands[0].reg != i2.operands[0].reg: + return False + if i1.operands[1].reg != i2.operands[1].reg: + return False + reg = i1.operands[1].reg + if reg not in adrp_pair_registers: + return False + adrp_pair_registers.remove(reg) + continue + + return False + + return True + + +def main() -> None: + for func in utils.get_functions(): + if not func.decomp_name: + continue + + if func.status == utils.FunctionStatus.Matching: + if not check_function(func.addr, func.size, func.decomp_name): + utils.fail(f"{func.decomp_name} was marked as matching but does not match") + return + + +if __name__ == "__main__": + main()