diff --git a/data/data_symbols.csv b/data/data_symbols.csv index a05715ca..9dbc0bee 100644 --- a/data/data_symbols.csv +++ b/data/data_symbols.csv @@ -1,3 +1,4 @@ +0x00000071023556B0,_ZTVN4sead14SafeStringBaseIcEE 0x000000710246F9E0,_ZN4ksys3gdt6detail13sCommonFlags0E 0x00000071024709E0,_ZN4ksys3gdt6detail13sCommonFlags1E 0x00000071024719E0,_ZN4ksys3gdt6detail13sCommonFlags2E diff --git a/tools/util/checker.py b/tools/util/checker.py index 261d7c96..5d7c37d7 100644 --- a/tools/util/checker.py +++ b/tools/util/checker.py @@ -1,5 +1,6 @@ +import struct from collections import defaultdict -from typing import Set, DefaultDict, Dict, Optional +from typing import Set, DefaultDict, Dict, Optional, Tuple import capstone as cs @@ -18,6 +19,9 @@ class FunctionChecker: self._mismatch_addr1 = -1 self._mismatch_addr2 = -1 self._mismatch_cause = "" + self._base_got_section = elf.base_elf.get_section_by_name(".got") + self._decomp_glob_data_table = elf.build_glob_data_table(elf.my_elf) + self._got_data_symbol_check_cache: Dict[Tuple[int, int], bool] = dict() self.load_data_for_project() @@ -113,7 +117,7 @@ class FunctionChecker: gprs1[reg] += i1.operands[1].value.mem.disp gprs2[reg] += i2.operands[1].value.mem.disp - if not self._check_data_symbol(i1, i2, gprs1[reg], gprs2[reg]): + if not self._check_data_symbol_load(i1, i2, gprs1[reg], gprs2[reg]): return False adrp_pair_registers.remove(reg) @@ -130,9 +134,9 @@ class FunctionChecker: if reg not in adrp_pair_registers: return False - gprs1[reg] += i1.operands[1].value.mem.disp - gprs2[reg] += i2.operands[1].value.mem.disp - if not self._check_data_symbol(i1, i2, gprs1[reg], gprs2[reg]): + gprs1[reg] += i1.operands[2].value.mem.disp + gprs2[reg] += i2.operands[2].value.mem.disp + if not self._check_data_symbol_load(i1, i2, gprs1[reg], gprs2[reg]): return False adrp_pair_registers.remove(reg) @@ -174,10 +178,30 @@ class FunctionChecker: return True if self._log_mismatch_cause: - self._set_mismatch_cause(i1, i2, f"data symbol mismatch: {symbol.name} (original address: {orig_addr:#x})") + self._set_mismatch_cause(i1, i2, f"data symbol mismatch: {symbol.name} (original address: {orig_addr:#x}, " + f"expected: {decomp_symbol.addr:#x}, " + f"actual: {decomp_addr:#x})") return False + def _check_data_symbol_load(self, i1, i2, orig_addr: int, decomp_addr: int) -> bool: + cached_result = self._got_data_symbol_check_cache.get((orig_addr, decomp_addr), None) + if cached_result is not None: + return cached_result + + if not elf.is_in_section(self._base_got_section, orig_addr, 8): + return True + + ptr1, = struct.unpack(" bool: name = self.decompiled_fns.get(orig_addr, None) if name is None: diff --git a/tools/util/elf.py b/tools/util/elf.py index aff99a0b..44d5ecaf 100644 --- a/tools/util/elf.py +++ b/tools/util/elf.py @@ -1,9 +1,9 @@ -#!/usr/bin/env python3 - -from typing import Any, Dict, NamedTuple import io +from typing import Any, Dict, NamedTuple from elftools.elf.elffile import ELFFile +from elftools.elf.relocation import RelocationSection +from elftools.elf.sections import Section import diff_settings from util import utils @@ -40,7 +40,13 @@ def get_file_offset(elf, addr: int) -> int: continue if seg["p_vaddr"] <= addr < seg["p_vaddr"] + seg["p_filesz"]: return addr - seg["p_vaddr"] + seg["p_offset"] - assert False + raise KeyError(f"No segment found for {addr:#x}") + + +def is_in_section(section: Section, addr: int, size: int) -> bool: + begin = section["sh_addr"] + end = begin + section["sh_size"] + return begin <= addr < end and begin <= addr + size < end def get_symbol(table, name: str) -> Symbol: @@ -69,14 +75,34 @@ def build_name_to_symbol_table(symtab) -> Dict[str, Symbol]: return {sym.name: Symbol(sym["st_value"], sym.name, sym["st_size"]) for sym in symtab.iter_symbols()} +def read_from_elf(elf: ELFFile, addr: int, size: int) -> bytes: + offset: int = get_file_offset(elf, addr) + elf.stream.seek(offset) + return elf.stream.read(size) + + def get_fn_from_base_elf(addr: int, size: int) -> Function: - offset = get_file_offset(base_elf, addr) - base_elf.stream.seek(offset) - return Function(base_elf.stream.read(size), addr) + return Function(read_from_elf(base_elf, addr, size), addr) def get_fn_from_my_elf(name: str) -> Function: sym = get_symbol(my_symtab, name) - offset = get_file_offset(my_elf, sym.addr) - my_elf.stream.seek(offset) - return Function(my_elf.stream.read(sym.size), sym.addr) + return Function(read_from_elf(my_elf, sym.addr, sym.size), sym.addr) + + +R_AARCH64_GLOB_DAT = 1025 + + +def build_glob_data_table(elf: ELFFile) -> Dict[int, int]: + table: Dict[int, int] = dict() + section = elf.get_section_by_name(".rela.dyn") + assert isinstance(section, RelocationSection) + + symtab = elf.get_section(section["sh_link"]) + + for reloc in section.iter_relocations(): + sym_value = symtab.get_symbol(reloc["r_info_sym"])["st_value"] + if reloc["r_info_type"] == R_AARCH64_GLOB_DAT: + table[reloc["r_offset"]] = sym_value + reloc["r_addend"] + + return table