From 834f44e55638aee63da8dd378d5d65dda8adfd15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Lam?= Date: Tue, 29 Dec 2020 14:49:38 +0100 Subject: [PATCH] tools: Optimize symbol parsing --- tools/show_vtable.py | 2 +- tools/util/elf.py | 56 +++++++++++++++++++++++++++++++++++++------- 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/tools/show_vtable.py b/tools/show_vtable.py index b659cacc..dfee5e13 100755 --- a/tools/show_vtable.py +++ b/tools/show_vtable.py @@ -13,7 +13,7 @@ from util import utils def find_vtable(symtab, class_name: str) -> Optional[str]: name_offset = len("vtable for ") - for sym in symtab.iter_symbols(): + for sym in util.elf.iter_symbols(symtab): if not sym.name.startswith("_ZTV"): continue if cxxfilt.demangle(sym.name)[name_offset:] == class_name: diff --git a/tools/util/elf.py b/tools/util/elf.py index 841aff47..8f00a120 100644 --- a/tools/util/elf.py +++ b/tools/util/elf.py @@ -35,6 +35,22 @@ class Function(NamedTuple): addr: int +_ElfSymFormat = struct.Struct(" int: for seg in elf.iter_segments(): if seg.header["p_type"] != "PT_LOAD": @@ -50,11 +66,20 @@ def is_in_section(section: Section, addr: int, size: int) -> bool: return begin <= addr < end and begin <= addr + size < end -def get_symbol(table, name: str) -> Symbol: - syms = table.get_symbol_by_name(name) - if not syms or len(syms) != 1: - raise KeyError(name) - return Symbol(syms[0]["st_value"], name, syms[0]["st_size"]) +_TableCache = dict() + + +def make_table_cached(symtab): + table = _TableCache.get(id(symtab)) + if table is None: + table = build_name_to_symbol_table(symtab) + _TableCache[id(symtab)] = table + return table + + +def get_symbol(symtab, name: str) -> Symbol: + table = make_table_cached(symtab) + return table[name] def get_symbol_file_offset_and_size(elf, table, name: str) -> (int, int): @@ -62,10 +87,20 @@ def get_symbol_file_offset_and_size(elf, table, name: str) -> (int, int): return get_file_offset(elf, sym.addr), sym.size +def iter_symbols(symtab): + offset = symtab["sh_offset"] + entsize = symtab["sh_entsize"] + for i in range(symtab.num_symbols()): + symtab.stream.seek(offset + i * entsize) + entry = _ElfSym.parse(symtab.stream.read(_ElfSymFormat.size)) + name = symtab.stringtable.get_string(entry.st_name) + yield Symbol(entry.st_value, name, entry.st_size) + + def build_addr_to_symbol_table(symtab) -> Dict[int, str]: table = dict() - for sym in symtab.iter_symbols(): - addr = sym["st_value"] + for sym in iter_symbols(symtab): + addr = sym.addr existing_value = table.get(addr, None) if existing_value is None or not existing_value.startswith("_Z"): table[addr] = sym.name @@ -73,7 +108,7 @@ def build_addr_to_symbol_table(symtab) -> Dict[int, str]: def build_name_to_symbol_table(symtab) -> Dict[str, Symbol]: - return {sym.name: Symbol(sym["st_value"], sym.name, sym["st_size"]) for sym in symtab.iter_symbols()} + return {sym.name: sym for sym in iter_symbols(symtab)} def read_from_elf(elf: ELFFile, addr: int, size: int) -> bytes: @@ -101,9 +136,12 @@ def build_glob_data_table(elf: ELFFile) -> Dict[int, int]: assert isinstance(section, RelocationSection) symtab = elf.get_section(section["sh_link"]) + offset = symtab["sh_offset"] + entsize = symtab["sh_entsize"] for reloc in section.iter_relocations(): - sym_value = symtab.get_symbol(reloc["r_info_sym"])["st_value"] + symtab.stream.seek(offset + reloc["r_info_sym"] * entsize) + sym_value = _ElfSym.parse(symtab.stream.read(_ElfSymFormat.size)).st_value if reloc["r_info_type"] == R_AARCH64_GLOB_DAT: table[reloc["r_offset"]] = sym_value + reloc["r_addend"]