tools: Optimize symbol parsing

This commit is contained in:
Léo Lam 2020-12-29 14:49:38 +01:00
parent 8b41b83228
commit 834f44e556
No known key found for this signature in database
GPG Key ID: 0DF30F9081000741
2 changed files with 48 additions and 10 deletions

View File

@ -13,7 +13,7 @@ from util import utils
def find_vtable(symtab, class_name: str) -> Optional[str]: def find_vtable(symtab, class_name: str) -> Optional[str]:
name_offset = len("vtable for ") name_offset = len("vtable for ")
for sym in symtab.iter_symbols(): for sym in util.elf.iter_symbols(symtab):
if not sym.name.startswith("_ZTV"): if not sym.name.startswith("_ZTV"):
continue continue
if cxxfilt.demangle(sym.name)[name_offset:] == class_name: if cxxfilt.demangle(sym.name)[name_offset:] == class_name:

View File

@ -35,6 +35,22 @@ class Function(NamedTuple):
addr: int addr: int
_ElfSymFormat = struct.Struct("<IBBHQQ")
class _ElfSym(NamedTuple):
st_name: int
info: int
other: int
shndx: int
st_value: int
st_size: int
@staticmethod
def parse(d: bytes):
return _ElfSym._make(_ElfSymFormat.unpack(d))
def get_file_offset(elf, addr: int) -> int: def get_file_offset(elf, addr: int) -> int:
for seg in elf.iter_segments(): for seg in elf.iter_segments():
if seg.header["p_type"] != "PT_LOAD": if seg.header["p_type"] != "PT_LOAD":
@ -50,11 +66,20 @@ def is_in_section(section: Section, addr: int, size: int) -> bool:
return begin <= addr < end and begin <= addr + size < end return begin <= addr < end and begin <= addr + size < end
def get_symbol(table, name: str) -> Symbol: _TableCache = dict()
syms = table.get_symbol_by_name(name)
if not syms or len(syms) != 1:
raise KeyError(name) def make_table_cached(symtab):
return Symbol(syms[0]["st_value"], name, syms[0]["st_size"]) table = _TableCache.get(id(symtab))
if table is None:
table = build_name_to_symbol_table(symtab)
_TableCache[id(symtab)] = table
return table
def get_symbol(symtab, name: str) -> Symbol:
table = make_table_cached(symtab)
return table[name]
def get_symbol_file_offset_and_size(elf, table, name: str) -> (int, int): def get_symbol_file_offset_and_size(elf, table, name: str) -> (int, int):
@ -62,10 +87,20 @@ def get_symbol_file_offset_and_size(elf, table, name: str) -> (int, int):
return get_file_offset(elf, sym.addr), sym.size return get_file_offset(elf, sym.addr), sym.size
def iter_symbols(symtab):
offset = symtab["sh_offset"]
entsize = symtab["sh_entsize"]
for i in range(symtab.num_symbols()):
symtab.stream.seek(offset + i * entsize)
entry = _ElfSym.parse(symtab.stream.read(_ElfSymFormat.size))
name = symtab.stringtable.get_string(entry.st_name)
yield Symbol(entry.st_value, name, entry.st_size)
def build_addr_to_symbol_table(symtab) -> Dict[int, str]: def build_addr_to_symbol_table(symtab) -> Dict[int, str]:
table = dict() table = dict()
for sym in symtab.iter_symbols(): for sym in iter_symbols(symtab):
addr = sym["st_value"] addr = sym.addr
existing_value = table.get(addr, None) existing_value = table.get(addr, None)
if existing_value is None or not existing_value.startswith("_Z"): if existing_value is None or not existing_value.startswith("_Z"):
table[addr] = sym.name table[addr] = sym.name
@ -73,7 +108,7 @@ def build_addr_to_symbol_table(symtab) -> Dict[int, str]:
def build_name_to_symbol_table(symtab) -> Dict[str, Symbol]: def build_name_to_symbol_table(symtab) -> Dict[str, Symbol]:
return {sym.name: Symbol(sym["st_value"], sym.name, sym["st_size"]) for sym in symtab.iter_symbols()} return {sym.name: sym for sym in iter_symbols(symtab)}
def read_from_elf(elf: ELFFile, addr: int, size: int) -> bytes: def read_from_elf(elf: ELFFile, addr: int, size: int) -> bytes:
@ -101,9 +136,12 @@ def build_glob_data_table(elf: ELFFile) -> Dict[int, int]:
assert isinstance(section, RelocationSection) assert isinstance(section, RelocationSection)
symtab = elf.get_section(section["sh_link"]) symtab = elf.get_section(section["sh_link"])
offset = symtab["sh_offset"]
entsize = symtab["sh_entsize"]
for reloc in section.iter_relocations(): for reloc in section.iter_relocations():
sym_value = symtab.get_symbol(reloc["r_info_sym"])["st_value"] symtab.stream.seek(offset + reloc["r_info_sym"] * entsize)
sym_value = _ElfSym.parse(symtab.stream.read(_ElfSymFormat.size)).st_value
if reloc["r_info_type"] == R_AARCH64_GLOB_DAT: if reloc["r_info_type"] == R_AARCH64_GLOB_DAT:
table[reloc["r_offset"]] = sym_value + reloc["r_addend"] table[reloc["r_offset"]] = sym_value + reloc["r_addend"]