tp/tools/libdol2asm/symbol_finder.py

457 lines
20 KiB
Python

import librel
from dataclasses import dataclass, field
from collections import defaultdict
from typing import Dict, List
from pathlib import Path
from intervaltree import Interval, IntervalTree
from .context import Context
from .disassemble import Access, BranchAccess, FloatLoadAccess, DoubleLoadAccess
from .data import *
from . import util
from . import linker_map
from . import binary
from . import sort_translation_units
from . import generate_symbols
from . import generate_functions
from . import settings
from . import disassemble
def insert_access_as_symbol(context: Context,
module_id: int,
sections: Dict[int, ExecutableSection],
map_sections: Dict[str, linker_map.Section],
map_addrs: Dict[str, Dict[int, linker_map.Symbol]],
ait: Dict[str, IntervalTree],
relocations,
access: Access) -> bool:
"""Insert new symbol from the access data"""
# determine what sections the access addr are in
in_sections = [x for x in sections if access.addr in x]
if len(in_sections) == 0:
return False
if len(in_sections) != 1:
context.warning("multiple section for symbol at 0x%08X" %
(access.addr & 0xFFFFFFFF))
context.warning([(x.name, x.start, x.end) for x in sections])
context.warning([x.name for x in in_sections])
return False
# check that we don't already have a symbol for the access address
section = in_sections[0]
relative_addr = access.addr - section.start
if relative_addr in map_addrs[section.name]:
map_addrs[section.name][relative_addr].access = access
return False
#if map_sections[section.name].index in relocations:
# for relocation in relocations[map_sections[section.name].index]:
# if relocation.replace_addr == relative_addr:
# relocation.access = access
# return False
overlap = ait[section.name].at(relative_addr)
if len(overlap) > 0:
overlap_symbol = list(overlap)[0].data
if overlap_symbol.name == "@stringBase0":
return False
obj = None
lib = None
name = None
# because this is an "access" we have no other information about the name, here we try to set names for known addresses
if module_id == 0:
if section.name == ".init":
obj = "init.o"
if access.addr in settings.PREDEFINED_SYMBOLS:
name = settings.PREDEFINED_SYMBOLS[access.addr]
# create new linker map symbol
symbol = linker_map.Symbol(relative_addr, 0, 0, name, lib, obj)
symbol.source = f"insert_access_as_symbol/{access.addr:08X}"
symbol.access = access
map_sections[section.name].symbols.append(symbol)
map_addrs[section.name][relative_addr] = symbol
return True
def infer_location_from_other_symbols(section: linker_map.Section, symbols: List[linker_map.Symbol]):
""" """
symbols.sort(key=lambda x: x.addr)
obj = None
lib = None
symbols_without_obj = []
for symbol in symbols:
# often, code and data are not stored in the same section (except .init). thus, if the symbol is in any code section
# set the 'is_function' flag so we later can parse the symbol as a function.
if section.is_addr_code(symbol.addr) and symbol.name:
symbol.is_function = True
# TODO: Not sure if the FAKE_FUNCTIONS are used anymore
if symbol.is_function and symbol.addr in settings.FAKE_FUNCTIONS:
symbol.is_function = False
if not symbol.obj:
# assign symbols to the previous object and library
if obj:
symbol.obj = obj
symbol.lib = lib
else:
symbols_without_obj.append(symbol)
else:
# assign previous symbols to the same object and library as this symbol
for rsym in symbols_without_obj:
rsym.obj = symbol.obj
rsym.lib = symbol.lib
symbols_without_obj = []
if symbol.obj:
obj = symbol.obj
lib = symbol.lib
if symbols_without_obj:
# there are no other symbol which we can use to infer the object and library file from,
# create a fake translation unit
for rsym in symbols_without_obj:
rsym.obj = f"unknown_translation_unit_{section.name.replace('.','')}.o"
def calculate_symbol_sizes(section: linker_map.Section, symbols: List[linker_map.Symbol]):
""" Calculate symbol sizes taking into account the section and near-by symbols """
symbols.sort(key=lambda x: (x.addr, x.size))
# Calculate the size (and padding) of the symbol
for curr, next in util.mapOverlap(symbols, 2):
if not curr:
continue
if next:
caddr = curr.addr + section.addr
naddr = next.addr + section.addr
if curr.size == 0:
# Assume all data from current symbol to next is for this symbol (we cannot determine the padding)
curr.size = naddr - caddr
else:
# The difference between current symbol end and next is the padding
curr_addr = caddr + curr.size
next_addr = naddr
if curr_addr > next_addr:
# There are functions that have sub-function within themself. (See __save_gpr)
# Truncate symbol size.
curr.size = naddr - caddr
else:
curr.padding = next_addr - curr_addr
else:
caddr = curr.addr + section.addr
if curr.size == 0:
# Assume the symbol goes to the end of the section
curr.size = section.end - caddr
else:
curr_addr = caddr + curr.size
curr.padding = section.end - curr_addr
assert curr.padding >= 0
# Some section have their object files aligned to 8 bytes. This hacks will move symbols,
# which are not aligned, to the next object file. Not sure if the frameworkF.map have them listed
# in the wrong source file or if a different alignment setting was used.
if section.name == ".bss" or section.name == ".sdata" or section.name == ".sbss":
for i, curr in enumerate(symbols):
begin_aligned = ((curr.addr) % 8) == 0
end_aligned = ((curr.addr + curr.size + curr.padding) % 8) == 0
if not begin_aligned:
continue
if end_aligned:
continue
j = i+1
next = None
for n in symbols[i+1:]:
if ((n.addr + n.size + n.padding) % 8) == 0:
next = n
break
j += 1
if not next:
continue
if curr.obj == next.obj and curr.lib == next.lib:
continue
for sym in symbols[i:j]:
sym.obj = next.obj
sym.lib = next.lib
# check that we're using all the space of the section
if len(symbols) > 1:
last_symbol = symbols[-1]
endp = last_symbol.end + last_symbol.padding
assert section.addr + endp == section.end
last_symbol.padding = 0
def search(context: Context,
module_id: int,
name: str,
map_path: Path,
sections: List[ExecutableSection],
relocations: Dict[int, "rel.Relocation"],
all_relocations: Dict[int, "librel.Relocation"],
cache: bool) -> Module:
""" Search for symbols from executable sections and the linker map. """
# Get symbolsby reading the linker map
map_sections, map_addrs = linker_map.parse(
context,
module_id,
map_path,
sections,
base_folder=(module_id == 0))
# Find accesses/symbols by analyzing the code
accesses, highLink = binary.analyze(
context,
module_id,
sections,
cache=cache)
ait_sections = dict()
for name, section in map_sections.items():
ait_sections[name] = IntervalTree([
Interval(symbol.start, symbol.end, symbol)
for symbol in section.symbols
if symbol.size > 0
])
# TODO: do we really need to sort?
sorted_accesses = list(accesses.items())
sorted_accesses.sort(key=lambda x: x[0])
for relative_addr, access in sorted_accesses:
# add access as symbol, the check if the address is already a symbol is done inside 'insert_access_as_symbol'
insert_access_as_symbol(context, module_id, sections,
map_sections, map_addrs, ait_sections, relocations, access)
# add entrypoint to the right section. the entrypoint is required as it is not included in the linker map.
if module_id == 0:
for section in sections:
if not settings.ENTRY_POINT in section:
continue
branch_access = BranchAccess(at=0x00000000, addr=settings.ENTRY_POINT)
insert_access_as_symbol(
context, module_id, sections, map_sections, map_addrs, ait_sections, relocations, branch_access)
break
# insert relocation that are not already symbol from the linker map
if len(all_relocations) > 0:
table = dict()
for section in map_sections.values():
if not section.name in table:
table[section.name] = dict()
for symbol in section.symbols:
table[section.name][symbol.addr] = symbol
# iterate over all relocations (from all modules) that wants to relocation something inside _this_ module.
for r in all_relocations[module_id]:
section = sections[r.section]
if section.name in table:
addr = r.addend
if module_id == 0: # relative address are used for symbols
addr -= section.addr
overlap = ait_sections[section.name].at(addr)
if len(overlap) > 0:
overlap_symbol = list(overlap)[0].data
if overlap_symbol.name == "@stringBase0":
continue
access = None
if r.parent.data and r.parent.executable_flag and module_id == 372:
inst_addr = r.parent.addr + r.offset
if r.type == librel.R_PPC_ADDR16_LO:
inst_addr -= 2
elif r.type == librel.R_PPC_ADDR16_HI:
inst_addr -= 2
elif r.type == librel.R_PPC_ADDR16_HA:
inst_addr -= 2
if inst_addr in highLink:
high_inst_data = r.parent.data[highLink[inst_addr] - r.parent.addr:][:4]
high_insts = list(disassemble.cs.disasm(high_inst_data, highLink[inst_addr]))
inst_data = r.parent.data[inst_addr - r.parent.addr:][:4]
insts = list(disassemble.cs.disasm(inst_data, inst_addr))
if len(insts) == 1 and len(high_insts) == 1:
high_inst = high_insts[0]
inst = insts[0]
if high_inst.id == disassemble.PPC_INS_LIS:
if inst.id in disassemble.FLOAT_INST:
access = FloatLoadAccess(r.offset, section.addr + addr)
elif inst.id in disassemble.DOUBLE_INST:
access = DoubleLoadAccess(r.offset, section.addr + addr)
if not addr in table[section.name]:
symbol = linker_map.Symbol(addr, 0, 0, None, None, None)
symbol.source = f"relocation/{section.name}/{r.addend:08X}"
symbol.access = access
table[section.name][addr] = symbol
map_sections[section.name].symbols.append(symbol)
elif access:
table[section.name][addr].access = access
else:
context.error(f"{section.name} not in module {module_id}")
# build a tree
section_count = defaultdict(int)
tree = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
tree_order = defaultdict(lambda: defaultdict(list))
for section in map_sections.values():
# calculate the size of symbols and determine where symbols without a library and object file should be located.
infer_location_from_other_symbols(section, section.symbols)
calculate_symbol_sizes(section, section.symbols)
#for symbol in section.symbols:
# if symbol.addr >= 0x80450c90 and symbol.addr < 0x80450ca0:
# context.debug(f"{symbol.addr:08X} {symbol.size:04X} {symbol.name} ({symbol.source})")
if section.symbols:
for symbol in section.symbols:
if not section.name in tree[symbol.lib][symbol.obj]:
tree[symbol.lib][symbol.obj][section.name] = []
else:
tree[None][None][section.name] = []
#
for symbol in section.symbols:
symbol.relative_addr = symbol.addr
symbol.addr += section.addr + section.first_padding
section_count[section.name] += 1
tree[symbol.lib][symbol.obj][section.name].append(symbol)
tree_order[symbol.lib][section.name].append(symbol.obj)
# create the structure with all neccessary information, such as, libraries, translation units, sections, and symbols
module = Module(module_id)
module.executable_sections = sections
module.start = min([x.start for x in sections if x.size > 0])
module.end = max([x.end for x in sections if x.size > 0])
for k, v in tree.items():
library_name = k
if library_name:
library_name = library_name.replace(".a", "")
library = Library(library_name)
module.add_library(library)
# sort object files. each section will have its own order of the object files, combine everything to find a god-order
order_sections = tree_order[k]
order = sort_translation_units.sort(context, v.keys(), order_sections)
for tuk in order:
translation_name = tuk
if not translation_name:
translation_name = "unknown_translation_unit.o"
translation_unit = TranslationUnit(
translation_name.replace(".o", ""))
library.add_translation_unit(translation_unit)
# global_destructor_chain.o will be generate from template
if module_id > 0 and translation_name == "global_destructor_chain.o":
translation_unit.generate = False
# executor.o will be generate from template
if module_id > 0 and translation_name == "executor.o":
translation_unit.generate = False
for sk, sv in v[tuk].items():
map_section = map_sections[sk]
exe_section = sections[map_section.index]
section = Section(sk,
map_section.addr, map_section.size,
map_section.data,
base_addr=exe_section.base_addr,
index=map_section.index,
alignment=exe_section.alignment)
if map_section.index in relocations:
for relocation in relocations[map_section.index]:
section.relocations[map_section.addr + relocation.replace_addr] = relocation
translation_unit.add_section(section)
# If the section contains data but there are no symbols for it, we need to create
# a "fake" symbol or otherwise the data will not be included in the final elf.
if map_section.size > 0 and section_count[sk] == 0:
name = f"_section_symbol_{sk.replace('.', '')}"
symbol = linker_map.Symbol(
section.addr, map_section.size, 0, name, k, tuk)
symbol.source = f"section_symbol/{tuk}/{section.name}/{section.addr:08X}"
sv.append(symbol)
section_count[sk] += 1
context.debug(f"added symbol '{name}' for unreferenced section '{sk}'")
# group symbols together, e.g., functions will be a group of [header, label1, label2, ...]
groups = generate_symbols.groups_from_symbols(sv)
for group in groups:
first = group[0]
new_symbols = []
if first.is_function:
# take the group and generate a function
assert section.data
new_symbols.extend(generate_functions.from_group(section, group))
else:
# take the group of symbols and generate "real" symbols
new_symbols.extend(generate_symbols.from_group(section, group))
for symbol in new_symbols:
section.add_symbol(symbol)
for symbol in section.symbols:
symbol.set_mlts(module.index,library.name, translation_unit.name, section.name)
# clear data
section.data = None
if module_id > 0 and translation_name == "global_destructor_chain.o":
translation_unit.special = "rel"
for section in translation_unit.sections.values():
if section.name == ".dtors":
assert len(section.symbols) > 0
first = section.symbols[0]
length = sum([ x.size+x.padding for x in section.symbols ])
_dtors = LinkerGenerated(
identifier=Identifier("_xx", symbol.addr + 4, "_dtors"),
addr=first.addr,
size=length,
data=[],
data_type=PointerType(VOID),
padding=0,
padding_data=[],
zero_length=True,
always_extern=True)
_dtors.set_mlts(module.index,library.name, translation_unit.name, section.name)
section.symbols = [_dtors]
elif section.name == ".text":
for symbol in section.symbols:
if symbol.identifier.name == "__register_global_object":
symbol.argument_types.extend([
PointerType(VOID), # object
PointerType(VOID), # dtor
PointerType(VOID), # chain
])
elif symbol.identifier.name == "__destroy_global_chain":
pass
if module_id > 0 and translation_name == "executor.o":
translation_unit.special = "rel"
return module