mirror of https://github.com/zeldaret/tp.git
457 lines
20 KiB
Python
457 lines
20 KiB
Python
import librel
|
|
|
|
from dataclasses import dataclass, field
|
|
from collections import defaultdict
|
|
from typing import Dict, List
|
|
from pathlib import Path
|
|
from intervaltree import Interval, IntervalTree
|
|
|
|
from .context import Context
|
|
from .disassemble import Access, BranchAccess, FloatLoadAccess, DoubleLoadAccess
|
|
from .data import *
|
|
|
|
from . import util
|
|
from . import linker_map
|
|
from . import binary
|
|
from . import sort_translation_units
|
|
from . import generate_symbols
|
|
from . import generate_functions
|
|
from . import settings
|
|
from . import disassemble
|
|
|
|
|
|
def insert_access_as_symbol(context: Context,
|
|
module_id: int,
|
|
sections: Dict[int, ExecutableSection],
|
|
map_sections: Dict[str, linker_map.Section],
|
|
map_addrs: Dict[str, Dict[int, linker_map.Symbol]],
|
|
ait: Dict[str, IntervalTree],
|
|
relocations,
|
|
access: Access) -> bool:
|
|
"""Insert new symbol from the access data"""
|
|
|
|
# determine what sections the access addr are in
|
|
in_sections = [x for x in sections if access.addr in x]
|
|
if len(in_sections) == 0:
|
|
return False
|
|
if len(in_sections) != 1:
|
|
context.warning("multiple section for symbol at 0x%08X" %
|
|
(access.addr & 0xFFFFFFFF))
|
|
context.warning([(x.name, x.start, x.end) for x in sections])
|
|
context.warning([x.name for x in in_sections])
|
|
return False
|
|
|
|
# check that we don't already have a symbol for the access address
|
|
section = in_sections[0]
|
|
relative_addr = access.addr - section.start
|
|
if relative_addr in map_addrs[section.name]:
|
|
map_addrs[section.name][relative_addr].access = access
|
|
return False
|
|
|
|
#if map_sections[section.name].index in relocations:
|
|
# for relocation in relocations[map_sections[section.name].index]:
|
|
# if relocation.replace_addr == relative_addr:
|
|
# relocation.access = access
|
|
# return False
|
|
|
|
overlap = ait[section.name].at(relative_addr)
|
|
if len(overlap) > 0:
|
|
overlap_symbol = list(overlap)[0].data
|
|
if overlap_symbol.name == "@stringBase0":
|
|
return False
|
|
|
|
obj = None
|
|
lib = None
|
|
name = None
|
|
|
|
# because this is an "access" we have no other information about the name, here we try to set names for known addresses
|
|
if module_id == 0:
|
|
if section.name == ".init":
|
|
obj = "init.o"
|
|
if access.addr in settings.PREDEFINED_SYMBOLS:
|
|
name = settings.PREDEFINED_SYMBOLS[access.addr]
|
|
|
|
# create new linker map symbol
|
|
symbol = linker_map.Symbol(relative_addr, 0, 0, name, lib, obj)
|
|
symbol.source = f"insert_access_as_symbol/{access.addr:08X}"
|
|
symbol.access = access
|
|
|
|
map_sections[section.name].symbols.append(symbol)
|
|
map_addrs[section.name][relative_addr] = symbol
|
|
return True
|
|
|
|
|
|
def infer_location_from_other_symbols(section: linker_map.Section, symbols: List[linker_map.Symbol]):
|
|
""" """
|
|
symbols.sort(key=lambda x: x.addr)
|
|
|
|
obj = None
|
|
lib = None
|
|
symbols_without_obj = []
|
|
for symbol in symbols:
|
|
# often, code and data are not stored in the same section (except .init). thus, if the symbol is in any code section
|
|
# set the 'is_function' flag so we later can parse the symbol as a function.
|
|
if section.is_addr_code(symbol.addr) and symbol.name:
|
|
symbol.is_function = True
|
|
# TODO: Not sure if the FAKE_FUNCTIONS are used anymore
|
|
if symbol.is_function and symbol.addr in settings.FAKE_FUNCTIONS:
|
|
symbol.is_function = False
|
|
|
|
if not symbol.obj:
|
|
# assign symbols to the previous object and library
|
|
if obj:
|
|
symbol.obj = obj
|
|
symbol.lib = lib
|
|
else:
|
|
symbols_without_obj.append(symbol)
|
|
else:
|
|
# assign previous symbols to the same object and library as this symbol
|
|
for rsym in symbols_without_obj:
|
|
rsym.obj = symbol.obj
|
|
rsym.lib = symbol.lib
|
|
symbols_without_obj = []
|
|
|
|
if symbol.obj:
|
|
obj = symbol.obj
|
|
lib = symbol.lib
|
|
|
|
if symbols_without_obj:
|
|
# there are no other symbol which we can use to infer the object and library file from,
|
|
# create a fake translation unit
|
|
for rsym in symbols_without_obj:
|
|
rsym.obj = f"unknown_translation_unit_{section.name.replace('.','')}.o"
|
|
|
|
|
|
def calculate_symbol_sizes(section: linker_map.Section, symbols: List[linker_map.Symbol]):
|
|
""" Calculate symbol sizes taking into account the section and near-by symbols """
|
|
|
|
symbols.sort(key=lambda x: (x.addr, x.size))
|
|
|
|
# Calculate the size (and padding) of the symbol
|
|
for curr, next in util.mapOverlap(symbols, 2):
|
|
if not curr:
|
|
continue
|
|
|
|
if next:
|
|
caddr = curr.addr + section.addr
|
|
naddr = next.addr + section.addr
|
|
if curr.size == 0:
|
|
# Assume all data from current symbol to next is for this symbol (we cannot determine the padding)
|
|
curr.size = naddr - caddr
|
|
else:
|
|
# The difference between current symbol end and next is the padding
|
|
curr_addr = caddr + curr.size
|
|
next_addr = naddr
|
|
if curr_addr > next_addr:
|
|
# There are functions that have sub-function within themself. (See __save_gpr)
|
|
# Truncate symbol size.
|
|
curr.size = naddr - caddr
|
|
else:
|
|
curr.padding = next_addr - curr_addr
|
|
else:
|
|
caddr = curr.addr + section.addr
|
|
|
|
if curr.size == 0:
|
|
# Assume the symbol goes to the end of the section
|
|
curr.size = section.end - caddr
|
|
else:
|
|
curr_addr = caddr + curr.size
|
|
curr.padding = section.end - curr_addr
|
|
assert curr.padding >= 0
|
|
|
|
# Some section have their object files aligned to 8 bytes. This hacks will move symbols,
|
|
# which are not aligned, to the next object file. Not sure if the frameworkF.map have them listed
|
|
# in the wrong source file or if a different alignment setting was used.
|
|
if section.name == ".bss" or section.name == ".sdata" or section.name == ".sbss":
|
|
for i, curr in enumerate(symbols):
|
|
begin_aligned = ((curr.addr) % 8) == 0
|
|
end_aligned = ((curr.addr + curr.size + curr.padding) % 8) == 0
|
|
if not begin_aligned:
|
|
continue
|
|
if end_aligned:
|
|
continue
|
|
|
|
j = i+1
|
|
next = None
|
|
for n in symbols[i+1:]:
|
|
if ((n.addr + n.size + n.padding) % 8) == 0:
|
|
next = n
|
|
break
|
|
j += 1
|
|
|
|
if not next:
|
|
continue
|
|
if curr.obj == next.obj and curr.lib == next.lib:
|
|
continue
|
|
|
|
for sym in symbols[i:j]:
|
|
sym.obj = next.obj
|
|
sym.lib = next.lib
|
|
|
|
# check that we're using all the space of the section
|
|
if len(symbols) > 1:
|
|
last_symbol = symbols[-1]
|
|
endp = last_symbol.end + last_symbol.padding
|
|
assert section.addr + endp == section.end
|
|
last_symbol.padding = 0
|
|
|
|
|
|
def search(context: Context,
|
|
module_id: int,
|
|
name: str,
|
|
map_path: Path,
|
|
sections: List[ExecutableSection],
|
|
relocations: Dict[int, "rel.Relocation"],
|
|
all_relocations: Dict[int, "librel.Relocation"],
|
|
cache: bool) -> Module:
|
|
""" Search for symbols from executable sections and the linker map. """
|
|
|
|
# Get symbolsby reading the linker map
|
|
map_sections, map_addrs = linker_map.parse(
|
|
context,
|
|
module_id,
|
|
map_path,
|
|
sections,
|
|
base_folder=(module_id == 0))
|
|
|
|
# Find accesses/symbols by analyzing the code
|
|
accesses, highLink = binary.analyze(
|
|
context,
|
|
module_id,
|
|
sections,
|
|
cache=cache)
|
|
|
|
ait_sections = dict()
|
|
for name, section in map_sections.items():
|
|
ait_sections[name] = IntervalTree([
|
|
Interval(symbol.start, symbol.end, symbol)
|
|
for symbol in section.symbols
|
|
if symbol.size > 0
|
|
])
|
|
|
|
# TODO: do we really need to sort?
|
|
sorted_accesses = list(accesses.items())
|
|
sorted_accesses.sort(key=lambda x: x[0])
|
|
for relative_addr, access in sorted_accesses:
|
|
# add access as symbol, the check if the address is already a symbol is done inside 'insert_access_as_symbol'
|
|
insert_access_as_symbol(context, module_id, sections,
|
|
map_sections, map_addrs, ait_sections, relocations, access)
|
|
|
|
# add entrypoint to the right section. the entrypoint is required as it is not included in the linker map.
|
|
if module_id == 0:
|
|
for section in sections:
|
|
if not settings.ENTRY_POINT in section:
|
|
continue
|
|
|
|
branch_access = BranchAccess(at=0x00000000, addr=settings.ENTRY_POINT)
|
|
insert_access_as_symbol(
|
|
context, module_id, sections, map_sections, map_addrs, ait_sections, relocations, branch_access)
|
|
break
|
|
|
|
# insert relocation that are not already symbol from the linker map
|
|
if len(all_relocations) > 0:
|
|
table = dict()
|
|
for section in map_sections.values():
|
|
if not section.name in table:
|
|
table[section.name] = dict()
|
|
for symbol in section.symbols:
|
|
table[section.name][symbol.addr] = symbol
|
|
|
|
# iterate over all relocations (from all modules) that wants to relocation something inside _this_ module.
|
|
for r in all_relocations[module_id]:
|
|
section = sections[r.section]
|
|
if section.name in table:
|
|
addr = r.addend
|
|
if module_id == 0: # relative address are used for symbols
|
|
addr -= section.addr
|
|
|
|
overlap = ait_sections[section.name].at(addr)
|
|
if len(overlap) > 0:
|
|
overlap_symbol = list(overlap)[0].data
|
|
if overlap_symbol.name == "@stringBase0":
|
|
continue
|
|
|
|
access = None
|
|
if r.parent.data and r.parent.executable_flag and module_id == 372:
|
|
inst_addr = r.parent.addr + r.offset
|
|
if r.type == librel.R_PPC_ADDR16_LO:
|
|
inst_addr -= 2
|
|
elif r.type == librel.R_PPC_ADDR16_HI:
|
|
inst_addr -= 2
|
|
elif r.type == librel.R_PPC_ADDR16_HA:
|
|
inst_addr -= 2
|
|
if inst_addr in highLink:
|
|
high_inst_data = r.parent.data[highLink[inst_addr] - r.parent.addr:][:4]
|
|
high_insts = list(disassemble.cs.disasm(high_inst_data, highLink[inst_addr]))
|
|
|
|
inst_data = r.parent.data[inst_addr - r.parent.addr:][:4]
|
|
insts = list(disassemble.cs.disasm(inst_data, inst_addr))
|
|
|
|
if len(insts) == 1 and len(high_insts) == 1:
|
|
high_inst = high_insts[0]
|
|
inst = insts[0]
|
|
if high_inst.id == disassemble.PPC_INS_LIS:
|
|
if inst.id in disassemble.FLOAT_INST:
|
|
access = FloatLoadAccess(r.offset, section.addr + addr)
|
|
elif inst.id in disassemble.DOUBLE_INST:
|
|
access = DoubleLoadAccess(r.offset, section.addr + addr)
|
|
|
|
if not addr in table[section.name]:
|
|
symbol = linker_map.Symbol(addr, 0, 0, None, None, None)
|
|
symbol.source = f"relocation/{section.name}/{r.addend:08X}"
|
|
symbol.access = access
|
|
table[section.name][addr] = symbol
|
|
map_sections[section.name].symbols.append(symbol)
|
|
elif access:
|
|
table[section.name][addr].access = access
|
|
else:
|
|
context.error(f"{section.name} not in module {module_id}")
|
|
|
|
# build a tree
|
|
section_count = defaultdict(int)
|
|
tree = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
|
|
tree_order = defaultdict(lambda: defaultdict(list))
|
|
for section in map_sections.values():
|
|
|
|
# calculate the size of symbols and determine where symbols without a library and object file should be located.
|
|
infer_location_from_other_symbols(section, section.symbols)
|
|
calculate_symbol_sizes(section, section.symbols)
|
|
|
|
#for symbol in section.symbols:
|
|
# if symbol.addr >= 0x80450c90 and symbol.addr < 0x80450ca0:
|
|
# context.debug(f"{symbol.addr:08X} {symbol.size:04X} {symbol.name} ({symbol.source})")
|
|
|
|
if section.symbols:
|
|
for symbol in section.symbols:
|
|
if not section.name in tree[symbol.lib][symbol.obj]:
|
|
tree[symbol.lib][symbol.obj][section.name] = []
|
|
else:
|
|
tree[None][None][section.name] = []
|
|
|
|
#
|
|
for symbol in section.symbols:
|
|
symbol.relative_addr = symbol.addr
|
|
symbol.addr += section.addr + section.first_padding
|
|
section_count[section.name] += 1
|
|
tree[symbol.lib][symbol.obj][section.name].append(symbol)
|
|
tree_order[symbol.lib][section.name].append(symbol.obj)
|
|
|
|
# create the structure with all neccessary information, such as, libraries, translation units, sections, and symbols
|
|
module = Module(module_id)
|
|
module.executable_sections = sections
|
|
module.start = min([x.start for x in sections if x.size > 0])
|
|
module.end = max([x.end for x in sections if x.size > 0])
|
|
for k, v in tree.items():
|
|
library_name = k
|
|
if library_name:
|
|
library_name = library_name.replace(".a", "")
|
|
library = Library(library_name)
|
|
module.add_library(library)
|
|
|
|
# sort object files. each section will have its own order of the object files, combine everything to find a god-order
|
|
order_sections = tree_order[k]
|
|
order = sort_translation_units.sort(context, v.keys(), order_sections)
|
|
|
|
for tuk in order:
|
|
translation_name = tuk
|
|
if not translation_name:
|
|
translation_name = "unknown_translation_unit.o"
|
|
translation_unit = TranslationUnit(
|
|
translation_name.replace(".o", ""))
|
|
library.add_translation_unit(translation_unit)
|
|
|
|
# global_destructor_chain.o will be generate from template
|
|
if module_id > 0 and translation_name == "global_destructor_chain.o":
|
|
translation_unit.generate = False
|
|
|
|
# executor.o will be generate from template
|
|
if module_id > 0 and translation_name == "executor.o":
|
|
translation_unit.generate = False
|
|
|
|
for sk, sv in v[tuk].items():
|
|
map_section = map_sections[sk]
|
|
exe_section = sections[map_section.index]
|
|
section = Section(sk,
|
|
map_section.addr, map_section.size,
|
|
map_section.data,
|
|
base_addr=exe_section.base_addr,
|
|
index=map_section.index,
|
|
alignment=exe_section.alignment)
|
|
|
|
|
|
if map_section.index in relocations:
|
|
for relocation in relocations[map_section.index]:
|
|
section.relocations[map_section.addr + relocation.replace_addr] = relocation
|
|
translation_unit.add_section(section)
|
|
|
|
# If the section contains data but there are no symbols for it, we need to create
|
|
# a "fake" symbol or otherwise the data will not be included in the final elf.
|
|
if map_section.size > 0 and section_count[sk] == 0:
|
|
name = f"_section_symbol_{sk.replace('.', '')}"
|
|
symbol = linker_map.Symbol(
|
|
section.addr, map_section.size, 0, name, k, tuk)
|
|
symbol.source = f"section_symbol/{tuk}/{section.name}/{section.addr:08X}"
|
|
sv.append(symbol)
|
|
section_count[sk] += 1
|
|
context.debug(f"added symbol '{name}' for unreferenced section '{sk}'")
|
|
|
|
# group symbols together, e.g., functions will be a group of [header, label1, label2, ...]
|
|
groups = generate_symbols.groups_from_symbols(sv)
|
|
for group in groups:
|
|
first = group[0]
|
|
|
|
new_symbols = []
|
|
if first.is_function:
|
|
# take the group and generate a function
|
|
assert section.data
|
|
new_symbols.extend(generate_functions.from_group(section, group))
|
|
else:
|
|
# take the group of symbols and generate "real" symbols
|
|
new_symbols.extend(generate_symbols.from_group(section, group))
|
|
|
|
for symbol in new_symbols:
|
|
section.add_symbol(symbol)
|
|
|
|
for symbol in section.symbols:
|
|
symbol.set_mlts(module.index,library.name, translation_unit.name, section.name)
|
|
|
|
# clear data
|
|
section.data = None
|
|
|
|
if module_id > 0 and translation_name == "global_destructor_chain.o":
|
|
translation_unit.special = "rel"
|
|
|
|
for section in translation_unit.sections.values():
|
|
if section.name == ".dtors":
|
|
assert len(section.symbols) > 0
|
|
first = section.symbols[0]
|
|
length = sum([ x.size+x.padding for x in section.symbols ])
|
|
_dtors = LinkerGenerated(
|
|
identifier=Identifier("_xx", symbol.addr + 4, "_dtors"),
|
|
addr=first.addr,
|
|
size=length,
|
|
data=[],
|
|
data_type=PointerType(VOID),
|
|
padding=0,
|
|
padding_data=[],
|
|
zero_length=True,
|
|
always_extern=True)
|
|
_dtors.set_mlts(module.index,library.name, translation_unit.name, section.name)
|
|
section.symbols = [_dtors]
|
|
elif section.name == ".text":
|
|
for symbol in section.symbols:
|
|
if symbol.identifier.name == "__register_global_object":
|
|
symbol.argument_types.extend([
|
|
PointerType(VOID), # object
|
|
PointerType(VOID), # dtor
|
|
PointerType(VOID), # chain
|
|
])
|
|
elif symbol.identifier.name == "__destroy_global_chain":
|
|
pass
|
|
|
|
if module_id > 0 and translation_name == "executor.o":
|
|
translation_unit.special = "rel"
|
|
|
|
|
|
return module
|