tp/tools/libdol2asm/generate_symbols.py

335 lines
13 KiB
Python

from . import util
from . import linker_map
from .disassemble import Access, BranchAccess, FloatLoadAccess, DoubleLoadAccess
from .data import *
def string_decode(data: bytearray) -> Tuple[str, str]:
""" Try to decode the data using utf-8 or shift-jis """
try:
result = data[:-1].decode("utf-8")
return result, "utf-8"
except:
pass
try:
result = data[:-1].decode("shift_jisx0213")
return result, "shift-jis"
except:
pass
return None, None
def string_from_data(addr: int, data: bytearray, string_base: StringBase) -> String:
""" Create string symbol from an address and data """
string, encoding = string_decode(data)
assert encoding != None
return String(
Identifier("stringBase", addr, None),
addr,
len(data),
data_type=PointerType(ConstType(CHAR)),
encoding=encoding,
decoded_string=string,
string_base=string_base)
def zero_initialized_symbol(section: Section,
identifier: Identifier,
symbol: linker_map.Symbol,
padding: int) -> List[Symbol]:
""" Create symbols that have no data (zero-initialized) """
if isinstance(symbol.access, FloatLoadAccess):
is_float_constant = identifier.name and identifier.name.startswith(
"__float_")
if symbol.size >= 4 and symbol.size % 4 == 0 and not is_float_constant:
return [FloatingPoint.create_f32_without_data(identifier, symbol.addr, symbol.size, padding)]
if isinstance(symbol.access, DoubleLoadAccess):
if symbol.size >= 8 and symbol.size % 8 == 0:
return [FloatingPoint.create_f64_without_data(identifier, symbol.addr, symbol.size, padding)]
return [ArbitraryData.create_without_data(identifier, symbol.addr, symbol.size, padding)]
def value_initialized_symbol(section: Section,
identifier: Identifier,
offset: int,
data: bytearray,
padding_data: bytearray,
symbol: linker_map.Symbol) -> List[Symbol]:
""" Create symbols from data. This will try to find strings, integers, floats, and other special symbols. """
# all virtual tables begin with "__vt"
if symbol.name and symbol.name.startswith("__vt"):
assert section.name == ".data"
assert symbol.size % 4 == 0
assert len(padding_data) % 4 == 0
assert sum(padding_data) == 0
return [VirtualTable.create(
identifier,
symbol.addr,
data,
padding_data)]
# strings will always be in rodata
if section.name == ".rodata":
if symbol.name == "@stringBase0":
string_base = StringBase(
Identifier("stringBase", symbol.addr, symbol.name),
symbol.addr,
0,
data = bytes(),
data_type=PointerType(ConstType(CHAR)),
padding=0,
padding_data=bytes(),
strings = [])
strings = [ string_base ]
split_data = list(util.magicsplit(data, 0))
x_offset = 0
for x in split_data[:-1]:
str_addr = symbol.addr + x_offset
str_length = len(x) + 1
str_data = bytes(x + [0])
strings.append(string_from_data(
str_addr, str_data, string_base))
x_offset += str_length
#return [StringBase.create(symbol, strings, data, padding_data)]
strings[-1].padding = len(padding_data)
strings[-1].padding_data = padding_data
return strings
if section.name == ".init":
if symbol.name == "_rom_copy_info" or symbol.name == "_bss_init_info":
assert len(padding_data) == 0
return [LinkerGenerated.create(identifier, symbol.addr, symbol.size)]
# both .ctors and .dtors symbols are special
if section.name == ".ctors":
if symbol.name == "__init_cpp_exceptions_reference":
assert len(data) == 4
assert len(padding_data) % 4 == 0
constructor_count = len(padding_data) // 4
constructors = list(struct.unpack(
">" + "I" * constructor_count, padding_data))
count = 0
for x in constructors:
if x == 0:
break
count += 1
_ctors_data = padding_data[0:count*4]
__init_cpp_exceptions_reference = ReferenceArray.create(
identifier,
symbol.addr,
data,
bytearray(),
always_extern=True)
# instead of creating the _ctors ourself we let the linker do it
_ctors = LinkerGenerated(
identifier=Identifier("_xx", symbol.addr + 4, "_ctors"),
addr=symbol.addr + 4,
size=len(_ctors_data),
data=[],
data_type=PointerType(VOID),
padding=0,
padding_data=[],
zero_length=True,
always_extern=True)
return [
__init_cpp_exceptions_reference,
_ctors
]
elif not symbol.name:
assert len(data) % 4 == 0
assert len(padding_data) == 0
_ctors = LinkerGenerated(
identifier=Identifier("_xx", symbol.addr, "_ctors"),
addr=symbol.addr,
size=len(data),
data=[],
data_type=PointerType(VOID),
padding=0,
padding_data=[],
zero_length=True,
always_extern=True)
return [_ctors]
if section.name == ".dtors":
if symbol.name == "__destroy_global_chain_reference":
assert len(data) == 4
__destroy_global_chain_reference = ReferenceArray.create(
identifier, symbol.addr, data, bytearray(),
always_extern=True)
if len(padding_data) == 0:
return [__destroy_global_chain_reference]
# _dtors
_dtors = LinkerGenerated(
identifier=Identifier("_xx", symbol.addr + 4, "_dtors"),
addr=symbol.addr + 4,
size=len(padding_data),
data=[],
data_type=PointerType(VOID),
padding=0,
padding_data=[],
zero_length=True,
always_extern=True)
return [
__destroy_global_chain_reference,
_dtors
]
elif symbol.name == "__fini_cpp_exceptions_reference":
assert len(data) == 4
assert len(padding_data) == 0
__fini_cpp_exceptions_reference = ReferenceArray.create(
identifier, symbol.addr, data, bytearray(),
always_extern=True)
__dtors_null_terminator = ReferenceArray.create(
Identifier("_xx", symbol.addr + 4, "__dtors_null_terminator"),
symbol.addr + 4, bytearray([0, 0, 0, 0]), bytearray(),
always_extern=True)
return [
__fini_cpp_exceptions_reference,
__dtors_null_terminator,
]
elif not symbol.name:
assert len(data) % 4 == 0
assert len(padding_data) == 0
_ctors = LinkerGenerated(
identifier=Identifier("_xx", symbol.addr, "_dtors"),
addr=symbol.addr,
size=len(data),
data=[],
data_type=PointerType(VOID),
padding=0,
padding_data=[],
zero_length=True,
always_extern=True)
return [_ctors]
if isinstance(symbol.access, FloatLoadAccess):
is_float_constant = identifier.name and identifier.name.startswith(
"__float_")
if symbol.size >= 4 and symbol.size % 4 == 0 and not is_float_constant:
values = FloatingPoint.f32_from(data)
padding_values = FloatingPoint.f32_from(padding_data)
# Metrowerks is very smart... if you initialize a float with 0.0f, the storage will be moved to the one of the .bss sections.
# Generated literals will always be (for floats and doubles) in the .sdata2 section. Thus, if we are a literal, we cannot
# use the value 0.0f.
is_zero = (len(values) == 1 and values[0][0] == 0)
is_zero_and_no_padding = is_zero and len(padding_values) == 0
if len(values) > 0 and not is_zero_and_no_padding:
return [FloatingPoint.create_f32(identifier, symbol.addr, values, padding_values)]
if isinstance(symbol.access, DoubleLoadAccess):
if symbol.size >= 8 and symbol.size % 8 == 0:
values = FloatingPoint.f64_from(data)
padding_values = FloatingPoint.f64_from(padding_data)
# Same comments as for the float case.
is_zero = (len(values) == 1 and values[0][0] == 0)
is_zero_and_no_padding = is_zero and len(padding_values) == 0
if len(values) > 0 and not is_zero_and_no_padding:
return [FloatingPoint.create_f64(identifier, symbol.addr, values, padding_values)]
if symbol.size == 4 and len(padding_data) % 4 == 0:
if identifier.name and "$" in identifier.name:
# TODO: symbols with the character $ are often arrays.
pass
else:
values = Integer.u32_from(data)
float_values = FloatingPoint.f32_from(data)
if values[0] != 0:
f32 = float_values[0][1]
if util.is_nice_float32(f32) or f32 in util.float32_exact:
padding_values = FloatingPoint.f32_from(padding_data)
return [FloatingPoint.create_f32(identifier, symbol.addr, float_values, padding_values)]
else:
padding_values = Integer.u32_from(padding_data)
return [Integer.create_u32(identifier, symbol.addr, data, values, padding_data, padding_values)]
if symbol.size == 2 and len(padding_data) % 2 == 0:
if identifier.name and "$" in identifier.name:
# TODO: symbols with the character $ are often arrays.
pass
else:
values = Integer.u16_from(data)
padding_values = Integer.u16_from(padding_data)
if values[0] != 0:
return [Integer.create_u16(identifier, symbol.addr, data, values, padding_data, padding_values)]
# otherwise export it as raw initialized data
return [ArbitraryData.create_with_data(identifier, symbol.addr, data, padding_data)]
def from_group(section: Section, group: List[linker_map.Symbol]) -> List[Symbol]:
""" Create symbol from a group of linker map symbols """
assert len(group) == 1
first = group[0]
if first.size <= 0:
return []
identifier = Identifier("data", first.addr, first.name)
if not section.data:
return zero_initialized_symbol(section, identifier, first, first.padding)
else:
data = bytes()
padding_data = bytes()
data = section.get_data(first.start, first.end)
if first.padding > 0:
padding_data = bytes()
padding_data = section.get_data(first.end, first.end+first.padding)
assert len(data) == first.size
assert len(padding_data) == first.padding
return value_initialized_symbol(section, identifier, first.addr, data, padding_data, first)
def groups_from_symbols(symbols: List[linker_map.Symbol]) -> List[List[linker_map.Symbol]]:
""" Group symbols based on the 'is_function' flag """
group = []
groups = []
for symbol in symbols:
if symbol.is_function:
if group:
groups.append(group)
group = [symbol]
continue
if group and symbol.name == None:
group.append(symbol)
else:
if group:
groups.append(group)
group = []
groups.append([symbol])
if group:
groups.append(group)
group = []
return groups