tp/tools/libdol2asm/exporter/cpp_asm_exporter.py

222 lines
9.7 KiB
Python

import struct
from capstone import *
from capstone.ppc import *
from ..disassemble import *
from ..data.function import *
from ..context import Context
from ..builder import AsyncBuilder
class CPPDisassembler(Disassembler):
def __init__(self, builder: AsyncBuilder, function: Function, block_map, symbol_table, relocations, context: Context):
super().__init__([])
self.builder = builder
self.function = function
self.block_map = block_map
self.symbol_table = symbol_table
self.last_address = 0
self.relocations = relocations
self.unregister_references = set()
self.context = context
def get_symbol(self, addr):
if addr == self.function.addr:
return self.function
if addr in self.block_map:
return self.block_map[addr]
return self.symbol_table.always_get(addr)
def addr_to_label(self, addr) -> str:
symbol = self.get_symbol(addr)
if not symbol:
return None
return symbol.asm_reference(addr)
async def callback(self, address, insn, bytes):
if address in self.block_map:
block = self.block_map[address]
await self.builder.write("%s:" % block.identifier.label)
self.last_address = address
asm = None
raw = struct.unpack(">I", bytes)[0]
if insn != None:
asm = self.insn_to_text(address, insn, raw)
else:
# Capstone couldn't disassemble it
idx = (raw & 0xfc000000) >> 26
idx2 = (raw & 0x000007fe) >> 1
if idx == 31 and idx2 == 467: # mtspr
asm = disasm_mspr(raw, 1)
elif idx == 31 and idx2 == 339: # mfspr
asm = disasm_mspr(raw, 0)
elif idx == 31 and idx2 == 512: # mcrxr
asm = disasm_mcrxr(raw)
elif idx == 63 and idx2 == 32: # fcmpo
asm = disasm_fcmp(raw)
elif idx == 4:
asm = disasm_ps(raw) # Paired singles
elif idx in {56, 57, 60, 61}:
asm = disasm_ps_mem(raw, idx)
elif idx == 58:
asm = disasm_ld(raw) # ld
if asm == None:
asm = f".4byte 0x{raw:08X} /* unknown instruction */"
prefixComment = '/* %08X %02X %02X %02X %02X */' % (
address, bytes[0], bytes[1], bytes[2], bytes[3])
await self.builder.write(f"{prefixComment}\t{asm}")
def insn_to_text(self, addr, insn, raw) -> str:
id = insn.id
split_load = addr in self.splitDataLoads
offset_load = is_load_store_reg_offset(insn, None)
r2_addr = self.r2AddrInsns[addr]
r13_addr = self.r13AddrInsns[addr]
# Branch instruction replace immediate value with label
if id in {PPC_INS_B, PPC_INS_BL, PPC_INS_BDZ, PPC_INS_BDNZ}:
label = self.addr_to_label(insn.operands[0].imm)
if not label:
k = " ".join([f"{x:02X}" for x in insn.bytes])
self.context.warning(
f"{k}| '{label}' {addr:08X} to {insn.operands[0].imm:08X}, branch to unknown addr: {insn}")
sys.exit(1)
assert label
return f"{insn.mnemonic} {label}"
elif id == PPC_INS_BC:
branchPred = '+' if (insn.bytes[1] & 0x20) else ''
if insn.operands[0].type == PPC_OP_IMM:
label = self.addr_to_label(insn.operands[0].imm)
return f"{insn.mnemonic}{branchPred} {label}"
elif insn.operands[1].type == PPC_OP_IMM:
label = self.addr_to_label(insn.operands[1].imm)
reg = insn.reg_name(insn.operands[0].value.reg)
return f"{insn.mnemonic}{branchPred} {reg}, {label}"
# Handle split loads (high part)
elif id == PPC_INS_LIS and split_load:
loLoadInsn = self.linkedInsns[addr]
value = self.splitDataLoads[addr]
suffix = 'h' if loLoadInsn.id == PPC_INS_ORI else 'ha'
reg = insn.reg_name(insn.operands[0].reg)
name = self.addr_to_label(value)
if name:
return f"{insn.mnemonic} {reg}, {name}@{suffix} /* 0x{value:08X}@{suffix} */"
else:
if suffix == 'h':
fixed_value = value >> 16
fixed_value &= 0xFFFF
else:
fixed_value = value >> 16
fixed_value += (1 if (value & 0x8000) else 0)
fixed_value &= 0xFFFF
return f"{insn.mnemonic} {reg}, 0x{fixed_value:04X} /* 0x{value:08X}@{suffix} */"
# Handle split loads (low part)
elif id in {PPC_INS_ADDI, PPC_INS_ADDIC, PPC_INS_ORI} and split_load:
# NOTE: PPC_INS_ADDIC only works with a patched compiler
value = self.splitDataLoads[addr]
rA = insn.reg_name(insn.operands[0].reg)
rB = insn.reg_name(insn.operands[1].reg)
name = self.addr_to_label(value)
if name:
return f"{insn.mnemonic} {rA}, {rB}, {name}@l /* 0x{value:08X}@l */"
else:
fixed_value = value & 0xFFFF
return f"{insn.mnemonic} {rA}, {rB}, 0x{fixed_value:04X} /* 0x{value:08X}@l */"
elif offset_load and split_load:
value = self.splitDataLoads[addr]
rA = insn.reg_name(insn.operands[0].reg)
rB = insn.reg_name(insn.operands[1].mem.base)
name = self.addr_to_label(value)
if name:
return f"{insn.mnemonic} {rA}, {name}@l({rB}) /* 0x{value:08X}@l */"
else:
fixed_value = value & 0xFFFF
return f"{insn.mnemonic} {rA}, 0x{fixed_value:04X}({rB}) /* 0x{value:08X}@l */"
if r13_addr == self.common_r13_addr:
if id == PPC_INS_ADDI and insn.operands[1].reg == PPC_REG_R13:
imm = insn.operands[2].imm
value = r13_addr + sign_extend_16(imm)
rA = insn.reg_name(insn.operands[0].reg)
rB = insn.reg_name(insn.operands[1].reg)
name = self.addr_to_label(value)
if name:
# simplified mnemonic for: la rD,d(rA) equivalent to addi rD,rA,d
# https://www.nxp.com/docs/en/application-note/AN2491.pdf
return f"la {rA}, {name}({rB}) /* {value:08X}-_SDA_BASE_ */"
if is_load_store_reg_offset(insn, PPC_REG_R13):
value = r13_addr + \
sign_extend_16(insn.operands[1].mem.disp)
rA = insn.reg_name(insn.operands[0].reg)
rB = insn.reg_name(insn.operands[1].mem.base)
name = self.addr_to_label(value)
if name:
return f"{insn.mnemonic} {rA}, {name}({rB})"
if r2_addr == self.common_r2_addr:
if id == PPC_INS_ADDI and insn.operands[1].reg == PPC_REG_R2:
value = r2_addr + sign_extend_16(insn.operands[2].imm)
rA = insn.reg_name(insn.operands[0].reg)
rB = insn.reg_name(insn.operands[1].reg)
name = self.addr_to_label(value)
if name:
# simplified mnemonic for: la rD,d(rA) equivalent to addi rD,rA,d
# https://www.nxp.com/docs/en/application-note/AN2491.pdf
return f"la {rA}, {name}({rB}) /* {value:08X}-_SDA2_BASE_ */"
if is_load_store_reg_offset(insn, PPC_REG_R2):
value = r2_addr + \
sign_extend_16(insn.operands[1].mem.disp)
rA = insn.reg_name(insn.operands[0].reg)
rB = insn.reg_name(insn.operands[1].mem.base)
name = self.addr_to_label(value)
if name:
return f"{insn.mnemonic} {rA}, {name}({rB})"
# Sign-extend immediate values because Capstone is an idiot and doesn't do that automatically
if id in {PPC_INS_ADDI, PPC_INS_ADDIC, PPC_INS_SUBFIC, PPC_INS_MULLI} and (insn.operands[2].imm & 0x8000):
return "%s %s, %s, %i" % (insn.mnemonic, insn.reg_name(insn.operands[0].reg), insn.reg_name(insn.operands[1].value.reg), insn.operands[2].imm - 0x10000)
elif id in {PPC_INS_LI, PPC_INS_CMPWI} and (insn.operands[1].imm & 0x8000):
return "%s %s, %i" % (insn.mnemonic, insn.reg_name(insn.operands[0].reg), insn.operands[1].imm - 0x10000)
elif id == PPC_INS_CNTLZW:
return f"cntlzw {insn.op_str}"
elif id == PPC_INS_MTICCR:
A = 0x3fb
rB = insn.reg_name(insn.operands[0].reg)
return f"mtspr 0x{A:X}, {rB}"
elif id == PPC_INS_SYNC:
assert insn.operands[0].value.imm == 0
return 'sync'
elif id == PPC_INS_LWZ and insn.operands[0].type == PPC_OP_REG and insn.operands[0].reg == PPC_REG_R4 and insn.operands[1].type == PPC_OP_MEM and insn.operands[1].value.mem.base == PPC_REG_R0:
value = insn.operands[1].value.mem.disp
insn_str = 'lwz %s, %s0x%x(r0)' % (insn.reg_name(insn.operands[0].reg), '-' if (
value < 0) else '', ((-value) & 0xFFFF) if value < 0 else (value & 0xFFFF))
return insn_str
# "twui X" is shorthand for "twi 31, X"
elif id == PPC_INS_TWUI:
assert insn.operands[0].type == PPC_OP_REG
assert insn.operands[1].type == PPC_OP_IMM
rA = insn.reg_name(insn.operands[0].reg)
S = insn.operands[1].value.imm
insn_str = 'twi %i, %s, 0x%x' % (31, rA, S)
return insn_str
else:
if addr in self.registerLoads:
return f"{insn.mnemonic} {insn.op_str}\t/* effective address: {self.registerLoads[addr]:08X} */"
return f"{insn.mnemonic} {insn.op_str}"