ABI FPR names (#767)

* Add o32 to disassembler and update macro.inc

* Add a variable for asm processor to makefile
and improve objdump flags

* Update diff settings

* git subrepo pull --force tools/asm-differ

subrepo:
  subdir:   "tools/asm-differ"
  merged:   "1236288d1"
upstream:
  origin:   "https://github.com/simonlindholm/asm-differ"
  branch:   "main"
  commit:   "1236288d1"
git-subrepo:
  version:  "0.4.3"
  origin:   "https://github.com/ingydotnet/git-subrepo.git"
  commit:   "2f68596"

* Remove * import and implement option

* Fix some stuff in the makefile

* Update asm-processor

* Review

* Fix old var name
This commit is contained in:
EllipticEllipsis 2022-03-31 18:22:19 +01:00 committed by GitHub
parent 54a4d1eb12
commit 15dfaf0862
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 513 additions and 280 deletions

View File

@ -21,9 +21,9 @@ ifeq ($(NON_MATCHING),1)
COMPARE := 0
endif
DISASM_FLAGS ?=
DISASM_FLAGS := --reg-names=o32
ifneq ($(FULL_DISASM), 0)
DISASM_FLAGS += --full
DISASM_FLAGS += --all
endif
PROJECT_DIR := $(dir $(realpath $(firstword $(MAKEFILE_LIST))))
@ -73,6 +73,9 @@ AS := $(MIPS_BINUTILS_PREFIX)as
LD := $(MIPS_BINUTILS_PREFIX)ld
OBJCOPY := $(MIPS_BINUTILS_PREFIX)objcopy
OBJDUMP := $(MIPS_BINUTILS_PREFIX)objdump
ASM_PROC := python3 tools/asm-processor/build.py
ASM_PROC_FLAGS := --input-enc=utf-8 --output-enc=euc-jp
IINC := -Iinclude -Isrc -Iassets -Ibuild -I.
@ -99,6 +102,9 @@ MIPS_VERSION := -mips2
# we support Microsoft extensions such as anonymous structs, which the compiler does support but warns for their usage. Surpress the warnings with -woff.
CFLAGS += -G 0 -non_shared -Xfullwarn -Xcpluscomm $(IINC) -nostdinc -Wab,-r4300_mul -woff 624,649,838,712
# Use relocations and abi fpr names in the dump
OBJDUMP_FLAGS := -d -r -z -Mreg-names=32
ifeq ($(shell getconf LONG_BIT), 32)
# Work around memory allocation bug in QEMU
export QEMU_GUEST_BASE := 1
@ -178,6 +184,7 @@ build/src/libultra/flash/%.o: MIPS_VERSION := -mips1
build/src/code/audio/%.o: OPTFLAGS := -O2
build/assets/%.o: OPTFLAGS := -O1
build/assets/%.o: ASM_PROC_FLAGS :=
# file flags
build/src/boot_O2_g3/fault.o: CFLAGS += -trapuv
@ -194,21 +201,19 @@ build/src/libultra/libc/llcvt.o: OPTFLAGS := -O1
build/src/libultra/libc/llcvt.o: MIPS_VERSION := -mips3 -32
# cc & asm-processor
build/src/boot_O2/%.o: CC := python3 tools/asm-processor/build.py $(CC) -- $(AS) $(ASFLAGS) --
build/src/boot_O2_g3/%.o: CC := python3 tools/asm-processor/build.py $(CC) -- $(AS) $(ASFLAGS) --
build/src/boot_O2/%.o: CC := $(ASM_PROC) $(ASM_PROC_FLAGS) $(CC) -- $(AS) $(ASFLAGS) --
build/src/boot_O2_g3/%.o: CC := $(ASM_PROC) $(ASM_PROC_FLAGS) $(CC) -- $(AS) $(ASFLAGS) --
build/src/libultra/%.o: CC := python3 tools/asm-processor/build.py $(CC_OLD) -- $(AS) $(ASFLAGS) --
build/src/libultra/%.o: CC := $(CC_OLD)
# Needed at least until voice is decompiled
build/src/libultra/voice/%.o: CC := $(ASM_PROC) $(ASM_PROC_FLAGS) $(CC_OLD) -- $(AS) $(ASFLAGS) --
build/src/code/%.o: CC := python3 tools/asm-processor/build.py $(CC) -- $(AS) $(ASFLAGS) --
build/src/code/audio/%.o: CC := python3 tools/asm-processor/build.py $(CC) -- $(AS) $(ASFLAGS) --
build/src/code/%.o: CC := $(ASM_PROC) $(ASM_PROC_FLAGS) $(CC) -- $(AS) $(ASFLAGS) --
build/src/code/audio/%.o: CC := $(ASM_PROC) $(ASM_PROC_FLAGS) $(CC) -- $(AS) $(ASFLAGS) --
build/src/overlays/actors/%.o: CC := python3 tools/asm-processor/build.py $(CC) -- $(AS) $(ASFLAGS) --
build/src/overlays/effects/%.o: CC := python3 tools/asm-processor/build.py $(CC) -- $(AS) $(ASFLAGS) --
build/src/overlays/fbdemos/%.o: CC := python3 tools/asm-processor/build.py $(CC) -- $(AS) $(ASFLAGS) --
build/src/overlays/gamestates/%.o: CC := python3 tools/asm-processor/build.py $(CC) -- $(AS) $(ASFLAGS) --
build/src/overlays/kaleido_scope/%.o: CC := python3 tools/asm-processor/build.py $(CC) -- $(AS) $(ASFLAGS) --
build/src/overlays/%.o: CC := $(ASM_PROC) $(ASM_PROC_FLAGS) $(CC) -- $(AS) $(ASFLAGS) --
build/assets/%.o: CC := python3 tools/asm-processor/build.py $(CC) -- $(AS) $(ASFLAGS) --
build/assets/%.o: CC := $(ASM_PROC) $(ASM_PROC_FLAGS) $(CC) -- $(AS) $(ASFLAGS) --
#### Main Targets ###
@ -303,7 +308,7 @@ build/data/%.o: data/%.s
build/src/overlays/%.o: src/overlays/%.c
$(CC_CHECK) $<
$(CC) -c $(CFLAGS) $(MIPS_VERSION) $(OPTFLAGS) -o $@ $<
@$(OBJDUMP) -d $@ > $(@:.o=.s)
@$(OBJDUMP) $(OBJDUMP_FLAGS) $@ > $(@:.o=.s)
# TODO: `() || true` is currently necessary to suppress `Error 1 (ignored)` make warnings caused by `test`, but this will go away if
# the following is moved to a separate rule that is only run once when all the required objects have been compiled.
$(ZAPD) bovl -eh -i $@ -cfg $< --outputpath $(@D)/$(notdir $(@D))_reloc.s
@ -313,21 +318,21 @@ build/src/overlays/%.o: src/overlays/%.c
build/src/%.o: src/%.c
$(CC_CHECK) $<
$(CC) -c $(CFLAGS) $(MIPS_VERSION) $(OPTFLAGS) -o $@ $<
@$(OBJDUMP) -d $@ > $(@:.o=.s)
@$(OBJDUMP) $(OBJDUMP_FLAGS) $@ > $(@:.o=.s)
$(RM_MDEBUG)
build/src/libultra/libc/ll.o: src/libultra/libc/ll.c
$(CC_CHECK) $<
$(CC) -c $(CFLAGS) $(MIPS_VERSION) $(OPTFLAGS) -o $@ $<
python3 tools/set_o32abi_bit.py $@
@$(OBJDUMP) -d $@ > $(@:.o=.s)
@$(OBJDUMP) $(OBJDUMP_FLAGS) $@ > $(@:.o=.s)
$(RM_MDEBUG)
build/src/libultra/libc/llcvt.o: src/libultra/libc/llcvt.c
$(CC_CHECK) $<
$(CC) -c $(CFLAGS) $(MIPS_VERSION) $(OPTFLAGS) -o $@ $<
python3 tools/set_o32abi_bit.py $@
@$(OBJDUMP) -d $@ > $(@:.o=.s)
@$(OBJDUMP) $(OBJDUMP_FLAGS) $@ > $(@:.o=.s)
$(RM_MDEBUG)
# Build C files from assets

View File

@ -5,4 +5,5 @@ def apply(config, args):
config['myimg'] = 'mm.us.rev1.rom_uncompressed.z64'
config['mapfile'] = 'build/mm.map'
config['source_directories'] = ['./src','./include']
config['objdump_flags'] = ['-M','reg-names=32']
config['makeflags'] = ['KEEP_MDEBUG=1']

View File

@ -37,3 +37,39 @@
.set TagHi, $29
.set ErrorEPC, $30
.set Reserved31, $31
# Float register aliases (o32 ABI, odd ones are rarely used)
.set $fv0, $f0
.set $fv0f, $f1
.set $fv1, $f2
.set $fv1f, $f3
.set $ft0, $f4
.set $ft0f, $f5
.set $ft1, $f6
.set $ft1f, $f7
.set $ft2, $f8
.set $ft2f, $f9
.set $ft3, $f10
.set $ft3f, $f11
.set $fa0, $f12
.set $fa0f, $f13
.set $fa1, $f14
.set $fa1f, $f15
.set $ft4, $f16
.set $ft4f, $f17
.set $ft5, $f18
.set $ft5f, $f19
.set $fs0, $f20
.set $fs0f, $f21
.set $fs1, $f22
.set $fs1f, $f23
.set $fs2, $f24
.set $fs2f, $f25
.set $fs3, $f26
.set $fs3f, $f27
.set $fs4, $f28
.set $fs4f, $f29
.set $fs5, $f30
.set $fs5f, $f31

View File

@ -6,7 +6,7 @@
[subrepo]
remote = https://github.com/simonlindholm/asm-differ
branch = main
commit = 6f8f80b719359d018a2b734288c977aae6538870
parent = 91a6e9f647d2035eba281d286c78f089f70f269f
commit = 1236288d1520335c2bfb672078fec65084d7cb5c
parent = 2c5690701a350c7e7c3d6252dff925ad65d59910
method = merge
cmdver = 0.4.3

View File

@ -1,6 +1,5 @@
repos:
- repo: https://github.com/psf/black
rev: 20.8b1
rev: 22.1.0
hooks:
- id: black
language_version: python3.6

View File

@ -11,10 +11,10 @@ Nice differ for assembly code. Currently supports MIPS, PPC, AArch64, and ARM32;
## Usage
Create a file `diff_settings.sh` in some directory (see the one in this repo for an example). Then from that directory, run
Create a file `diff_settings.py` in some directory (see the one in this repo for an example). Then from that directory, run
```bash
/path/to/diff.sh [flags] (function|rom addr)
/path/to/diff.py [flags] (function|rom addr)
```
Recommended flags are `-mwo` (automatically run `make` on source file changes, and include symbols in diff). See `--help` for more details.

View File

@ -109,6 +109,15 @@ if __name__ == "__main__":
help="""Diff .o files rather than a whole binary. This makes it possible to
see symbol names. (Recommended)""",
)
parser.add_argument(
"-f",
"--objfile",
dest="objfile",
type=str,
help="""File path for an object file being diffed. When used
the map file isn't searched for the function given. Useful for dynamically
linked libraries.""",
)
parser.add_argument(
"-e",
"--elf",
@ -132,6 +141,14 @@ if __name__ == "__main__":
help="""Tweak --source handling to make it work with binutils < 2.33.
Implies --source.""",
)
parser.add_argument(
"-j",
"--section",
dest="diff_section",
default=".text",
metavar="SECTION",
help="Diff restricted to a given output section.",
)
parser.add_argument(
"-L",
"--line-numbers",
@ -356,6 +373,7 @@ except ModuleNotFoundError as e:
class ProjectSettings:
arch_str: str
objdump_executable: str
objdump_flags: List[str]
build_command: List[str]
map_format: str
mw_build_dir: str
@ -365,6 +383,7 @@ class ProjectSettings:
source_directories: Optional[List[str]]
source_extensions: List[str]
show_line_numbers_default: bool
disassemble_all: bool
@dataclass
@ -379,8 +398,10 @@ class Config:
# Build/objdump options
diff_obj: bool
objfile: Optional[str]
make: bool
source_old_binutils: bool
diff_section: str
inlines: bool
max_function_size_lines: int
max_function_size_bytes: int
@ -422,9 +443,11 @@ def create_project_settings(settings: Dict[str, Any]) -> ProjectSettings:
"source_extensions", [".c", ".h", ".cpp", ".hpp", ".s"]
),
objdump_executable=get_objdump_executable(settings.get("objdump_executable")),
objdump_flags=settings.get("objdump_flags", []),
map_format=settings.get("map_format", "gnu"),
mw_build_dir=settings.get("mw_build_dir", "build/"),
show_line_numbers_default=settings.get("show_line_numbers_default", True),
disassemble_all=settings.get("disassemble_all", False),
)
@ -461,8 +484,10 @@ def create_config(args: argparse.Namespace, project: ProjectSettings) -> Config:
arch=arch,
# Build/objdump options
diff_obj=args.diff_obj,
objfile=args.objfile,
make=args.make,
source_old_binutils=args.source_old_binutils,
diff_section=args.diff_section,
inlines=args.inlines,
max_function_size_lines=args.max_lines,
max_function_size_bytes=args.max_lines * 4,
@ -518,7 +543,7 @@ def get_arch(arch_str: str) -> "ArchSettings":
raise ValueError(f"Unknown architecture: {arch_str}")
BUFFER_CMD: List[str] = ["tail", "-c", str(10 ** 9)]
BUFFER_CMD: List[str] = ["tail", "-c", str(10**9)]
# -S truncates long lines instead of wrapping them
# -R interprets color escape sequences
@ -809,12 +834,7 @@ class JsonFormatter(Formatter):
return {"text": s, "format": f.name.lower()}
elif isinstance(f, RotationFormat):
attrs = asdict(f)
attrs.update(
{
"text": s,
"format": "rotation",
}
)
attrs.update({"text": s, "format": "rotation"})
return attrs
else:
static_assert_unreachable(f)
@ -962,7 +982,7 @@ def restrict_to_function(dump: str, fn_name: str) -> str:
return ""
def serialize_data_references(references: List[Tuple[int, int, str]]) -> str:
def serialize_rodata_references(references: List[Tuple[int, int, str]]) -> str:
return "".join(
f"DATAREF {text_offset} {from_offset} {from_section}\n"
for (text_offset, from_offset, from_section) in references
@ -991,7 +1011,11 @@ def run_objdump(cmd: ObjdumpCommand, config: Config, project: ProjectSettings) -
flags, target, restrict = cmd
try:
out = subprocess.run(
[project.objdump_executable] + config.arch.arch_flags + flags + [target],
[project.objdump_executable]
+ config.arch.arch_flags
+ project.objdump_flags
+ flags
+ [target],
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
@ -1009,11 +1033,11 @@ def run_objdump(cmd: ObjdumpCommand, config: Config, project: ProjectSettings) -
with open(target, "rb") as f:
obj_data = f.read()
return preprocess_objdump_out(restrict, obj_data, out)
return preprocess_objdump_out(restrict, obj_data, out, config)
def preprocess_objdump_out(
restrict: Optional[str], obj_data: Optional[bytes], objdump_out: str
restrict: Optional[str], obj_data: Optional[bytes], objdump_out: str, config: Config
) -> str:
"""
Preprocess the output of objdump into a format that `process()` expects.
@ -1033,13 +1057,16 @@ def preprocess_objdump_out(
out = out.rstrip("\n")
if obj_data:
out = serialize_data_references(parse_elf_data_references(obj_data)) + out
out = (
serialize_rodata_references(parse_elf_rodata_references(obj_data, config))
+ out
)
return out
def search_map_file(
fn_name: str, project: ProjectSettings
fn_name: str, project: ProjectSettings, config: Config
) -> Tuple[Optional[str], Optional[int]]:
if not project.mapfile:
fail(f"No map file configured; cannot find function {fn_name}.")
@ -1059,7 +1086,7 @@ def search_map_file(
cands = []
last_line = ""
for line in lines:
if line.startswith(" .text"):
if line.startswith(" " + config.diff_section):
cur_objfile = line.split()[3]
if "load address" in line:
tokens = last_line.split() + line.split()
@ -1084,9 +1111,12 @@ def search_map_file(
re.compile(
# ram elf rom
r" \S+ \S+ (\S+) (\S+) . "
+ fn_name
# object name
+ r"(?: \(entry of \.(?:init|text)\))? \t(\S+)"
+ re.escape(fn_name)
+ r"(?: \(entry of "
+ re.escape(config.diff_section)
+ r"\))? \t"
# object name
+ "(\S+)"
),
contents,
)
@ -1121,7 +1151,9 @@ def search_map_file(
return None, None
def parse_elf_data_references(data: bytes) -> List[Tuple[int, int, str]]:
def parse_elf_rodata_references(
data: bytes, config: Config
) -> List[Tuple[int, int, str]]:
e_ident = data[:16]
if e_ident[:4] != b"\x7FELF":
return []
@ -1134,7 +1166,6 @@ def parse_elf_data_references(data: bytes) -> List[Tuple[int, int, str]]:
is_little_endian = e_ident[5] == 1
str_end = "<" if is_little_endian else ">"
str_off = "I" if is_32bit else "Q"
sym_size = {"B": 1, "H": 2, "I": 4, "Q": 8}
def read(spec: str, offset: int) -> Tuple[int, ...]:
spec = spec.replace("P", str_off)
@ -1186,7 +1217,12 @@ def parse_elf_data_references(data: bytes) -> List[Tuple[int, int, str]]:
assert len(symtab_sections) == 1
symtab = sections[symtab_sections[0]]
text_sections = [i for i in range(e_shnum) if sec_names[i] == b".text" and sections[i].sh_size != 0]
section_name = config.diff_section.encode("utf-8")
text_sections = [
i
for i in range(e_shnum)
if sec_names[i] == section_name and sections[i].sh_size != 0
]
if len(text_sections) != 1:
return []
text_section = text_sections[0]
@ -1195,11 +1231,10 @@ def parse_elf_data_references(data: bytes) -> List[Tuple[int, int, str]]:
for s in sections:
if s.sh_type == SHT_REL or s.sh_type == SHT_RELA:
if s.sh_info == text_section:
# Skip .text -> .text references
# Skip section_name -> section_name references
continue
sec_name = sec_names[s.sh_info].decode("latin1")
if sec_name == ".mwcats.text":
# Skip Metrowerks CATS Utility section
if sec_name != ".rodata":
continue
sec_base = sections[s.sh_info].sh_offset
for i in range(0, s.sh_size, s.sh_entsize):
@ -1257,11 +1292,16 @@ def dump_elf(
f"--stop-address={end_addr}",
]
if project.disassemble_all:
disassemble_flag = "-D"
else:
disassemble_flag = "-d"
flags2 = [
f"--disassemble={diff_elf_symbol}",
]
objdump_flags = ["-drz", "-j", ".text"]
objdump_flags = [disassemble_flag, "-rz", "-j", config.diff_section]
return (
project.myimg,
(objdump_flags + flags1, project.baseimg, None),
@ -1283,7 +1323,10 @@ def dump_objfile(
if start.startswith("0"):
fail("numerical start address not supported with -o; pass a function name")
objfile, _ = search_map_file(start, project)
objfile = config.objfile
if not objfile:
objfile, _ = search_map_file(start, project, config)
if not objfile:
fail("Not able to find .o file for function.")
@ -1297,7 +1340,12 @@ def dump_objfile(
if not os.path.isfile(refobjfile):
fail(f'Please ensure an OK .o file exists at "{refobjfile}".')
objdump_flags = ["-drz", "-j", ".text"]
if project.disassemble_all:
disassemble_flag = "-D"
else:
disassemble_flag = "-d"
objdump_flags = [disassemble_flag, "-rz", "-j", config.diff_section]
return (
objfile,
(objdump_flags, refobjfile, start),
@ -1314,7 +1362,7 @@ def dump_binary(
run_make(project.myimg, project)
start_addr = maybe_eval_int(start)
if start_addr is None:
_, start_addr = search_map_file(start, project)
_, start_addr = search_map_file(start, project, config)
if start_addr is None:
fail("Not able to find function in map file.")
if end is not None:
@ -1333,6 +1381,7 @@ def dump_binary(
(objdump_flags + flags2, project.myimg, None),
)
# Example: "ldr r4, [pc, #56] ; (4c <AddCoins+0x4c>)"
ARM32_LOAD_POOL_PATTERN = r"(ldr\s+r([0-9]|1[0-3]),\s+\[pc,.*;\s*)(\([a-fA-F0-9]+.*\))"
@ -1363,7 +1412,7 @@ class AsmProcessor:
class AsmProcessorMIPS(AsmProcessor):
def process_reloc(self, row: str, prev: str) -> str:
arch = self.config.arch
if "R_MIPS_NONE" in row:
if "R_MIPS_NONE" in row or "R_MIPS_JALR" in row:
# GNU as emits no-op relocations immediately after real ones when
# assembling with -mabi=64. Return without trying to parse 'imm' as an
# integer.
@ -1378,12 +1427,9 @@ class AsmProcessorMIPS(AsmProcessor):
# TODO: handle unambiguous cases where all addends for a symbol are the
# same, or show "+???".
mnemonic = prev.split()[0]
if (
mnemonic in arch.instructions_with_address_immediates
and not imm.startswith("0x")
):
imm = "0x" + imm
repl += "+" + imm if int(imm, 0) > 0 else imm
if mnemonic in arch.instructions_with_address_immediates:
imm = hex(int(imm, 16))
repl += ("" if imm.startswith("-") else "+") + imm
if "R_MIPS_LO16" in row:
repl = f"%lo({repl})"
elif "R_MIPS_HI16" in row:
@ -1398,6 +1444,12 @@ class AsmProcessorMIPS(AsmProcessor):
# Branch to glabel. This gives confusing output, but there's not much
# we can do here.
pass
elif "R_MIPS_GPREL16" in row:
repl = f"%gp_rel({repl})"
elif "R_MIPS_GOT16" in row:
repl = f"%got({repl})"
elif "R_MIPS_CALL16" in row:
repl = f"%call16({repl})"
else:
assert False, f"unknown relocation type '{row}' for line '{prev}'"
return before + repl + after
@ -1544,6 +1596,7 @@ class ArchSettings:
big_endian: Optional[bool] = True
delay_slot_instructions: Set[str] = field(default_factory=set)
MIPS_BRANCH_LIKELY_INSTRUCTIONS = {
"beql",
"bnel",
@ -1651,10 +1704,13 @@ PPC_BRANCH_INSTRUCTIONS = {
MIPS_SETTINGS = ArchSettings(
name="mips",
re_int=re.compile(r"[0-9]+"),
re_comment=re.compile(r"<.*?>"),
re_reg=re.compile(
r"\$?\b(a[0-7]|t[0-9]|s[0-8]|at|v[01]|f[12]?[0-9]|f3[01]|kt?[01]|fp|ra|zero)\b"
),
re_comment=re.compile(r"<.*>"),
# Includes:
# - General purpose registers v0..1, a0..7, t0..9, s0..8, zero, at, fp, k0..1/kt0..1
# - Float registers f0..31, or fv0..1, fa0..7, ft0..15, fs0..8 plus odd complements
# (actually used number depends on ABI)
# sp, gp should not be in this list
re_reg=re.compile(r"\$?\b([astv][0-9]|at|f[astv]?[0-9]+f?|kt?[01]|fp|ra|zero)\b"),
re_sprel=re.compile(r"(?<=,)([0-9]+|0x[0-9a-f]+)\(sp\)"),
re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"),
re_imm=re.compile(r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(sp)|%(lo|hi)\([^)]*\)"),
@ -1672,7 +1728,7 @@ MIPSEL_SETTINGS = replace(MIPS_SETTINGS, name="mipsel", big_endian=False)
ARM32_SETTINGS = ArchSettings(
name="arm32",
re_int=re.compile(r"[0-9]+"),
re_comment=re.compile(r"(<.*?>|//.*$)"),
re_comment=re.compile(r"(<.*>|//.*$)"),
# Includes:
# - General purpose registers: r0..13
# - Frame pointer registers: lr (r14), pc (r15)
@ -1690,26 +1746,33 @@ ARM32_SETTINGS = ArchSettings(
proc=AsmProcessorARM32,
)
ARMEL_SETTINGS = replace(ARM32_SETTINGS, name="armel", big_endian=False)
AARCH64_SETTINGS = ArchSettings(
name="aarch64",
re_int=re.compile(r"[0-9]+"),
re_comment=re.compile(r"(<.*?>|//.*$)"),
# GPRs and FP registers: X0-X30, W0-W30, [DSHQ]0..31
re_comment=re.compile(r"(<.*>|//.*$)"),
# GPRs and FP registers: X0-X30, W0-W30, [BHSDVQ]0..31
# (FP registers may be followed by data width and number of elements, e.g. V0.4S)
# The zero registers and SP should not be in this list.
re_reg=re.compile(r"\$?\b([dshq][12]?[0-9]|[dshq]3[01]|[xw][12]?[0-9]|[xw]30)\b"),
re_reg=re.compile(
r"\$?\b([bhsdvq]([12]?[0-9]|3[01])(\.\d\d?[bhsdvq])?|[xw][12]?[0-9]|[xw]30)\b"
),
re_sprel=re.compile(r"sp, #-?(0x[0-9a-fA-F]+|[0-9]+)\b"),
re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"),
re_imm=re.compile(r"(?<!sp, )#-?(0x[0-9a-fA-F]+|[0-9]+)\b"),
re_reloc=re.compile(r"R_AARCH64_"),
branch_instructions=AARCH64_BRANCH_INSTRUCTIONS,
instructions_with_address_immediates=AARCH64_BRANCH_INSTRUCTIONS.union({"bl", "adrp"}),
instructions_with_address_immediates=AARCH64_BRANCH_INSTRUCTIONS.union(
{"bl", "adrp"}
),
proc=AsmProcessorAArch64,
)
PPC_SETTINGS = ArchSettings(
name="ppc",
re_int=re.compile(r"[0-9]+"),
re_comment=re.compile(r"(<.*?>|//.*$)"),
re_comment=re.compile(r"(<.*>|//.*$)"),
re_reg=re.compile(r"\$?\b([rf][0-9]+)\b"),
re_sprel=re.compile(r"(?<=,)(-?[0-9]+|-?0x[0-9a-f]+)\(r1\)"),
re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"),
@ -1724,6 +1787,7 @@ ARCH_SETTINGS = [
MIPS_SETTINGS,
MIPSEL_SETTINGS,
ARM32_SETTINGS,
ARMEL_SETTINGS,
AARCH64_SETTINGS,
PPC_SETTINGS,
]
@ -1743,6 +1807,7 @@ def hexify_int(row: str, pat: Match[str], arch: ArchSettings) -> str:
def parse_relocated_line(line: str) -> Tuple[str, str, str]:
# Pick out the last argument
for c in ",\t ":
if c in line:
ind2 = line.rindex(c)
@ -1751,6 +1816,7 @@ def parse_relocated_line(line: str) -> Tuple[str, str, str]:
raise Exception(f"failed to parse relocated line: {line}")
before = line[: ind2 + 1]
after = line[ind2 + 1 :]
# Move an optional ($reg) part of it to 'after'
ind2 = after.find("(")
if ind2 == -1:
imm, after = after, ""
@ -1879,8 +1945,15 @@ def process(dump: str, config: Config) -> List[Line]:
row_parts = [part.lstrip() for part in row.split(" ", 1)]
mnemonic = row_parts[0].strip()
if mnemonic not in arch.instructions_with_address_immediates:
row = re.sub(arch.re_int, lambda m: hexify_int(row, m, arch), row)
addr = ""
if mnemonic in arch.instructions_with_address_immediates:
row, addr = split_off_address(row)
# objdump prefixes addresses with 0x/-0x if they don't resolve to some
# symbol + offset. Strip that.
addr = addr.replace("0x", "")
row = re.sub(arch.re_int, lambda m: hexify_int(row, m, arch), row)
row += addr
# Let 'original' be 'row' with relocations applied, while we continue
# transforming 'row' into a coarser version that ignores registers and
@ -1900,9 +1973,6 @@ def process(dump: str, config: Config) -> List[Line]:
scorable_line = normalized_original
if not config.score_stack_differences:
scorable_line = re.sub(arch.re_sprel, "addr(sp)", scorable_line)
if mnemonic in arch.branch_instructions:
# Replace the final argument with "<target>"
scorable_line = re.sub(r"[^, \t]+$", "<target>", scorable_line)
if skip_next:
skip_next = False
@ -1925,8 +1995,6 @@ def process(dump: str, config: Config) -> List[Line]:
branch_target = None
if mnemonic in arch.branch_instructions:
branch_target = int(row_parts[1].strip().split(",")[-1], 16)
if mnemonic in arch.branch_likely_instructions:
branch_target -= 4
output.append(
Line(
@ -1974,7 +2042,9 @@ def split_off_address(line: str) -> Tuple[str, str]:
parts = line.split(",")
if len(parts) < 2:
parts = line.split(None, 1)
off = len(line) - len(parts[-1])
if len(parts) < 2:
parts.append("")
off = len(line) - len(parts[-1].strip())
return line[:off], line[off:]
@ -1990,7 +2060,7 @@ def diff_sequences(
) -> List[Tuple[str, int, int, int, int]]:
if (
algorithm != "levenshtein"
or len(seq1) * len(seq2) > 4 * 10 ** 8
or len(seq1) * len(seq2) > 4 * 10**8
or len(seq1) + len(seq2) >= 0x110000
):
return diff_sequences_difflib(seq1, seq2)
@ -2177,10 +2247,15 @@ class Diff:
def trim_nops(lines: List[Line], arch: ArchSettings) -> List[Line]:
lines = lines[:]
while lines and lines[-1].mnemonic == "nop" and (len(lines) == 1 or lines[-2].mnemonic not in arch.delay_slot_instructions):
while (
lines
and lines[-1].mnemonic == "nop"
and (len(lines) == 1 or lines[-2].mnemonic not in arch.delay_slot_instructions)
):
lines.pop()
return lines
def do_diff(lines1: List[Line], lines2: List[Line], config: Config) -> Diff:
if config.show_source:
import cxxfilt
@ -2212,7 +2287,6 @@ def do_diff(lines1: List[Line], lines2: List[Line], config: Config) -> Diff:
lines2 = trim_nops(lines2, arch)
diffed_lines = diff_lines(lines1, lines2, config.algorithm)
score = score_diff_lines(diffed_lines, config)
max_score = len(lines1) * config.penalty_deletion
line_num_base = -1
@ -2281,10 +2355,15 @@ def do_diff(lines1: List[Line], lines2: List[Line], config: Config) -> Diff:
line2_line = line_num_2to1[line2.line_num]
line2_target = (line2_line[0] + (target - line2.line_num), 0)
# Set the key for three-way diffing to a normalized version.
# Adjust the branch target for scoring and three-way diffing.
norm2, norm_branch2 = split_off_address(line2.normalized_original)
if norm_branch2 != "<ign>":
line2.normalized_original = norm2 + str(line2_target)
if norm_branch2 != "<ignore>":
retargetted = hex(line2_target[0]).replace("0x", "")
if line2_target[1] != 0:
retargetted += f"+{line2_target[1]}"
line2.normalized_original = norm2 + retargetted
sc_base, _ = split_off_address(line2.scorable_line)
line2.scorable_line = sc_base + retargetted
same_target = line2_target == (line1.branch_target, 0)
else:
# Do a naive comparison for non-branches (e.g. function calls).
@ -2444,6 +2523,7 @@ def do_diff(lines1: List[Line], lines2: List[Line], config: Config) -> Diff:
)
)
score = score_diff_lines(diffed_lines, config)
output = output[config.skip_lines :]
return Diff(lines=output, score=score, max_score=max_score)

View File

@ -783,6 +783,13 @@ def parse_source(f, opt, framepointer, mips1, input_enc, output_enc, out_depende
else:
min_instr_count = 2
skip_instr_count = 1
elif opt == 'O0':
if framepointer:
min_instr_count = 8
skip_instr_count = 8
else:
min_instr_count = 4
skip_instr_count = 4
elif opt == 'g':
if framepointer:
min_instr_count = 7
@ -792,7 +799,7 @@ def parse_source(f, opt, framepointer, mips1, input_enc, output_enc, out_depende
skip_instr_count = 4
else:
if opt != 'g3':
raise Failure("must pass one of -g, -O1, -O2, -O2 -g3")
raise Failure("must pass one of -g, -O0, -O1, -O2, -O2 -g3")
if framepointer:
min_instr_count = 4
skip_instr_count = 4
@ -813,6 +820,7 @@ def parse_source(f, opt, framepointer, mips1, input_enc, output_enc, out_depende
]
is_cutscene_data = False
is_early_include = False
for line_no, raw_line in enumerate(f, 1):
raw_line = raw_line.rstrip()
@ -832,44 +840,51 @@ def parse_source(f, opt, framepointer, mips1, input_enc, output_enc, out_depende
global_asm = None
else:
global_asm.process_line(raw_line, output_enc)
elif line in ['GLOBAL_ASM(', '#pragma GLOBAL_ASM(']:
global_asm = GlobalAsmBlock("GLOBAL_ASM block at line " + str(line_no))
start_index = len(output_lines)
elif ((line.startswith('GLOBAL_ASM("') or line.startswith('#pragma GLOBAL_ASM("'))
and line.endswith('")')):
fname = line[line.index('(') + 2 : -2]
out_dependencies.append(fname)
global_asm = GlobalAsmBlock(fname)
with open(fname, encoding=input_enc) as f:
for line2 in f:
global_asm.process_line(line2.rstrip(), output_enc)
src, fn = global_asm.finish(state)
output_lines[-1] = ''.join(src)
asm_functions.append(fn)
global_asm = None
elif line == '#pragma asmproc recurse':
# C includes qualified as
# #pragma asmproc recurse
# #include "file.c"
# will be processed recursively when encountered
is_early_include = True
elif is_early_include:
# Previous line was a #pragma asmproc recurse
is_early_include = False
if not line.startswith("#include "):
raise Failure("#pragma asmproc recurse must be followed by an #include ")
fpath = os.path.dirname(f.name)
fname = os.path.join(fpath, line[line.index(' ') + 2 : -1])
out_dependencies.append(fname)
include_src = StringIO()
with open(fname, encoding=input_enc) as include_file:
parse_source(include_file, opt, framepointer, mips1, input_enc, output_enc, out_dependencies, include_src)
include_src.write('#line ' + str(line_no + 1) + ' "' + f.name + '"')
output_lines[-1] = include_src.getvalue()
include_src.close()
else:
if line in ['GLOBAL_ASM(', '#pragma GLOBAL_ASM(']:
global_asm = GlobalAsmBlock("GLOBAL_ASM block at line " + str(line_no))
start_index = len(output_lines)
elif ((line.startswith('GLOBAL_ASM("') or line.startswith('#pragma GLOBAL_ASM("'))
and line.endswith('")')):
fname = line[line.index('(') + 2 : -2]
out_dependencies.append(fname)
global_asm = GlobalAsmBlock(fname)
with open(fname, encoding=input_enc) as f:
for line2 in f:
global_asm.process_line(line2.rstrip(), output_enc)
src, fn = global_asm.finish(state)
output_lines[-1] = ''.join(src)
asm_functions.append(fn)
global_asm = None
elif line.startswith('#include "') and line.endswith('" EARLY'):
# C includes qualified with EARLY (i.e. #include "file.c" EARLY) will be
# processed recursively when encountered
fpath = os.path.dirname(f.name)
fname = os.path.join(fpath, line[line.index(' ') + 2 : -7])
out_dependencies.append(fname)
include_src = StringIO()
with open(fname, encoding=input_enc) as include_file:
parse_source(include_file, opt, framepointer, mips1, input_enc, output_enc, out_dependencies, include_src)
include_src.write('#line ' + str(line_no + 1) + ' "' + f.name + '"')
output_lines[-1] = include_src.getvalue()
include_src.close()
else:
# This is a hack to replace all floating-point numbers in an array of a particular type
# (in this case CutsceneData) with their corresponding IEEE-754 hexadecimal representation
if cutscene_data_regexpr.search(line) is not None:
is_cutscene_data = True
elif line.endswith("};"):
is_cutscene_data = False
if is_cutscene_data:
raw_line = re.sub(float_regexpr, repl_float_hex, raw_line)
output_lines[-1] = raw_line
# This is a hack to replace all floating-point numbers in an array of a particular type
# (in this case CutsceneData) with their corresponding IEEE-754 hexadecimal representation
if cutscene_data_regexpr.search(line) is not None:
is_cutscene_data = True
elif line.endswith("};"):
is_cutscene_data = False
if is_cutscene_data:
raw_line = re.sub(float_regexpr, repl_float_hex, raw_line)
output_lines[-1] = raw_line
if print_source:
if isinstance(print_source, StringIO):
@ -877,7 +892,14 @@ def parse_source(f, opt, framepointer, mips1, input_enc, output_enc, out_depende
print_source.write(line + '\n')
else:
for line in output_lines:
print_source.write(line.encode(output_enc) + b'\n')
try:
line_encoded = line.encode(output_enc)
except UnicodeEncodeError:
print("Failed to encode a line to", output_enc)
print("The line:", line)
print("The line, utf-8-encoded:", line.encode("utf-8"))
raise
print_source.write(line_encoded + b'\n')
print_source.flush()
if print_source != sys.stdout.buffer:
print_source.close()
@ -1239,6 +1261,7 @@ def run_wrapped(argv, outfile, functions):
parser.add_argument('-mips1', dest='mips1', action='store_true')
parser.add_argument('-g3', dest='g3', action='store_true')
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('-O0', dest='opt', action='store_const', const='O0')
group.add_argument('-O1', dest='opt', action='store_const', const='O1')
group.add_argument('-O2', dest='opt', action='store_const', const='O2')
group.add_argument('-g', dest='opt', action='store_const', const='g')

View File

@ -10,30 +10,49 @@ dir_path = os.path.dirname(os.path.realpath(__file__))
prelude = os.path.join(dir_path, "prelude.inc")
all_args = sys.argv[1:]
sep1 = all_args.index('--')
sep2 = all_args.index('--', sep1+1)
sep0 = [index for index, arg in enumerate(all_args) if not arg.startswith("-")][0]
sep1 = all_args.index("--")
sep2 = all_args.index("--", sep1 + 1)
compiler = all_args[:sep1]
asmproc_flags = all_args[:sep0]
compiler = all_args[sep0:sep1]
assembler = all_args[sep1+1:sep2]
assembler_sh = ' '.join(shlex.quote(x) for x in assembler)
assembler = all_args[sep1 + 1 : sep2]
assembler_sh = " ".join(shlex.quote(x) for x in assembler)
compile_args = all_args[sep2+1:]
compile_args = all_args[sep2 + 1 :]
in_file = compile_args[-1]
out_ind = compile_args.index('-o')
out_ind = compile_args.index("-o")
out_file = compile_args[out_ind + 1]
del compile_args[-1]
del compile_args[out_ind + 1]
del compile_args[out_ind]
in_dir = os.path.split(os.path.realpath(in_file))[0]
opt_flags = [x for x in compile_args if x in ['-g3', '-g', '-O1', '-O2', '-framepointer']]
opt_flags = [
x for x in compile_args if x in ["-g3", "-g", "-O0", "-O1", "-O2", "-framepointer"]
]
if "-mips2" not in compile_args:
opt_flags.append("-mips1")
preprocessed_file = tempfile.NamedTemporaryFile(prefix='preprocessed', suffix='.c', delete=False)
asmproc_flags += opt_flags + [in_file]
# Drop .mdebug and .gptab sections from resulting binaries. This makes
# resulting .o files much smaller and speeds up builds, but loses line
# number debug data.
# asmproc_flags += ["--drop-mdebug-gptab"]
# Convert encoding before compiling.
# asmproc_flags += ["--input-enc", "utf-8", "--output-enc", "euc-jp"]
preprocessed_file = tempfile.NamedTemporaryFile(
prefix="preprocessed", suffix=".c", delete=False
)
try:
asmproc_flags = opt_flags + [in_file, '--input-enc', 'utf-8', '--output-enc', 'euc-jp']
compile_cmdline = compiler + compile_args + ['-I', in_dir, '-o', out_file, preprocessed_file.name]
compile_cmdline = (
compiler + compile_args + ["-I", in_dir, "-o", out_file, preprocessed_file.name]
)
functions, deps = asm_processor.run(asmproc_flags, outfile=preprocessed_file)
try:
@ -41,13 +60,24 @@ try:
except subprocess.CalledProcessError as e:
print("Failed to compile file " + in_file + ". Command line:")
print()
print(' '.join(shlex.quote(x) for x in compile_cmdline))
print(" ".join(shlex.quote(x) for x in compile_cmdline))
print()
sys.exit(55)
# To keep the preprocessed file:
# os._exit(1)
asm_processor.run(asmproc_flags + ['--post-process', out_file, '--assembler', assembler_sh, '--asm-prelude', prelude], functions=functions)
asm_processor.run(
asmproc_flags
+ [
"--post-process",
out_file,
"--assembler",
assembler_sh,
"--asm-prelude",
prelude,
],
functions=functions,
)
deps_file = out_file[:-2] + ".asmproc.d"
if deps:

View File

@ -1,5 +1,43 @@
.set noat
.set noreorder
.set gp=64
.include "macro.inc"
.macro glabel label
.global \label
\label:
.endm
# Float register aliases (o32 ABI, odd ones are rarely used)
.set $fv0, $f0
.set $fv0f, $f1
.set $fv1, $f2
.set $fv1f, $f3
.set $ft0, $f4
.set $ft0f, $f5
.set $ft1, $f6
.set $ft1f, $f7
.set $ft2, $f8
.set $ft2f, $f9
.set $ft3, $f10
.set $ft3f, $f11
.set $fa0, $f12
.set $fa0f, $f13
.set $fa1, $f14
.set $fa1f, $f15
.set $ft4, $f16
.set $ft4f, $f17
.set $ft5, $f18
.set $ft5f, $f19
.set $fs0, $f20
.set $fs0f, $f21
.set $fs1, $f22
.set $fs1f, $f23
.set $fs2, $f24
.set $fs2f, $f25
.set $fs3, $f26
.set $fs3f, $f27
.set $fs4, $f28
.set $fs4f, $f29
.set $fs5, $f30
.set $fs5f, $f31

View File

@ -2,34 +2,44 @@
import argparse, ast, math, os, re, struct
import bisect
from mips_isa import *
from multiprocessing import *
import multiprocessing
from pathlib import Path
import mips_isa
# Consider implementing gpr naming too, but already uses abi names by default
fpr_name_options = {
"numeric": mips_isa.numeric_fpr_names,
"o32": mips_isa.o32_fpr_names,
}
parser = argparse.ArgumentParser()
parser.add_argument(
"-j", dest="jobs", type=int, default=1, help="number of processes to run at once"
)
parser.add_argument(
"--full",
"-f",
dest="full",
"-a",
"--all",
dest="all",
action="store_true",
default=False,
help="Decompile all files regardless of whether they are used or not",
)
parser.add_argument(
"-files",
"-f",
"--files",
dest="files",
nargs="+",
required=False,
help="Optional list of files to diassemble separated by a space. This is a whitelist, all files will be skipped besides the ones listed here if used.",
)
parser.add_argument("--reg-names", choices=fpr_name_options.keys(), help="How to name registers in the output")
args = parser.parse_args()
jobs = args.jobs
mips_isa.mips_fpr_names = fpr_name_options.get(args.reg_names, mips_isa.mips_fpr_names)
ASM_OUT = "asm/"
DATA_OUT = "data/"
@ -122,71 +132,71 @@ def discard_decomped_files(files_spec, include_files):
MIPS_BRANCH_LIKELY_INSNS = [
MIPS_INS_BEQL,
MIPS_INS_BGEZALL,
MIPS_INS_BGEZL,
MIPS_INS_BGTZL,
MIPS_INS_BLEZL,
MIPS_INS_BLTZALL,
MIPS_INS_BLTZL,
MIPS_INS_BNEL,
MIPS_INS_BC1TL,
MIPS_INS_BC1FL,
mips_isa.MIPS_INS_BEQL,
mips_isa.MIPS_INS_BGEZALL,
mips_isa.MIPS_INS_BGEZL,
mips_isa.MIPS_INS_BGTZL,
mips_isa.MIPS_INS_BLEZL,
mips_isa.MIPS_INS_BLTZALL,
mips_isa.MIPS_INS_BLTZL,
mips_isa.MIPS_INS_BNEL,
mips_isa.MIPS_INS_BC1TL,
mips_isa.MIPS_INS_BC1FL,
]
MIPS_BRANCH_INSNS = [
*MIPS_BRANCH_LIKELY_INSNS,
MIPS_INS_BEQ,
MIPS_INS_BGEZ,
MIPS_INS_BGEZAL,
MIPS_INS_BGTZ,
MIPS_INS_BNE,
MIPS_INS_BLTZ,
MIPS_INS_BLTZAL,
MIPS_INS_BLEZ,
MIPS_INS_BC1T,
MIPS_INS_BC1F,
MIPS_INS_BEQZ,
MIPS_INS_BNEZ,
MIPS_INS_B,
mips_isa.MIPS_INS_BEQ,
mips_isa.MIPS_INS_BGEZ,
mips_isa.MIPS_INS_BGEZAL,
mips_isa.MIPS_INS_BGTZ,
mips_isa.MIPS_INS_BNE,
mips_isa.MIPS_INS_BLTZ,
mips_isa.MIPS_INS_BLTZAL,
mips_isa.MIPS_INS_BLEZ,
mips_isa.MIPS_INS_BC1T,
mips_isa.MIPS_INS_BC1F,
mips_isa.MIPS_INS_BEQZ,
mips_isa.MIPS_INS_BNEZ,
mips_isa.MIPS_INS_B,
]
MIPS_JUMP_INSNS = [MIPS_INS_JAL, MIPS_INS_JALR, MIPS_INS_J, MIPS_INS_JR]
MIPS_JUMP_INSNS = [mips_isa.MIPS_INS_JAL, mips_isa.MIPS_INS_JALR, mips_isa.MIPS_INS_J, mips_isa.MIPS_INS_JR]
MIPS_FP_LOAD_INSNS = [MIPS_INS_LWC1, MIPS_INS_LDC1]
MIPS_FP_LOAD_INSNS = [mips_isa.MIPS_INS_LWC1, mips_isa.MIPS_INS_LDC1]
MIPS_FP_STORE_INSNS = [MIPS_INS_SWC1, MIPS_INS_SDC1]
MIPS_FP_STORE_INSNS = [mips_isa.MIPS_INS_SWC1, mips_isa.MIPS_INS_SDC1]
MIPS_FP_LOAD_STORE_INSNS = [*MIPS_FP_LOAD_INSNS, *MIPS_FP_STORE_INSNS]
MIPS_STORE_INSNS = [
MIPS_INS_SB,
MIPS_INS_SH,
MIPS_INS_SW,
MIPS_INS_SWL,
MIPS_INS_SWR,
MIPS_INS_SD,
MIPS_INS_SDL,
MIPS_INS_SDR,
MIPS_INS_SC,
MIPS_INS_SCD,
mips_isa.MIPS_INS_SB,
mips_isa.MIPS_INS_SH,
mips_isa.MIPS_INS_SW,
mips_isa.MIPS_INS_SWL,
mips_isa.MIPS_INS_SWR,
mips_isa.MIPS_INS_SD,
mips_isa.MIPS_INS_SDL,
mips_isa.MIPS_INS_SDR,
mips_isa.MIPS_INS_SC,
mips_isa.MIPS_INS_SCD,
*MIPS_FP_STORE_INSNS,
]
MIPS_LOAD_STORE_INSNS = [
MIPS_INS_LB,
MIPS_INS_LBU,
MIPS_INS_LH,
MIPS_INS_LHU,
MIPS_INS_LW,
MIPS_INS_LWL,
MIPS_INS_LWR,
MIPS_INS_LWU,
MIPS_INS_LD,
MIPS_INS_LDL,
MIPS_INS_LDR,
MIPS_INS_LL,
MIPS_INS_LLD,
mips_isa.MIPS_INS_LB,
mips_isa.MIPS_INS_LBU,
mips_isa.MIPS_INS_LH,
mips_isa.MIPS_INS_LHU,
mips_isa.MIPS_INS_LW,
mips_isa.MIPS_INS_LWL,
mips_isa.MIPS_INS_LWR,
mips_isa.MIPS_INS_LWU,
mips_isa.MIPS_INS_LD,
mips_isa.MIPS_INS_LDL,
mips_isa.MIPS_INS_LDR,
mips_isa.MIPS_INS_LL,
mips_isa.MIPS_INS_LLD,
*MIPS_STORE_INSNS,
*MIPS_FP_LOAD_STORE_INSNS,
]
@ -615,7 +625,7 @@ def find_symbols_in_text(section, rodata_section, data_regions):
# assert insn.value_forname(insn.fields[0]) is not None
# print(f"insn: {insn.mnemonic}, rt: {insn.rt}, first: {insn.value_forname(insn.fields[0])}")
# assert insn.id not in [MIPS_INS_ORI, MIPS_INS_ADDIU, *MIPS_LOAD_STORE_INSNS] or insn.rt == insn.value_forname(insn.fields[0])
# assert insn.id not in [MIPS_INS_ORI, mips_isa.MIPS_INS_ADDIU, *MIPS_LOAD_STORE_INSNS] or insn.rt == insn.value_forname(insn.fields[0])
if (
delay_slot
@ -636,9 +646,9 @@ def find_symbols_in_text(section, rodata_section, data_regions):
insns = []
for i, raw_insn in enumerate(raw_insns, 0):
insn = decode_insn(raw_insn, vram + i * 4)
insn = mips_isa.decode_insn(raw_insn, vram + i * 4)
if insn.id == MIPS_INS_JR and insn.rs != MIPS_REG_RA:
if insn.id == mips_isa.MIPS_INS_JR and insn.rs != mips_isa.MIPS_REG_RA:
# It's hard to find when two jump tables next to each other end, so do a naive first pass
# to try and find as many jump tables as possible.
# Luckily IDO has a very homogeneous output for jump tables for which it is very unlikely
@ -651,9 +661,9 @@ def find_symbols_in_text(section, rodata_section, data_regions):
insn_m2 = insns[-2]
insn_m3 = insns[-3]
if (
insn_m1.id == MIPS_INS_LW
and insn_m2.id == MIPS_INS_ADDU
and insn_m3.id == MIPS_INS_LUI
insn_m1.id == mips_isa.MIPS_INS_LW
and insn_m2.id == mips_isa.MIPS_INS_ADDU
and insn_m3.id == mips_isa.MIPS_INS_LUI
):
prospective_jtbls.add((insn_m3.imm << 0x10) + insn_m1.imm)
insns.append(insn)
@ -674,7 +684,7 @@ def find_symbols_in_text(section, rodata_section, data_regions):
Relocated jump targets give us functions in this section
"""
assert (
insn.id == MIPS_INS_JAL
insn.id == mips_isa.MIPS_INS_JAL
), f"R_MIPS_26 applied to {insn.mnemonic} when it should be JAL"
put_symbol(symbols_dict, "functions", insn.target)
elif reloc[1] == 5: # R_MIPS_HI16
@ -682,7 +692,7 @@ def find_symbols_in_text(section, rodata_section, data_regions):
Relocated %hi gives us %hi values to match with associated %lo
"""
assert (
insn.id == MIPS_INS_LUI
insn.id == mips_isa.MIPS_INS_LUI
), f"R_MIPS_HI16 applied to {insn.mnemonic} when it should be LUI"
prev_hi = insn.imm
hi_vram = vram + reloc[2]
@ -691,7 +701,7 @@ def find_symbols_in_text(section, rodata_section, data_regions):
Relocated %lo + a %hi to match with gives us relocated symbols in data sections
"""
assert (
insn.id == MIPS_INS_ADDIU or insn.id in MIPS_LOAD_STORE_INSNS
insn.id == mips_isa.MIPS_INS_ADDIU or insn.id in MIPS_LOAD_STORE_INSNS
), f"R_MIPS_HI16 applied to {insn.mnemonic} when it should be ADDIU or a load/store"
symbol_value = (prev_hi << 0x10) + insn.imm
put_symbols(symbols_dict, "symbols", {hi_vram: symbol_value})
@ -757,16 +767,16 @@ def find_symbols_in_text(section, rodata_section, data_regions):
if insn.id in MIPS_BRANCH_INSNS:
func_branch_labels.add(insn.offset)
delayed_insn = insn
elif insn.id == MIPS_INS_ERET:
elif insn.id == mips_isa.MIPS_INS_ERET:
put_symbol(symbols_dict, "functions", vaddr + 4)
elif insn.id in MIPS_JUMP_INSNS:
if insn.id == MIPS_INS_JAL:
if insn.id == mips_isa.MIPS_INS_JAL:
# mark function at target
put_symbol(symbols_dict, "functions", insn.target)
elif insn.id == MIPS_INS_J:
elif insn.id == mips_isa.MIPS_INS_J:
# mark label at target
func_branch_labels.add(insn.target)
elif insn.id == MIPS_INS_JR:
elif insn.id == mips_isa.MIPS_INS_JR:
# check if anything branches past it in either branch or jtbl labels
if vaddr >= max(func_branch_labels, default=0) and vaddr >= max(
func_jtbl_labels, default=0
@ -792,7 +802,7 @@ def find_symbols_in_text(section, rodata_section, data_regions):
symbols_dict, "functions", vaddr + 8 + n_padding * 4
)
delayed_insn = insn
elif insn.id == MIPS_INS_LUI:
elif insn.id == mips_isa.MIPS_INS_LUI:
"""
Process LUI instruction
@ -809,34 +819,34 @@ def find_symbols_in_text(section, rodata_section, data_regions):
save_tracker(delayed_insn.offset, {insn.rt: (vaddr, insn.imm)})
else:
lui_tracker.update({insn.rt: (vaddr, insn.imm)})
elif insn.id == MIPS_INS_ADDIU or insn.id in MIPS_LOAD_STORE_INSNS:
elif insn.id == mips_isa.MIPS_INS_ADDIU or insn.id in MIPS_LOAD_STORE_INSNS:
# try match with tracked lui and mark symbol
hi_vram, imm_value = lui_tracker.get(
insn.rs if insn.id == MIPS_INS_ADDIU else insn.base, (None, None)
insn.rs if insn.id == mips_isa.MIPS_INS_ADDIU else insn.base, (None, None)
)
# if a match was found, validate and record the symbol, TODO improve validation
if hi_vram != None and (
(((imm_value >> 0x8) & 0xF) != 0 and imm_value < 0x1000)
or (imm_value >= 0x8000 and imm_value < 0x80D0)
or (imm_value >= 0xA400 and imm_value < 0xA480)
or (imm_value < 0x0400 and insn.id == MIPS_INS_ADDIU)
or (imm_value < 0x0400 and insn.id == mips_isa.MIPS_INS_ADDIU)
):
lo_vram = vaddr
symbol_value = (imm_value << 0x10) + insn.imm
put_symbols(symbols_dict, "symbols", {hi_vram: symbol_value})
put_symbols(symbols_dict, "symbols", {lo_vram: symbol_value})
if insn.id == MIPS_INS_LW:
if insn.id == mips_isa.MIPS_INS_LW:
# try find jr within the same block
cur_idx = i
lookahead_insn = insns[cur_idx] # TODO fix vaddr here
# still in same block unless one of these instructions are found
while lookahead_insn.id not in [
*MIPS_BRANCH_INSNS,
MIPS_INS_JAL,
MIPS_INS_JALR,
MIPS_INS_J,
mips_isa.MIPS_INS_JAL,
mips_isa.MIPS_INS_JALR,
mips_isa.MIPS_INS_J,
]:
if lookahead_insn.id == MIPS_INS_JR:
if lookahead_insn.id == mips_isa.MIPS_INS_JR:
if lookahead_insn.rs == (
insn.ft
if insn.id in MIPS_FP_LOAD_STORE_INSNS
@ -884,24 +894,24 @@ def find_symbols_in_text(section, rodata_section, data_regions):
if cur_idx >= len(raw_insns):
break
lookahead_insn = insns[cur_idx] # TODO fix vaddr here
elif insn.id == MIPS_INS_LD: # doubleword loads
elif insn.id == mips_isa.MIPS_INS_LD: # doubleword loads
put_symbol(symbols_dict, "dwords", symbol_value)
elif insn.id in [MIPS_INS_LWC1, MIPS_INS_SWC1]: # float load/stores
elif insn.id in [mips_isa.MIPS_INS_LWC1, mips_isa.MIPS_INS_SWC1]: # float load/stores
# add float
put_symbol(symbols_dict, "floats", symbol_value)
elif insn.id in [MIPS_INS_LDC1, MIPS_INS_SDC1]: # double load/stores
elif insn.id in [mips_isa.MIPS_INS_LDC1, mips_isa.MIPS_INS_SDC1]: # double load/stores
# add double
put_symbol(symbols_dict, "doubles", symbol_value)
elif (
insn.id == MIPS_INS_ADDIU and vaddr % 4 == 0
insn.id == mips_isa.MIPS_INS_ADDIU and vaddr % 4 == 0
): # strings seem to only ever be 4-byte aligned
# add possible string
put_symbol(symbols_dict, "prospective_strings", symbol_value)
# clear lui tracking state if register is clobbered by the addiu/load instruction itself
# insn.rt == (insn.rs if insn.id == MIPS_INS_ADDIU else insn.base) and
# insn.rt == (insn.rs if insn.id == mips_isa.MIPS_INS_ADDIU else insn.base) and
if insn.id not in MIPS_STORE_INSNS and insn.id not in MIPS_FP_LOAD_INSNS:
clobber_conditionally(insn)
elif insn.id == MIPS_INS_ORI:
elif insn.id == mips_isa.MIPS_INS_ORI:
# try match with tracked lui and mark constant
hi_vram, imm_value = lui_tracker.get(insn.rs, (None, None))
if hi_vram != None: # found match
@ -916,7 +926,7 @@ def find_symbols_in_text(section, rodata_section, data_regions):
else:
# clear lui tracking if register is clobbered by something unrelated
if (
insn.id == MIPS_INS_ADDU
insn.id == mips_isa.MIPS_INS_ADDU
and insn.rs in lui_tracker.keys()
and (insn.rd == insn.rs)
):
@ -929,35 +939,35 @@ def find_symbols_in_text(section, rodata_section, data_regions):
pass
# insns listed either write to fprs/cop0 or don't write to any
elif insn.id not in [
MIPS_INS_MTC0,
MIPS_INS_MTC1,
MIPS_INS_DMTC1,
MIPS_INS_MULT,
MIPS_INS_MULTU,
MIPS_INS_DMULT,
MIPS_INS_DMULTU,
MIPS_INS_DIV,
MIPS_INS_DIVU,
MIPS_INS_DDIV,
MIPS_INS_DDIVU,
MIPS_INS_MTHI,
MIPS_INS_MTLO,
MIPS_INS_CTC1,
MIPS_INS_NOP,
MIPS_INS_BREAK,
MIPS_INS_TLBP,
MIPS_INS_TLBR,
MIPS_INS_TLBWI,
MIPS_INS_MOV_S,
MIPS_INS_MOV_D,
MIPS_INS_C_LT_S,
MIPS_INS_C_LT_D,
MIPS_INS_DIV_S,
MIPS_INS_MUL_S,
MIPS_INS_TRUNC_W_S,
MIPS_INS_CVT_S_W,
MIPS_INS_SUB_S,
MIPS_INS_ADD_S,
mips_isa.MIPS_INS_MTC0,
mips_isa.MIPS_INS_MTC1,
mips_isa.MIPS_INS_DMTC1,
mips_isa.MIPS_INS_MULT,
mips_isa.MIPS_INS_MULTU,
mips_isa.MIPS_INS_DMULT,
mips_isa.MIPS_INS_DMULTU,
mips_isa.MIPS_INS_DIV,
mips_isa.MIPS_INS_DIVU,
mips_isa.MIPS_INS_DDIV,
mips_isa.MIPS_INS_DDIVU,
mips_isa.MIPS_INS_MTHI,
mips_isa.MIPS_INS_MTLO,
mips_isa.MIPS_INS_CTC1,
mips_isa.MIPS_INS_NOP,
mips_isa.MIPS_INS_BREAK,
mips_isa.MIPS_INS_TLBP,
mips_isa.MIPS_INS_TLBR,
mips_isa.MIPS_INS_TLBWI,
mips_isa.MIPS_INS_MOV_S,
mips_isa.MIPS_INS_MOV_D,
mips_isa.MIPS_INS_C_LT_S,
mips_isa.MIPS_INS_C_LT_D,
mips_isa.MIPS_INS_DIV_S,
mips_isa.MIPS_INS_MUL_S,
mips_isa.MIPS_INS_TRUNC_W_S,
mips_isa.MIPS_INS_CVT_S_W,
mips_isa.MIPS_INS_SUB_S,
mips_isa.MIPS_INS_ADD_S,
]:
clobber_conditionally(insn)
@ -1000,18 +1010,18 @@ def find_symbols_in_text(section, rodata_section, data_regions):
part += ", "
op_str.append(part)
elif insn.id == MIPS_INS_LUI:
elif insn.id == mips_isa.MIPS_INS_LUI:
op_str = [op_str]
instr["rt"] = insn.rt
elif insn.id == MIPS_INS_ADDIU or insn.id in MIPS_LOAD_STORE_INSNS:
elif insn.id == mips_isa.MIPS_INS_ADDIU or insn.id in MIPS_LOAD_STORE_INSNS:
op_str = [op_str]
instr["rt"] = insn.rt
instr["rs"] = insn.rs
instr["ft"] = insn.ft
instr["base"] = insn.base
elif insn.id == MIPS_INS_ORI:
elif insn.id == mips_isa.MIPS_INS_ORI:
op_str = [op_str]
instr["rt"] = insn.rt
instr["rs"] = insn.rs
@ -1031,13 +1041,13 @@ def find_symbols_in_text(section, rodata_section, data_regions):
if delay_slot and delayed_insn is not None:
if (
delayed_insn.id == MIPS_INS_JAL
or delayed_insn.id == MIPS_INS_JALR
or (delayed_insn.id == MIPS_INS_JR and delayed_insn.rs == MIPS_REG_RA)
delayed_insn.id == mips_isa.MIPS_INS_JAL
or delayed_insn.id == mips_isa.MIPS_INS_JALR
or (delayed_insn.id == mips_isa.MIPS_INS_JR and delayed_insn.rs == mips_isa.MIPS_REG_RA)
):
# destroy lui tracking state
lui_tracker.clear()
elif delayed_insn.id == MIPS_INS_JR and vaddr == next_jtbl_jr + 4:
elif delayed_insn.id == mips_isa.MIPS_INS_JR and vaddr == next_jtbl_jr + 4:
# save lui tracking state for each jtbl label
for label in individual_jtbl_labels[next_jtbl_jr]:
save_tracker(label, lui_tracker.copy())
@ -1046,7 +1056,7 @@ def find_symbols_in_text(section, rodata_section, data_regions):
# save lui tracking state
save_tracker(delayed_insn.offset, lui_tracker.copy())
# destroy current lui tracking state for unconditional branches
if delayed_insn.id == MIPS_INS_B:
if delayed_insn.id == mips_isa.MIPS_INS_B:
lui_tracker.clear()
delayed_insn = None
@ -1265,35 +1275,35 @@ def fixup_text_symbols(data, vram, data_regions, info):
else:
line += op_part
elif insn["id"] == MIPS_INS_LUI:
elif insn["id"] == mips_isa.MIPS_INS_LUI:
symbol_value = symbols.get(vaddr, None)
if symbol_value is not None:
line += (
f"{mips_gpr_names[insn['rt']]}, %hi({proper_name(symbol_value)})"
f"{ mips_isa.mips_gpr_names[insn['rt']]}, %hi({proper_name(symbol_value)})"
)
else:
constant_value = constants.get(vaddr, None)
if constant_value is not None:
line += (
f"{mips_gpr_names[insn['rt']]}, (0x{constant_value:08X} >> 16)"
f"{ mips_isa.mips_gpr_names[insn['rt']]}, (0x{constant_value:08X} >> 16)"
)
else:
line += entry["op"][0]
elif insn["id"] == MIPS_INS_ADDIU or insn["id"] in MIPS_LOAD_STORE_INSNS:
elif insn["id"] == mips_isa.MIPS_INS_ADDIU or insn["id"] in MIPS_LOAD_STORE_INSNS:
symbol_value = symbols.get(vaddr, None)
if symbol_value is not None:
if insn["id"] == MIPS_INS_ADDIU:
line += f"{mips_gpr_names[insn['rt']]}, {mips_gpr_names[insn['rs']]}, %lo({proper_name(symbol_value)})"
if insn["id"] == mips_isa.MIPS_INS_ADDIU:
line += f"{ mips_isa.mips_gpr_names[insn['rt']]}, { mips_isa.mips_gpr_names[insn['rs']]}, %lo({proper_name(symbol_value)})"
else:
line += f"{mips_fpr_names[insn['ft']] if insn['id'] in MIPS_FP_LOAD_STORE_INSNS else mips_gpr_names[insn['rt']]}, %lo({proper_name(symbol_value)})({mips_gpr_names[insn['base']]})"
line += f"{mips_isa.mips_fpr_names[insn['ft']] if insn['id'] in MIPS_FP_LOAD_STORE_INSNS else mips_isa.mips_gpr_names[insn['rt']]}, %lo({proper_name(symbol_value)})({ mips_isa.mips_gpr_names[insn['base']]})"
else:
line += entry["op"][0]
elif insn["id"] == MIPS_INS_ORI:
elif insn["id"] == mips_isa.MIPS_INS_ORI:
constant_value = constants.get(vaddr, None)
if constant_value is not None:
line += f"{mips_gpr_names[insn['rt']]}, {mips_gpr_names[insn['rs']]}, (0x{constant_value:08X} & 0xFFFF)"
line += f"{ mips_isa.mips_gpr_names[insn['rt']]}, { mips_isa.mips_gpr_names[insn['rs']]}, (0x{constant_value:08X} & 0xFFFF)"
else:
line += entry["op"][0]
@ -1329,7 +1339,7 @@ def disassemble_text(data, vram, data_regions, info):
for i, raw_insn in enumerate(raw_insns, 0):
i *= 4
vaddr = vram + i
insn = decode_insn(raw_insns[i // 4], vaddr)
insn = mips_isa.decode_insn(raw_insns[i // 4], vaddr)
mnemonic = insn.mnemonic
op_str = insn.op_str
@ -1389,27 +1399,27 @@ def disassemble_text(data, vram, data_regions, info):
op_str_parts.append(insn.format_field(field))
op_str = ", ".join(op_str_parts)
delayed_insn = insn
elif insn.id == MIPS_INS_LUI:
elif insn.id == mips_isa.MIPS_INS_LUI:
symbol_value = symbols.get(vaddr, None)
if symbol_value is not None:
op_str = f"{mips_gpr_names[insn.rt]}, %hi({proper_name(symbol_value)})"
op_str = f"{ mips_isa.mips_gpr_names[insn.rt]}, %hi({proper_name(symbol_value)})"
else:
constant_value = constants.get(vaddr, None)
if constant_value is not None:
op_str = (
f"{mips_gpr_names[insn.rt]}, (0x{constant_value:08X} >> 16)"
f"{ mips_isa.mips_gpr_names[insn.rt]}, (0x{constant_value:08X} >> 16)"
)
elif insn.id == MIPS_INS_ADDIU or insn.id in MIPS_LOAD_STORE_INSNS:
elif insn.id == mips_isa.MIPS_INS_ADDIU or insn.id in MIPS_LOAD_STORE_INSNS:
symbol_value = symbols.get(vaddr, None)
if symbol_value is not None:
if insn.id == MIPS_INS_ADDIU:
op_str = f"{mips_gpr_names[insn.rt]}, {mips_gpr_names[insn.rs]}, %lo({proper_name(symbol_value)})"
if insn.id == mips_isa.MIPS_INS_ADDIU:
op_str = f"{ mips_isa.mips_gpr_names[insn.rt]}, { mips_isa.mips_gpr_names[insn.rs]}, %lo({proper_name(symbol_value)})"
else:
op_str = f"{mips_fpr_names[insn.ft] if insn.id in MIPS_FP_LOAD_STORE_INSNS else mips_gpr_names[insn.rt]}, %lo({proper_name(symbol_value)})({mips_gpr_names[insn.base]})"
elif insn.id == MIPS_INS_ORI:
op_str = f"{mips_isa.mips_fpr_names[insn.ft] if insn.id in MIPS_FP_LOAD_STORE_INSNS else mips_isa.mips_gpr_names[insn.rt]}, %lo({proper_name(symbol_value)})({ mips_isa.mips_gpr_names[insn.base]})"
elif insn.id == mips_isa.MIPS_INS_ORI:
constant_value = constants.get(vaddr, None)
if constant_value is not None:
op_str = f"{mips_gpr_names[insn.rt]}, {mips_gpr_names[insn.rs]}, (0x{constant_value:08X} & 0xFFFF)"
op_str = f"{ mips_isa.mips_gpr_names[insn.rt]}, { mips_isa.mips_gpr_names[insn.rs]}, (0x{constant_value:08X} & 0xFFFF)"
if delay_slot:
mnemonic = " " + mnemonic
@ -2205,7 +2215,7 @@ for segment in files_spec:
new[offset] = name
full_file_list[segment[0]] = new
if args.full:
if args.all:
new_spec = []
for segment in files_spec:
if args.files and not any(
@ -2357,7 +2367,7 @@ for segment in files_spec:
del files_spec[:]
pool = get_context("fork").Pool(jobs)
pool = multiprocessing.get_context("fork").Pool(jobs)
# Find symbols for each segment
for section in all_sections:
if section[-1]["name"] == "makerom":
@ -2393,7 +2403,7 @@ for section in all_sections:
pool.close()
pool.join()
pool = get_context("fork").Pool(jobs)
pool = multiprocessing.get_context("fork").Pool(jobs)
for section in all_sections:
if section[-1]["type"] == "makerom":
continue
@ -2418,7 +2428,7 @@ for section in all_sections:
vrom_addrs = {addr for _, addr in vrom_variables}
# Textual disassembly for each segment
with get_context("fork").Pool(jobs) as p:
with multiprocessing.get_context("fork").Pool(jobs) as p:
p.map(
disassemble_segment,
[

View File

@ -286,15 +286,26 @@ mips_cop0_names = [
"TagLo" , "TagHi" , "ErrorEPC" , "Reserved31",
]
mips_fpr_names = [
numeric_fpr_names = [
"$f0", "$f1", "$f2", "$f3",
"$f4", "$f5", "$f6", "$f7", "$f8", "$f9", "$f10", "$f11",
"$f12", "$f13", "$f14", "$f15",
"$f16", "$f17", "$f18", "$f19",
"$f20", "$f21", "$f22", "$f23", "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30",
"$31",
"$31", # Floating-point control/status register
]
o32_fpr_names = [
"$fv0", "$fv0f", "$fv1", "$fv1f",
"$ft0", "$ft0f", "$ft1", "$ft1f", "$ft2", "$ft2f", "$ft3", "$ft3f",
"$fa0", "$fa0f", "$fa1", "$fa1f",
"$ft4", "$ft4f", "$ft5", "$ft5f",
"$fs0", "$fs0f", "$fs1", "$fs1f", "$fs2", "$fs2f", "$fs3", "$fs3f", "$fs4", "$fs4f", "$fs5",
"$31", # Floating-point control/status register
]
mips_fpr_names = numeric_fpr_names
# Instruction field fetching
def sign_extend_16(value):