diff --git a/tools/ZAPD/.gitrepo b/tools/ZAPD/.gitrepo
index 3144cab206..131ccdc889 100644
--- a/tools/ZAPD/.gitrepo
+++ b/tools/ZAPD/.gitrepo
@@ -6,7 +6,7 @@
[subrepo]
remote = https://github.com/zeldaret/ZAPD.git
branch = master
- commit = 5786abbdd2d0fd14907e03575a0bd972c1fe9b28
- parent = e451a103ba5d14af3dd14acfa54a6b4999fd951f
+ commit = 23929ec9373d28cb298daad4ad7cb468e09c0a46
+ parent = 2e487b5008c129031ab311a3a7bfd42adeb4916b
method = merge
- cmdver = 0.4.5
+ cmdver = 0.4.3
diff --git a/tools/ZAPD/ZAPD/ZTextureAnimation.cpp b/tools/ZAPD/ZAPD/ZTextureAnimation.cpp
index 698054fa87..0c55b26097 100644
--- a/tools/ZAPD/ZAPD/ZTextureAnimation.cpp
+++ b/tools/ZAPD/ZAPD/ZTextureAnimation.cpp
@@ -569,6 +569,7 @@ void ZTextureAnimation::DeclareReferences(const std::string& prefix)
count = 2;
}
params = new TextureScrollingParams(parent);
+ params->type = entry.type;
params->ExtractFromBinary(paramsOffset, count);
break;
@@ -582,6 +583,7 @@ void ZTextureAnimation::DeclareReferences(const std::string& prefix)
case TextureAnimationParamsType::TextureCycle:
params = new TextureCyclingParams(parent);
+ params->type = entry.type;
params->ExtractFromBinary(paramsOffset);
break;
diff --git a/tools/asm-differ/.gitignore b/tools/asm-differ/.gitignore
index eb176dc56b..a2b216fea6 100644
--- a/tools/asm-differ/.gitignore
+++ b/tools/asm-differ/.gitignore
@@ -1,2 +1,4 @@
.mypy_cache/
__pycache__/
+.vscode/
+poetry.lock
diff --git a/tools/asm-differ/.gitrepo b/tools/asm-differ/.gitrepo
index 8d0a6445dc..a23a9c1e4a 100644
--- a/tools/asm-differ/.gitrepo
+++ b/tools/asm-differ/.gitrepo
@@ -6,7 +6,7 @@
[subrepo]
remote = https://github.com/simonlindholm/asm-differ
branch = main
- commit = 1236288d1520335c2bfb672078fec65084d7cb5c
- parent = 2c5690701a350c7e7c3d6252dff925ad65d59910
+ commit = ae408664a89ea4dc70d005d0afc69ac26c938cbb
+ parent = c833969ea79ba31c3103e25e94ef88098c0287de
method = merge
cmdver = 0.4.3
diff --git a/tools/asm-differ/.pre-commit-config.yaml b/tools/asm-differ/.pre-commit-config.yaml
index ccc4afc0ee..67ba03d124 100644
--- a/tools/asm-differ/.pre-commit-config.yaml
+++ b/tools/asm-differ/.pre-commit-config.yaml
@@ -1,5 +1,5 @@
repos:
- repo: https://github.com/psf/black
- rev: 22.1.0
+ rev: 22.3.0
hooks:
- id: black
diff --git a/tools/asm-differ/README.md b/tools/asm-differ/README.md
index 205310d394..1d0932c9ba 100644
--- a/tools/asm-differ/README.md
+++ b/tools/asm-differ/README.md
@@ -7,7 +7,7 @@ Nice differ for assembly code. Currently supports MIPS, PPC, AArch64, and ARM32;
## Dependencies
- Python >= 3.6
-- `python3 -m pip install --user colorama watchdog python-Levenshtein` (also `dataclasses` if on 3.6)
+- `python3 -m pip install --user colorama watchdog levenshtein cxxfilt` (also `dataclasses` if on 3.6)
## Usage
diff --git a/tools/asm-differ/diff.py b/tools/asm-differ/diff.py
index 4b5bc892ee..230dbaed70 100755
--- a/tools/asm-differ/diff.py
+++ b/tools/asm-differ/diff.py
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
# PYTHON_ARGCOMPLETE_OK
import argparse
+import enum
import sys
from typing import (
Any,
@@ -28,6 +29,14 @@ def static_assert_unreachable(x: NoReturn) -> NoReturn:
raise Exception("Unreachable! " + repr(x))
+class DiffMode(enum.Enum):
+ SINGLE = "single"
+ SINGLE_BASE = "single_base"
+ NORMAL = "normal"
+ THREEWAY_PREV = "3prev"
+ THREEWAY_BASE = "3base"
+
+
# ==== COMMAND-LINE ====
if __name__ == "__main__":
@@ -201,11 +210,11 @@ if __name__ == "__main__":
)
parser.add_argument(
"-s",
- "--stop-jr-ra",
- dest="stop_jrra",
+ "--stop-at-ret",
+ dest="stop_at_ret",
action="store_true",
- help="""Stop disassembling at the first 'jr ra'. Some functions have
- multiple return points, so use with care!""",
+ help="""Stop disassembling at the first return instruction.
+ Some functions have multiple return points, so use with care!""",
)
parser.add_argument(
"-i",
@@ -228,6 +237,13 @@ if __name__ == "__main__":
action="store_false",
help="Don't visualize branches/branch targets.",
)
+ parser.add_argument(
+ "-R",
+ "--no-show-rodata-refs",
+ dest="show_rodata_refs",
+ action="store_false",
+ help="Don't show .rodata -> .text references (typically from jump tables).",
+ )
parser.add_argument(
"-S",
"--base-shift",
@@ -248,21 +264,37 @@ if __name__ == "__main__":
help="""Automatically update when source/object files change.
Recommended in combination with -m.""",
)
+ parser.add_argument(
+ "-0",
+ "--diff_mode=single_base",
+ dest="diff_mode",
+ action="store_const",
+ const=DiffMode.SINGLE_BASE,
+ help="""View the base asm only (not a diff).""",
+ )
+ parser.add_argument(
+ "-1",
+ "--diff_mode=single",
+ dest="diff_mode",
+ action="store_const",
+ const=DiffMode.SINGLE,
+ help="""View the current asm only (not a diff).""",
+ )
parser.add_argument(
"-3",
"--threeway=prev",
- dest="threeway",
+ dest="diff_mode",
action="store_const",
- const="prev",
+ const=DiffMode.THREEWAY_PREV,
help="""Show a three-way diff between target asm, current asm, and asm
prior to -w rebuild. Requires -w.""",
)
parser.add_argument(
"-b",
"--threeway=base",
- dest="threeway",
+ dest="diff_mode",
action="store_const",
- const="base",
+ const=DiffMode.THREEWAY_BASE,
help="""Show a three-way diff between target asm, current asm, and asm
when diff.py was started. Requires -w.""",
)
@@ -336,11 +368,9 @@ if __name__ == "__main__":
# (We do imports late to optimize auto-complete performance.)
import abc
-import ast
from collections import Counter, defaultdict
from dataclasses import asdict, dataclass, field, replace
import difflib
-import enum
import html
import itertools
import json
@@ -357,7 +387,7 @@ import traceback
MISSING_PREREQUISITES = (
"Missing prerequisite python module {}. "
- "Run `python3 -m pip install --user colorama watchdog python-Levenshtein cxxfilt` to install prerequisites (cxxfilt only needed with --source)."
+ "Run `python3 -m pip install --user colorama watchdog levenshtein cxxfilt` to install prerequisites (cxxfilt only needed with --source)."
)
try:
@@ -376,7 +406,8 @@ class ProjectSettings:
objdump_flags: List[str]
build_command: List[str]
map_format: str
- mw_build_dir: str
+ build_dir: str
+ ms_map_address_offset: int
baseimg: Optional[str]
myimg: Optional[str]
mapfile: Optional[str]
@@ -384,6 +415,8 @@ class ProjectSettings:
source_extensions: List[str]
show_line_numbers_default: bool
disassemble_all: bool
+ reg_categories: Dict[str, int]
+ expected_dir: str
@dataclass
@@ -408,17 +441,19 @@ class Config:
# Display options
formatter: "Formatter"
- threeway: Optional[str]
+ diff_mode: DiffMode
base_shift: int
skip_lines: int
compress: Optional[Compress]
+ show_rodata_refs: bool
show_branches: bool
show_line_numbers: bool
show_source: bool
- stop_jrra: bool
+ stop_at_ret: bool
ignore_large_imms: bool
ignore_addr_diffs: bool
algorithm: str
+ reg_categories: Dict[str, int]
# Score options
score_stack_differences = True
@@ -444,10 +479,13 @@ def create_project_settings(settings: Dict[str, Any]) -> ProjectSettings:
),
objdump_executable=get_objdump_executable(settings.get("objdump_executable")),
objdump_flags=settings.get("objdump_flags", []),
+ expected_dir=settings.get("expected_dir", "expected/"),
map_format=settings.get("map_format", "gnu"),
- mw_build_dir=settings.get("mw_build_dir", "build/"),
+ ms_map_address_offset=settings.get("ms_map_address_offset", 0),
+ build_dir=settings.get("build_dir", settings.get("mw_build_dir", "build/")),
show_line_numbers_default=settings.get("show_line_numbers_default", True),
disassemble_all=settings.get("disassemble_all", False),
+ reg_categories=settings.get("reg_categories", {}),
)
@@ -493,19 +531,21 @@ def create_config(args: argparse.Namespace, project: ProjectSettings) -> Config:
max_function_size_bytes=args.max_lines * 4,
# Display options
formatter=formatter,
- threeway=args.threeway,
+ diff_mode=args.diff_mode or DiffMode.NORMAL,
base_shift=eval_int(
args.base_shift, "Failed to parse --base-shift (-S) argument as an integer."
),
skip_lines=args.skip_lines,
compress=compress,
+ show_rodata_refs=args.show_rodata_refs,
show_branches=args.show_branches,
show_line_numbers=show_line_numbers,
show_source=args.show_source or args.source_old_binutils,
- stop_jrra=args.stop_jrra,
+ stop_at_ret=args.stop_at_ret,
ignore_large_imms=args.ignore_large_imms,
ignore_addr_diffs=args.ignore_addr_diffs,
algorithm=args.algorithm,
+ reg_categories=project.reg_categories,
)
@@ -563,6 +603,7 @@ class BasicFormat(enum.Enum):
IMMEDIATE = enum.auto()
STACK = enum.auto()
REGISTER = enum.auto()
+ REGISTER_CATEGORY = enum.auto()
DELAY_SLOT = enum.auto()
DIFF_CHANGE = enum.auto()
DIFF_ADD = enum.auto()
@@ -662,11 +703,19 @@ class Text:
@dataclass
-class TableMetadata:
+class TableLine:
+ key: Optional[str]
+ is_data_ref: bool
+ cells: Tuple[Tuple[Text, Optional["Line"]], ...]
+
+
+@dataclass
+class TableData:
headers: Tuple[Text, ...]
current_score: int
max_score: int
previous_score: Optional[int]
+ lines: List[TableLine]
class Formatter(abc.ABC):
@@ -676,7 +725,7 @@ class Formatter(abc.ABC):
...
@abc.abstractmethod
- def table(self, meta: TableMetadata, lines: List[Tuple["OutputLine", ...]]) -> str:
+ def table(self, data: TableData) -> str:
"""Format a multi-column table with metadata"""
...
@@ -684,8 +733,8 @@ class Formatter(abc.ABC):
return "".join(self.apply_format(chunk, f) for chunk, f in text.segments)
@staticmethod
- def outputline_texts(lines: Tuple["OutputLine", ...]) -> Tuple[Text, ...]:
- return tuple([lines[0].base or Text()] + [line.fmt2 for line in lines[1:]])
+ def outputline_texts(line: TableLine) -> Tuple[Text, ...]:
+ return tuple(cell[0] for cell in line.cells)
@dataclass
@@ -695,8 +744,8 @@ class PlainFormatter(Formatter):
def apply_format(self, chunk: str, f: Format) -> str:
return chunk
- def table(self, meta: TableMetadata, lines: List[Tuple["OutputLine", ...]]) -> str:
- rows = [meta.headers] + [self.outputline_texts(ls) for ls in lines]
+ def table(self, data: TableData) -> str:
+ rows = [data.headers] + [self.outputline_texts(line) for line in data.lines]
return "\n".join(
"".join(self.apply(x.ljust(self.column_width)) for x in row) for row in rows
)
@@ -709,12 +758,14 @@ class AnsiFormatter(Formatter):
STYLE_UNDERLINE = "\x1b[4m"
STYLE_NO_UNDERLINE = "\x1b[24m"
STYLE_INVERT = "\x1b[7m"
+ STYLE_RESET = "\x1b[0m"
BASIC_ANSI_CODES = {
BasicFormat.NONE: "",
BasicFormat.IMMEDIATE: Fore.LIGHTBLUE_EX,
BasicFormat.STACK: Fore.YELLOW,
BasicFormat.REGISTER: Fore.YELLOW,
+ BasicFormat.REGISTER_CATEGORY: Fore.LIGHTYELLOW_EX,
BasicFormat.DELAY_SLOT: Fore.LIGHTBLACK_EX,
BasicFormat.DIFF_CHANGE: Fore.LIGHTBLUE_EX,
BasicFormat.DIFF_ADD: Fore.GREEN,
@@ -761,14 +812,19 @@ class AnsiFormatter(Formatter):
static_assert_unreachable(f)
return f"{ansi_code}{chunk}{undo_ansi_code}"
- def table(self, meta: TableMetadata, lines: List[Tuple["OutputLine", ...]]) -> str:
- rows = [(meta.headers, False)] + [
- (self.outputline_texts(line), line[1].is_data_ref) for line in lines
+ def table(self, data: TableData) -> str:
+ rows = [(data.headers, False)] + [
+ (
+ self.outputline_texts(line),
+ line.is_data_ref,
+ )
+ for line in data.lines
]
return "\n".join(
"".join(
(self.STYLE_INVERT if is_data_ref else "")
+ self.apply(x.ljust(self.column_width))
+ + (self.STYLE_RESET if is_data_ref else "")
for x in row
)
for (row, is_data_ref) in rows
@@ -794,7 +850,7 @@ class HtmlFormatter(Formatter):
static_assert_unreachable(f)
return f"{chunk}"
- def table(self, meta: TableMetadata, lines: List[Tuple["OutputLine", ...]]) -> str:
+ def table(self, data: TableData) -> str:
def table_row(line: Tuple[Text, ...], is_data_ref: bool, cell_el: str) -> str:
tr_attrs = " class='data-ref'" if is_data_ref else ""
output_row = f"
"
@@ -806,12 +862,12 @@ class HtmlFormatter(Formatter):
output = "\n"
output += " \n"
- output += table_row(meta.headers, False, "th")
+ output += table_row(data.headers, False, "th")
output += " \n"
output += " \n"
output += "".join(
- table_row(self.outputline_texts(line), line[1].is_data_ref, "td")
- for line in lines
+ table_row(self.outputline_texts(line), line.is_data_ref, "td")
+ for line in data.lines
)
output += " \n"
output += "
\n"
@@ -826,7 +882,7 @@ class JsonFormatter(Formatter):
# This method is unused by this formatter
return NotImplemented
- def table(self, meta: TableMetadata, rows: List[Tuple["OutputLine", ...]]) -> str:
+ def table(self, data: TableData) -> str:
def serialize_format(s: str, f: Format) -> Dict[str, Any]:
if f == BasicFormat.NONE:
return {"text": s}
@@ -844,29 +900,25 @@ class JsonFormatter(Formatter):
return []
return [serialize_format(s, f) for s, f in text.segments]
- is_threeway = len(meta.headers) == 3
-
output: Dict[str, Any] = {}
output["arch_str"] = self.arch_str
output["header"] = {
name: serialize(h)
- for h, name in zip(meta.headers, ("base", "current", "previous"))
+ for h, name in zip(data.headers, ("base", "current", "previous"))
}
- output["current_score"] = meta.current_score
- output["max_score"] = meta.max_score
- if meta.previous_score is not None:
- output["previous_score"] = meta.previous_score
+ output["current_score"] = data.current_score
+ output["max_score"] = data.max_score
+ if data.previous_score is not None:
+ output["previous_score"] = data.previous_score
output_rows: List[Dict[str, Any]] = []
- for row in rows:
+ for row in data.lines:
output_row: Dict[str, Any] = {}
- output_row["key"] = row[0].key2
- output_row["is_data_ref"] = row[1].is_data_ref
- iters = [
- ("base", row[0].base, row[0].line1),
- ("current", row[1].fmt2, row[1].line2),
+ output_row["key"] = row.key
+ output_row["is_data_ref"] = row.is_data_ref
+ iters: List[Tuple[str, Text, Optional[Line]]] = [
+ (label, *cell)
+ for label, cell in zip(("base", "current", "previous"), row.cells)
]
- if is_threeway:
- iters.append(("previous", row[2].fmt2, row[2].line2))
if all(line is None for _, _, line in iters):
# Skip rows that were only for displaying source code
continue
@@ -935,10 +987,41 @@ def symbol_formatter(group: str, base_index: int) -> FormatFunction:
ObjdumpCommand = Tuple[List[str], str, Optional[str]]
+# eval_expr adapted from https://stackoverflow.com/a/9558001
+
+import ast
+import operator as op
+
+# supported operators
+operators: Dict[Type[Union[ast.operator, ast.unaryop]], Any] = {
+ ast.Add: op.add,
+ ast.Sub: op.sub,
+ ast.Mult: op.mul,
+ ast.Div: op.truediv,
+ ast.Pow: op.pow,
+ ast.BitXor: op.xor,
+ ast.USub: op.neg,
+}
+
+
+def eval_expr(expr: str) -> Any:
+ return eval_(ast.parse(expr, mode="eval").body)
+
+
+def eval_(node: ast.AST) -> Any:
+ if isinstance(node, ast.Num): #
+ return node.n
+ elif isinstance(node, ast.BinOp): #
+ return operators[type(node.op)](eval_(node.left), eval_(node.right))
+ elif isinstance(node, ast.UnaryOp): # e.g., -1
+ return operators[type(node.op)](eval_(node.operand))
+ else:
+ raise TypeError(node)
+
def maybe_eval_int(expr: str) -> Optional[int]:
try:
- ret = ast.literal_eval(expr)
+ ret = eval_expr(expr)
if not isinstance(ret, int):
raise Exception("not an integer")
return ret
@@ -1056,7 +1139,7 @@ def preprocess_objdump_out(
out = out[out.find("\n") + 1 :]
out = out.rstrip("\n")
- if obj_data:
+ if obj_data and config.show_rodata_refs:
out = (
serialize_rodata_references(parse_elf_rodata_references(obj_data, config))
+ out
@@ -1065,8 +1148,27 @@ def preprocess_objdump_out(
return out
+def search_build_objects(objname: str, project: ProjectSettings) -> Optional[str]:
+ objfiles = [
+ os.path.join(dirpath, f)
+ for dirpath, _, filenames in os.walk(project.build_dir)
+ for f in filenames
+ if f == objname
+ ]
+ if len(objfiles) > 1:
+ all_objects = "\n".join(objfiles)
+ fail(
+ f"Found multiple objects of the same name {objname} in {project.build_dir}, "
+ f"cannot determine which to diff against: \n{all_objects}"
+ )
+ if len(objfiles) == 1:
+ return objfiles[0]
+
+ return None
+
+
def search_map_file(
- fn_name: str, project: ProjectSettings, config: Config
+ fn_name: str, project: ProjectSettings, config: Config, *, for_binary: bool
) -> Tuple[Optional[str], Optional[int]]:
if not project.mapfile:
fail(f"No map file configured; cannot find function {fn_name}.")
@@ -1078,6 +1180,12 @@ def search_map_file(
fail(f"Failed to open map file {project.mapfile} for reading.")
if project.map_format == "gnu":
+ if for_binary and "load address" not in contents:
+ fail(
+ 'Failed to find "load address" in map file. Maybe you need to add\n'
+ '"export LANG := C" to your Makefile to avoid localized output?'
+ )
+
lines = contents.split("\n")
try:
@@ -1093,10 +1201,12 @@ def search_map_file(
ram = int(tokens[1], 0)
rom = int(tokens[5], 0)
ram_to_rom = rom - ram
- if line.endswith(" " + fn_name):
+ if line.endswith(" " + fn_name) or f" {fn_name} = 0x" in line:
ram = int(line.split()[0], 0)
- if cur_objfile is not None and ram_to_rom is not None:
- cands.append((cur_objfile, ram + ram_to_rom))
+ if (for_binary and ram_to_rom is not None) or (
+ not for_binary and cur_objfile is not None
+ ):
+ cands.append((cur_objfile, ram + (ram_to_rom or 0)))
last_line = line
except Exception as e:
traceback.print_exc()
@@ -1108,16 +1218,14 @@ def search_map_file(
return cands[0]
elif project.map_format == "mw":
find = re.findall(
- re.compile(
- # ram elf rom
- r" \S+ \S+ (\S+) (\S+) . "
- + re.escape(fn_name)
- + r"(?: \(entry of "
- + re.escape(config.diff_section)
- + r"\))? \t"
- # object name
- + "(\S+)"
- ),
+ # ram elf rom alignment
+ r" \S+ \S+ (\S+) (\S+) +\S+ "
+ + re.escape(fn_name)
+ + r"(?: \(entry of "
+ + re.escape(config.diff_section)
+ + r"\))? \t"
+ # object name
+ + "(\S+)",
contents,
)
if len(find) > 1:
@@ -1125,27 +1233,56 @@ def search_map_file(
if len(find) == 1:
rom = int(find[0][1], 16)
objname = find[0][2]
- # The metrowerks linker map format does not contain the full object path,
- # so we must complete it manually.
- objfiles = [
- os.path.join(dirpath, f)
- for dirpath, _, filenames in os.walk(project.mw_build_dir)
- for f in filenames
- if f == objname
- ]
- if len(objfiles) > 1:
- all_objects = "\n".join(objfiles)
- fail(
- f"Found multiple objects of the same name {objname} in {project.mw_build_dir}, "
- f"cannot determine which to diff against: \n{all_objects}"
- )
- if len(objfiles) == 1:
- objfile = objfiles[0]
- # TODO Currently the ram-rom conversion only works for diffing ELF
- # executables, but it would likely be more convenient to diff DOLs.
- # At this time it is recommended to always use -o when running the diff
- # script as this mode does not make use of the ram-rom conversion.
+ objfile = search_build_objects(objname, project)
+
+ # TODO Currently the ram-rom conversion only works for diffing ELF
+ # executables, but it would likely be more convenient to diff DOLs.
+ # At this time it is recommended to always use -o when running the diff
+ # script as this mode does not make use of the ram-rom conversion.
+ if objfile is not None:
return objfile, rom
+ elif project.map_format == "ms":
+ load_address_find = re.search(
+ r"Preferred load address is ([0-9a-f]+)",
+ contents,
+ )
+ if not load_address_find:
+ fail(f"Couldn't find module load address in map file.")
+ load_address = int(load_address_find.group(1), 16)
+
+ diff_segment_find = re.search(
+ r"([0-9a-f]+):[0-9a-f]+ [0-9a-f]+H " + re.escape(config.diff_section),
+ contents,
+ )
+ if not diff_segment_find:
+ fail(f"Couldn't find segment for section in map file.")
+ diff_segment = diff_segment_find.group(1)
+
+ find = re.findall(
+ r" (?:"
+ + re.escape(diff_segment)
+ + r")\S+\s+(?:"
+ + re.escape(fn_name)
+ + r")\s+\S+ ... \S+",
+ contents,
+ )
+ if len(find) > 1:
+ fail(f"Found multiple occurrences of function {fn_name} in map file.")
+ if len(find) == 1:
+ names_find = re.search(r"(\S+) ... (\S+)", find[0])
+ assert names_find is not None
+ fileofs = (
+ int(names_find.group(1), 16)
+ - load_address
+ + project.ms_map_address_offset
+ )
+ if for_binary:
+ return None, fileofs
+
+ objname = names_find.group(2)
+ objfile = search_build_objects(objname, project)
+ if objfile is not None:
+ return objfile, fileofs
else:
fail(f"Linker map format {project.map_format} unrecognised.")
return None, None
@@ -1161,6 +1298,8 @@ def parse_elf_rodata_references(
SHT_SYMTAB = 2
SHT_REL = 9
SHT_RELA = 4
+ R_MIPS_32 = 2
+ R_MIPS_GPREL32 = 12
is_32bit = e_ident[4] == 1
is_little_endian = e_ident[5] == 1
@@ -1234,7 +1373,7 @@ def parse_elf_rodata_references(
# Skip section_name -> section_name references
continue
sec_name = sec_names[s.sh_info].decode("latin1")
- if sec_name != ".rodata":
+ if sec_name not in (".rodata", ".late_rodata"):
continue
sec_base = sections[s.sh_info].sh_offset
for i in range(0, s.sh_size, s.sh_entsize):
@@ -1259,7 +1398,7 @@ def parse_elf_rodata_references(
)
if st_shndx == text_section:
if s.sh_type == SHT_REL:
- if e_machine == 8 and r_type == 2: # R_MIPS_32
+ if e_machine == 8 and r_type in (R_MIPS_32, R_MIPS_GPREL32):
(r_addend,) = read("I", sec_base + r_offset)
else:
continue
@@ -1325,7 +1464,7 @@ def dump_objfile(
objfile = config.objfile
if not objfile:
- objfile, _ = search_map_file(start, project, config)
+ objfile, _ = search_map_file(start, project, config, for_binary=False)
if not objfile:
fail("Not able to find .o file for function.")
@@ -1336,8 +1475,8 @@ def dump_objfile(
if not os.path.isfile(objfile):
fail(f"Not able to find .o file for function: {objfile} is not a file.")
- refobjfile = "expected/" + objfile
- if not os.path.isfile(refobjfile):
+ refobjfile = os.path.join(project.expected_dir, objfile)
+ if config.diff_mode != DiffMode.SINGLE and not os.path.isfile(refobjfile):
fail(f'Please ensure an OK .o file exists at "{refobjfile}".')
if project.disassemble_all:
@@ -1362,7 +1501,7 @@ def dump_binary(
run_make(project.myimg, project)
start_addr = maybe_eval_int(start)
if start_addr is None:
- _, start_addr = search_map_file(start, project, config)
+ _, start_addr = search_map_file(start, project, config, for_binary=True)
if start_addr is None:
fail("Not able to find function in map file.")
if end is not None:
@@ -1391,8 +1530,13 @@ class AsmProcessor:
def __init__(self, config: Config) -> None:
self.config = config
- def process_reloc(self, row: str, prev: str) -> str:
- return prev
+ def pre_process(
+ self, mnemonic: str, args: str, next_row: Optional[str]
+ ) -> Tuple[str, str]:
+ return mnemonic, args
+
+ def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]:
+ return prev, None
def normalize(self, mnemonic: str, row: str) -> str:
"""This should be called exactly once for each line."""
@@ -1408,28 +1552,24 @@ class AsmProcessor:
def post_process(self, lines: List["Line"]) -> None:
return
+ def is_end_of_function(self, mnemonic: str, args: str) -> bool:
+ return False
+
class AsmProcessorMIPS(AsmProcessor):
- def process_reloc(self, row: str, prev: str) -> str:
+ def __init__(self, config: Config) -> None:
+ super().__init__(config)
+ self.seen_jr_ra = False
+
+ def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]:
arch = self.config.arch
if "R_MIPS_NONE" in row or "R_MIPS_JALR" in row:
# GNU as emits no-op relocations immediately after real ones when
# assembling with -mabi=64. Return without trying to parse 'imm' as an
# integer.
- return prev
+ return prev, None
before, imm, after = parse_relocated_line(prev)
- repl = row.split()[-1]
- if imm != "0":
- # MIPS uses relocations with addends embedded in the code as immediates.
- # If there is an immediate, show it as part of the relocation. Ideally
- # we'd show this addend in both %lo/%hi, but annoyingly objdump's output
- # doesn't include enough information to pair up %lo's and %hi's...
- # TODO: handle unambiguous cases where all addends for a symbol are the
- # same, or show "+???".
- mnemonic = prev.split()[0]
- if mnemonic in arch.instructions_with_address_immediates:
- imm = hex(int(imm, 16))
- repl += ("" if imm.startswith("-") else "+") + imm
+ repl = row.split()[-1] + reloc_addend_from_imm(imm, before, self.config.arch)
if "R_MIPS_LO16" in row:
repl = f"%lo({repl})"
elif "R_MIPS_HI16" in row:
@@ -1452,20 +1592,57 @@ class AsmProcessorMIPS(AsmProcessor):
repl = f"%call16({repl})"
else:
assert False, f"unknown relocation type '{row}' for line '{prev}'"
- return before + repl + after
+ return before + repl + after, repl
+
+ def is_end_of_function(self, mnemonic: str, args: str) -> bool:
+ if self.seen_jr_ra:
+ return True
+ if mnemonic == "jr" and args == "ra":
+ self.seen_jr_ra = True
+ return False
class AsmProcessorPPC(AsmProcessor):
- def process_reloc(self, row: str, prev: str) -> str:
+ def pre_process(
+ self, mnemonic: str, args: str, next_row: Optional[str]
+ ) -> Tuple[str, str]:
+
+ if next_row and "R_PPC_EMB_SDA21" in next_row:
+ # With sda21 relocs, the linker transforms `r0` into `r2`/`r13`, and
+ # we may encounter this in either pre-transformed or post-transformed
+ # versions depending on if the .o file comes from compiler output or
+ # from disassembly. Normalize, to make sure both forms are treated as
+ # equivalent.
+
+ args = args.replace("(r2)", "(0)")
+ args = args.replace("(r13)", "(0)")
+ args = args.replace(",r2,", ",0,")
+ args = args.replace(",r13,", ",0,")
+
+ # We want to convert li and lis with an sda21 reloc,
+ # because the r0 to r2/r13 transformation results in
+ # turning an li/lis into an addi/addis with r2/r13 arg
+ # our preprocessing normalizes all versions to addi with a 0 arg
+ if mnemonic in {"li", "lis"}:
+ mnemonic = mnemonic.replace("li", "addi")
+ args_parts = args.split(",")
+ args = args_parts[0] + ",0," + args_parts[1]
+
+ return mnemonic, args
+
+ def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]:
arch = self.config.arch
assert any(
- r in row for r in ["R_PPC_REL24", "R_PPC_ADDR16", "R_PPC_EMB_SDA21"]
+ r in row
+ for r in ["R_PPC_REL24", "R_PPC_ADDR16", "R_PPC_EMB_SDA21", "R_PPC_REL14"]
), f"unknown relocation type '{row}' for line '{prev}'"
before, imm, after = parse_relocated_line(prev)
repl = row.split()[-1]
if "R_PPC_REL24" in row:
# function calls
pass
+ if "R_PPC_REL14" in row:
+ pass
elif "R_PPC_ADDR16_HI" in row:
# absolute hi of addr
repl = f"{repl}@h"
@@ -1483,17 +1660,30 @@ class AsmProcessorPPC(AsmProcessor):
if int(repl.split("+")[1], 16) > 0x70000000:
repl = repl.split("+")[0]
elif "R_PPC_EMB_SDA21" in row:
- # small data area
- pass
- return before + repl + after
+ # sda21 relocations; r2/r13 --> 0 swaps are performed in pre_process
+ repl = f"{repl}@sda21"
+
+ return before + repl + after, repl
+
+ def is_end_of_function(self, mnemonic: str, args: str) -> bool:
+ return mnemonic == "blr"
class AsmProcessorARM32(AsmProcessor):
- def process_reloc(self, row: str, prev: str) -> str:
+ def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]:
arch = self.config.arch
+ if "R_ARM_V4BX" in row:
+ # R_ARM_V4BX converts "bx " to "mov pc," for some targets.
+ # Ignore for now.
+ return prev, None
+ if "R_ARM_ABS32" in row and not prev.startswith(".word"):
+ # Don't crash on R_ARM_ABS32 relocations incorrectly applied to code.
+ # (We may want to do something more fancy here that actually shows the
+ # related symbol, but this serves as a stop-gap.)
+ return prev, None
before, imm, after = parse_relocated_line(prev)
- repl = row.split()[-1]
- return before + repl + after
+ repl = row.split()[-1] + reloc_addend_from_imm(imm, before, self.config.arch)
+ return before + repl + after, repl
def _normalize_arch_specific(self, mnemonic: str, row: str) -> str:
if self.config.ignore_addr_diffs:
@@ -1577,6 +1767,51 @@ class AsmProcessorAArch64(AsmProcessor):
return row
+class AsmProcessorI686(AsmProcessor):
+ def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]:
+ repl = row.split()[-1]
+ mnemonic, args = prev.split(maxsplit=1)
+
+ addr_imm = re.search(r"(? bool:
+ return mnemonic == "ret"
+
+
@dataclass
class ArchSettings:
name: str
@@ -1701,6 +1936,76 @@ PPC_BRANCH_INSTRUCTIONS = {
"bgt-",
}
+I686_BRANCH_INSTRUCTIONS = {
+ "call",
+ "jmp",
+ "ljmp",
+ "ja",
+ "jae",
+ "jb",
+ "jbe",
+ "jc",
+ "jcxz",
+ "jecxz",
+ "jrcxz",
+ "je",
+ "jg",
+ "jge",
+ "jl",
+ "jle",
+ "jna",
+ "jnae",
+ "jnb",
+ "jnbe",
+ "jnc",
+ "jne",
+ "jng",
+ "jnge",
+ "jnl",
+ "jnle",
+ "jno",
+ "jnp",
+ "jns",
+ "jnz",
+ "jo",
+ "jp",
+ "jpe",
+ "jpo",
+ "js",
+ "jz",
+ "ja",
+ "jae",
+ "jb",
+ "jbe",
+ "jc",
+ "je",
+ "jz",
+ "jg",
+ "jge",
+ "jl",
+ "jle",
+ "jna",
+ "jnae",
+ "jnb",
+ "jnbe",
+ "jnc",
+ "jne",
+ "jng",
+ "jnge",
+ "jnl",
+ "jnle",
+ "jno",
+ "jnp",
+ "jns",
+ "jnz",
+ "jo",
+ "jp",
+ "jpe",
+ "jpo",
+ "js",
+ "jz",
+}
+
MIPS_SETTINGS = ArchSettings(
name="mips",
re_int=re.compile(r"[0-9]+"),
@@ -1713,18 +2018,26 @@ MIPS_SETTINGS = ArchSettings(
re_reg=re.compile(r"\$?\b([astv][0-9]|at|f[astv]?[0-9]+f?|kt?[01]|fp|ra|zero)\b"),
re_sprel=re.compile(r"(?<=,)([0-9]+|0x[0-9a-f]+)\(sp\)"),
re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"),
- re_imm=re.compile(r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(sp)|%(lo|hi)\([^)]*\)"),
+ re_imm=re.compile(
+ r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(sp)|%(lo|hi|got|gp_rel|call16)\([^)]*\)"
+ ),
re_reloc=re.compile(r"R_MIPS_"),
arch_flags=["-m", "mips:4300"],
branch_likely_instructions=MIPS_BRANCH_LIKELY_INSTRUCTIONS,
branch_instructions=MIPS_BRANCH_INSTRUCTIONS,
- instructions_with_address_immediates=MIPS_BRANCH_INSTRUCTIONS.union({"jal", "j"}),
+ instructions_with_address_immediates=MIPS_BRANCH_INSTRUCTIONS.union({"j", "jal"}),
delay_slot_instructions=MIPS_BRANCH_INSTRUCTIONS.union({"j", "jal", "jr", "jalr"}),
proc=AsmProcessorMIPS,
)
MIPSEL_SETTINGS = replace(MIPS_SETTINGS, name="mipsel", big_endian=False)
+MIPSEE_SETTINGS = replace(
+ MIPSEL_SETTINGS, name="mipsee", arch_flags=["-m", "mips:5900"]
+)
+
+MIPS_ARCH_NAMES = {"mips", "mipsel", "mipsee"}
+
ARM32_SETTINGS = ArchSettings(
name="arm32",
re_int=re.compile(r"[0-9]+"),
@@ -1773,23 +2086,56 @@ PPC_SETTINGS = ArchSettings(
name="ppc",
re_int=re.compile(r"[0-9]+"),
re_comment=re.compile(r"(<.*>|//.*$)"),
- re_reg=re.compile(r"\$?\b([rf][0-9]+)\b"),
+ # r1 not included
+ re_reg=re.compile(r"\$?\b([rf](?:[02-9]|[1-9][0-9]+)|f1)\b"),
re_sprel=re.compile(r"(?<=,)(-?[0-9]+|-?0x[0-9a-f]+)\(r1\)"),
re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"),
- re_imm=re.compile(r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(r1)|[^@]*@(ha|h|lo)"),
+ re_imm=re.compile(
+ r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(r1)|[^ \t,]+@(l|ha|h|sda21)"
+ ),
re_reloc=re.compile(r"R_PPC_"),
+ arch_flags=["-m", "powerpc", "-M", "broadway"],
branch_instructions=PPC_BRANCH_INSTRUCTIONS,
instructions_with_address_immediates=PPC_BRANCH_INSTRUCTIONS.union({"bl"}),
proc=AsmProcessorPPC,
)
+I686_SETTINGS = ArchSettings(
+ name="i686",
+ re_int=re.compile(r"[0-9]+"),
+ re_comment=re.compile(r"<.*>"),
+ # Includes:
+ # - (e)a-d(x,l,h)
+ # - (e)s,d,b(i,p)(l)
+ # - cr0-7
+ # - x87 st
+ # - MMX, SSE vector registers
+ # - cursed registers: eal ebl ebh edl edh...
+ re_reg=re.compile(
+ r"\%?\b(e?(([sd]i|[sb]p)l?|[abcd][xhl])|[cdesfg]s|cr[0-7]|x?mm[0-7]|st)\b"
+ ),
+ re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"),
+ re_sprel=re.compile(r"-?(0x[0-9a-f]+|[0-9]+)(?=\((%ebp|%esi)\))"),
+ re_imm=re.compile(r"-?(0x[0-9a-f]+|[0-9]+)"),
+ re_reloc=re.compile(r"R_386_"),
+ # The x86 architecture has a variable instruction length. The raw bytes of
+ # an instruction as displayed by objdump can line wrap if it's long enough.
+ # This destroys the objdump output processor logic, so we avoid this.
+ arch_flags=["-m", "i386", "--no-show-raw-insn"],
+ branch_instructions=I686_BRANCH_INSTRUCTIONS,
+ instructions_with_address_immediates=I686_BRANCH_INSTRUCTIONS.union({"mov"}),
+ proc=AsmProcessorI686,
+)
+
ARCH_SETTINGS = [
MIPS_SETTINGS,
MIPSEL_SETTINGS,
+ MIPSEE_SETTINGS,
ARM32_SETTINGS,
ARMEL_SETTINGS,
AARCH64_SETTINGS,
PPC_SETTINGS,
+ I686_SETTINGS,
]
@@ -1822,11 +2168,31 @@ def parse_relocated_line(line: str) -> Tuple[str, str, str]:
imm, after = after, ""
else:
imm, after = after[:ind2], after[ind2:]
- if imm == "0x0":
- imm = "0"
return before, imm, after
+def reloc_addend_from_imm(imm: str, before: str, arch: ArchSettings) -> str:
+ """For architectures like MIPS where relocations have addends embedded in
+ the code as immediates, convert such an immediate into an addition/
+ subtraction that can occur just after the symbol."""
+ # TODO this is incorrect for MIPS %lo/%hi which need to be paired up
+ # and combined. In practice, this means we only get symbol offsets within
+ # %lo, while %hi just shows the symbol. Unfortunately, objdump's output
+ # loses relocation order, so we cannot do this without parsing ELF relocs
+ # ourselves...
+ mnemonic = before.split()[0]
+ if mnemonic in arch.instructions_with_address_immediates:
+ addend = int(imm, 16)
+ else:
+ addend = int(imm, 0)
+ if addend == 0:
+ return ""
+ elif addend < 0:
+ return hex(addend)
+ else:
+ return "+" + hex(addend)
+
+
def pad_mnemonic(line: str) -> str:
if "\t" not in line:
return line
@@ -1841,6 +2207,7 @@ class Line:
original: str
normalized_original: str
scorable_line: str
+ symbol: Optional[str] = None
line_num: Optional[int] = None
branch_target: Optional[int] = None
data_pool_addr: Optional[int] = None
@@ -1896,10 +2263,8 @@ def process(dump: str, config: Config) -> List[Line]:
if not re.match(r"^\s+[0-9a-f]+:\s+", row):
# This regex is conservative, and assumes the file path does not contain "weird"
- # characters like colons, tabs, or angle brackets.
- if re.match(
- r"^[^ \t<>:][^\t<>:]*:[0-9]+( \(discriminator [0-9]+\))?$", row
- ):
+ # characters like tabs or angle brackets.
+ if re.match(r"^[^ \t<>][^\t<>]*:[0-9]+( \(discriminator [0-9]+\))?$", row):
source_filename, _, tail = row.rpartition(":")
source_line_num = int(tail.partition(" ")[0])
source_lines.append(row)
@@ -1919,9 +2284,14 @@ def process(dump: str, config: Config) -> List[Line]:
line_num_str = row.split(":")[0]
row = row.rstrip()
tabs = row.split("\t")
- row = "\t".join(tabs[2:])
line_num = eval_line_num(line_num_str.strip())
+ # TODO: use --no-show-raw-insn for all arches
+ if arch.name == "i686":
+ row = "\t".join(tabs[1:])
+ else:
+ row = "\t".join(tabs[2:])
+
if line_num in data_refs:
refs = data_refs[line_num]
ref_str = "; ".join(
@@ -1943,7 +2313,13 @@ def process(dump: str, config: Config) -> List[Line]:
else:
# powerpc-eabi-objdump doesn't use tabs
row_parts = [part.lstrip() for part in row.split(" ", 1)]
+
mnemonic = row_parts[0].strip()
+ args = row_parts[1].strip() if len(row_parts) >= 2 else ""
+
+ next_line = lines[i] if i < len(lines) else None
+ mnemonic, args = processor.pre_process(mnemonic, args, next_line)
+ row = mnemonic + "\t" + args.replace("\t", " ")
addr = ""
if mnemonic in arch.instructions_with_address_immediates:
@@ -1960,14 +2336,28 @@ def process(dump: str, config: Config) -> List[Line]:
# immediates.
original = row
+ symbol = None
while i < len(lines):
reloc_row = lines[i]
if re.search(arch.re_reloc, reloc_row):
- original = processor.process_reloc(reloc_row, original)
+ original, reloc_symbol = processor.process_reloc(reloc_row, original)
+ if reloc_symbol is not None:
+ symbol = reloc_symbol
else:
break
i += 1
+ is_text_relative_j = False
+ if (
+ arch.name in MIPS_ARCH_NAMES
+ and mnemonic == "j"
+ and symbol is not None
+ and symbol.startswith(".text")
+ ):
+ symbol = None
+ original = row
+ is_text_relative_j = True
+
normalized_original = processor.normalize(mnemonic, original)
scorable_line = normalized_original
@@ -1993,8 +2383,16 @@ def process(dump: str, config: Config) -> List[Line]:
row = normalize_imms(row, arch)
branch_target = None
- if mnemonic in arch.branch_instructions:
- branch_target = int(row_parts[1].strip().split(",")[-1], 16)
+ if (
+ mnemonic in arch.branch_instructions or is_text_relative_j
+ ) and symbol is None:
+ x86_longjmp = re.search(r"\*(.*)\(", args)
+ if x86_longjmp:
+ capture = x86_longjmp.group(1)
+ if capture != "":
+ branch_target = int(capture, 16)
+ else:
+ branch_target = int(args.split(",")[-1], 16)
output.append(
Line(
@@ -2003,6 +2401,7 @@ def process(dump: str, config: Config) -> List[Line]:
original=original,
normalized_original=normalized_original,
scorable_line=scorable_line,
+ symbol=symbol,
line_num=line_num,
branch_target=branch_target,
data_pool_addr=data_pool_addr,
@@ -2015,9 +2414,7 @@ def process(dump: str, config: Config) -> List[Line]:
num_instr += 1
source_lines = []
- if config.stop_jrra and mnemonic == "jr" and row_parts[1].strip() == "ra":
- stop_after_delay_slot = True
- elif stop_after_delay_slot:
+ if config.stop_at_ret and processor.is_end_of_function(mnemonic, args):
break
processor.post_process(output)
@@ -2032,9 +2429,44 @@ def normalize_stack(row: str, arch: ArchSettings) -> str:
return re.sub(arch.re_sprel, "addr(sp)", row)
-def imm_matches_everything(row: str, arch: ArchSettings) -> bool:
- # (this should probably be arch-specific)
- return "(." in row
+def check_for_symbol_mismatch(
+ old_line: Line, new_line: Line, symbol_map: Dict[str, str]
+) -> bool:
+
+ assert old_line.symbol is not None
+ assert new_line.symbol is not None
+
+ if new_line.symbol.startswith("%hi"):
+ return False
+
+ if old_line.symbol not in symbol_map:
+ symbol_map[old_line.symbol] = new_line.symbol
+ return False
+ elif symbol_map[old_line.symbol] == new_line.symbol:
+ return False
+
+ return True
+
+
+def field_matches_any_symbol(field: str, arch: ArchSettings) -> bool:
+ if arch.name == "ppc":
+ if "..." in field:
+ return True
+
+ parts = field.rsplit("@", 1)
+ if len(parts) == 2 and parts[1] in {"l", "h", "ha", "sda21"}:
+ field = parts[0]
+
+ return re.fullmatch((r"^@\d+$"), field) is not None
+
+ if arch.name in MIPS_ARCH_NAMES:
+ return "." in field
+
+ # Example: ".text+0x34"
+ if arch.name == "arm32":
+ return "." in field
+
+ return False
def split_off_address(line: str) -> Tuple[str, str]:
@@ -2058,15 +2490,10 @@ def diff_sequences_difflib(
def diff_sequences(
seq1: List[str], seq2: List[str], algorithm: str
) -> List[Tuple[str, int, int, int, int]]:
- if (
- algorithm != "levenshtein"
- or len(seq1) * len(seq2) > 4 * 10**8
- or len(seq1) + len(seq2) >= 0x110000
- ):
+ if algorithm != "levenshtein":
return diff_sequences_difflib(seq1, seq2)
# The Levenshtein library assumes that we compare strings, not lists. Convert.
- # (Per the check above we know we have fewer than 0x110000 unique elements, so chr() works.)
remapping: Dict[str, str] = {}
def remap(seq: List[str]) -> str:
@@ -2079,8 +2506,16 @@ def diff_sequences(
seq[i] = val
return "".join(seq)
- rem1 = remap(seq1)
- rem2 = remap(seq2)
+ try:
+ rem1 = remap(seq1)
+ rem2 = remap(seq2)
+ except ValueError as e:
+ if len(seq1) + len(seq2) < 0x110000:
+ raise
+ # If there are too many unique elements, chr() doesn't work.
+ # Assume this is the case and fall back to difflib.
+ return diff_sequences_difflib(seq1, seq2)
+
import Levenshtein
ret: List[Tuple[str, int, int, int, int]] = Levenshtein.opcodes(rem1, rem2)
@@ -2113,69 +2548,80 @@ def diff_lines(
return ret
+def diff_sameline(
+ old_line: Line, new_line: Line, config: Config, symbol_map: Dict[str, str]
+) -> Tuple[int, int, bool]:
+
+ old = old_line.scorable_line
+ new = new_line.scorable_line
+ if old == new:
+ return (0, 0, False)
+
+ num_stack_penalties = 0
+ num_regalloc_penalties = 0
+ has_symbol_mismatch = False
+
+ ignore_last_field = False
+ if config.score_stack_differences:
+ oldsp = re.search(config.arch.re_sprel, old)
+ newsp = re.search(config.arch.re_sprel, new)
+ if oldsp and newsp:
+ oldrel = int(oldsp.group(1) or "0", 0)
+ newrel = int(newsp.group(1) or "0", 0)
+ num_stack_penalties += abs(oldrel - newrel)
+ ignore_last_field = True
+
+ # Probably regalloc difference, or signed vs unsigned
+
+ # Compare each field in order
+ new_parts, old_parts = new.split(None, 1), old.split(None, 1)
+ newfields, oldfields = new_parts[1].split(","), old_parts[1].split(",")
+ if ignore_last_field:
+ newfields = newfields[:-1]
+ oldfields = oldfields[:-1]
+ else:
+ # If the last field has a parenthesis suffix, e.g. "0x38(r7)"
+ # we split that part out to make it a separate field
+ # however, we don't split if it has a proceeding % macro, e.g. "%lo(.data)"
+ re_paren = re.compile(r"(? int:
# This logic is copied from `scorer.py` from the decomp permuter project
# https://github.com/simonlindholm/decomp-permuter/blob/main/src/scorer.py
- score = 0
+ num_stack_penalties = 0
+ num_regalloc_penalties = 0
+ num_reordering_penalties = 0
+ num_insertion_penalties = 0
+ num_deletion_penalties = 0
deletions = []
insertions = []
- def lo_hi_match(old: str, new: str) -> bool:
- # TODO: Make this arch-independent, like `imm_matches_everything()`
- old_lo = old.find("%lo")
- old_hi = old.find("%hi")
- new_lo = new.find("%lo")
- new_hi = new.find("%hi")
-
- if old_lo != -1 and new_lo != -1:
- old_idx = old_lo
- new_idx = new_lo
- elif old_hi != -1 and new_hi != -1:
- old_idx = old_hi
- new_idx = new_hi
- else:
- return False
-
- if old[:old_idx] != new[:new_idx]:
- return False
-
- old_inner = old[old_idx + 4 : -1]
- new_inner = new[new_idx + 4 : -1]
- return old_inner.startswith(".") or new_inner.startswith(".")
-
- def diff_sameline(old: str, new: str) -> None:
- nonlocal score
- if old == new:
- return
-
- if lo_hi_match(old, new):
- return
-
- ignore_last_field = False
- if config.score_stack_differences:
- oldsp = re.search(config.arch.re_sprel, old)
- newsp = re.search(config.arch.re_sprel, new)
- if oldsp and newsp:
- oldrel = int(oldsp.group(1) or "0", 0)
- newrel = int(newsp.group(1) or "0", 0)
- score += abs(oldrel - newrel) * config.penalty_stackdiff
- ignore_last_field = True
-
- # Probably regalloc difference, or signed vs unsigned
-
- # Compare each field in order
- newfields, oldfields = new.split(","), old.split(",")
- if ignore_last_field:
- newfields = newfields[:-1]
- oldfields = oldfields[:-1]
- for nf, of in zip(newfields, oldfields):
- if nf != of:
- score += config.penalty_regalloc
- # Penalize any extra fields
- score += abs(len(newfields) - len(oldfields)) * config.penalty_regalloc
-
def diff_insert(line: str) -> None:
# Reordering or totally different codegen.
# Defer this until later when we can tell.
@@ -2205,7 +2651,9 @@ def score_diff_lines(
if max_index is not None and index > max_index:
break
if line1 and line2 and line1.mnemonic == line2.mnemonic:
- diff_sameline(line1.scorable_line, line2.scorable_line)
+ sp, rp, _ = diff_sameline(line1, line2, config, symbol_map)
+ num_stack_penalties += sp
+ num_regalloc_penalties += rp
else:
if line1:
diff_delete(line1.scorable_line)
@@ -2218,13 +2666,17 @@ def score_diff_lines(
ins = insertions_co[item]
dels = deletions_co[item]
common = min(ins, dels)
- score += (
- (ins - common) * config.penalty_insertion
- + (dels - common) * config.penalty_deletion
- + config.penalty_reordering * common
- )
+ num_insertion_penalties += ins - common
+ num_deletion_penalties += dels - common
+ num_reordering_penalties += common
- return score
+ return (
+ num_stack_penalties * config.penalty_stackdiff
+ + num_regalloc_penalties * config.penalty_regalloc
+ + num_reordering_penalties * config.penalty_reordering
+ + num_insertion_penalties * config.penalty_insertion
+ + num_deletion_penalties * config.penalty_deletion
+ )
@dataclass(frozen=True)
@@ -2262,6 +2714,7 @@ def do_diff(lines1: List[Line], lines2: List[Line], config: Config) -> Diff:
arch = config.arch
fmt = config.formatter
output: List[OutputLine] = []
+ symbol_map: Dict[str, str] = {}
sc1 = symbol_formatter("base-reg", 0)
sc2 = symbol_formatter("my-reg", 0)
@@ -2287,7 +2740,6 @@ def do_diff(lines1: List[Line], lines2: List[Line], config: Config) -> Diff:
lines2 = trim_nops(lines2, arch)
diffed_lines = diff_lines(lines1, lines2, config.algorithm)
- max_score = len(lines1) * config.penalty_deletion
line_num_base = -1
line_num_offset = 0
@@ -2372,7 +2824,17 @@ def do_diff(lines1: List[Line], lines2: List[Line], config: Config) -> Diff:
if normalize_imms(branchless1, arch) == normalize_imms(
branchless2, arch
):
- if imm_matches_everything(branchless2, arch):
+ (
+ stack_penalties,
+ regalloc_penalties,
+ has_symbol_mismatch,
+ ) = diff_sameline(line1, line2, config, symbol_map)
+
+ if (
+ regalloc_penalties == 0
+ and stack_penalties == 0
+ and not has_symbol_mismatch
+ ):
# ignore differences due to %lo(.rodata + ...) vs symbol
out1 = out1.reformat(BasicFormat.NONE)
out2 = out2.reformat(BasicFormat.NONE)
@@ -2397,8 +2859,19 @@ def do_diff(lines1: List[Line], lines2: List[Line], config: Config) -> Diff:
else:
# reg differences and maybe imm as well
out1, out2 = format_fields(arch.re_reg, out1, out2, sc1, sc2)
- line_color1 = line_color2 = sym_color = BasicFormat.REGISTER
- line_prefix = "r"
+ cats = config.reg_categories
+ if cats and any(
+ cats.get(of.group()) != cats.get(nf.group())
+ for (of, nf) in zip(
+ out1.finditer(arch.re_reg), out2.finditer(arch.re_reg)
+ )
+ ):
+ sym_color = BasicFormat.REGISTER_CATEGORY
+ line_prefix = "R"
+ else:
+ sym_color = BasicFormat.REGISTER
+ line_prefix = "r"
+ line_color1 = line_color2 = sym_color
if same_target:
address_imm_fmt = BasicFormat.NONE
@@ -2523,8 +2996,10 @@ def do_diff(lines1: List[Line], lines2: List[Line], config: Config) -> Diff:
)
)
- score = score_diff_lines(diffed_lines, config)
output = output[config.skip_lines :]
+
+ score = score_diff_lines(diffed_lines, config, symbol_map)
+ max_score = len(lines1) * config.penalty_deletion
return Diff(lines=output, score=score, max_score=max_score)
@@ -2586,24 +3061,12 @@ def compress_matching(
return ret
-def align_diffs(
- old_diff: Diff, new_diff: Diff, config: Config
-) -> Tuple[TableMetadata, List[Tuple[OutputLine, ...]]]:
- meta: TableMetadata
+def align_diffs(old_diff: Diff, new_diff: Diff, config: Config) -> TableData:
+ headers: Tuple[Text, ...]
diff_lines: List[Tuple[OutputLine, ...]]
padding = " " * 7 if config.show_line_numbers else " " * 2
- if config.threeway:
- meta = TableMetadata(
- headers=(
- Text("TARGET"),
- Text(f"{padding}CURRENT ({new_diff.score})"),
- Text(f"{padding}PREVIOUS ({old_diff.score})"),
- ),
- current_score=new_diff.score,
- max_score=new_diff.max_score,
- previous_score=old_diff.score,
- )
+ if config.diff_mode in (DiffMode.THREEWAY_PREV, DiffMode.THREEWAY_BASE):
old_chunks = chunk_diff_lines(old_diff.lines)
new_chunks = chunk_diff_lines(new_diff.lines)
diff_lines = []
@@ -2638,20 +3101,54 @@ def align_diffs(
diff_lines = [
(base, new, old if old != new else empty) for base, new, old in diff_lines
]
- else:
- meta = TableMetadata(
- headers=(
- Text("TARGET"),
- Text(f"{padding}CURRENT ({new_diff.score})"),
- ),
- current_score=new_diff.score,
- max_score=new_diff.max_score,
- previous_score=None,
+ headers = (
+ Text("TARGET"),
+ Text(f"{padding}CURRENT ({new_diff.score})"),
+ Text(f"{padding}PREVIOUS ({old_diff.score})"),
)
+ current_score = new_diff.score
+ max_score = new_diff.max_score
+ previous_score = old_diff.score
+ elif config.diff_mode in (DiffMode.SINGLE, DiffMode.SINGLE_BASE):
+ header = Text("BASE" if config.diff_mode == DiffMode.SINGLE_BASE else "CURRENT")
+ diff_lines = [(line,) for line in new_diff.lines]
+ headers = (header,)
+ # Scoring is disabled for view mode
+ current_score = 0
+ max_score = 0
+ previous_score = None
+ else:
diff_lines = [(line, line) for line in new_diff.lines]
+ headers = (
+ Text("TARGET"),
+ Text(f"{padding}CURRENT ({new_diff.score})"),
+ )
+ current_score = new_diff.score
+ max_score = new_diff.max_score
+ previous_score = None
if config.compress:
diff_lines = compress_matching(diff_lines, config.compress.context)
- return meta, diff_lines
+
+ def diff_line_to_table_line(line: Tuple[OutputLine, ...]) -> TableLine:
+ cells = [
+ (line[0].base or Text(), line[0].line1)
+ ]
+ for ol in line[1:]:
+ cells.append((ol.fmt2, ol.line2))
+
+ return TableLine(
+ key=line[0].key2,
+ is_data_ref=line[0].is_data_ref,
+ cells=tuple(cells),
+ )
+
+ return TableData(
+ headers=headers,
+ current_score=current_score,
+ max_score=max_score,
+ previous_score=previous_score,
+ lines=[diff_line_to_table_line(line) for line in diff_lines],
+ )
def debounced_fs_watch(
@@ -2754,17 +3251,26 @@ class Display:
return (self.emsg, self.emsg)
my_lines = process(self.mydump, self.config)
- diff_output = do_diff(self.base_lines, my_lines, self.config)
+
+ if self.config.diff_mode == DiffMode.SINGLE_BASE:
+ diff_output = do_diff(self.base_lines, self.base_lines, self.config)
+ elif self.config.diff_mode == DiffMode.SINGLE:
+ diff_output = do_diff(my_lines, my_lines, self.config)
+ else:
+ diff_output = do_diff(self.base_lines, my_lines, self.config)
+
last_diff_output = self.last_diff_output or diff_output
- if self.config.threeway != "base" or not self.last_diff_output:
+ if self.config.diff_mode != DiffMode.THREEWAY_BASE or not self.last_diff_output:
self.last_diff_output = diff_output
- meta, diff_lines = align_diffs(last_diff_output, diff_output, self.config)
- output = self.config.formatter.table(meta, diff_lines)
+ data = align_diffs(last_diff_output, diff_output, self.config)
+ output = self.config.formatter.table(data)
+
refresh_key = (
[line.key2 for line in diff_output.lines],
diff_output.score,
)
+
return (output, refresh_key)
def run_less(
@@ -2882,7 +3388,10 @@ def main() -> None:
except ModuleNotFoundError as e:
fail(MISSING_PREREQUISITES.format(e.name))
- if config.threeway and not args.watch:
+ if (
+ config.diff_mode in (DiffMode.THREEWAY_BASE, DiffMode.THREEWAY_PREV)
+ and not args.watch
+ ):
fail("Threeway diffing requires -w.")
if args.diff_elf_symbol:
@@ -2910,8 +3419,10 @@ def main() -> None:
if args.base_asm is not None:
with open(args.base_asm) as f:
basedump = f.read()
- else:
+ elif config.diff_mode != DiffMode.SINGLE:
basedump = run_objdump(basecmd, config, project)
+ else:
+ basedump = ""
mydump = run_objdump(mycmd, config, project)
diff --git a/tools/asm-differ/diff_settings.py b/tools/asm-differ/diff_settings.py
index 183b96b75d..19d67d5487 100644
--- a/tools/asm-differ/diff_settings.py
+++ b/tools/asm-differ/diff_settings.py
@@ -5,7 +5,8 @@ def apply(config, args):
config["source_directories"] = ["."]
# config["show_line_numbers_default"] = True
# config["arch"] = "mips"
- # config["map_format"] = "gnu" # gnu or mw
- # config["mw_build_dir"] = "build/" # only needed for mw map format
+ # config["map_format"] = "gnu" # gnu, mw, ms
+ # config["build_dir"] = "build/" # only needed for mw and ms map format
+ # config["expected_dir"] = "expected/" # needed for -o
# config["makeflags"] = []
# config["objdump_executable"] = ""
diff --git a/tools/asm-differ/pyproject.toml b/tools/asm-differ/pyproject.toml
new file mode 100644
index 0000000000..7a112aee55
--- /dev/null
+++ b/tools/asm-differ/pyproject.toml
@@ -0,0 +1,21 @@
+[tool.poetry]
+name = "asm-differ"
+version = "0.1.0"
+description = ""
+authors = ["Simon Lindholm "]
+license = "UNLICENSE"
+readme = "README.md"
+packages = [{ include = "diff.py" }]
+
+[tool.poetry.dependencies]
+python = "^3.7"
+colorama = "^0.4.6"
+ansiwrap = "^0.8.4"
+watchdog = "^2.2.0"
+levenshtein = "^0.20.9"
+cxxfilt = "^0.3.0"
+
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/tools/fado/.gitrepo b/tools/fado/.gitrepo
index cedec49302..042b904f19 100644
--- a/tools/fado/.gitrepo
+++ b/tools/fado/.gitrepo
@@ -6,7 +6,7 @@
[subrepo]
remote = git@github.com:EllipticEllipsis/fado.git
branch = master
- commit = f7efb10a9a65f27e9ccad7ce270234f20d386ac9
- parent = 90cfafec47fe4b73ad9009e9501e147e86025aa6
+ commit = 8d896ee97d565508755584803c409fc33bb0c953
+ parent = b51c9f4d22d6e7db63700c163418654431a2a61a
method = merge
cmdver = 0.4.3
diff --git a/tools/fado/lib/vc_vector/vc_vector.c b/tools/fado/lib/vc_vector/vc_vector.c
index 3f677c242a..426f1b0cbb 100644
--- a/tools/fado/lib/vc_vector/vc_vector.c
+++ b/tools/fado/lib/vc_vector/vc_vector.c
@@ -112,15 +112,15 @@ bool vc_vector_is_equals(vc_vector* vector1, vc_vector* vector2) {
return memcmp(vector1->data, vector2->data, size_vector1) == 0;
}
-float vc_vector_get_growth_factor() {
+float vc_vector_get_growth_factor(void) {
return GROWTH_FACTOR;
}
-size_t vc_vector_get_default_count_of_elements() {
+size_t vc_vector_get_default_count_of_elements(void) {
return DEFAULT_COUNT_OF_ELEMENTS;
}
-size_t vc_vector_struct_size() {
+size_t vc_vector_struct_size(void) {
return sizeof(vc_vector);
}
diff --git a/tools/fado/lib/vc_vector/vc_vector.h b/tools/fado/lib/vc_vector/vc_vector.h
index 2b1422c1a5..e57f832543 100644
--- a/tools/fado/lib/vc_vector/vc_vector.h
+++ b/tools/fado/lib/vc_vector/vc_vector.h
@@ -24,13 +24,13 @@ void vc_vector_release(vc_vector* vector);
bool vc_vector_is_equals(vc_vector* vector1, vc_vector* vector2);
// Returns constant value of the vector growth factor.
-float vc_vector_get_growth_factor();
+float vc_vector_get_growth_factor(void);
// Returns constant value of the vector default count of elements.
-size_t vc_vector_get_default_count_of_elements();
+size_t vc_vector_get_default_count_of_elements(void);
// Returns constant value of the vector struct size.
-size_t vc_vector_struct_size();
+size_t vc_vector_struct_size(void);
// ----------------------------------------------------------------------------
// Element access
diff --git a/tools/fado/src/main.c b/tools/fado/src/main.c
index c6af79025a..e6b7926d65 100644
--- a/tools/fado/src/main.c
+++ b/tools/fado/src/main.c
@@ -15,7 +15,7 @@
#include "version.inc"
-void PrintVersion() {
+void PrintVersion(void) {
printf("Fado (Fairy-Assisted relocations for Decompiled Overlays), version %s\n", versionNumber);
printf("Copyright (C) 2021 Elliptic Ellipsis\n");
printf("%s\n", credits);
@@ -88,7 +88,7 @@ static size_t posArgCount = ARRAY_COUNT(posArgInfo);
static size_t optCount = ARRAY_COUNT(optInfo);
static struct option longOptions[ARRAY_COUNT(optInfo)];
-void ConstructLongOpts() {
+void ConstructLongOpts(void) {
size_t i;
for (i = 0; i < optCount; i++) {
diff --git a/tools/graphovl/.gitrepo b/tools/graphovl/.gitrepo
index 064b8ee581..ca544b8858 100644
--- a/tools/graphovl/.gitrepo
+++ b/tools/graphovl/.gitrepo
@@ -6,7 +6,7 @@
[subrepo]
remote = https://github.com/AngheloAlf/graphovl.git
branch = master
- commit = f5fe93d75bb75ea4bea65f62c43f41f6a1e70679
- parent = 6c5a50ef95f351acc7c4c0455a347a94443adbe1
+ commit = dab4addae0c5db6274ab5daf7780c62c346120a1
+ parent = 5da1ae553569ddb0016d302cfac8c45d9cb22e73
method = merge
cmdver = 0.4.3
diff --git a/tools/graphovl/graphovl.py b/tools/graphovl/graphovl.py
index 28f0a361c0..7cb7af2c51 100755
--- a/tools/graphovl/graphovl.py
+++ b/tools/graphovl/graphovl.py
@@ -18,7 +18,7 @@ except ModuleNotFoundError:
script_dir = os.path.dirname(os.path.realpath(__file__))
config = ConfigParser()
-func_names = None
+func_names = list()
func_definitions = list()
line_numbers_of_functions = list()
@@ -72,6 +72,19 @@ def capture_setupaction_call_arg(content):
transitionList.append(func)
return transitionList
+setaction_regexpr = re.compile(r"_SetAction+\([^\)]*\)(\.[^\)]*\))?;")
+
+def capture_setaction_calls(content):
+ return [x.group() for x in re.finditer(setaction_regexpr, content)]
+
+def capture_setaction_call_arg(content):
+ transitionList = []
+ for x in re.finditer(setaction_regexpr, content):
+ func = x.group().split(",")[2].strip().split(");")[0].strip()
+ if func not in transitionList:
+ transitionList.append(func)
+ return transitionList
+
# Search for the function definition by supplied function name
def definition_by_name(content, name):
for definition in capture_definitions(content):
@@ -206,7 +219,11 @@ def addFunctionTransitionToGraph(dot, index: int, func_name: str, action_transit
fontColor = config.get("colors", "fontcolor")
bubbleColor = config.get("colors", "bubbleColor")
indexStr = str(index)
- funcIndex = str(index_of_func(action_transition))
+ try:
+ funcIndex = str(index_of_func(action_transition))
+ except ValueError:
+ print(f"Warning: function '{action_transition}' called by '{func_name}' was not found. Skiping...", file=sys.stderr)
+ return
dot.node(indexStr, func_name, fontcolor=fontColor, color=bubbleColor)
dot.node(funcIndex, action_transition, fontcolor=fontColor, color=bubbleColor)
@@ -230,7 +247,7 @@ def addCallNamesToGraph(dot, func_names: list, index: int, code_body: str, remov
if call in removeList:
continue
- if setupAction and "_SetupAction" in call:
+ if setupAction and ("_SetupAction" in call or "_SetAction" in call):
continue
seen.add(call)
@@ -342,10 +359,11 @@ def main():
actionIdentifier = "this->actionFunc"
setupAction = func_prefix + "_SetupAction" in func_names
+ setAction = func_prefix + "_SetAction" in func_names
arrayActorFunc = match_obj is not None
rawActorFunc = actionIdentifier in contents
- if not setupAction and not arrayActorFunc and not rawActorFunc:
+ if not setupAction and not setAction and not arrayActorFunc and not rawActorFunc:
print("No actor action-based structure found")
os._exit(1)
@@ -383,6 +401,8 @@ def main():
Create all edges for SetupAction-based actors
"""
transitionList = capture_setupaction_call_arg(code_body)
+ elif setAction:
+ transitionList = capture_setaction_call_arg(code_body)
elif arrayActorFunc:
"""
Create all edges for ActorFunc array-based actors