From 0c3a48ef94a0c29253c5f1997f6fb79efeade261 Mon Sep 17 00:00:00 2001 From: Anghelo Carvajal Date: Tue, 14 Mar 2023 20:51:47 -0300 Subject: [PATCH] Subrepos update (#1208) * git subrepo pull tools/asm-differ --force subrepo: subdir: "tools/asm-differ" merged: "ae408664a" upstream: origin: "https://github.com/simonlindholm/asm-differ" branch: "main" commit: "ae408664a" git-subrepo: version: "0.4.3" origin: "https://github.com/ingydotnet/git-subrepo.git" commit: "2f68596" * git subrepo pull (merge) tools/fado --force subrepo: subdir: "tools/fado" merged: "8d896ee97" upstream: origin: "git@github.com:EllipticEllipsis/fado.git" branch: "master" commit: "8d896ee97" git-subrepo: version: "0.4.3" origin: "https://github.com/ingydotnet/git-subrepo.git" commit: "2f68596" * git subrepo pull tools/graphovl --force subrepo: subdir: "tools/graphovl" merged: "dab4addae" upstream: origin: "https://github.com/AngheloAlf/graphovl.git" branch: "master" commit: "dab4addae" git-subrepo: version: "0.4.3" origin: "https://github.com/ingydotnet/git-subrepo.git" commit: "2f68596" * git subrepo pull tools/z64compress --force subrepo: subdir: "tools/z64compress" merged: "43035d97f" upstream: origin: "https://github.com/z64me/z64compress.git" branch: "main" commit: "43035d97f" git-subrepo: version: "0.4.3" origin: "https://github.com/ingydotnet/git-subrepo.git" commit: "2f68596" * git subrepo pull tools/ZAPD --force subrepo: subdir: "tools/ZAPD" merged: "23929ec93" upstream: origin: "https://github.com/zeldaret/ZAPD.git" branch: "master" commit: "23929ec93" git-subrepo: version: "0.4.3" origin: "https://github.com/ingydotnet/git-subrepo.git" commit: "2f68596" * Revert "git subrepo pull tools/z64compress --force" This reverts commit 2e487b5008c129031ab311a3a7bfd42adeb4916b. --- tools/ZAPD/.gitrepo | 6 +- tools/ZAPD/ZAPD/ZTextureAnimation.cpp | 2 + tools/asm-differ/.gitignore | 2 + tools/asm-differ/.gitrepo | 4 +- tools/asm-differ/.pre-commit-config.yaml | 2 +- tools/asm-differ/README.md | 2 +- tools/asm-differ/diff.py | 991 +++++++++++++++++------ tools/asm-differ/diff_settings.py | 5 +- tools/asm-differ/pyproject.toml | 21 + tools/fado/.gitrepo | 4 +- tools/fado/lib/vc_vector/vc_vector.c | 6 +- tools/fado/lib/vc_vector/vc_vector.h | 6 +- tools/fado/src/main.c | 4 +- tools/graphovl/.gitrepo | 4 +- tools/graphovl/graphovl.py | 28 +- 15 files changed, 822 insertions(+), 265 deletions(-) create mode 100644 tools/asm-differ/pyproject.toml diff --git a/tools/ZAPD/.gitrepo b/tools/ZAPD/.gitrepo index 3144cab206..131ccdc889 100644 --- a/tools/ZAPD/.gitrepo +++ b/tools/ZAPD/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/zeldaret/ZAPD.git branch = master - commit = 5786abbdd2d0fd14907e03575a0bd972c1fe9b28 - parent = e451a103ba5d14af3dd14acfa54a6b4999fd951f + commit = 23929ec9373d28cb298daad4ad7cb468e09c0a46 + parent = 2e487b5008c129031ab311a3a7bfd42adeb4916b method = merge - cmdver = 0.4.5 + cmdver = 0.4.3 diff --git a/tools/ZAPD/ZAPD/ZTextureAnimation.cpp b/tools/ZAPD/ZAPD/ZTextureAnimation.cpp index 698054fa87..0c55b26097 100644 --- a/tools/ZAPD/ZAPD/ZTextureAnimation.cpp +++ b/tools/ZAPD/ZAPD/ZTextureAnimation.cpp @@ -569,6 +569,7 @@ void ZTextureAnimation::DeclareReferences(const std::string& prefix) count = 2; } params = new TextureScrollingParams(parent); + params->type = entry.type; params->ExtractFromBinary(paramsOffset, count); break; @@ -582,6 +583,7 @@ void ZTextureAnimation::DeclareReferences(const std::string& prefix) case TextureAnimationParamsType::TextureCycle: params = new TextureCyclingParams(parent); + params->type = entry.type; params->ExtractFromBinary(paramsOffset); break; diff --git a/tools/asm-differ/.gitignore b/tools/asm-differ/.gitignore index eb176dc56b..a2b216fea6 100644 --- a/tools/asm-differ/.gitignore +++ b/tools/asm-differ/.gitignore @@ -1,2 +1,4 @@ .mypy_cache/ __pycache__/ +.vscode/ +poetry.lock diff --git a/tools/asm-differ/.gitrepo b/tools/asm-differ/.gitrepo index 8d0a6445dc..a23a9c1e4a 100644 --- a/tools/asm-differ/.gitrepo +++ b/tools/asm-differ/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/simonlindholm/asm-differ branch = main - commit = 1236288d1520335c2bfb672078fec65084d7cb5c - parent = 2c5690701a350c7e7c3d6252dff925ad65d59910 + commit = ae408664a89ea4dc70d005d0afc69ac26c938cbb + parent = c833969ea79ba31c3103e25e94ef88098c0287de method = merge cmdver = 0.4.3 diff --git a/tools/asm-differ/.pre-commit-config.yaml b/tools/asm-differ/.pre-commit-config.yaml index ccc4afc0ee..67ba03d124 100644 --- a/tools/asm-differ/.pre-commit-config.yaml +++ b/tools/asm-differ/.pre-commit-config.yaml @@ -1,5 +1,5 @@ repos: - repo: https://github.com/psf/black - rev: 22.1.0 + rev: 22.3.0 hooks: - id: black diff --git a/tools/asm-differ/README.md b/tools/asm-differ/README.md index 205310d394..1d0932c9ba 100644 --- a/tools/asm-differ/README.md +++ b/tools/asm-differ/README.md @@ -7,7 +7,7 @@ Nice differ for assembly code. Currently supports MIPS, PPC, AArch64, and ARM32; ## Dependencies - Python >= 3.6 -- `python3 -m pip install --user colorama watchdog python-Levenshtein` (also `dataclasses` if on 3.6) +- `python3 -m pip install --user colorama watchdog levenshtein cxxfilt` (also `dataclasses` if on 3.6) ## Usage diff --git a/tools/asm-differ/diff.py b/tools/asm-differ/diff.py index 4b5bc892ee..230dbaed70 100755 --- a/tools/asm-differ/diff.py +++ b/tools/asm-differ/diff.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # PYTHON_ARGCOMPLETE_OK import argparse +import enum import sys from typing import ( Any, @@ -28,6 +29,14 @@ def static_assert_unreachable(x: NoReturn) -> NoReturn: raise Exception("Unreachable! " + repr(x)) +class DiffMode(enum.Enum): + SINGLE = "single" + SINGLE_BASE = "single_base" + NORMAL = "normal" + THREEWAY_PREV = "3prev" + THREEWAY_BASE = "3base" + + # ==== COMMAND-LINE ==== if __name__ == "__main__": @@ -201,11 +210,11 @@ if __name__ == "__main__": ) parser.add_argument( "-s", - "--stop-jr-ra", - dest="stop_jrra", + "--stop-at-ret", + dest="stop_at_ret", action="store_true", - help="""Stop disassembling at the first 'jr ra'. Some functions have - multiple return points, so use with care!""", + help="""Stop disassembling at the first return instruction. + Some functions have multiple return points, so use with care!""", ) parser.add_argument( "-i", @@ -228,6 +237,13 @@ if __name__ == "__main__": action="store_false", help="Don't visualize branches/branch targets.", ) + parser.add_argument( + "-R", + "--no-show-rodata-refs", + dest="show_rodata_refs", + action="store_false", + help="Don't show .rodata -> .text references (typically from jump tables).", + ) parser.add_argument( "-S", "--base-shift", @@ -248,21 +264,37 @@ if __name__ == "__main__": help="""Automatically update when source/object files change. Recommended in combination with -m.""", ) + parser.add_argument( + "-0", + "--diff_mode=single_base", + dest="diff_mode", + action="store_const", + const=DiffMode.SINGLE_BASE, + help="""View the base asm only (not a diff).""", + ) + parser.add_argument( + "-1", + "--diff_mode=single", + dest="diff_mode", + action="store_const", + const=DiffMode.SINGLE, + help="""View the current asm only (not a diff).""", + ) parser.add_argument( "-3", "--threeway=prev", - dest="threeway", + dest="diff_mode", action="store_const", - const="prev", + const=DiffMode.THREEWAY_PREV, help="""Show a three-way diff between target asm, current asm, and asm prior to -w rebuild. Requires -w.""", ) parser.add_argument( "-b", "--threeway=base", - dest="threeway", + dest="diff_mode", action="store_const", - const="base", + const=DiffMode.THREEWAY_BASE, help="""Show a three-way diff between target asm, current asm, and asm when diff.py was started. Requires -w.""", ) @@ -336,11 +368,9 @@ if __name__ == "__main__": # (We do imports late to optimize auto-complete performance.) import abc -import ast from collections import Counter, defaultdict from dataclasses import asdict, dataclass, field, replace import difflib -import enum import html import itertools import json @@ -357,7 +387,7 @@ import traceback MISSING_PREREQUISITES = ( "Missing prerequisite python module {}. " - "Run `python3 -m pip install --user colorama watchdog python-Levenshtein cxxfilt` to install prerequisites (cxxfilt only needed with --source)." + "Run `python3 -m pip install --user colorama watchdog levenshtein cxxfilt` to install prerequisites (cxxfilt only needed with --source)." ) try: @@ -376,7 +406,8 @@ class ProjectSettings: objdump_flags: List[str] build_command: List[str] map_format: str - mw_build_dir: str + build_dir: str + ms_map_address_offset: int baseimg: Optional[str] myimg: Optional[str] mapfile: Optional[str] @@ -384,6 +415,8 @@ class ProjectSettings: source_extensions: List[str] show_line_numbers_default: bool disassemble_all: bool + reg_categories: Dict[str, int] + expected_dir: str @dataclass @@ -408,17 +441,19 @@ class Config: # Display options formatter: "Formatter" - threeway: Optional[str] + diff_mode: DiffMode base_shift: int skip_lines: int compress: Optional[Compress] + show_rodata_refs: bool show_branches: bool show_line_numbers: bool show_source: bool - stop_jrra: bool + stop_at_ret: bool ignore_large_imms: bool ignore_addr_diffs: bool algorithm: str + reg_categories: Dict[str, int] # Score options score_stack_differences = True @@ -444,10 +479,13 @@ def create_project_settings(settings: Dict[str, Any]) -> ProjectSettings: ), objdump_executable=get_objdump_executable(settings.get("objdump_executable")), objdump_flags=settings.get("objdump_flags", []), + expected_dir=settings.get("expected_dir", "expected/"), map_format=settings.get("map_format", "gnu"), - mw_build_dir=settings.get("mw_build_dir", "build/"), + ms_map_address_offset=settings.get("ms_map_address_offset", 0), + build_dir=settings.get("build_dir", settings.get("mw_build_dir", "build/")), show_line_numbers_default=settings.get("show_line_numbers_default", True), disassemble_all=settings.get("disassemble_all", False), + reg_categories=settings.get("reg_categories", {}), ) @@ -493,19 +531,21 @@ def create_config(args: argparse.Namespace, project: ProjectSettings) -> Config: max_function_size_bytes=args.max_lines * 4, # Display options formatter=formatter, - threeway=args.threeway, + diff_mode=args.diff_mode or DiffMode.NORMAL, base_shift=eval_int( args.base_shift, "Failed to parse --base-shift (-S) argument as an integer." ), skip_lines=args.skip_lines, compress=compress, + show_rodata_refs=args.show_rodata_refs, show_branches=args.show_branches, show_line_numbers=show_line_numbers, show_source=args.show_source or args.source_old_binutils, - stop_jrra=args.stop_jrra, + stop_at_ret=args.stop_at_ret, ignore_large_imms=args.ignore_large_imms, ignore_addr_diffs=args.ignore_addr_diffs, algorithm=args.algorithm, + reg_categories=project.reg_categories, ) @@ -563,6 +603,7 @@ class BasicFormat(enum.Enum): IMMEDIATE = enum.auto() STACK = enum.auto() REGISTER = enum.auto() + REGISTER_CATEGORY = enum.auto() DELAY_SLOT = enum.auto() DIFF_CHANGE = enum.auto() DIFF_ADD = enum.auto() @@ -662,11 +703,19 @@ class Text: @dataclass -class TableMetadata: +class TableLine: + key: Optional[str] + is_data_ref: bool + cells: Tuple[Tuple[Text, Optional["Line"]], ...] + + +@dataclass +class TableData: headers: Tuple[Text, ...] current_score: int max_score: int previous_score: Optional[int] + lines: List[TableLine] class Formatter(abc.ABC): @@ -676,7 +725,7 @@ class Formatter(abc.ABC): ... @abc.abstractmethod - def table(self, meta: TableMetadata, lines: List[Tuple["OutputLine", ...]]) -> str: + def table(self, data: TableData) -> str: """Format a multi-column table with metadata""" ... @@ -684,8 +733,8 @@ class Formatter(abc.ABC): return "".join(self.apply_format(chunk, f) for chunk, f in text.segments) @staticmethod - def outputline_texts(lines: Tuple["OutputLine", ...]) -> Tuple[Text, ...]: - return tuple([lines[0].base or Text()] + [line.fmt2 for line in lines[1:]]) + def outputline_texts(line: TableLine) -> Tuple[Text, ...]: + return tuple(cell[0] for cell in line.cells) @dataclass @@ -695,8 +744,8 @@ class PlainFormatter(Formatter): def apply_format(self, chunk: str, f: Format) -> str: return chunk - def table(self, meta: TableMetadata, lines: List[Tuple["OutputLine", ...]]) -> str: - rows = [meta.headers] + [self.outputline_texts(ls) for ls in lines] + def table(self, data: TableData) -> str: + rows = [data.headers] + [self.outputline_texts(line) for line in data.lines] return "\n".join( "".join(self.apply(x.ljust(self.column_width)) for x in row) for row in rows ) @@ -709,12 +758,14 @@ class AnsiFormatter(Formatter): STYLE_UNDERLINE = "\x1b[4m" STYLE_NO_UNDERLINE = "\x1b[24m" STYLE_INVERT = "\x1b[7m" + STYLE_RESET = "\x1b[0m" BASIC_ANSI_CODES = { BasicFormat.NONE: "", BasicFormat.IMMEDIATE: Fore.LIGHTBLUE_EX, BasicFormat.STACK: Fore.YELLOW, BasicFormat.REGISTER: Fore.YELLOW, + BasicFormat.REGISTER_CATEGORY: Fore.LIGHTYELLOW_EX, BasicFormat.DELAY_SLOT: Fore.LIGHTBLACK_EX, BasicFormat.DIFF_CHANGE: Fore.LIGHTBLUE_EX, BasicFormat.DIFF_ADD: Fore.GREEN, @@ -761,14 +812,19 @@ class AnsiFormatter(Formatter): static_assert_unreachable(f) return f"{ansi_code}{chunk}{undo_ansi_code}" - def table(self, meta: TableMetadata, lines: List[Tuple["OutputLine", ...]]) -> str: - rows = [(meta.headers, False)] + [ - (self.outputline_texts(line), line[1].is_data_ref) for line in lines + def table(self, data: TableData) -> str: + rows = [(data.headers, False)] + [ + ( + self.outputline_texts(line), + line.is_data_ref, + ) + for line in data.lines ] return "\n".join( "".join( (self.STYLE_INVERT if is_data_ref else "") + self.apply(x.ljust(self.column_width)) + + (self.STYLE_RESET if is_data_ref else "") for x in row ) for (row, is_data_ref) in rows @@ -794,7 +850,7 @@ class HtmlFormatter(Formatter): static_assert_unreachable(f) return f"{chunk}" - def table(self, meta: TableMetadata, lines: List[Tuple["OutputLine", ...]]) -> str: + def table(self, data: TableData) -> str: def table_row(line: Tuple[Text, ...], is_data_ref: bool, cell_el: str) -> str: tr_attrs = " class='data-ref'" if is_data_ref else "" output_row = f" " @@ -806,12 +862,12 @@ class HtmlFormatter(Formatter): output = "\n" output += " \n" - output += table_row(meta.headers, False, "th") + output += table_row(data.headers, False, "th") output += " \n" output += " \n" output += "".join( - table_row(self.outputline_texts(line), line[1].is_data_ref, "td") - for line in lines + table_row(self.outputline_texts(line), line.is_data_ref, "td") + for line in data.lines ) output += " \n" output += "
\n" @@ -826,7 +882,7 @@ class JsonFormatter(Formatter): # This method is unused by this formatter return NotImplemented - def table(self, meta: TableMetadata, rows: List[Tuple["OutputLine", ...]]) -> str: + def table(self, data: TableData) -> str: def serialize_format(s: str, f: Format) -> Dict[str, Any]: if f == BasicFormat.NONE: return {"text": s} @@ -844,29 +900,25 @@ class JsonFormatter(Formatter): return [] return [serialize_format(s, f) for s, f in text.segments] - is_threeway = len(meta.headers) == 3 - output: Dict[str, Any] = {} output["arch_str"] = self.arch_str output["header"] = { name: serialize(h) - for h, name in zip(meta.headers, ("base", "current", "previous")) + for h, name in zip(data.headers, ("base", "current", "previous")) } - output["current_score"] = meta.current_score - output["max_score"] = meta.max_score - if meta.previous_score is not None: - output["previous_score"] = meta.previous_score + output["current_score"] = data.current_score + output["max_score"] = data.max_score + if data.previous_score is not None: + output["previous_score"] = data.previous_score output_rows: List[Dict[str, Any]] = [] - for row in rows: + for row in data.lines: output_row: Dict[str, Any] = {} - output_row["key"] = row[0].key2 - output_row["is_data_ref"] = row[1].is_data_ref - iters = [ - ("base", row[0].base, row[0].line1), - ("current", row[1].fmt2, row[1].line2), + output_row["key"] = row.key + output_row["is_data_ref"] = row.is_data_ref + iters: List[Tuple[str, Text, Optional[Line]]] = [ + (label, *cell) + for label, cell in zip(("base", "current", "previous"), row.cells) ] - if is_threeway: - iters.append(("previous", row[2].fmt2, row[2].line2)) if all(line is None for _, _, line in iters): # Skip rows that were only for displaying source code continue @@ -935,10 +987,41 @@ def symbol_formatter(group: str, base_index: int) -> FormatFunction: ObjdumpCommand = Tuple[List[str], str, Optional[str]] +# eval_expr adapted from https://stackoverflow.com/a/9558001 + +import ast +import operator as op + +# supported operators +operators: Dict[Type[Union[ast.operator, ast.unaryop]], Any] = { + ast.Add: op.add, + ast.Sub: op.sub, + ast.Mult: op.mul, + ast.Div: op.truediv, + ast.Pow: op.pow, + ast.BitXor: op.xor, + ast.USub: op.neg, +} + + +def eval_expr(expr: str) -> Any: + return eval_(ast.parse(expr, mode="eval").body) + + +def eval_(node: ast.AST) -> Any: + if isinstance(node, ast.Num): # + return node.n + elif isinstance(node, ast.BinOp): # + return operators[type(node.op)](eval_(node.left), eval_(node.right)) + elif isinstance(node, ast.UnaryOp): # e.g., -1 + return operators[type(node.op)](eval_(node.operand)) + else: + raise TypeError(node) + def maybe_eval_int(expr: str) -> Optional[int]: try: - ret = ast.literal_eval(expr) + ret = eval_expr(expr) if not isinstance(ret, int): raise Exception("not an integer") return ret @@ -1056,7 +1139,7 @@ def preprocess_objdump_out( out = out[out.find("\n") + 1 :] out = out.rstrip("\n") - if obj_data: + if obj_data and config.show_rodata_refs: out = ( serialize_rodata_references(parse_elf_rodata_references(obj_data, config)) + out @@ -1065,8 +1148,27 @@ def preprocess_objdump_out( return out +def search_build_objects(objname: str, project: ProjectSettings) -> Optional[str]: + objfiles = [ + os.path.join(dirpath, f) + for dirpath, _, filenames in os.walk(project.build_dir) + for f in filenames + if f == objname + ] + if len(objfiles) > 1: + all_objects = "\n".join(objfiles) + fail( + f"Found multiple objects of the same name {objname} in {project.build_dir}, " + f"cannot determine which to diff against: \n{all_objects}" + ) + if len(objfiles) == 1: + return objfiles[0] + + return None + + def search_map_file( - fn_name: str, project: ProjectSettings, config: Config + fn_name: str, project: ProjectSettings, config: Config, *, for_binary: bool ) -> Tuple[Optional[str], Optional[int]]: if not project.mapfile: fail(f"No map file configured; cannot find function {fn_name}.") @@ -1078,6 +1180,12 @@ def search_map_file( fail(f"Failed to open map file {project.mapfile} for reading.") if project.map_format == "gnu": + if for_binary and "load address" not in contents: + fail( + 'Failed to find "load address" in map file. Maybe you need to add\n' + '"export LANG := C" to your Makefile to avoid localized output?' + ) + lines = contents.split("\n") try: @@ -1093,10 +1201,12 @@ def search_map_file( ram = int(tokens[1], 0) rom = int(tokens[5], 0) ram_to_rom = rom - ram - if line.endswith(" " + fn_name): + if line.endswith(" " + fn_name) or f" {fn_name} = 0x" in line: ram = int(line.split()[0], 0) - if cur_objfile is not None and ram_to_rom is not None: - cands.append((cur_objfile, ram + ram_to_rom)) + if (for_binary and ram_to_rom is not None) or ( + not for_binary and cur_objfile is not None + ): + cands.append((cur_objfile, ram + (ram_to_rom or 0))) last_line = line except Exception as e: traceback.print_exc() @@ -1108,16 +1218,14 @@ def search_map_file( return cands[0] elif project.map_format == "mw": find = re.findall( - re.compile( - # ram elf rom - r" \S+ \S+ (\S+) (\S+) . " - + re.escape(fn_name) - + r"(?: \(entry of " - + re.escape(config.diff_section) - + r"\))? \t" - # object name - + "(\S+)" - ), + # ram elf rom alignment + r" \S+ \S+ (\S+) (\S+) +\S+ " + + re.escape(fn_name) + + r"(?: \(entry of " + + re.escape(config.diff_section) + + r"\))? \t" + # object name + + "(\S+)", contents, ) if len(find) > 1: @@ -1125,27 +1233,56 @@ def search_map_file( if len(find) == 1: rom = int(find[0][1], 16) objname = find[0][2] - # The metrowerks linker map format does not contain the full object path, - # so we must complete it manually. - objfiles = [ - os.path.join(dirpath, f) - for dirpath, _, filenames in os.walk(project.mw_build_dir) - for f in filenames - if f == objname - ] - if len(objfiles) > 1: - all_objects = "\n".join(objfiles) - fail( - f"Found multiple objects of the same name {objname} in {project.mw_build_dir}, " - f"cannot determine which to diff against: \n{all_objects}" - ) - if len(objfiles) == 1: - objfile = objfiles[0] - # TODO Currently the ram-rom conversion only works for diffing ELF - # executables, but it would likely be more convenient to diff DOLs. - # At this time it is recommended to always use -o when running the diff - # script as this mode does not make use of the ram-rom conversion. + objfile = search_build_objects(objname, project) + + # TODO Currently the ram-rom conversion only works for diffing ELF + # executables, but it would likely be more convenient to diff DOLs. + # At this time it is recommended to always use -o when running the diff + # script as this mode does not make use of the ram-rom conversion. + if objfile is not None: return objfile, rom + elif project.map_format == "ms": + load_address_find = re.search( + r"Preferred load address is ([0-9a-f]+)", + contents, + ) + if not load_address_find: + fail(f"Couldn't find module load address in map file.") + load_address = int(load_address_find.group(1), 16) + + diff_segment_find = re.search( + r"([0-9a-f]+):[0-9a-f]+ [0-9a-f]+H " + re.escape(config.diff_section), + contents, + ) + if not diff_segment_find: + fail(f"Couldn't find segment for section in map file.") + diff_segment = diff_segment_find.group(1) + + find = re.findall( + r" (?:" + + re.escape(diff_segment) + + r")\S+\s+(?:" + + re.escape(fn_name) + + r")\s+\S+ ... \S+", + contents, + ) + if len(find) > 1: + fail(f"Found multiple occurrences of function {fn_name} in map file.") + if len(find) == 1: + names_find = re.search(r"(\S+) ... (\S+)", find[0]) + assert names_find is not None + fileofs = ( + int(names_find.group(1), 16) + - load_address + + project.ms_map_address_offset + ) + if for_binary: + return None, fileofs + + objname = names_find.group(2) + objfile = search_build_objects(objname, project) + if objfile is not None: + return objfile, fileofs else: fail(f"Linker map format {project.map_format} unrecognised.") return None, None @@ -1161,6 +1298,8 @@ def parse_elf_rodata_references( SHT_SYMTAB = 2 SHT_REL = 9 SHT_RELA = 4 + R_MIPS_32 = 2 + R_MIPS_GPREL32 = 12 is_32bit = e_ident[4] == 1 is_little_endian = e_ident[5] == 1 @@ -1234,7 +1373,7 @@ def parse_elf_rodata_references( # Skip section_name -> section_name references continue sec_name = sec_names[s.sh_info].decode("latin1") - if sec_name != ".rodata": + if sec_name not in (".rodata", ".late_rodata"): continue sec_base = sections[s.sh_info].sh_offset for i in range(0, s.sh_size, s.sh_entsize): @@ -1259,7 +1398,7 @@ def parse_elf_rodata_references( ) if st_shndx == text_section: if s.sh_type == SHT_REL: - if e_machine == 8 and r_type == 2: # R_MIPS_32 + if e_machine == 8 and r_type in (R_MIPS_32, R_MIPS_GPREL32): (r_addend,) = read("I", sec_base + r_offset) else: continue @@ -1325,7 +1464,7 @@ def dump_objfile( objfile = config.objfile if not objfile: - objfile, _ = search_map_file(start, project, config) + objfile, _ = search_map_file(start, project, config, for_binary=False) if not objfile: fail("Not able to find .o file for function.") @@ -1336,8 +1475,8 @@ def dump_objfile( if not os.path.isfile(objfile): fail(f"Not able to find .o file for function: {objfile} is not a file.") - refobjfile = "expected/" + objfile - if not os.path.isfile(refobjfile): + refobjfile = os.path.join(project.expected_dir, objfile) + if config.diff_mode != DiffMode.SINGLE and not os.path.isfile(refobjfile): fail(f'Please ensure an OK .o file exists at "{refobjfile}".') if project.disassemble_all: @@ -1362,7 +1501,7 @@ def dump_binary( run_make(project.myimg, project) start_addr = maybe_eval_int(start) if start_addr is None: - _, start_addr = search_map_file(start, project, config) + _, start_addr = search_map_file(start, project, config, for_binary=True) if start_addr is None: fail("Not able to find function in map file.") if end is not None: @@ -1391,8 +1530,13 @@ class AsmProcessor: def __init__(self, config: Config) -> None: self.config = config - def process_reloc(self, row: str, prev: str) -> str: - return prev + def pre_process( + self, mnemonic: str, args: str, next_row: Optional[str] + ) -> Tuple[str, str]: + return mnemonic, args + + def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]: + return prev, None def normalize(self, mnemonic: str, row: str) -> str: """This should be called exactly once for each line.""" @@ -1408,28 +1552,24 @@ class AsmProcessor: def post_process(self, lines: List["Line"]) -> None: return + def is_end_of_function(self, mnemonic: str, args: str) -> bool: + return False + class AsmProcessorMIPS(AsmProcessor): - def process_reloc(self, row: str, prev: str) -> str: + def __init__(self, config: Config) -> None: + super().__init__(config) + self.seen_jr_ra = False + + def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]: arch = self.config.arch if "R_MIPS_NONE" in row or "R_MIPS_JALR" in row: # GNU as emits no-op relocations immediately after real ones when # assembling with -mabi=64. Return without trying to parse 'imm' as an # integer. - return prev + return prev, None before, imm, after = parse_relocated_line(prev) - repl = row.split()[-1] - if imm != "0": - # MIPS uses relocations with addends embedded in the code as immediates. - # If there is an immediate, show it as part of the relocation. Ideally - # we'd show this addend in both %lo/%hi, but annoyingly objdump's output - # doesn't include enough information to pair up %lo's and %hi's... - # TODO: handle unambiguous cases where all addends for a symbol are the - # same, or show "+???". - mnemonic = prev.split()[0] - if mnemonic in arch.instructions_with_address_immediates: - imm = hex(int(imm, 16)) - repl += ("" if imm.startswith("-") else "+") + imm + repl = row.split()[-1] + reloc_addend_from_imm(imm, before, self.config.arch) if "R_MIPS_LO16" in row: repl = f"%lo({repl})" elif "R_MIPS_HI16" in row: @@ -1452,20 +1592,57 @@ class AsmProcessorMIPS(AsmProcessor): repl = f"%call16({repl})" else: assert False, f"unknown relocation type '{row}' for line '{prev}'" - return before + repl + after + return before + repl + after, repl + + def is_end_of_function(self, mnemonic: str, args: str) -> bool: + if self.seen_jr_ra: + return True + if mnemonic == "jr" and args == "ra": + self.seen_jr_ra = True + return False class AsmProcessorPPC(AsmProcessor): - def process_reloc(self, row: str, prev: str) -> str: + def pre_process( + self, mnemonic: str, args: str, next_row: Optional[str] + ) -> Tuple[str, str]: + + if next_row and "R_PPC_EMB_SDA21" in next_row: + # With sda21 relocs, the linker transforms `r0` into `r2`/`r13`, and + # we may encounter this in either pre-transformed or post-transformed + # versions depending on if the .o file comes from compiler output or + # from disassembly. Normalize, to make sure both forms are treated as + # equivalent. + + args = args.replace("(r2)", "(0)") + args = args.replace("(r13)", "(0)") + args = args.replace(",r2,", ",0,") + args = args.replace(",r13,", ",0,") + + # We want to convert li and lis with an sda21 reloc, + # because the r0 to r2/r13 transformation results in + # turning an li/lis into an addi/addis with r2/r13 arg + # our preprocessing normalizes all versions to addi with a 0 arg + if mnemonic in {"li", "lis"}: + mnemonic = mnemonic.replace("li", "addi") + args_parts = args.split(",") + args = args_parts[0] + ",0," + args_parts[1] + + return mnemonic, args + + def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]: arch = self.config.arch assert any( - r in row for r in ["R_PPC_REL24", "R_PPC_ADDR16", "R_PPC_EMB_SDA21"] + r in row + for r in ["R_PPC_REL24", "R_PPC_ADDR16", "R_PPC_EMB_SDA21", "R_PPC_REL14"] ), f"unknown relocation type '{row}' for line '{prev}'" before, imm, after = parse_relocated_line(prev) repl = row.split()[-1] if "R_PPC_REL24" in row: # function calls pass + if "R_PPC_REL14" in row: + pass elif "R_PPC_ADDR16_HI" in row: # absolute hi of addr repl = f"{repl}@h" @@ -1483,17 +1660,30 @@ class AsmProcessorPPC(AsmProcessor): if int(repl.split("+")[1], 16) > 0x70000000: repl = repl.split("+")[0] elif "R_PPC_EMB_SDA21" in row: - # small data area - pass - return before + repl + after + # sda21 relocations; r2/r13 --> 0 swaps are performed in pre_process + repl = f"{repl}@sda21" + + return before + repl + after, repl + + def is_end_of_function(self, mnemonic: str, args: str) -> bool: + return mnemonic == "blr" class AsmProcessorARM32(AsmProcessor): - def process_reloc(self, row: str, prev: str) -> str: + def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]: arch = self.config.arch + if "R_ARM_V4BX" in row: + # R_ARM_V4BX converts "bx " to "mov pc," for some targets. + # Ignore for now. + return prev, None + if "R_ARM_ABS32" in row and not prev.startswith(".word"): + # Don't crash on R_ARM_ABS32 relocations incorrectly applied to code. + # (We may want to do something more fancy here that actually shows the + # related symbol, but this serves as a stop-gap.) + return prev, None before, imm, after = parse_relocated_line(prev) - repl = row.split()[-1] - return before + repl + after + repl = row.split()[-1] + reloc_addend_from_imm(imm, before, self.config.arch) + return before + repl + after, repl def _normalize_arch_specific(self, mnemonic: str, row: str) -> str: if self.config.ignore_addr_diffs: @@ -1577,6 +1767,51 @@ class AsmProcessorAArch64(AsmProcessor): return row +class AsmProcessorI686(AsmProcessor): + def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]: + repl = row.split()[-1] + mnemonic, args = prev.split(maxsplit=1) + + addr_imm = re.search(r"(? bool: + return mnemonic == "ret" + + @dataclass class ArchSettings: name: str @@ -1701,6 +1936,76 @@ PPC_BRANCH_INSTRUCTIONS = { "bgt-", } +I686_BRANCH_INSTRUCTIONS = { + "call", + "jmp", + "ljmp", + "ja", + "jae", + "jb", + "jbe", + "jc", + "jcxz", + "jecxz", + "jrcxz", + "je", + "jg", + "jge", + "jl", + "jle", + "jna", + "jnae", + "jnb", + "jnbe", + "jnc", + "jne", + "jng", + "jnge", + "jnl", + "jnle", + "jno", + "jnp", + "jns", + "jnz", + "jo", + "jp", + "jpe", + "jpo", + "js", + "jz", + "ja", + "jae", + "jb", + "jbe", + "jc", + "je", + "jz", + "jg", + "jge", + "jl", + "jle", + "jna", + "jnae", + "jnb", + "jnbe", + "jnc", + "jne", + "jng", + "jnge", + "jnl", + "jnle", + "jno", + "jnp", + "jns", + "jnz", + "jo", + "jp", + "jpe", + "jpo", + "js", + "jz", +} + MIPS_SETTINGS = ArchSettings( name="mips", re_int=re.compile(r"[0-9]+"), @@ -1713,18 +2018,26 @@ MIPS_SETTINGS = ArchSettings( re_reg=re.compile(r"\$?\b([astv][0-9]|at|f[astv]?[0-9]+f?|kt?[01]|fp|ra|zero)\b"), re_sprel=re.compile(r"(?<=,)([0-9]+|0x[0-9a-f]+)\(sp\)"), re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"), - re_imm=re.compile(r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(sp)|%(lo|hi)\([^)]*\)"), + re_imm=re.compile( + r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(sp)|%(lo|hi|got|gp_rel|call16)\([^)]*\)" + ), re_reloc=re.compile(r"R_MIPS_"), arch_flags=["-m", "mips:4300"], branch_likely_instructions=MIPS_BRANCH_LIKELY_INSTRUCTIONS, branch_instructions=MIPS_BRANCH_INSTRUCTIONS, - instructions_with_address_immediates=MIPS_BRANCH_INSTRUCTIONS.union({"jal", "j"}), + instructions_with_address_immediates=MIPS_BRANCH_INSTRUCTIONS.union({"j", "jal"}), delay_slot_instructions=MIPS_BRANCH_INSTRUCTIONS.union({"j", "jal", "jr", "jalr"}), proc=AsmProcessorMIPS, ) MIPSEL_SETTINGS = replace(MIPS_SETTINGS, name="mipsel", big_endian=False) +MIPSEE_SETTINGS = replace( + MIPSEL_SETTINGS, name="mipsee", arch_flags=["-m", "mips:5900"] +) + +MIPS_ARCH_NAMES = {"mips", "mipsel", "mipsee"} + ARM32_SETTINGS = ArchSettings( name="arm32", re_int=re.compile(r"[0-9]+"), @@ -1773,23 +2086,56 @@ PPC_SETTINGS = ArchSettings( name="ppc", re_int=re.compile(r"[0-9]+"), re_comment=re.compile(r"(<.*>|//.*$)"), - re_reg=re.compile(r"\$?\b([rf][0-9]+)\b"), + # r1 not included + re_reg=re.compile(r"\$?\b([rf](?:[02-9]|[1-9][0-9]+)|f1)\b"), re_sprel=re.compile(r"(?<=,)(-?[0-9]+|-?0x[0-9a-f]+)\(r1\)"), re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"), - re_imm=re.compile(r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(r1)|[^@]*@(ha|h|lo)"), + re_imm=re.compile( + r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(r1)|[^ \t,]+@(l|ha|h|sda21)" + ), re_reloc=re.compile(r"R_PPC_"), + arch_flags=["-m", "powerpc", "-M", "broadway"], branch_instructions=PPC_BRANCH_INSTRUCTIONS, instructions_with_address_immediates=PPC_BRANCH_INSTRUCTIONS.union({"bl"}), proc=AsmProcessorPPC, ) +I686_SETTINGS = ArchSettings( + name="i686", + re_int=re.compile(r"[0-9]+"), + re_comment=re.compile(r"<.*>"), + # Includes: + # - (e)a-d(x,l,h) + # - (e)s,d,b(i,p)(l) + # - cr0-7 + # - x87 st + # - MMX, SSE vector registers + # - cursed registers: eal ebl ebh edl edh... + re_reg=re.compile( + r"\%?\b(e?(([sd]i|[sb]p)l?|[abcd][xhl])|[cdesfg]s|cr[0-7]|x?mm[0-7]|st)\b" + ), + re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"), + re_sprel=re.compile(r"-?(0x[0-9a-f]+|[0-9]+)(?=\((%ebp|%esi)\))"), + re_imm=re.compile(r"-?(0x[0-9a-f]+|[0-9]+)"), + re_reloc=re.compile(r"R_386_"), + # The x86 architecture has a variable instruction length. The raw bytes of + # an instruction as displayed by objdump can line wrap if it's long enough. + # This destroys the objdump output processor logic, so we avoid this. + arch_flags=["-m", "i386", "--no-show-raw-insn"], + branch_instructions=I686_BRANCH_INSTRUCTIONS, + instructions_with_address_immediates=I686_BRANCH_INSTRUCTIONS.union({"mov"}), + proc=AsmProcessorI686, +) + ARCH_SETTINGS = [ MIPS_SETTINGS, MIPSEL_SETTINGS, + MIPSEE_SETTINGS, ARM32_SETTINGS, ARMEL_SETTINGS, AARCH64_SETTINGS, PPC_SETTINGS, + I686_SETTINGS, ] @@ -1822,11 +2168,31 @@ def parse_relocated_line(line: str) -> Tuple[str, str, str]: imm, after = after, "" else: imm, after = after[:ind2], after[ind2:] - if imm == "0x0": - imm = "0" return before, imm, after +def reloc_addend_from_imm(imm: str, before: str, arch: ArchSettings) -> str: + """For architectures like MIPS where relocations have addends embedded in + the code as immediates, convert such an immediate into an addition/ + subtraction that can occur just after the symbol.""" + # TODO this is incorrect for MIPS %lo/%hi which need to be paired up + # and combined. In practice, this means we only get symbol offsets within + # %lo, while %hi just shows the symbol. Unfortunately, objdump's output + # loses relocation order, so we cannot do this without parsing ELF relocs + # ourselves... + mnemonic = before.split()[0] + if mnemonic in arch.instructions_with_address_immediates: + addend = int(imm, 16) + else: + addend = int(imm, 0) + if addend == 0: + return "" + elif addend < 0: + return hex(addend) + else: + return "+" + hex(addend) + + def pad_mnemonic(line: str) -> str: if "\t" not in line: return line @@ -1841,6 +2207,7 @@ class Line: original: str normalized_original: str scorable_line: str + symbol: Optional[str] = None line_num: Optional[int] = None branch_target: Optional[int] = None data_pool_addr: Optional[int] = None @@ -1896,10 +2263,8 @@ def process(dump: str, config: Config) -> List[Line]: if not re.match(r"^\s+[0-9a-f]+:\s+", row): # This regex is conservative, and assumes the file path does not contain "weird" - # characters like colons, tabs, or angle brackets. - if re.match( - r"^[^ \t<>:][^\t<>:]*:[0-9]+( \(discriminator [0-9]+\))?$", row - ): + # characters like tabs or angle brackets. + if re.match(r"^[^ \t<>][^\t<>]*:[0-9]+( \(discriminator [0-9]+\))?$", row): source_filename, _, tail = row.rpartition(":") source_line_num = int(tail.partition(" ")[0]) source_lines.append(row) @@ -1919,9 +2284,14 @@ def process(dump: str, config: Config) -> List[Line]: line_num_str = row.split(":")[0] row = row.rstrip() tabs = row.split("\t") - row = "\t".join(tabs[2:]) line_num = eval_line_num(line_num_str.strip()) + # TODO: use --no-show-raw-insn for all arches + if arch.name == "i686": + row = "\t".join(tabs[1:]) + else: + row = "\t".join(tabs[2:]) + if line_num in data_refs: refs = data_refs[line_num] ref_str = "; ".join( @@ -1943,7 +2313,13 @@ def process(dump: str, config: Config) -> List[Line]: else: # powerpc-eabi-objdump doesn't use tabs row_parts = [part.lstrip() for part in row.split(" ", 1)] + mnemonic = row_parts[0].strip() + args = row_parts[1].strip() if len(row_parts) >= 2 else "" + + next_line = lines[i] if i < len(lines) else None + mnemonic, args = processor.pre_process(mnemonic, args, next_line) + row = mnemonic + "\t" + args.replace("\t", " ") addr = "" if mnemonic in arch.instructions_with_address_immediates: @@ -1960,14 +2336,28 @@ def process(dump: str, config: Config) -> List[Line]: # immediates. original = row + symbol = None while i < len(lines): reloc_row = lines[i] if re.search(arch.re_reloc, reloc_row): - original = processor.process_reloc(reloc_row, original) + original, reloc_symbol = processor.process_reloc(reloc_row, original) + if reloc_symbol is not None: + symbol = reloc_symbol else: break i += 1 + is_text_relative_j = False + if ( + arch.name in MIPS_ARCH_NAMES + and mnemonic == "j" + and symbol is not None + and symbol.startswith(".text") + ): + symbol = None + original = row + is_text_relative_j = True + normalized_original = processor.normalize(mnemonic, original) scorable_line = normalized_original @@ -1993,8 +2383,16 @@ def process(dump: str, config: Config) -> List[Line]: row = normalize_imms(row, arch) branch_target = None - if mnemonic in arch.branch_instructions: - branch_target = int(row_parts[1].strip().split(",")[-1], 16) + if ( + mnemonic in arch.branch_instructions or is_text_relative_j + ) and symbol is None: + x86_longjmp = re.search(r"\*(.*)\(", args) + if x86_longjmp: + capture = x86_longjmp.group(1) + if capture != "": + branch_target = int(capture, 16) + else: + branch_target = int(args.split(",")[-1], 16) output.append( Line( @@ -2003,6 +2401,7 @@ def process(dump: str, config: Config) -> List[Line]: original=original, normalized_original=normalized_original, scorable_line=scorable_line, + symbol=symbol, line_num=line_num, branch_target=branch_target, data_pool_addr=data_pool_addr, @@ -2015,9 +2414,7 @@ def process(dump: str, config: Config) -> List[Line]: num_instr += 1 source_lines = [] - if config.stop_jrra and mnemonic == "jr" and row_parts[1].strip() == "ra": - stop_after_delay_slot = True - elif stop_after_delay_slot: + if config.stop_at_ret and processor.is_end_of_function(mnemonic, args): break processor.post_process(output) @@ -2032,9 +2429,44 @@ def normalize_stack(row: str, arch: ArchSettings) -> str: return re.sub(arch.re_sprel, "addr(sp)", row) -def imm_matches_everything(row: str, arch: ArchSettings) -> bool: - # (this should probably be arch-specific) - return "(." in row +def check_for_symbol_mismatch( + old_line: Line, new_line: Line, symbol_map: Dict[str, str] +) -> bool: + + assert old_line.symbol is not None + assert new_line.symbol is not None + + if new_line.symbol.startswith("%hi"): + return False + + if old_line.symbol not in symbol_map: + symbol_map[old_line.symbol] = new_line.symbol + return False + elif symbol_map[old_line.symbol] == new_line.symbol: + return False + + return True + + +def field_matches_any_symbol(field: str, arch: ArchSettings) -> bool: + if arch.name == "ppc": + if "..." in field: + return True + + parts = field.rsplit("@", 1) + if len(parts) == 2 and parts[1] in {"l", "h", "ha", "sda21"}: + field = parts[0] + + return re.fullmatch((r"^@\d+$"), field) is not None + + if arch.name in MIPS_ARCH_NAMES: + return "." in field + + # Example: ".text+0x34" + if arch.name == "arm32": + return "." in field + + return False def split_off_address(line: str) -> Tuple[str, str]: @@ -2058,15 +2490,10 @@ def diff_sequences_difflib( def diff_sequences( seq1: List[str], seq2: List[str], algorithm: str ) -> List[Tuple[str, int, int, int, int]]: - if ( - algorithm != "levenshtein" - or len(seq1) * len(seq2) > 4 * 10**8 - or len(seq1) + len(seq2) >= 0x110000 - ): + if algorithm != "levenshtein": return diff_sequences_difflib(seq1, seq2) # The Levenshtein library assumes that we compare strings, not lists. Convert. - # (Per the check above we know we have fewer than 0x110000 unique elements, so chr() works.) remapping: Dict[str, str] = {} def remap(seq: List[str]) -> str: @@ -2079,8 +2506,16 @@ def diff_sequences( seq[i] = val return "".join(seq) - rem1 = remap(seq1) - rem2 = remap(seq2) + try: + rem1 = remap(seq1) + rem2 = remap(seq2) + except ValueError as e: + if len(seq1) + len(seq2) < 0x110000: + raise + # If there are too many unique elements, chr() doesn't work. + # Assume this is the case and fall back to difflib. + return diff_sequences_difflib(seq1, seq2) + import Levenshtein ret: List[Tuple[str, int, int, int, int]] = Levenshtein.opcodes(rem1, rem2) @@ -2113,69 +2548,80 @@ def diff_lines( return ret +def diff_sameline( + old_line: Line, new_line: Line, config: Config, symbol_map: Dict[str, str] +) -> Tuple[int, int, bool]: + + old = old_line.scorable_line + new = new_line.scorable_line + if old == new: + return (0, 0, False) + + num_stack_penalties = 0 + num_regalloc_penalties = 0 + has_symbol_mismatch = False + + ignore_last_field = False + if config.score_stack_differences: + oldsp = re.search(config.arch.re_sprel, old) + newsp = re.search(config.arch.re_sprel, new) + if oldsp and newsp: + oldrel = int(oldsp.group(1) or "0", 0) + newrel = int(newsp.group(1) or "0", 0) + num_stack_penalties += abs(oldrel - newrel) + ignore_last_field = True + + # Probably regalloc difference, or signed vs unsigned + + # Compare each field in order + new_parts, old_parts = new.split(None, 1), old.split(None, 1) + newfields, oldfields = new_parts[1].split(","), old_parts[1].split(",") + if ignore_last_field: + newfields = newfields[:-1] + oldfields = oldfields[:-1] + else: + # If the last field has a parenthesis suffix, e.g. "0x38(r7)" + # we split that part out to make it a separate field + # however, we don't split if it has a proceeding % macro, e.g. "%lo(.data)" + re_paren = re.compile(r"(? int: # This logic is copied from `scorer.py` from the decomp permuter project # https://github.com/simonlindholm/decomp-permuter/blob/main/src/scorer.py - score = 0 + num_stack_penalties = 0 + num_regalloc_penalties = 0 + num_reordering_penalties = 0 + num_insertion_penalties = 0 + num_deletion_penalties = 0 deletions = [] insertions = [] - def lo_hi_match(old: str, new: str) -> bool: - # TODO: Make this arch-independent, like `imm_matches_everything()` - old_lo = old.find("%lo") - old_hi = old.find("%hi") - new_lo = new.find("%lo") - new_hi = new.find("%hi") - - if old_lo != -1 and new_lo != -1: - old_idx = old_lo - new_idx = new_lo - elif old_hi != -1 and new_hi != -1: - old_idx = old_hi - new_idx = new_hi - else: - return False - - if old[:old_idx] != new[:new_idx]: - return False - - old_inner = old[old_idx + 4 : -1] - new_inner = new[new_idx + 4 : -1] - return old_inner.startswith(".") or new_inner.startswith(".") - - def diff_sameline(old: str, new: str) -> None: - nonlocal score - if old == new: - return - - if lo_hi_match(old, new): - return - - ignore_last_field = False - if config.score_stack_differences: - oldsp = re.search(config.arch.re_sprel, old) - newsp = re.search(config.arch.re_sprel, new) - if oldsp and newsp: - oldrel = int(oldsp.group(1) or "0", 0) - newrel = int(newsp.group(1) or "0", 0) - score += abs(oldrel - newrel) * config.penalty_stackdiff - ignore_last_field = True - - # Probably regalloc difference, or signed vs unsigned - - # Compare each field in order - newfields, oldfields = new.split(","), old.split(",") - if ignore_last_field: - newfields = newfields[:-1] - oldfields = oldfields[:-1] - for nf, of in zip(newfields, oldfields): - if nf != of: - score += config.penalty_regalloc - # Penalize any extra fields - score += abs(len(newfields) - len(oldfields)) * config.penalty_regalloc - def diff_insert(line: str) -> None: # Reordering or totally different codegen. # Defer this until later when we can tell. @@ -2205,7 +2651,9 @@ def score_diff_lines( if max_index is not None and index > max_index: break if line1 and line2 and line1.mnemonic == line2.mnemonic: - diff_sameline(line1.scorable_line, line2.scorable_line) + sp, rp, _ = diff_sameline(line1, line2, config, symbol_map) + num_stack_penalties += sp + num_regalloc_penalties += rp else: if line1: diff_delete(line1.scorable_line) @@ -2218,13 +2666,17 @@ def score_diff_lines( ins = insertions_co[item] dels = deletions_co[item] common = min(ins, dels) - score += ( - (ins - common) * config.penalty_insertion - + (dels - common) * config.penalty_deletion - + config.penalty_reordering * common - ) + num_insertion_penalties += ins - common + num_deletion_penalties += dels - common + num_reordering_penalties += common - return score + return ( + num_stack_penalties * config.penalty_stackdiff + + num_regalloc_penalties * config.penalty_regalloc + + num_reordering_penalties * config.penalty_reordering + + num_insertion_penalties * config.penalty_insertion + + num_deletion_penalties * config.penalty_deletion + ) @dataclass(frozen=True) @@ -2262,6 +2714,7 @@ def do_diff(lines1: List[Line], lines2: List[Line], config: Config) -> Diff: arch = config.arch fmt = config.formatter output: List[OutputLine] = [] + symbol_map: Dict[str, str] = {} sc1 = symbol_formatter("base-reg", 0) sc2 = symbol_formatter("my-reg", 0) @@ -2287,7 +2740,6 @@ def do_diff(lines1: List[Line], lines2: List[Line], config: Config) -> Diff: lines2 = trim_nops(lines2, arch) diffed_lines = diff_lines(lines1, lines2, config.algorithm) - max_score = len(lines1) * config.penalty_deletion line_num_base = -1 line_num_offset = 0 @@ -2372,7 +2824,17 @@ def do_diff(lines1: List[Line], lines2: List[Line], config: Config) -> Diff: if normalize_imms(branchless1, arch) == normalize_imms( branchless2, arch ): - if imm_matches_everything(branchless2, arch): + ( + stack_penalties, + regalloc_penalties, + has_symbol_mismatch, + ) = diff_sameline(line1, line2, config, symbol_map) + + if ( + regalloc_penalties == 0 + and stack_penalties == 0 + and not has_symbol_mismatch + ): # ignore differences due to %lo(.rodata + ...) vs symbol out1 = out1.reformat(BasicFormat.NONE) out2 = out2.reformat(BasicFormat.NONE) @@ -2397,8 +2859,19 @@ def do_diff(lines1: List[Line], lines2: List[Line], config: Config) -> Diff: else: # reg differences and maybe imm as well out1, out2 = format_fields(arch.re_reg, out1, out2, sc1, sc2) - line_color1 = line_color2 = sym_color = BasicFormat.REGISTER - line_prefix = "r" + cats = config.reg_categories + if cats and any( + cats.get(of.group()) != cats.get(nf.group()) + for (of, nf) in zip( + out1.finditer(arch.re_reg), out2.finditer(arch.re_reg) + ) + ): + sym_color = BasicFormat.REGISTER_CATEGORY + line_prefix = "R" + else: + sym_color = BasicFormat.REGISTER + line_prefix = "r" + line_color1 = line_color2 = sym_color if same_target: address_imm_fmt = BasicFormat.NONE @@ -2523,8 +2996,10 @@ def do_diff(lines1: List[Line], lines2: List[Line], config: Config) -> Diff: ) ) - score = score_diff_lines(diffed_lines, config) output = output[config.skip_lines :] + + score = score_diff_lines(diffed_lines, config, symbol_map) + max_score = len(lines1) * config.penalty_deletion return Diff(lines=output, score=score, max_score=max_score) @@ -2586,24 +3061,12 @@ def compress_matching( return ret -def align_diffs( - old_diff: Diff, new_diff: Diff, config: Config -) -> Tuple[TableMetadata, List[Tuple[OutputLine, ...]]]: - meta: TableMetadata +def align_diffs(old_diff: Diff, new_diff: Diff, config: Config) -> TableData: + headers: Tuple[Text, ...] diff_lines: List[Tuple[OutputLine, ...]] padding = " " * 7 if config.show_line_numbers else " " * 2 - if config.threeway: - meta = TableMetadata( - headers=( - Text("TARGET"), - Text(f"{padding}CURRENT ({new_diff.score})"), - Text(f"{padding}PREVIOUS ({old_diff.score})"), - ), - current_score=new_diff.score, - max_score=new_diff.max_score, - previous_score=old_diff.score, - ) + if config.diff_mode in (DiffMode.THREEWAY_PREV, DiffMode.THREEWAY_BASE): old_chunks = chunk_diff_lines(old_diff.lines) new_chunks = chunk_diff_lines(new_diff.lines) diff_lines = [] @@ -2638,20 +3101,54 @@ def align_diffs( diff_lines = [ (base, new, old if old != new else empty) for base, new, old in diff_lines ] - else: - meta = TableMetadata( - headers=( - Text("TARGET"), - Text(f"{padding}CURRENT ({new_diff.score})"), - ), - current_score=new_diff.score, - max_score=new_diff.max_score, - previous_score=None, + headers = ( + Text("TARGET"), + Text(f"{padding}CURRENT ({new_diff.score})"), + Text(f"{padding}PREVIOUS ({old_diff.score})"), ) + current_score = new_diff.score + max_score = new_diff.max_score + previous_score = old_diff.score + elif config.diff_mode in (DiffMode.SINGLE, DiffMode.SINGLE_BASE): + header = Text("BASE" if config.diff_mode == DiffMode.SINGLE_BASE else "CURRENT") + diff_lines = [(line,) for line in new_diff.lines] + headers = (header,) + # Scoring is disabled for view mode + current_score = 0 + max_score = 0 + previous_score = None + else: diff_lines = [(line, line) for line in new_diff.lines] + headers = ( + Text("TARGET"), + Text(f"{padding}CURRENT ({new_diff.score})"), + ) + current_score = new_diff.score + max_score = new_diff.max_score + previous_score = None if config.compress: diff_lines = compress_matching(diff_lines, config.compress.context) - return meta, diff_lines + + def diff_line_to_table_line(line: Tuple[OutputLine, ...]) -> TableLine: + cells = [ + (line[0].base or Text(), line[0].line1) + ] + for ol in line[1:]: + cells.append((ol.fmt2, ol.line2)) + + return TableLine( + key=line[0].key2, + is_data_ref=line[0].is_data_ref, + cells=tuple(cells), + ) + + return TableData( + headers=headers, + current_score=current_score, + max_score=max_score, + previous_score=previous_score, + lines=[diff_line_to_table_line(line) for line in diff_lines], + ) def debounced_fs_watch( @@ -2754,17 +3251,26 @@ class Display: return (self.emsg, self.emsg) my_lines = process(self.mydump, self.config) - diff_output = do_diff(self.base_lines, my_lines, self.config) + + if self.config.diff_mode == DiffMode.SINGLE_BASE: + diff_output = do_diff(self.base_lines, self.base_lines, self.config) + elif self.config.diff_mode == DiffMode.SINGLE: + diff_output = do_diff(my_lines, my_lines, self.config) + else: + diff_output = do_diff(self.base_lines, my_lines, self.config) + last_diff_output = self.last_diff_output or diff_output - if self.config.threeway != "base" or not self.last_diff_output: + if self.config.diff_mode != DiffMode.THREEWAY_BASE or not self.last_diff_output: self.last_diff_output = diff_output - meta, diff_lines = align_diffs(last_diff_output, diff_output, self.config) - output = self.config.formatter.table(meta, diff_lines) + data = align_diffs(last_diff_output, diff_output, self.config) + output = self.config.formatter.table(data) + refresh_key = ( [line.key2 for line in diff_output.lines], diff_output.score, ) + return (output, refresh_key) def run_less( @@ -2882,7 +3388,10 @@ def main() -> None: except ModuleNotFoundError as e: fail(MISSING_PREREQUISITES.format(e.name)) - if config.threeway and not args.watch: + if ( + config.diff_mode in (DiffMode.THREEWAY_BASE, DiffMode.THREEWAY_PREV) + and not args.watch + ): fail("Threeway diffing requires -w.") if args.diff_elf_symbol: @@ -2910,8 +3419,10 @@ def main() -> None: if args.base_asm is not None: with open(args.base_asm) as f: basedump = f.read() - else: + elif config.diff_mode != DiffMode.SINGLE: basedump = run_objdump(basecmd, config, project) + else: + basedump = "" mydump = run_objdump(mycmd, config, project) diff --git a/tools/asm-differ/diff_settings.py b/tools/asm-differ/diff_settings.py index 183b96b75d..19d67d5487 100644 --- a/tools/asm-differ/diff_settings.py +++ b/tools/asm-differ/diff_settings.py @@ -5,7 +5,8 @@ def apply(config, args): config["source_directories"] = ["."] # config["show_line_numbers_default"] = True # config["arch"] = "mips" - # config["map_format"] = "gnu" # gnu or mw - # config["mw_build_dir"] = "build/" # only needed for mw map format + # config["map_format"] = "gnu" # gnu, mw, ms + # config["build_dir"] = "build/" # only needed for mw and ms map format + # config["expected_dir"] = "expected/" # needed for -o # config["makeflags"] = [] # config["objdump_executable"] = "" diff --git a/tools/asm-differ/pyproject.toml b/tools/asm-differ/pyproject.toml new file mode 100644 index 0000000000..7a112aee55 --- /dev/null +++ b/tools/asm-differ/pyproject.toml @@ -0,0 +1,21 @@ +[tool.poetry] +name = "asm-differ" +version = "0.1.0" +description = "" +authors = ["Simon Lindholm "] +license = "UNLICENSE" +readme = "README.md" +packages = [{ include = "diff.py" }] + +[tool.poetry.dependencies] +python = "^3.7" +colorama = "^0.4.6" +ansiwrap = "^0.8.4" +watchdog = "^2.2.0" +levenshtein = "^0.20.9" +cxxfilt = "^0.3.0" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/tools/fado/.gitrepo b/tools/fado/.gitrepo index cedec49302..042b904f19 100644 --- a/tools/fado/.gitrepo +++ b/tools/fado/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = git@github.com:EllipticEllipsis/fado.git branch = master - commit = f7efb10a9a65f27e9ccad7ce270234f20d386ac9 - parent = 90cfafec47fe4b73ad9009e9501e147e86025aa6 + commit = 8d896ee97d565508755584803c409fc33bb0c953 + parent = b51c9f4d22d6e7db63700c163418654431a2a61a method = merge cmdver = 0.4.3 diff --git a/tools/fado/lib/vc_vector/vc_vector.c b/tools/fado/lib/vc_vector/vc_vector.c index 3f677c242a..426f1b0cbb 100644 --- a/tools/fado/lib/vc_vector/vc_vector.c +++ b/tools/fado/lib/vc_vector/vc_vector.c @@ -112,15 +112,15 @@ bool vc_vector_is_equals(vc_vector* vector1, vc_vector* vector2) { return memcmp(vector1->data, vector2->data, size_vector1) == 0; } -float vc_vector_get_growth_factor() { +float vc_vector_get_growth_factor(void) { return GROWTH_FACTOR; } -size_t vc_vector_get_default_count_of_elements() { +size_t vc_vector_get_default_count_of_elements(void) { return DEFAULT_COUNT_OF_ELEMENTS; } -size_t vc_vector_struct_size() { +size_t vc_vector_struct_size(void) { return sizeof(vc_vector); } diff --git a/tools/fado/lib/vc_vector/vc_vector.h b/tools/fado/lib/vc_vector/vc_vector.h index 2b1422c1a5..e57f832543 100644 --- a/tools/fado/lib/vc_vector/vc_vector.h +++ b/tools/fado/lib/vc_vector/vc_vector.h @@ -24,13 +24,13 @@ void vc_vector_release(vc_vector* vector); bool vc_vector_is_equals(vc_vector* vector1, vc_vector* vector2); // Returns constant value of the vector growth factor. -float vc_vector_get_growth_factor(); +float vc_vector_get_growth_factor(void); // Returns constant value of the vector default count of elements. -size_t vc_vector_get_default_count_of_elements(); +size_t vc_vector_get_default_count_of_elements(void); // Returns constant value of the vector struct size. -size_t vc_vector_struct_size(); +size_t vc_vector_struct_size(void); // ---------------------------------------------------------------------------- // Element access diff --git a/tools/fado/src/main.c b/tools/fado/src/main.c index c6af79025a..e6b7926d65 100644 --- a/tools/fado/src/main.c +++ b/tools/fado/src/main.c @@ -15,7 +15,7 @@ #include "version.inc" -void PrintVersion() { +void PrintVersion(void) { printf("Fado (Fairy-Assisted relocations for Decompiled Overlays), version %s\n", versionNumber); printf("Copyright (C) 2021 Elliptic Ellipsis\n"); printf("%s\n", credits); @@ -88,7 +88,7 @@ static size_t posArgCount = ARRAY_COUNT(posArgInfo); static size_t optCount = ARRAY_COUNT(optInfo); static struct option longOptions[ARRAY_COUNT(optInfo)]; -void ConstructLongOpts() { +void ConstructLongOpts(void) { size_t i; for (i = 0; i < optCount; i++) { diff --git a/tools/graphovl/.gitrepo b/tools/graphovl/.gitrepo index 064b8ee581..ca544b8858 100644 --- a/tools/graphovl/.gitrepo +++ b/tools/graphovl/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/AngheloAlf/graphovl.git branch = master - commit = f5fe93d75bb75ea4bea65f62c43f41f6a1e70679 - parent = 6c5a50ef95f351acc7c4c0455a347a94443adbe1 + commit = dab4addae0c5db6274ab5daf7780c62c346120a1 + parent = 5da1ae553569ddb0016d302cfac8c45d9cb22e73 method = merge cmdver = 0.4.3 diff --git a/tools/graphovl/graphovl.py b/tools/graphovl/graphovl.py index 28f0a361c0..7cb7af2c51 100755 --- a/tools/graphovl/graphovl.py +++ b/tools/graphovl/graphovl.py @@ -18,7 +18,7 @@ except ModuleNotFoundError: script_dir = os.path.dirname(os.path.realpath(__file__)) config = ConfigParser() -func_names = None +func_names = list() func_definitions = list() line_numbers_of_functions = list() @@ -72,6 +72,19 @@ def capture_setupaction_call_arg(content): transitionList.append(func) return transitionList +setaction_regexpr = re.compile(r"_SetAction+\([^\)]*\)(\.[^\)]*\))?;") + +def capture_setaction_calls(content): + return [x.group() for x in re.finditer(setaction_regexpr, content)] + +def capture_setaction_call_arg(content): + transitionList = [] + for x in re.finditer(setaction_regexpr, content): + func = x.group().split(",")[2].strip().split(");")[0].strip() + if func not in transitionList: + transitionList.append(func) + return transitionList + # Search for the function definition by supplied function name def definition_by_name(content, name): for definition in capture_definitions(content): @@ -206,7 +219,11 @@ def addFunctionTransitionToGraph(dot, index: int, func_name: str, action_transit fontColor = config.get("colors", "fontcolor") bubbleColor = config.get("colors", "bubbleColor") indexStr = str(index) - funcIndex = str(index_of_func(action_transition)) + try: + funcIndex = str(index_of_func(action_transition)) + except ValueError: + print(f"Warning: function '{action_transition}' called by '{func_name}' was not found. Skiping...", file=sys.stderr) + return dot.node(indexStr, func_name, fontcolor=fontColor, color=bubbleColor) dot.node(funcIndex, action_transition, fontcolor=fontColor, color=bubbleColor) @@ -230,7 +247,7 @@ def addCallNamesToGraph(dot, func_names: list, index: int, code_body: str, remov if call in removeList: continue - if setupAction and "_SetupAction" in call: + if setupAction and ("_SetupAction" in call or "_SetAction" in call): continue seen.add(call) @@ -342,10 +359,11 @@ def main(): actionIdentifier = "this->actionFunc" setupAction = func_prefix + "_SetupAction" in func_names + setAction = func_prefix + "_SetAction" in func_names arrayActorFunc = match_obj is not None rawActorFunc = actionIdentifier in contents - if not setupAction and not arrayActorFunc and not rawActorFunc: + if not setupAction and not setAction and not arrayActorFunc and not rawActorFunc: print("No actor action-based structure found") os._exit(1) @@ -383,6 +401,8 @@ def main(): Create all edges for SetupAction-based actors """ transitionList = capture_setupaction_call_arg(code_body) + elif setAction: + transitionList = capture_setaction_call_arg(code_body) elif arrayActorFunc: """ Create all edges for ActorFunc array-based actors