mirror of https://github.com/zeldaret/botw.git
tools: Migrate to external repo
This commit is contained in:
parent
802be64d8e
commit
18152c5909
|
@ -40,4 +40,4 @@ perf.data.old
|
|||
.gdb_history
|
||||
|
||||
# Tooling
|
||||
/toolchain/clang/
|
||||
/toolchain/clang-*
|
||||
|
|
|
@ -7,15 +7,12 @@
|
|||
[submodule "agl"]
|
||||
path = lib/agl
|
||||
url = https://github.com/open-ead/agl
|
||||
[submodule "asm-differ"]
|
||||
path = tools/asm-differ
|
||||
url = https://github.com/simonlindholm/asm-differ
|
||||
[submodule "lib/EventFlow"]
|
||||
path = lib/EventFlow
|
||||
url = https://github.com/open-ead/EventFlow
|
||||
[submodule "tools/common"]
|
||||
path = tools/common
|
||||
url = https://github.com/open-ead/nx-decomp-tools
|
||||
[submodule "toolchain/musl"]
|
||||
path = toolchain/musl
|
||||
url = https://github.com/open-ead/botw-lib-musl
|
||||
[submodule "tools/nx-decomp-tools-binaries"]
|
||||
path = tools/nx-decomp-tools-binaries
|
||||
url = https://github.com/open-ead/nx-decomp-tools-binaries
|
||||
|
|
|
@ -4,7 +4,7 @@ To contribute to the project, you will need:
|
|||
|
||||
* A disassembler or a decompiler such as Hex-Rays or Ghidra.
|
||||
* Python 3 and pip for the diff script
|
||||
* These Python modules: `capstone colorama cxxfilt pyelftools` (install them with `pip install ...`)
|
||||
* These Python modules: `capstone colorama cxxfilt pyelftools ansiwrap watchdog python-Levenshtein toml` (install them with `pip install ...`)
|
||||
|
||||
Experience with reverse engineering optimized C++ code is very useful but not necessary if you already know how to decompile C code.
|
||||
|
||||
|
@ -145,7 +145,7 @@ public:
|
|||
5. **Get the mangled name** of your function. For example, if you are decompiling BaseProcMgr::createInstance:
|
||||
|
||||
```
|
||||
$ tools/print_decomp_symbols.py -a | grep BaseProcMgr::createInstance
|
||||
$ tools/common/print_decomp_symbols.py -a | grep BaseProcMgr::createInstance
|
||||
UNLISTED ksys::act::BaseProcMgr::createInstance(sead::Heap*) (_ZN4ksys3act11BaseProcMgr14createInstanceEPN4sead4HeapE)
|
||||
```
|
||||
|
||||
|
@ -206,15 +206,15 @@ This project sometimes uses small hacks to force particular code to be generated
|
|||
* Pass the `--source` flag to show source code interleaved with assembly code.
|
||||
* Add the `--inlines` flag to show inline function calls. This is not enabled by default because it usually produces too much output to be useful.
|
||||
* For more options, see [asm-differ](https://github.com/simonlindholm/asm-differ).
|
||||
* To print progress: `tools/progress.py`
|
||||
* To print progress: `tools/common/progress.py`
|
||||
* Note that progress is only approximate because of inline functions, templating and compiler-generated functions.
|
||||
* To print AI class decompilation status: `tools/ai_progress.py`
|
||||
* Use this to figure out which AI classes have not been decompiled yet.
|
||||
* To dump symbols: `tools/print_decomp_symbols.py`
|
||||
* To dump symbols: `tools/common/print_decomp_symbols.py`
|
||||
* Pass `-a` to list all symbols
|
||||
* Useful for getting the mangled name of a function. For example:
|
||||
|
||||
```
|
||||
$ tools/print_decomp_symbols.py -a | grep BaseProcMgr::createInstance
|
||||
$ tools/common/print_decomp_symbols.py -a | grep BaseProcMgr::createInstance
|
||||
UNLISTED ksys::act::BaseProcMgr::createInstance(sead::Heap*) (_ZN4ksys3act11BaseProcMgr14createInstanceEPN4sead4HeapE)
|
||||
```
|
||||
|
|
|
@ -158,7 +158,7 @@ Additionally, you'll also need:
|
|||
|
||||
2. Run `git submodule update --init --recursive`
|
||||
|
||||
3. Run `cargo install --path tools/viking`
|
||||
3. Run `cargo install --path tools/common/viking`
|
||||
|
||||
Next, you'll need to acquire the **original 1.5.0 or 1.6.0 `main` NSO executable**.
|
||||
|
||||
|
@ -167,7 +167,7 @@ Additionally, you'll also need:
|
|||
* The decompressed 1.5.0 NSO has the following SHA256 hash: `d9fa308d0ee7c0ab081c66d987523385e1afe06f66731bbfa32628438521c106`
|
||||
* If you have a compressed NSO or a 1.6.0 executable, don't worry about this.
|
||||
|
||||
4. Run `tools/setup.py [path to the NSO]`
|
||||
4. Run `tools/common/setup.py [path to the NSO]`
|
||||
* This will:
|
||||
* convert the executable if necessary
|
||||
* set up [Clang 4.0.1](https://releases.llvm.org/download.html#4.0.1) by downloading it from the official LLVM website
|
||||
|
|
|
@ -1 +1 @@
|
|||
tools/diff_settings.py
|
||||
tools/common/diff_settings.py
|
|
@ -1,7 +1,7 @@
|
|||
if (DEFINED ENV{UKING_CLANG})
|
||||
set(UKING_CLANG "$ENV{UKING_CLANG}")
|
||||
else()
|
||||
set(UKING_CLANG "${CMAKE_CURRENT_LIST_DIR}/clang")
|
||||
set(UKING_CLANG "${CMAKE_CURRENT_LIST_DIR}/clang-4.0.1")
|
||||
endif()
|
||||
|
||||
set(NX64_OPT_FLAGS "-O3 -g")
|
||||
|
|
|
@ -1,37 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Set, List
|
||||
|
||||
from util import utils
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("csv_path", help="Path to function CSV to merge")
|
||||
args = parser.parse_args()
|
||||
|
||||
csv_path = Path(args.csv_path)
|
||||
|
||||
known_fn_addrs: Set[int] = {func.addr for func in utils.get_functions()}
|
||||
new_fns: List[utils.FunctionInfo] = []
|
||||
for func in utils.get_functions(csv_path):
|
||||
if func.addr not in known_fn_addrs:
|
||||
new_fns.append(func)
|
||||
|
||||
new_fn_list: List[utils.FunctionInfo] = []
|
||||
new_fn_list.extend(utils.get_functions())
|
||||
new_fn_list.extend(new_fns)
|
||||
new_fn_list.sort(key=lambda func: func.addr)
|
||||
|
||||
# Output the modified function CSV.
|
||||
writer = csv.writer(sys.stdout, lineterminator="\n")
|
||||
for func in new_fn_list:
|
||||
writer.writerow(func.raw_row)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1 +0,0 @@
|
|||
Subproject commit 7cd33c4e7f5adadf5892261ad8a522efec51cac8
|
|
@ -0,0 +1 @@
|
|||
Subproject commit f0a952ce32d81018ec9623922cd2726ddd07c423
|
|
@ -0,0 +1,2 @@
|
|||
functions_csv = "data/uking_functions.csv"
|
||||
build_target = "uking"
|
|
@ -1,36 +0,0 @@
|
|||
from pathlib import Path
|
||||
import platform
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
def get_tools_bin_dir():
|
||||
path = ROOT / 'tools' / 'nx-decomp-tools-binaries'
|
||||
system = platform.system()
|
||||
if system == "Linux":
|
||||
return str(path) + "/linux/"
|
||||
if system == "Darwin":
|
||||
return str(path) + "/macos/"
|
||||
return ""
|
||||
|
||||
|
||||
def apply(config, args):
|
||||
config['arch'] = 'aarch64'
|
||||
config['baseimg'] = 'data/main.elf'
|
||||
config['myimg'] = 'build/uking'
|
||||
config['source_directories'] = ['src', 'lib']
|
||||
config['objdump_executable'] = get_tools_bin_dir() + 'aarch64-none-elf-objdump'
|
||||
|
||||
for dir in ('build', 'build/nx64-release'):
|
||||
if (Path(dir) / 'build.ninja').is_file():
|
||||
config['make_command'] = ['ninja', '-C', dir]
|
||||
|
||||
|
||||
def map_build_target(make_target: str):
|
||||
if make_target == "build/uking":
|
||||
return "uking"
|
||||
|
||||
# TODO: When support for directly diffing object files is added, this needs to strip
|
||||
# the build/ prefix from the object file targets.
|
||||
return make_target
|
|
@ -1,119 +0,0 @@
|
|||
// Script to load BotW CSV data into Ghidra
|
||||
//@author AlexApps99
|
||||
//@category BotW
|
||||
|
||||
import ghidra.app.script.GhidraScript;
|
||||
import ghidra.program.model.symbol.SourceType;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileReader;
|
||||
import java.io.File;
|
||||
import ghidra.program.model.address.Address;
|
||||
import ghidra.program.model.listing.Function;
|
||||
import ghidra.program.model.listing.FunctionManager;
|
||||
import ghidra.app.cmd.label.DemanglerCmd;
|
||||
import ghidra.program.model.address.AddressSet;
|
||||
import ghidra.util.NumericUtilities;
|
||||
import ghidra.app.cmd.label.DemanglerCmd;
|
||||
import ghidra.program.model.listing.FunctionTag;
|
||||
import ghidra.util.exception.DuplicateNameException;
|
||||
import ghidra.program.model.listing.FunctionTagManager;
|
||||
|
||||
public class RenameFunctionsInGhidra extends GhidraScript {
|
||||
private FunctionManager func_mgr;
|
||||
private FunctionTagManager func_tag_mgr;
|
||||
private String ok;
|
||||
private String minor;
|
||||
private String major;
|
||||
private String wip;
|
||||
private String undecompiled;
|
||||
private String lib;
|
||||
|
||||
private FunctionTag getOrMake(String name) {
|
||||
FunctionTag f = func_tag_mgr.getFunctionTag(name);
|
||||
if (f == null) f = func_tag_mgr.createFunctionTag(name, null);
|
||||
return f;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() throws Exception {
|
||||
func_mgr = currentProgram.getFunctionManager();
|
||||
func_tag_mgr = func_mgr.getFunctionTagManager();
|
||||
ok = getOrMake("OK").getName();
|
||||
minor = getOrMake("MINOR").getName();
|
||||
major = getOrMake("MAJOR").getName();
|
||||
wip = getOrMake("WIP").getName();
|
||||
undecompiled = getOrMake("UNDECOMPILED").getName();
|
||||
lib = getOrMake("LIBRARY").getName();
|
||||
|
||||
|
||||
File input_csv = askFile("uking_functions.csv", "Go");
|
||||
try (BufferedReader br = new BufferedReader(new FileReader(input_csv))) {
|
||||
// Skip header
|
||||
String line = br.readLine();
|
||||
while ((line = br.readLine()) != null) {
|
||||
String[] pieces = line.split(",", -4); // Don't skip empty last column
|
||||
if (pieces.length != 4) throw new Exception("Invalid CSV row: " + line);
|
||||
|
||||
Address addr = toAddr(pieces[0]);
|
||||
String status = pieces[1];
|
||||
long func_size = func_size = NumericUtilities.parseLong(pieces[2].strip());
|
||||
|
||||
String name = pieces[3].strip();
|
||||
|
||||
Function func = applyFunction(addr, status, name, func_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// TODO the j_ prefix probably breaks demangling
|
||||
private Function applyFunction(Address addr, String status, String name, long func_size) throws Exception {
|
||||
if (name.isEmpty()) name = null;
|
||||
|
||||
Function func = func_mgr.getFunctionAt(addr);
|
||||
AddressSet body = new AddressSet(addr, addr.addNoWrap(func_size - 1));
|
||||
|
||||
|
||||
if (func != null) {
|
||||
// Demangling can break this, hence the try-catch
|
||||
try {
|
||||
if (func.getName() != name) func.setName(name, SourceType.IMPORTED);
|
||||
} catch (DuplicateNameException e) {}
|
||||
if (!func.getBody().hasSameAddresses(body)) {
|
||||
func.setBody(body);
|
||||
}
|
||||
} else {
|
||||
func = func_mgr.createFunction(name, addr, body, SourceType.IMPORTED);
|
||||
}
|
||||
|
||||
if (name != null) {
|
||||
DemanglerCmd cmd = new DemanglerCmd(addr, name);
|
||||
if (!cmd.applyTo(currentProgram, monitor)) {
|
||||
// Something that isn't mangled
|
||||
}
|
||||
}
|
||||
|
||||
func.removeTag(ok);
|
||||
func.removeTag(minor);
|
||||
func.removeTag(major);
|
||||
func.removeTag(wip);
|
||||
func.removeTag(undecompiled);
|
||||
func.removeTag(lib);
|
||||
if (status.equals("O")) {
|
||||
func.addTag(ok);
|
||||
} else if (status.equals("m")) {
|
||||
func.addTag(minor);
|
||||
} else if (status.equals("M")) {
|
||||
func.addTag(major);
|
||||
} else if (status.equals("W")) {
|
||||
func.addTag(wip);
|
||||
} else if (status.equals("L")) {
|
||||
func.addTag(lib);
|
||||
func.addTag(undecompiled);
|
||||
} else {
|
||||
func.addTag(undecompiled);
|
||||
}
|
||||
|
||||
return func;
|
||||
}
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
import idaapi
|
||||
|
||||
for i in range(idaapi.get_fchunk_qty()):
|
||||
chunk = idaapi.getn_fchunk(i)
|
||||
if not idaapi.is_func_tail(chunk):
|
||||
continue
|
||||
|
||||
ea = chunk.start_ea
|
||||
print("removing tail 0x%016x" % ea)
|
||||
parent = idaapi.get_func(ea)
|
||||
idaapi.remove_func_tail(parent, ea)
|
||||
idaapi.add_func(ea)
|
|
@ -1,80 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
from colorama import Fore
|
||||
import csv
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
|
||||
import util.checker
|
||||
import util.elf
|
||||
from util import utils
|
||||
|
||||
|
||||
def read_candidates(path: Path) -> Dict[str, util.elf.Function]:
|
||||
candidates: Dict[str, util.elf.Function] = dict()
|
||||
|
||||
for candidate in path.read_text().splitlines():
|
||||
columns = candidate.split()
|
||||
if len(columns) == 3:
|
||||
candidate = columns[2]
|
||||
|
||||
candidates[candidate] = util.elf.get_fn_from_my_elf(candidate)
|
||||
|
||||
return candidates
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("csv_path",
|
||||
help="Path to a list of functions to identify (in the same format as the main function CSV)")
|
||||
parser.add_argument("candidates_path",
|
||||
help="Path to a list of candidates (names only)")
|
||||
args = parser.parse_args()
|
||||
|
||||
csv_path = Path(args.csv_path)
|
||||
candidates_path = Path(args.candidates_path)
|
||||
|
||||
candidates = read_candidates(candidates_path)
|
||||
|
||||
new_matches: Dict[int, str] = dict()
|
||||
checker = util.checker.FunctionChecker()
|
||||
|
||||
# Given a list L of functions to identify and a small list of candidates C, this tool will attempt to
|
||||
# automatically identify matches by checking each function in L against each function in C.
|
||||
#
|
||||
# This matching algorithm is quite naive (quadratic time complexity if both lists have about the same size)
|
||||
# but this should work well enough for short lists of candidates...
|
||||
for func in utils.get_functions(csv_path):
|
||||
if func.status != utils.FunctionStatus.NotDecompiled:
|
||||
continue
|
||||
|
||||
match_name = ""
|
||||
|
||||
for candidate_name, candidate in candidates.items():
|
||||
if len(candidate.data) != func.size:
|
||||
continue
|
||||
if checker.check(util.elf.get_fn_from_base_elf(func.addr, func.size), candidate):
|
||||
match_name = candidate_name
|
||||
break
|
||||
|
||||
if match_name:
|
||||
new_matches[func.addr] = match_name
|
||||
utils.print_note(
|
||||
f"found new match: {Fore.BLUE}{match_name}{Fore.RESET} ({func.addr | 0x71_00000000:#018x})")
|
||||
# This is no longer a candidate.
|
||||
del candidates[match_name]
|
||||
else:
|
||||
utils.warn(f"no match found for {Fore.BLUE}{func.name}{Fore.RESET} ({func.addr | 0x71_00000000:#018x})")
|
||||
|
||||
# Output the modified function CSV.
|
||||
writer = csv.writer(sys.stdout, lineterminator="\n")
|
||||
for func in utils.get_functions():
|
||||
if func.status == utils.FunctionStatus.NotDecompiled and func.addr in new_matches:
|
||||
func.raw_row[3] = new_matches[func.addr]
|
||||
writer.writerow(func.raw_row)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,92 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
from typing import Dict, List
|
||||
import argparse
|
||||
|
||||
import cxxfilt
|
||||
from colorama import Fore
|
||||
|
||||
from util import utils, checker, elf
|
||||
|
||||
|
||||
class Checker(checker.FunctionChecker):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.checking = ""
|
||||
self.invalid_call_descriptions: List[str] = []
|
||||
self.addr_to_symbol = elf.build_addr_to_symbol_table(elf.my_symtab)
|
||||
self._possible_calls: Dict[int, int] = dict()
|
||||
|
||||
def reset(self) -> None:
|
||||
self._possible_calls.clear()
|
||||
|
||||
def get_possible_calls(self) -> Dict[int, int]:
|
||||
return self._possible_calls
|
||||
|
||||
def on_unknown_fn_call(self, orig_addr: int, decomp_addr: int):
|
||||
existing_addr = self._possible_calls.get(orig_addr)
|
||||
if existing_addr is not None and existing_addr != decomp_addr:
|
||||
self.invalid_call_descriptions.append(
|
||||
f"{orig_addr | 0x7100000000:#x} was mapped to {self.addr_to_symbol[existing_addr]} "
|
||||
f"({existing_addr:#x}) "
|
||||
f"but now maps to {self.addr_to_symbol[decomp_addr]} ({decomp_addr:#x})"
|
||||
f" (while checking {self.checking})")
|
||||
return
|
||||
self._possible_calls[orig_addr] = decomp_addr
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser("Identifies matching functions by looking at function calls in matching functions")
|
||||
parser.add_argument("-f", "--fn", help="Functions to analyze", nargs="*")
|
||||
args = parser.parse_args()
|
||||
|
||||
functions_to_analyze = set(args.fn) if args.fn else set()
|
||||
|
||||
functions_by_addr: Dict[int, utils.FunctionInfo] = {fn.addr: fn for fn in utils.get_functions()}
|
||||
fn_checker = Checker()
|
||||
for fn in functions_by_addr.values():
|
||||
if functions_to_analyze and fn.decomp_name not in functions_to_analyze:
|
||||
continue
|
||||
|
||||
if fn.status != utils.FunctionStatus.Matching:
|
||||
continue
|
||||
|
||||
base_fn = elf.get_fn_from_base_elf(fn.addr, fn.size)
|
||||
try:
|
||||
my_fn = elf.get_fn_from_my_elf(fn.decomp_name)
|
||||
except KeyError:
|
||||
utils.warn(f"could not find function {fn.decomp_name}")
|
||||
continue
|
||||
|
||||
fn_checker.checking = fn.decomp_name
|
||||
fn_checker.check(base_fn, my_fn)
|
||||
|
||||
if fn_checker.invalid_call_descriptions:
|
||||
for x in fn_checker.invalid_call_descriptions:
|
||||
utils.print_note(x)
|
||||
utils.fail("invalid calls detected")
|
||||
|
||||
new_matches: Dict[int, str] = dict()
|
||||
calls = fn_checker.get_possible_calls().copy()
|
||||
for base_target, my_target in calls.items():
|
||||
target_info = functions_by_addr.get(base_target)
|
||||
if target_info is None:
|
||||
continue
|
||||
if target_info.status != utils.FunctionStatus.NotDecompiled:
|
||||
continue
|
||||
|
||||
base_fn = elf.get_fn_from_base_elf(target_info.addr, target_info.size)
|
||||
try:
|
||||
name = fn_checker.addr_to_symbol[my_target]
|
||||
my_fn = elf.get_fn_from_my_elf(name)
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
if fn_checker.check(base_fn, my_fn):
|
||||
new_matches[base_target] = name
|
||||
utils.print_note(f"new match: {Fore.BLUE}{cxxfilt.demangle(name)}{Fore.RESET}")
|
||||
|
||||
utils.add_decompiled_functions(new_matches)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,84 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
import struct
|
||||
from typing import Dict, Set
|
||||
|
||||
import capstone as cs
|
||||
import cxxfilt
|
||||
from colorama import Fore
|
||||
|
||||
from util import utils, elf
|
||||
|
||||
|
||||
def main() -> None:
|
||||
new_matches: Dict[int, str] = dict()
|
||||
functions_by_addr: Dict[int, utils.FunctionInfo] = {fn.addr: fn for fn in utils.get_functions()}
|
||||
|
||||
md = cs.Cs(cs.CS_ARCH_ARM64, cs.CS_MODE_ARM)
|
||||
md.detail = True
|
||||
decomp_addr_to_symbol = elf.build_addr_to_symbol_table(elf.my_symtab)
|
||||
decomp_glob_data_table = elf.build_glob_data_table(elf.my_elf)
|
||||
|
||||
processed: Set[int] = set()
|
||||
for fn in functions_by_addr.values():
|
||||
if fn.status != utils.FunctionStatus.Matching:
|
||||
continue
|
||||
|
||||
if fn.size != 0x5C or (not fn.decomp_name.endswith("8getRuntimeTypeInfoEv") and not fn.name.endswith("rtti2")):
|
||||
continue
|
||||
|
||||
base_fn = elf.get_fn_from_base_elf(fn.addr, fn.size)
|
||||
try:
|
||||
my_fn = elf.get_fn_from_my_elf(fn.decomp_name)
|
||||
except KeyError:
|
||||
utils.warn(f"could not find function {fn.decomp_name}")
|
||||
continue
|
||||
|
||||
assert len(base_fn.data) == len(my_fn.data)
|
||||
|
||||
vtable_ptr1 = 0
|
||||
vtable_ptr2 = 0
|
||||
for j, (i1, i2) in enumerate(zip(md.disasm(base_fn.data, base_fn.addr), md.disasm(my_fn.data, my_fn.addr))):
|
||||
assert i1.mnemonic == i2.mnemonic
|
||||
if j == 10:
|
||||
assert i1.mnemonic == "adrp"
|
||||
assert i1.operands[0].reg == i2.operands[0].reg
|
||||
vtable_ptr1 = i1.operands[1].imm
|
||||
vtable_ptr2 = i2.operands[1].imm
|
||||
elif j == 11:
|
||||
assert i1.mnemonic == "ldr"
|
||||
assert i1.operands[0].reg == i2.operands[0].reg
|
||||
assert i1.operands[1].value.mem.base == i2.operands[1].value.mem.base
|
||||
vtable_ptr1 += i1.operands[1].value.mem.disp
|
||||
vtable_ptr2 += i2.operands[1].value.mem.disp
|
||||
break
|
||||
|
||||
assert vtable_ptr1 != 0 and vtable_ptr2 != 0
|
||||
if vtable_ptr1 in processed:
|
||||
continue
|
||||
processed.add(vtable_ptr1)
|
||||
ptr1, = struct.unpack("<Q", elf.read_from_elf(elf.base_elf, vtable_ptr1, 8))
|
||||
ptr2 = decomp_glob_data_table[vtable_ptr2]
|
||||
|
||||
vtable1 = elf.get_vtable_fns_from_base_elf(ptr1 + 0x10, num_entries=1)
|
||||
vtable2 = elf.unpack_vtable_fns(elf.read_from_elf(elf.my_elf, addr=ptr2 + 0x10, size=8), num_entries=1)
|
||||
|
||||
if functions_by_addr[vtable1[0]].status == utils.FunctionStatus.Matching:
|
||||
continue
|
||||
|
||||
decomp_derive_fn_addr = vtable2[0]
|
||||
if decomp_derive_fn_addr == 0:
|
||||
decomp_derive_fn_addr = decomp_glob_data_table.get(ptr2 + 0x10, 0)
|
||||
if decomp_derive_fn_addr == 0:
|
||||
raise RuntimeError(f"Derive virtual function pointer is null "
|
||||
f"(fn: {fn.decomp_name}, decomp vtable at {ptr2:#x})")
|
||||
|
||||
name = decomp_addr_to_symbol[decomp_derive_fn_addr]
|
||||
new_matches[vtable1[0]] = name
|
||||
utils.print_note(f"new match: {Fore.BLUE}{cxxfilt.demangle(name)}{Fore.RESET} (from {fn.decomp_name})")
|
||||
|
||||
# overwrite the original names because they are likely to be incorrect
|
||||
utils.add_decompiled_functions(new_matches, new_orig_names=new_matches)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1 +0,0 @@
|
|||
Subproject commit 011c369bc4bead403d650dd1d1eeb69c04eb19c7
|
|
@ -1,58 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
from colorama import Fore, Style
|
||||
import diff_settings
|
||||
import subprocess
|
||||
from util import utils
|
||||
|
||||
parser = argparse.ArgumentParser(description="Prints build/uking.elf symbols")
|
||||
parser.add_argument("--print-undefined", "-u",
|
||||
help="Print symbols that are undefined", action="store_true")
|
||||
parser.add_argument("--print-c2-d2", "-c",
|
||||
help="Print C2/D2 (base object constructor/destructor) symbols", action="store_true")
|
||||
parser.add_argument("--hide-unknown", "-H",
|
||||
help="Hide symbols that are not present in the original game", action="store_true")
|
||||
parser.add_argument("--all", "-a", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
listed_decomp_symbols = {info.decomp_name for info in utils.get_functions()}
|
||||
original_symbols = {info.name for info in utils.get_functions()}
|
||||
|
||||
config: dict = dict()
|
||||
diff_settings.apply(config, {})
|
||||
myimg: str = config["myimg"]
|
||||
|
||||
entries = [x.strip().split() for x in subprocess.check_output(["nm", "-n", myimg], universal_newlines=True).split("\n")]
|
||||
|
||||
for entry in entries:
|
||||
if len(entry) == 3:
|
||||
addr = int(entry[0], 16)
|
||||
symbol_type: str = entry[1]
|
||||
name = entry[2]
|
||||
|
||||
if (symbol_type == "t" or symbol_type == "T" or symbol_type == "W") and (
|
||||
args.all or name not in listed_decomp_symbols):
|
||||
c1_name = name.replace("C2", "C1")
|
||||
is_c2_ctor = "C2" in name and c1_name in listed_decomp_symbols and utils.are_demangled_names_equal(
|
||||
c1_name, name)
|
||||
|
||||
d1_name = name.replace("D2", "D1")
|
||||
is_d2_dtor = "D2" in name and d1_name in listed_decomp_symbols and utils.are_demangled_names_equal(
|
||||
d1_name, name)
|
||||
|
||||
if args.print_c2_d2 or not (is_c2_ctor or is_d2_dtor):
|
||||
color = Fore.YELLOW
|
||||
if name in original_symbols:
|
||||
color = Fore.RED
|
||||
elif args.hide_unknown:
|
||||
continue
|
||||
if is_c2_ctor or is_d2_dtor:
|
||||
color += Style.DIM
|
||||
print(f"{color}UNLISTED {Fore.RESET} {utils.format_symbol_name(name)}")
|
||||
|
||||
elif len(entry) == 2:
|
||||
symbol_type = entry[0]
|
||||
name = entry[1]
|
||||
|
||||
if symbol_type.upper() == "U" and args.print_undefined:
|
||||
print(f"{Fore.CYAN}UNDEFINED{Style.RESET_ALL} {utils.format_symbol_name(name)}")
|
|
@ -1,101 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
from collections import defaultdict
|
||||
from colorama import Back, Fore, Style
|
||||
from util import utils
|
||||
from util.utils import FunctionStatus
|
||||
import typing as tp
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--csv", "-c", action="store_true",
|
||||
help="Print a CSV line rather than a human-readable progress report")
|
||||
parser.add_argument("--print-nm", "-n", action="store_true",
|
||||
help="Print non-matching functions with major issues")
|
||||
parser.add_argument("--print-eq", "-e", action="store_true",
|
||||
help="Print non-matching functions with minor issues")
|
||||
parser.add_argument("--print-ok", "-m", action="store_true",
|
||||
help="Print matching functions")
|
||||
args = parser.parse_args()
|
||||
|
||||
code_size_total = 0
|
||||
num_total = 0
|
||||
code_size: tp.DefaultDict[FunctionStatus, int] = defaultdict(int)
|
||||
counts: tp.DefaultDict[FunctionStatus, int] = defaultdict(int)
|
||||
|
||||
for info in utils.get_functions():
|
||||
code_size_total += info.size
|
||||
num_total += 1
|
||||
|
||||
if not info.decomp_name:
|
||||
continue
|
||||
|
||||
counts[info.status] += 1
|
||||
code_size[info.status] += info.size
|
||||
|
||||
if not args.csv:
|
||||
if info.status == FunctionStatus.NonMatching:
|
||||
if args.print_nm:
|
||||
print(f"{Fore.RED}NM{Fore.RESET} {utils.format_symbol_name(info.decomp_name)}")
|
||||
elif info.status == FunctionStatus.Equivalent:
|
||||
if args.print_eq:
|
||||
print(f"{Fore.YELLOW}EQ{Fore.RESET} {utils.format_symbol_name(info.decomp_name)}")
|
||||
elif info.status == FunctionStatus.Matching:
|
||||
if args.print_ok:
|
||||
print(f"{Fore.GREEN}OK{Fore.RESET} {utils.format_symbol_name(info.decomp_name)}")
|
||||
elif info.status == FunctionStatus.Wip:
|
||||
print(
|
||||
f"{Back.RED}{Style.BRIGHT}{Fore.WHITE} WIP {Style.RESET_ALL} {utils.format_symbol_name(info.decomp_name)}{Style.RESET_ALL}")
|
||||
|
||||
|
||||
def format_progress(label: str, num: int, size: int):
|
||||
percentage = round(100 * num / num_total, 3)
|
||||
size_percentage = round(100 * size / code_size_total, 3)
|
||||
return f"{num:>7d} {label}{Fore.RESET} ({percentage}% | size: {size_percentage}%)"
|
||||
|
||||
|
||||
def format_progress_for_status(label: str, status: FunctionStatus):
|
||||
return format_progress(label, counts[status], code_size[status])
|
||||
|
||||
|
||||
if args.csv:
|
||||
import git
|
||||
|
||||
version = 1
|
||||
git_object = git.Repo().head.object
|
||||
timestamp = str(git_object.committed_date)
|
||||
git_hash = git_object.hexsha
|
||||
|
||||
fields = [
|
||||
str(version),
|
||||
timestamp,
|
||||
git_hash,
|
||||
|
||||
str(num_total),
|
||||
str(code_size_total),
|
||||
|
||||
str(counts[FunctionStatus.Matching]),
|
||||
str(code_size[FunctionStatus.Matching]),
|
||||
|
||||
str(counts[FunctionStatus.Equivalent]),
|
||||
str(code_size[FunctionStatus.Equivalent]),
|
||||
|
||||
str(counts[FunctionStatus.NonMatching]),
|
||||
str(code_size[FunctionStatus.NonMatching]),
|
||||
]
|
||||
print(",".join(fields))
|
||||
|
||||
else:
|
||||
print()
|
||||
|
||||
print(f"{num_total:>7d} functions (size: ~{code_size_total} bytes)")
|
||||
|
||||
count_decompiled = counts[FunctionStatus.Matching] + counts[FunctionStatus.Equivalent] + counts[
|
||||
FunctionStatus.NonMatching]
|
||||
code_size_decompiled = code_size[FunctionStatus.Matching] + code_size[FunctionStatus.Equivalent] + code_size[
|
||||
FunctionStatus.NonMatching]
|
||||
|
||||
print(format_progress(f"{Fore.CYAN}decompiled", count_decompiled, code_size_decompiled))
|
||||
print(format_progress_for_status(f"{Fore.GREEN}matching", FunctionStatus.Matching))
|
||||
print(format_progress_for_status(f"{Fore.YELLOW}non-matching (minor issues)", FunctionStatus.Equivalent))
|
||||
print(format_progress_for_status(f"{Fore.RED}non-matching (major issues)", FunctionStatus.NonMatching))
|
||||
print()
|
|
@ -1,18 +0,0 @@
|
|||
# Renames functions in an IDA database to match the function names
|
||||
# in the decompiled source code.
|
||||
|
||||
import csv
|
||||
import idc
|
||||
import os
|
||||
|
||||
csv_path = os.path.join(os.path.dirname(__file__), "../data/uking_functions.csv")
|
||||
|
||||
with open(csv_path, "r") as f:
|
||||
reader = csv.reader(f)
|
||||
# Skip headers
|
||||
next(reader)
|
||||
for fn in reader:
|
||||
addr = int(fn[0], 16)
|
||||
name = fn[3]
|
||||
if name and not name.startswith(("sub_", "nullsub_", "j_")):
|
||||
idc.set_name(addr, name)
|
139
tools/setup.py
139
tools/setup.py
|
@ -9,36 +9,7 @@ import sys
|
|||
import tarfile
|
||||
import tempfile
|
||||
import urllib.request
|
||||
|
||||
ROOT = Path(__file__).parent.parent
|
||||
|
||||
|
||||
def fail(error: str):
|
||||
print(">>> " + error)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def _get_tool_binary_path():
|
||||
base = ROOT / "tools" / "nx-decomp-tools-binaries"
|
||||
system = platform.system()
|
||||
if system == "Linux":
|
||||
return str(base / "linux") + "/"
|
||||
if system == "Darwin":
|
||||
return str(base / "macos") + "/"
|
||||
return ""
|
||||
|
||||
|
||||
def _convert_nso_to_elf(nso_path: Path):
|
||||
print(">>>> converting NSO to ELF...")
|
||||
binpath = _get_tool_binary_path()
|
||||
subprocess.check_call([binpath + "nx2elf", str(nso_path)])
|
||||
|
||||
|
||||
def _decompress_nso(nso_path: Path, dest_path: Path):
|
||||
print(">>>> decompressing NSO...")
|
||||
binpath = _get_tool_binary_path()
|
||||
subprocess.check_call([binpath + "hactool", "-tnso",
|
||||
"--uncompressed=" + str(dest_path), str(nso_path)])
|
||||
from common import setup_common as setup
|
||||
|
||||
|
||||
def _download_v160_to_v150_patch(dest: Path):
|
||||
|
@ -46,14 +17,6 @@ def _download_v160_to_v150_patch(dest: Path):
|
|||
urllib.request.urlretrieve("https://s.botw.link/v150_downgrade/v160_to_v150.patch", dest)
|
||||
|
||||
|
||||
def _apply_xdelta3_patch(input: Path, patch: Path, dest: Path):
|
||||
print(">>>> applying patch...")
|
||||
try:
|
||||
subprocess.check_call(["xdelta3", "-d", "-s", str(input), str(patch), str(dest)])
|
||||
except FileNotFoundError:
|
||||
fail("error: install xdelta3 and try again")
|
||||
|
||||
|
||||
def prepare_executable(original_nso: Path):
|
||||
COMPRESSED_V150_HASH = "898dc199301f7c419be5144bb5cb27e2fc346e22b27345ba3fb40c0060c2baf8"
|
||||
UNCOMPRESSED_V150_HASH = "d9fa308d0ee7c0ab081c66d987523385e1afe06f66731bbfa32628438521c106"
|
||||
|
@ -62,26 +25,24 @@ def prepare_executable(original_nso: Path):
|
|||
|
||||
# The uncompressed v1.5.0 main NSO.
|
||||
TARGET_HASH = UNCOMPRESSED_V150_HASH
|
||||
TARGET_PATH = ROOT / "data" / "main.nso"
|
||||
TARGET_ELF_PATH = ROOT / "data" / "main.elf"
|
||||
|
||||
if TARGET_PATH.is_file() and hashlib.sha256(TARGET_PATH.read_bytes()).hexdigest() == TARGET_HASH and TARGET_ELF_PATH.is_file():
|
||||
if setup.TARGET_PATH.is_file() and hashlib.sha256(setup.TARGET_PATH.read_bytes()).hexdigest() == TARGET_HASH and setup.TARGET_ELF_PATH.is_file():
|
||||
print(">>> NSO is already set up")
|
||||
return
|
||||
|
||||
if not original_nso.is_file():
|
||||
fail(f"{original_nso} is not a file")
|
||||
setup.fail(f"{original_nso} is not a file")
|
||||
|
||||
nso_data = original_nso.read_bytes()
|
||||
nso_hash = hashlib.sha256(nso_data).hexdigest()
|
||||
|
||||
if nso_hash == UNCOMPRESSED_V150_HASH:
|
||||
print(">>> found uncompressed 1.5.0 NSO")
|
||||
TARGET_PATH.write_bytes(nso_data)
|
||||
setup.TARGET_PATH.write_bytes(nso_data)
|
||||
|
||||
elif nso_hash == COMPRESSED_V150_HASH:
|
||||
print(">>> found compressed 1.5.0 NSO")
|
||||
_decompress_nso(original_nso, TARGET_PATH)
|
||||
setup._decompress_nso(original_nso, setup.TARGET_PATH)
|
||||
|
||||
elif nso_hash == UNCOMPRESSED_V160_HASH:
|
||||
print(">>> found uncompressed 1.6.0 NSO")
|
||||
|
@ -89,7 +50,7 @@ def prepare_executable(original_nso: Path):
|
|||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
patch_path = Path(tmpdir) / "patch"
|
||||
_download_v160_to_v150_patch(patch_path)
|
||||
_apply_xdelta3_patch(original_nso, patch_path, TARGET_PATH)
|
||||
setup._apply_xdelta3_patch(original_nso, patch_path, setup.TARGET_PATH)
|
||||
|
||||
elif nso_hash == COMPRESSED_V160_HASH:
|
||||
print(">>> found compressed 1.6.0 NSO")
|
||||
|
@ -98,86 +59,22 @@ def prepare_executable(original_nso: Path):
|
|||
patch_path = Path(tmpdir) / "patch"
|
||||
decompressed_nso_path = Path(tmpdir) / "v160.nso"
|
||||
|
||||
_decompress_nso(original_nso, decompressed_nso_path)
|
||||
setup._decompress_nso(original_nso, decompressed_nso_path)
|
||||
_download_v160_to_v150_patch(patch_path)
|
||||
_apply_xdelta3_patch(decompressed_nso_path, patch_path, TARGET_PATH)
|
||||
setup._apply_xdelta3_patch(decompressed_nso_path, patch_path, setup.TARGET_PATH)
|
||||
|
||||
else:
|
||||
fail(f"unknown executable: {nso_hash}")
|
||||
setup.fail(f"unknown executable: {nso_hash}")
|
||||
|
||||
if not TARGET_PATH.is_file():
|
||||
fail("internal error while preparing executable (missing NSO); please report")
|
||||
if hashlib.sha256(TARGET_PATH.read_bytes()).hexdigest() != TARGET_HASH:
|
||||
fail("internal error while preparing executable (wrong NSO hash); please report")
|
||||
if not setup.TARGET_PATH.is_file():
|
||||
setup.fail("internal error while preparing executable (missing NSO); please report")
|
||||
if hashlib.sha256(setup.TARGET_PATH.read_bytes()).hexdigest() != TARGET_HASH:
|
||||
setup.fail("internal error while preparing executable (wrong NSO hash); please report")
|
||||
|
||||
_convert_nso_to_elf(TARGET_PATH)
|
||||
|
||||
if not TARGET_ELF_PATH.is_file():
|
||||
fail("internal error while preparing executable (missing ELF); please report")
|
||||
|
||||
|
||||
def set_up_compiler():
|
||||
compiler_dir = ROOT / "toolchain" / "clang"
|
||||
if compiler_dir.is_dir():
|
||||
print(">>> clang is already set up: nothing to do")
|
||||
return
|
||||
|
||||
system = platform.system()
|
||||
machine = platform.machine()
|
||||
|
||||
builds = {
|
||||
# Linux
|
||||
("Linux", "x86_64"): {
|
||||
"url": "https://releases.llvm.org/4.0.1/clang+llvm-4.0.1-x86_64-linux-gnu-Fedora-25.tar.xz",
|
||||
"dir_name": "clang+llvm-4.0.1-x86_64-linux-gnu-Fedora-25",
|
||||
},
|
||||
("Linux", "aarch64"): {
|
||||
"url": "https://releases.llvm.org/4.0.1/clang+llvm-4.0.1-aarch64-linux-gnu.tar.xz",
|
||||
"dir_name": "clang+llvm-4.0.1-aarch64-linux-gnu",
|
||||
},
|
||||
|
||||
# macOS
|
||||
("Darwin", "x86_64"): {
|
||||
"url": "https://releases.llvm.org/4.0.1/clang+llvm-4.0.1-x86_64-apple-darwin.tar.xz",
|
||||
"dir_name": "clang+llvm-4.0.1-x86_64-apple-darwin",
|
||||
},
|
||||
("Darwin", "aarch64"): {
|
||||
"url": "https://releases.llvm.org/4.0.1/clang+llvm-4.0.1-x86_64-apple-darwin.tar.xz",
|
||||
"dir_name": "clang+llvm-4.0.1-x86_64-apple-darwin",
|
||||
},
|
||||
}
|
||||
|
||||
build_info = builds.get((system, machine))
|
||||
if build_info is None:
|
||||
fail(
|
||||
f"unknown platform: {platform.platform()} (please report if you are on Linux and macOS)")
|
||||
|
||||
url: str = build_info["url"]
|
||||
dir_name: str = build_info["dir_name"]
|
||||
|
||||
print(f">>> downloading Clang from {url}...")
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
path = tmpdir + "/" + url.split("/")[-1]
|
||||
urllib.request.urlretrieve(url, path)
|
||||
|
||||
print(f">>> extracting Clang...")
|
||||
with tarfile.open(path) as f:
|
||||
f.extractall(compiler_dir.parent)
|
||||
(compiler_dir.parent / dir_name).rename(compiler_dir)
|
||||
|
||||
print(">>> successfully set up Clang")
|
||||
|
||||
|
||||
def create_build_dir():
|
||||
build_dir = ROOT / "build"
|
||||
if build_dir.is_dir():
|
||||
print(">>> build directory already exists: nothing to do")
|
||||
return
|
||||
|
||||
subprocess.check_call(
|
||||
"cmake -GNinja -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_TOOLCHAIN_FILE=toolchain/ToolchainNX64.cmake -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -B build/".split(" "))
|
||||
print(">>> created build directory")
|
||||
setup._convert_nso_to_elf(setup.TARGET_PATH)
|
||||
|
||||
if not setup.TARGET_ELF_PATH.is_file():
|
||||
setup.fail("internal error while preparing executable (missing ELF); please report")
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
|
@ -187,8 +84,8 @@ def main():
|
|||
args = parser.parse_args()
|
||||
|
||||
prepare_executable(args.original_nso)
|
||||
set_up_compiler()
|
||||
create_build_dir()
|
||||
setup.set_up_compiler("4.0.1")
|
||||
setup.create_build_dir()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -1,80 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import struct
|
||||
from typing import Optional
|
||||
|
||||
import cxxfilt
|
||||
from colorama import Fore, Style
|
||||
|
||||
import util.elf
|
||||
from util import utils
|
||||
|
||||
|
||||
def find_vtable(symtab, class_name: str) -> Optional[str]:
|
||||
name_offset = len("vtable for ")
|
||||
for sym in util.elf.iter_symbols(symtab):
|
||||
if not sym.name.startswith("_ZTV"):
|
||||
continue
|
||||
if cxxfilt.demangle(sym.name)[name_offset:] == class_name:
|
||||
return sym.name
|
||||
return None
|
||||
|
||||
|
||||
def bold(s) -> str:
|
||||
return Style.BRIGHT + str(s) + Style.NORMAL
|
||||
|
||||
|
||||
def dump_table(name: str) -> None:
|
||||
try:
|
||||
symbols = util.elf.build_addr_to_symbol_table(util.elf.my_symtab)
|
||||
decomp_symbols = {fn.decomp_name for fn in utils.get_functions() if fn.decomp_name}
|
||||
|
||||
offset, size = util.elf.get_symbol_file_offset_and_size(util.elf.my_elf, util.elf.my_symtab, name)
|
||||
util.elf.my_elf.stream.seek(offset)
|
||||
vtable_bytes = util.elf.my_elf.stream.read(size)
|
||||
|
||||
if not vtable_bytes:
|
||||
utils.fail(
|
||||
"empty vtable; has the key function been implemented? (https://lld.llvm.org/missingkeyfunction.html)")
|
||||
|
||||
print(f"{Fore.WHITE}{Style.BRIGHT}{cxxfilt.demangle(name)}{Style.RESET_ALL}")
|
||||
print(f"{Fore.YELLOW}{Style.BRIGHT}vtable @ 0x0{Style.RESET_ALL}")
|
||||
|
||||
assert size % 8 == 0
|
||||
for i in range(size // 8):
|
||||
word: int = struct.unpack_from("<Q", vtable_bytes, 8 * i)[0]
|
||||
name = symbols.get(word, None)
|
||||
if word == 0:
|
||||
pass
|
||||
elif name is not None:
|
||||
demangled_name: str = cxxfilt.demangle(name)
|
||||
color = Fore.GREEN if name in decomp_symbols else Fore.BLUE
|
||||
print(f"{color}{bold(demangled_name)}{Style.RESET_ALL}")
|
||||
print(f" {name}")
|
||||
elif word & (1 << 63):
|
||||
offset = -struct.unpack_from("<q", vtable_bytes, 8 * i)[0]
|
||||
print()
|
||||
print(f"{Fore.YELLOW}{Style.BRIGHT}vtable @ {offset:#x}{Style.RESET_ALL}")
|
||||
else:
|
||||
print(f"{Fore.RED}unknown data: {word:016x}{Style.RESET_ALL}")
|
||||
|
||||
except KeyError:
|
||||
utils.fail("could not find symbol")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("symbol_name", help="Name of the vtable symbol (_ZTV...) or class name")
|
||||
args = parser.parse_args()
|
||||
|
||||
symbol_name: str = args.symbol_name
|
||||
|
||||
if not symbol_name.startswith("_ZTV"):
|
||||
symbol_name = find_vtable(util.elf.my_symtab, args.symbol_name)
|
||||
|
||||
dump_table(symbol_name)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,31 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from colorama import Back, Fore, Style
|
||||
import sys
|
||||
|
||||
mapping = {
|
||||
"agl::utl::Parameter$uint$": "agl::utl::Parameter<u32>",
|
||||
"agl::utl::Parameter$int$": "agl::utl::Parameter<int>",
|
||||
"agl::utl::Parameter$s32$": "agl::utl::Parameter<int>",
|
||||
"agl::utl::Parameter$float$": "agl::utl::Parameter<float>",
|
||||
"agl::utl::Parameter$f32$": "agl::utl::Parameter<float>",
|
||||
"agl::utl::Parameter$bool$": "agl::utl::Parameter<bool>",
|
||||
"agl::utl::Parameter$sead::SafeString$": "agl::utl::Parameter<sead::SafeString>",
|
||||
"agl::utl::Parameter$sead::Vector3f$": "agl::utl::Parameter<sead::Vector3f>",
|
||||
"agl::utl::Parameter$sead::FixedSafeString20$": "agl::utl::Parameter<sead::FixedSafeString<32>>",
|
||||
"agl::utl::Parameter$sead::FixedSafeString40$": "agl::utl::Parameter<sead::FixedSafeString<64>>",
|
||||
"agl::utl::Parameter$sead::FixedSafeString100$": "agl::utl::Parameter<sead::FixedSafeString<256>>",
|
||||
"agl::utl::Parameter$sead::Color4f$": "agl::utl::Parameter<sead::Color4f>",
|
||||
"agl::utl::Parameter_String32": "agl::utl::Parameter<sead::FixedSafeString<32>>",
|
||||
"agl::utl::Parameter_String64": "agl::utl::Parameter<sead::FixedSafeString<64>>",
|
||||
"agl::utl::Parameter_String256": "agl::utl::Parameter<sead::FixedSafeString<256>>",
|
||||
}
|
||||
|
||||
lines = list(sys.stdin)
|
||||
|
||||
sys.stderr.write(Back.BLUE + Fore.WHITE + Style.BRIGHT + "=" * 30 + " output " + "=" * 30 + Style.RESET_ALL + "\n")
|
||||
|
||||
for line in lines:
|
||||
for from_type, to_type in mapping.items():
|
||||
line = line.replace(from_type, to_type)
|
||||
sys.stdout.write(line)
|
|
@ -1,223 +0,0 @@
|
|||
import struct
|
||||
from collections import defaultdict
|
||||
from typing import Set, DefaultDict, Dict, Optional, Tuple
|
||||
|
||||
import capstone as cs
|
||||
|
||||
from util import dsym, elf, utils
|
||||
|
||||
_store_instructions = ("str", "strb", "strh", "stur", "sturb", "sturh")
|
||||
|
||||
|
||||
class FunctionChecker:
|
||||
def __init__(self, log_mismatch_cause: bool = False):
|
||||
self.md = cs.Cs(cs.CS_ARCH_ARM64, cs.CS_MODE_ARM)
|
||||
self.md.detail = True
|
||||
self.my_symtab = elf.build_name_to_symbol_table(elf.my_symtab)
|
||||
self.dsymtab = dsym.DataSymbolContainer()
|
||||
self.decompiled_fns: Dict[int, str] = dict()
|
||||
|
||||
self._log_mismatch_cause = log_mismatch_cause
|
||||
self._mismatch_addr1 = -1
|
||||
self._mismatch_addr2 = -1
|
||||
self._mismatch_cause = ""
|
||||
self._base_got_section = elf.base_elf.get_section_by_name(".got")
|
||||
self._decomp_glob_data_table = elf.build_glob_data_table(elf.my_elf)
|
||||
self._got_data_symbol_check_cache: Dict[Tuple[int, int], bool] = dict()
|
||||
|
||||
self.load_data_for_project()
|
||||
|
||||
def _reset_mismatch(self) -> None:
|
||||
self._mismatch_addr1 = -1
|
||||
self._mismatch_addr2 = -1
|
||||
self._mismatch_cause = ""
|
||||
|
||||
def get_data_symtab(self) -> dsym.DataSymbolContainer:
|
||||
return self.dsymtab
|
||||
|
||||
def get_mismatch(self) -> (int, int, str):
|
||||
return self._mismatch_addr1, self._mismatch_addr2, self._mismatch_cause
|
||||
|
||||
def load_data_for_project(self) -> None:
|
||||
self.decompiled_fns = {func.addr: func.decomp_name for func in utils.get_functions() if func.decomp_name}
|
||||
self.get_data_symtab().load_from_csv(utils.get_repo_root() / "data" / "data_symbols.csv")
|
||||
|
||||
def check(self, base_fn: elf.Function, my_fn: elf.Function) -> bool:
|
||||
self._reset_mismatch()
|
||||
gprs1: DefaultDict[int, int] = defaultdict(int)
|
||||
gprs2: DefaultDict[int, int] = defaultdict(int)
|
||||
adrp_pair_registers: Set[int] = set()
|
||||
|
||||
size = len(base_fn)
|
||||
if len(base_fn) != len(my_fn):
|
||||
if self._log_mismatch_cause:
|
||||
self._set_mismatch_cause(None, None, "different function length")
|
||||
return False
|
||||
|
||||
def forget_modified_registers(insn):
|
||||
_, regs_write = insn.regs_access()
|
||||
for reg in regs_write:
|
||||
adrp_pair_registers.discard(reg)
|
||||
|
||||
for i1, i2 in zip(self.md.disasm(base_fn.data, base_fn.addr), self.md.disasm(my_fn.data, my_fn.addr)):
|
||||
if i1.bytes == i2.bytes:
|
||||
if i1.mnemonic == 'adrp':
|
||||
gprs1[i1.operands[0].reg] = i1.operands[1].imm
|
||||
gprs2[i2.operands[0].reg] = i2.operands[1].imm
|
||||
adrp_pair_registers.add(i1.operands[0].reg)
|
||||
elif i1.mnemonic == 'b':
|
||||
branch_target = i1.operands[0].imm
|
||||
if not (base_fn.addr <= branch_target < base_fn.addr + size):
|
||||
if not self._check_function_call(i1, i2, branch_target, i2.operands[0].imm):
|
||||
return False
|
||||
else:
|
||||
forget_modified_registers(i1)
|
||||
continue
|
||||
|
||||
if i1.mnemonic != i2.mnemonic:
|
||||
if self._log_mismatch_cause:
|
||||
self._set_mismatch_cause(i1, i2, "mnemonics are different")
|
||||
return False
|
||||
|
||||
# Ignore some address differences until a fully matching executable can be generated.
|
||||
|
||||
if i1.mnemonic == 'bl':
|
||||
if not self._check_function_call(i1, i2, i1.operands[0].imm, i2.operands[0].imm):
|
||||
return False
|
||||
continue
|
||||
|
||||
if i1.mnemonic == 'b':
|
||||
branch_target = i1.operands[0].imm
|
||||
# If we are branching outside the function, this is likely a tail call.
|
||||
# Treat this as a function call.
|
||||
if not (base_fn.addr <= branch_target < base_fn.addr + size):
|
||||
if not self._check_function_call(i1, i2, branch_target, i2.operands[0].imm):
|
||||
return False
|
||||
continue
|
||||
# Otherwise, it's a mismatch.
|
||||
return False
|
||||
|
||||
if i1.mnemonic == 'adrp':
|
||||
if i1.operands[0].reg != i2.operands[0].reg:
|
||||
return False
|
||||
reg = i1.operands[0].reg
|
||||
|
||||
gprs1[reg] = i1.operands[1].imm
|
||||
gprs2[reg] = i2.operands[1].imm
|
||||
|
||||
adrp_pair_registers.add(reg)
|
||||
continue
|
||||
|
||||
if i1.mnemonic == 'ldp' or i1.mnemonic == 'ldpsw' or i1.mnemonic == 'stp':
|
||||
if i1.operands[0].reg != i2.operands[0].reg:
|
||||
return False
|
||||
if i1.operands[1].reg != i2.operands[1].reg:
|
||||
return False
|
||||
if i1.operands[2].value.mem.base != i2.operands[2].value.mem.base:
|
||||
return False
|
||||
reg = i1.operands[2].value.mem.base
|
||||
if reg not in adrp_pair_registers:
|
||||
return False
|
||||
|
||||
gprs1[reg] += i1.operands[2].value.mem.disp
|
||||
gprs2[reg] += i2.operands[2].value.mem.disp
|
||||
if not self._check_data_symbol_load(i1, i2, gprs1[reg], gprs2[reg]):
|
||||
return False
|
||||
|
||||
forget_modified_registers(i1)
|
||||
continue
|
||||
|
||||
if i1.mnemonic.startswith('ld') or i1.mnemonic in _store_instructions:
|
||||
if i1.operands[0].reg != i2.operands[0].reg:
|
||||
return False
|
||||
if i1.operands[1].value.mem.base != i2.operands[1].value.mem.base:
|
||||
return False
|
||||
reg = i1.operands[1].value.mem.base
|
||||
if reg not in adrp_pair_registers:
|
||||
return False
|
||||
|
||||
gprs1[reg] += i1.operands[1].value.mem.disp
|
||||
gprs2[reg] += i2.operands[1].value.mem.disp
|
||||
if not self._check_data_symbol_load(i1, i2, gprs1[reg], gprs2[reg]):
|
||||
return False
|
||||
|
||||
forget_modified_registers(i1)
|
||||
continue
|
||||
|
||||
if i1.mnemonic == 'add':
|
||||
if i1.operands[0].reg != i2.operands[0].reg:
|
||||
return False
|
||||
if i1.operands[1].reg != i2.operands[1].reg:
|
||||
return False
|
||||
reg = i1.operands[1].reg
|
||||
if reg not in adrp_pair_registers:
|
||||
return False
|
||||
|
||||
gprs1[reg] += i1.operands[2].imm
|
||||
gprs2[reg] += i2.operands[2].imm
|
||||
if not self._check_data_symbol(i1, i2, gprs1[reg], gprs2[reg]):
|
||||
return False
|
||||
|
||||
forget_modified_registers(i1)
|
||||
continue
|
||||
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _set_mismatch_cause(self, i1: Optional[any], i2: Optional[any], description: str) -> None:
|
||||
self._mismatch_addr1 = i1.address if i1 else -1
|
||||
self._mismatch_addr2 = i2.address if i2 else -1
|
||||
self._mismatch_cause = description
|
||||
|
||||
def _check_data_symbol(self, i1, i2, orig_addr: int, decomp_addr: int) -> bool:
|
||||
symbol = self.dsymtab.get_symbol(orig_addr)
|
||||
if symbol is None:
|
||||
return True
|
||||
|
||||
decomp_symbol = self.my_symtab[symbol.name]
|
||||
if decomp_symbol.addr == decomp_addr:
|
||||
return True
|
||||
|
||||
if self._log_mismatch_cause:
|
||||
self._set_mismatch_cause(i1, i2, f"data symbol mismatch: {symbol.name} (original address: {orig_addr:#x}, "
|
||||
f"expected: {decomp_symbol.addr:#x}, "
|
||||
f"actual: {decomp_addr:#x})")
|
||||
|
||||
return False
|
||||
|
||||
def _check_data_symbol_load(self, i1, i2, orig_addr: int, decomp_addr: int) -> bool:
|
||||
cached_result = self._got_data_symbol_check_cache.get((orig_addr, decomp_addr), None)
|
||||
if cached_result is not None:
|
||||
return cached_result
|
||||
|
||||
if not elf.is_in_section(self._base_got_section, orig_addr, 8):
|
||||
return True
|
||||
|
||||
ptr1, = struct.unpack("<Q", elf.read_from_elf(elf.base_elf, orig_addr, 8))
|
||||
if self.dsymtab.get_symbol(ptr1) is None:
|
||||
return True
|
||||
|
||||
ptr2 = self._decomp_glob_data_table[decomp_addr]
|
||||
|
||||
result = self._check_data_symbol(i1, i2, ptr1, ptr2)
|
||||
self._got_data_symbol_check_cache[(orig_addr, decomp_addr)] = result
|
||||
return result
|
||||
|
||||
def _check_function_call(self, i1, i2, orig_addr: int, decomp_addr: int) -> bool:
|
||||
name = self.decompiled_fns.get(orig_addr, None)
|
||||
if name is None:
|
||||
self.on_unknown_fn_call(orig_addr, decomp_addr)
|
||||
return True
|
||||
|
||||
decomp_symbol = self.my_symtab[name]
|
||||
if decomp_symbol.addr == decomp_addr:
|
||||
return True
|
||||
|
||||
if self._log_mismatch_cause:
|
||||
self._set_mismatch_cause(i1, i2, f"function call mismatch: {name}")
|
||||
|
||||
return False
|
||||
|
||||
def on_unknown_fn_call(self, orig_addr: int, decomp_addr: int) -> None:
|
||||
pass
|
|
@ -1,62 +0,0 @@
|
|||
import csv
|
||||
from pathlib import Path
|
||||
import typing as tp
|
||||
|
||||
import util.elf
|
||||
|
||||
|
||||
class DataSymbol(tp.NamedTuple):
|
||||
addr: int # without the 0x7100000000 base
|
||||
name: str
|
||||
size: int
|
||||
|
||||
|
||||
_IDA_BASE = 0x7100000000
|
||||
|
||||
|
||||
class DataSymbolContainer:
|
||||
def __init__(self) -> None:
|
||||
self.symbols: tp.List[DataSymbol] = []
|
||||
|
||||
def load_from_csv(self, path: Path):
|
||||
symtab = util.elf.build_name_to_symbol_table(util.elf.my_symtab)
|
||||
|
||||
with path.open("r") as f:
|
||||
for i, line in enumerate(csv.reader(f)):
|
||||
if len(line) != 2:
|
||||
raise RuntimeError(f"Invalid line format at line {i}")
|
||||
|
||||
addr = int(line[0], 16) - _IDA_BASE
|
||||
name = line[1]
|
||||
if name not in symtab:
|
||||
continue
|
||||
size = symtab[name].size
|
||||
|
||||
self.symbols.append(DataSymbol(addr, name, size))
|
||||
|
||||
# Sort the list, just in case the entries were not sorted in the CSV.
|
||||
self.symbols.sort(key=lambda sym: sym.addr)
|
||||
|
||||
def get_symbol(self, addr: int) -> tp.Optional[DataSymbol]:
|
||||
"""If addr is part of a known data symbol, this function returns the corresponding symbol."""
|
||||
|
||||
# Perform a binary search on self.symbols.
|
||||
a = 0
|
||||
b = len(self.symbols) - 1
|
||||
while a <= b:
|
||||
m = (a + b) // 2
|
||||
|
||||
symbol: DataSymbol = self.symbols[m]
|
||||
addr_begin = symbol.addr
|
||||
addr_end = addr_begin + symbol.size
|
||||
|
||||
if addr_begin <= addr < addr_end:
|
||||
return symbol
|
||||
if addr <= addr_begin:
|
||||
b = m - 1
|
||||
elif addr >= addr_end:
|
||||
a = m + 1
|
||||
else:
|
||||
return None
|
||||
|
||||
return None
|
|
@ -1,169 +0,0 @@
|
|||
import io
|
||||
import struct
|
||||
from typing import Any, Dict, NamedTuple, Tuple
|
||||
|
||||
from elftools.elf.elffile import ELFFile
|
||||
from elftools.elf.relocation import RelocationSection
|
||||
from elftools.elf.sections import Section
|
||||
|
||||
import diff_settings
|
||||
from util import utils
|
||||
|
||||
_config: Dict[str, Any] = {}
|
||||
diff_settings.apply(_config, {})
|
||||
|
||||
_root = utils.get_repo_root()
|
||||
|
||||
base_elf_data = io.BytesIO((_root / _config["baseimg"]).read_bytes())
|
||||
my_elf_data = io.BytesIO((_root / _config["myimg"]).read_bytes())
|
||||
|
||||
base_elf = ELFFile(base_elf_data)
|
||||
my_elf = ELFFile(my_elf_data)
|
||||
my_symtab = my_elf.get_section_by_name(".symtab")
|
||||
if not my_symtab:
|
||||
utils.fail(f'{_config["myimg"]} has no symbol table')
|
||||
|
||||
|
||||
class Symbol(NamedTuple):
|
||||
addr: int
|
||||
name: str
|
||||
size: int
|
||||
|
||||
|
||||
class Function(NamedTuple):
|
||||
data: bytes
|
||||
addr: int
|
||||
|
||||
|
||||
_ElfSymFormat = struct.Struct("<IBBHQQ")
|
||||
|
||||
|
||||
class _ElfSym(NamedTuple):
|
||||
st_name: int
|
||||
info: int
|
||||
other: int
|
||||
shndx: int
|
||||
st_value: int
|
||||
st_size: int
|
||||
|
||||
@staticmethod
|
||||
def parse(d: bytes):
|
||||
return _ElfSym._make(_ElfSymFormat.unpack(d))
|
||||
|
||||
|
||||
def get_file_offset(elf, addr: int) -> int:
|
||||
for seg in elf.iter_segments():
|
||||
if seg.header["p_type"] != "PT_LOAD":
|
||||
continue
|
||||
if seg["p_vaddr"] <= addr < seg["p_vaddr"] + seg["p_filesz"]:
|
||||
return addr - seg["p_vaddr"] + seg["p_offset"]
|
||||
raise KeyError(f"No segment found for {addr:#x}")
|
||||
|
||||
|
||||
def is_in_section(section: Section, addr: int, size: int) -> bool:
|
||||
begin = section["sh_addr"]
|
||||
end = begin + section["sh_size"]
|
||||
return begin <= addr < end and begin <= addr + size < end
|
||||
|
||||
|
||||
_TableCache = dict()
|
||||
|
||||
|
||||
def make_table_cached(symtab):
|
||||
table = _TableCache.get(id(symtab))
|
||||
if table is None:
|
||||
table = build_name_to_symbol_table(symtab)
|
||||
_TableCache[id(symtab)] = table
|
||||
return table
|
||||
|
||||
|
||||
def get_symbol(symtab, name: str) -> Symbol:
|
||||
table = make_table_cached(symtab)
|
||||
return table[name]
|
||||
|
||||
|
||||
def get_symbol_file_offset_and_size(elf, table, name: str) -> (int, int):
|
||||
sym = get_symbol(table, name)
|
||||
return get_file_offset(elf, sym.addr), sym.size
|
||||
|
||||
|
||||
def iter_symbols(symtab):
|
||||
offset = symtab["sh_offset"]
|
||||
entsize = symtab["sh_entsize"]
|
||||
for i in range(symtab.num_symbols()):
|
||||
symtab.stream.seek(offset + i * entsize)
|
||||
entry = _ElfSym.parse(symtab.stream.read(_ElfSymFormat.size))
|
||||
name = symtab.stringtable.get_string(entry.st_name)
|
||||
yield Symbol(entry.st_value, name, entry.st_size)
|
||||
|
||||
|
||||
def build_addr_to_symbol_table(symtab) -> Dict[int, str]:
|
||||
table = dict()
|
||||
for sym in iter_symbols(symtab):
|
||||
addr = sym.addr
|
||||
existing_value = table.get(addr, None)
|
||||
if existing_value is None or not existing_value.startswith("_Z"):
|
||||
table[addr] = sym.name
|
||||
return table
|
||||
|
||||
|
||||
def build_name_to_symbol_table(symtab) -> Dict[str, Symbol]:
|
||||
return {sym.name: sym for sym in iter_symbols(symtab)}
|
||||
|
||||
|
||||
def read_from_elf(elf: ELFFile, addr: int, size: int) -> bytes:
|
||||
addr &= ~0x7100000000
|
||||
offset: int = get_file_offset(elf, addr)
|
||||
elf.stream.seek(offset)
|
||||
return elf.stream.read(size)
|
||||
|
||||
|
||||
def get_fn_from_base_elf(addr: int, size: int) -> Function:
|
||||
return Function(read_from_elf(base_elf, addr, size), addr)
|
||||
|
||||
|
||||
def get_fn_from_my_elf(name: str) -> Function:
|
||||
sym = get_symbol(my_symtab, name)
|
||||
return Function(read_from_elf(my_elf, sym.addr, sym.size), sym.addr)
|
||||
|
||||
|
||||
R_AARCH64_GLOB_DAT = 1025
|
||||
R_AARCH64_RELATIVE = 1027
|
||||
|
||||
|
||||
def build_glob_data_table(elf: ELFFile) -> Dict[int, int]:
|
||||
table: Dict[int, int] = dict()
|
||||
section = elf.get_section_by_name(".rela.dyn")
|
||||
assert isinstance(section, RelocationSection)
|
||||
|
||||
symtab = elf.get_section(section["sh_link"])
|
||||
offset = symtab["sh_offset"]
|
||||
entsize = symtab["sh_entsize"]
|
||||
|
||||
for reloc in section.iter_relocations():
|
||||
symtab.stream.seek(offset + reloc["r_info_sym"] * entsize)
|
||||
sym_value = _ElfSym.parse(symtab.stream.read(_ElfSymFormat.size)).st_value
|
||||
info_type = reloc["r_info_type"]
|
||||
if info_type == R_AARCH64_GLOB_DAT:
|
||||
table[reloc["r_offset"]] = sym_value + reloc["r_addend"]
|
||||
elif info_type == R_AARCH64_RELATIVE:
|
||||
# FIXME: this should be Delta(S) + A
|
||||
table[reloc["r_offset"]] = sym_value + reloc["r_addend"]
|
||||
|
||||
return table
|
||||
|
||||
|
||||
def unpack_vtable_fns(vtable_bytes: bytes, num_entries: int) -> Tuple[int, ...]:
|
||||
return struct.unpack(f"<{num_entries}Q", vtable_bytes[:num_entries * 8])
|
||||
|
||||
|
||||
def get_vtable_fns_from_base_elf(vtable_addr: int, num_entries: int) -> Tuple[int, ...]:
|
||||
vtable_bytes = read_from_elf(base_elf, vtable_addr, num_entries * 8)
|
||||
return unpack_vtable_fns(vtable_bytes, num_entries)
|
||||
|
||||
|
||||
def get_vtable_fns_from_my_elf(vtable_name: str, num_entries: int) -> Tuple[int, ...]:
|
||||
offset, size = get_symbol_file_offset_and_size(my_elf, my_symtab, vtable_name)
|
||||
my_elf.stream.seek(offset + 0x10)
|
||||
vtable_bytes = my_elf.stream.read(size - 0x10)
|
||||
return unpack_vtable_fns(vtable_bytes, num_entries)
|
|
@ -1,61 +0,0 @@
|
|||
from collections import defaultdict
|
||||
|
||||
_Visiting = 0
|
||||
_Visited = 1
|
||||
|
||||
|
||||
class Graph:
|
||||
def __init__(self):
|
||||
self.nodes = defaultdict(set)
|
||||
|
||||
def add_edge(self, a, b):
|
||||
self.nodes[a].add(b)
|
||||
|
||||
def find_connected_components(self):
|
||||
nodes = defaultdict(list)
|
||||
for u in self.nodes:
|
||||
for v in self.nodes[u]:
|
||||
nodes[u].append(v)
|
||||
nodes[v].append(u)
|
||||
cc = []
|
||||
visited = set()
|
||||
|
||||
def dfs(start):
|
||||
result = []
|
||||
to_visit = [start]
|
||||
while to_visit:
|
||||
x = to_visit.pop()
|
||||
result.append(x)
|
||||
visited.add(x)
|
||||
for y in nodes[x]:
|
||||
if y not in visited:
|
||||
to_visit.append(y)
|
||||
return result
|
||||
|
||||
for u in nodes.keys():
|
||||
if u in visited:
|
||||
continue
|
||||
cc.append(dfs(u))
|
||||
return cc
|
||||
|
||||
def topological_sort(self) -> list:
|
||||
result = []
|
||||
statuses = dict()
|
||||
|
||||
def dfs(node):
|
||||
if statuses.get(node) == _Visiting:
|
||||
raise RuntimeError("Graph is not acyclic")
|
||||
if statuses.get(node) == _Visited:
|
||||
return
|
||||
|
||||
statuses[node] = _Visiting
|
||||
for y in self.nodes.get(node, set()):
|
||||
dfs(y)
|
||||
|
||||
statuses[node] = _Visited
|
||||
result.insert(0, node)
|
||||
|
||||
for x in self.nodes:
|
||||
dfs(x)
|
||||
|
||||
return result
|
|
@ -1,129 +0,0 @@
|
|||
import io
|
||||
|
||||
from colorama import Fore, Style
|
||||
import csv
|
||||
import warnings
|
||||
import enum
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import typing as tp
|
||||
|
||||
try:
|
||||
import cxxfilt
|
||||
except:
|
||||
# cxxfilt cannot be used on Windows.
|
||||
warnings.warn("cxxfilt could not be imported; demangling functions will fail")
|
||||
|
||||
|
||||
class FunctionStatus(enum.Enum):
|
||||
Matching = 0
|
||||
Equivalent = 1 # semantically equivalent but not perfectly matching
|
||||
NonMatching = 2
|
||||
Wip = 3
|
||||
NotDecompiled = 4
|
||||
|
||||
|
||||
class FunctionInfo(tp.NamedTuple):
|
||||
addr: int # without the 0x7100000000 base
|
||||
name: str
|
||||
size: int
|
||||
decomp_name: str
|
||||
library: bool
|
||||
status: FunctionStatus
|
||||
raw_row: tp.List[str]
|
||||
|
||||
|
||||
_markers = {
|
||||
"O": FunctionStatus.Matching,
|
||||
"m": FunctionStatus.Equivalent,
|
||||
"M": FunctionStatus.NonMatching,
|
||||
"W": FunctionStatus.Wip,
|
||||
"U": FunctionStatus.NotDecompiled,
|
||||
"L": FunctionStatus.NotDecompiled,
|
||||
}
|
||||
|
||||
|
||||
def parse_function_csv_entry(row) -> FunctionInfo:
|
||||
ea, stat, size, name = row
|
||||
status = _markers.get(stat, FunctionStatus.NotDecompiled)
|
||||
decomp_name = ""
|
||||
|
||||
if status != FunctionStatus.NotDecompiled:
|
||||
decomp_name = name
|
||||
|
||||
addr = int(ea, 16) - 0x7100000000
|
||||
return FunctionInfo(addr, name, int(size), decomp_name, stat == "L", status, row)
|
||||
|
||||
|
||||
def get_functions_csv_path() -> Path:
|
||||
return get_repo_root() / "data" / "uking_functions.csv"
|
||||
|
||||
|
||||
def get_functions(path: tp.Optional[Path] = None) -> tp.Iterable[FunctionInfo]:
|
||||
if path is None:
|
||||
path = get_functions_csv_path()
|
||||
with path.open() as f:
|
||||
reader = csv.reader(f)
|
||||
# Skip headers
|
||||
next(reader)
|
||||
for row in reader:
|
||||
try:
|
||||
entry = parse_function_csv_entry(row)
|
||||
# excluded library function
|
||||
if entry.library:
|
||||
continue
|
||||
yield entry
|
||||
except ValueError as e:
|
||||
raise Exception(f"Failed to parse line {reader.line_num}") from e
|
||||
|
||||
|
||||
def add_decompiled_functions(new_matches: tp.Dict[int, str],
|
||||
new_orig_names: tp.Optional[tp.Dict[int, str]] = None) -> None:
|
||||
buffer = io.StringIO()
|
||||
writer = csv.writer(buffer, lineterminator="\n")
|
||||
for func in get_functions():
|
||||
if new_orig_names is not None and func.status == FunctionStatus.NotDecompiled and func.addr in new_orig_names:
|
||||
func.raw_row[3] = new_orig_names[func.addr]
|
||||
if func.status == FunctionStatus.NotDecompiled and func.addr in new_matches:
|
||||
func.raw_row[3] = new_matches[func.addr]
|
||||
writer.writerow(func.raw_row)
|
||||
get_functions_csv_path().write_text(buffer.getvalue())
|
||||
|
||||
|
||||
def format_symbol_name(name: str) -> str:
|
||||
try:
|
||||
return f"{cxxfilt.demangle(name)} {Style.DIM}({name}){Style.RESET_ALL}"
|
||||
except:
|
||||
return name
|
||||
|
||||
|
||||
def format_symbol_name_for_msg(name: str) -> str:
|
||||
try:
|
||||
return f"{Fore.BLUE}{cxxfilt.demangle(name)}{Fore.RESET} {Style.DIM}({name}){Style.RESET_ALL}{Style.BRIGHT}"
|
||||
except:
|
||||
return name
|
||||
|
||||
|
||||
def are_demangled_names_equal(name1: str, name2: str):
|
||||
return cxxfilt.demangle(name1) == cxxfilt.demangle(name2)
|
||||
|
||||
|
||||
def print_note(msg: str, prefix: str = ""):
|
||||
sys.stderr.write(f"{Style.BRIGHT}{prefix}{Fore.CYAN}note:{Fore.RESET} {msg}{Style.RESET_ALL}\n")
|
||||
|
||||
|
||||
def warn(msg: str, prefix: str = ""):
|
||||
sys.stderr.write(f"{Style.BRIGHT}{prefix}{Fore.MAGENTA}warning:{Fore.RESET} {msg}{Style.RESET_ALL}\n")
|
||||
|
||||
|
||||
def print_error(msg: str, prefix: str = ""):
|
||||
sys.stderr.write(f"{Style.BRIGHT}{prefix}{Fore.RED}error:{Fore.RESET} {msg}{Style.RESET_ALL}\n")
|
||||
|
||||
|
||||
def fail(msg: str, prefix: str = ""):
|
||||
print_error(msg, prefix)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def get_repo_root() -> Path:
|
||||
return Path(__file__).parent.parent.parent
|
|
@ -1 +0,0 @@
|
|||
/target
|
|
@ -1,506 +0,0 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.7.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.42"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "595d3cfa7a60d4555cb5067b99f07142a08ea778de5cf993f7b75c7d8fabc486"
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
|
||||
|
||||
[[package]]
|
||||
name = "bstr"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "90682c8d613ad3373e66de8c6411e0ae2ab2571e879d2efbf73558cc66f21279"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "capstone"
|
||||
version = "0.9.0"
|
||||
source = "git+https://github.com/leoetlino/capstone-rs#1f962210b1e2ff418cf6c1bcb6c6785427662a07"
|
||||
dependencies = [
|
||||
"capstone-sys",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "capstone-sys"
|
||||
version = "0.13.0"
|
||||
source = "git+https://github.com/leoetlino/capstone-rs#1f962210b1e2ff418cf6c1bcb6c6785427662a07"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.69"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e70cc2f62c6ce1868963827bd677764c62d07c3d9a3e1fb1177ee1a9ab199eb2"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "colored"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b3616f750b84d8f0de8a58bda93e08e2a81ad3f523089b05f1dffecab48c6cbd"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"lazy_static",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cpp_demangle"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ea47428dc9d2237f3c6bc134472edfd63ebba0af932e783506dcfd66f10d18a"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-deque"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-epoch"
|
||||
version = "0.9.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
"lazy_static",
|
||||
"memoffset",
|
||||
"scopeguard",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "csv"
|
||||
version = "1.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"csv-core",
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "csv-core"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
|
||||
|
||||
[[package]]
|
||||
name = "goblin"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b1800b95efee8ad4ef04517d4d69f8e209e763b1668f1179aeeedd0e454da55"
|
||||
dependencies = [
|
||||
"log",
|
||||
"plain",
|
||||
"scroll",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "0.4.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
|
||||
|
||||
[[package]]
|
||||
name = "lazy-init"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23517540be87a91d06324e6bf6286ba8214171123ee8862ae9a5e7d938d71815"
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.98"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "320cfe77175da3a483efed4bc0adc1968ca050b098ce4f2f1c13a56626128790"
|
||||
|
||||
[[package]]
|
||||
name = "libmimalloc-sys"
|
||||
version = "0.1.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1d1b8479c593dba88c2741fc50b92e13dbabbbe0bd504d979f244ccc1a5b1c01"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
|
||||
|
||||
[[package]]
|
||||
name = "memmap"
|
||||
version = "0.6.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2ffa2c986de11a9df78620c01eeaaf27d94d3ff02bf81bfcca953102dd0c6ff"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memoffset"
|
||||
version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59accc507f1338036a0477ef61afdae33cde60840f4dfe481319ce3ad116ddf9"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mimalloc"
|
||||
version = "0.1.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fb74897ce508e6c49156fd1476fc5922cbc6e75183c65e399c765a09122e5130"
|
||||
dependencies = [
|
||||
"libmimalloc-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num_cpus"
|
||||
version = "1.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "owning_ref"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6ff55baddef9e4ad00f88b6c743a2a8062d4c6ade126c2a528644b8e444d52ce"
|
||||
dependencies = [
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "plain"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c7ed8b8c7b886ea3ed7dde405212185f423ab44682667c8c6dd14aa1d9f6612"
|
||||
dependencies = [
|
||||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"crossbeam-deque",
|
||||
"either",
|
||||
"rayon-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon-core"
|
||||
version = "1.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e"
|
||||
dependencies = [
|
||||
"crossbeam-channel",
|
||||
"crossbeam-deque",
|
||||
"crossbeam-utils",
|
||||
"lazy_static",
|
||||
"num_cpus",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.5.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
||||
|
||||
[[package]]
|
||||
name = "scroll"
|
||||
version = "0.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fda28d4b4830b807a8b43f7b0e6b5df875311b3e7621d84577188c175b6ec1ec"
|
||||
dependencies = [
|
||||
"scroll_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scroll_derive"
|
||||
version = "0.10.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aaaae8f38bb311444cfb7f1979af0bc9240d95795f75f9ceddf6a59b79ceffa0"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.126"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03"
|
||||
|
||||
[[package]]
|
||||
name = "smawk"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f67ad224767faa3c7d8b6d91985b78e70a1324408abcb1cfcc2be4c06bc06043"
|
||||
|
||||
[[package]]
|
||||
name = "stable_deref_trait"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.74"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1873d832550d4588c3dbc20f01361ab00bfe741048f71e3fecf145a7cc18b29c"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "textwrap"
|
||||
version = "0.14.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0066c8d12af8b5acd21e00547c3797fde4e8677254a7ee429176ccebbe93dd80"
|
||||
dependencies = [
|
||||
"smawk",
|
||||
"unicode-linebreak",
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-linebreak"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a52dcaab0c48d931f7cc8ef826fa51690a08e1ea55117ef26f89864f532383f"
|
||||
dependencies = [
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
|
||||
|
||||
[[package]]
|
||||
name = "viking"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"capstone",
|
||||
"colored",
|
||||
"cpp_demangle",
|
||||
"csv",
|
||||
"goblin",
|
||||
"itertools",
|
||||
"lazy-init",
|
||||
"memmap",
|
||||
"mimalloc",
|
||||
"owning_ref",
|
||||
"rayon",
|
||||
"rustc-hash",
|
||||
"textwrap",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
|
@ -1,28 +0,0 @@
|
|||
[package]
|
||||
name = "viking"
|
||||
version = "1.0.0"
|
||||
edition = "2018"
|
||||
|
||||
[profile.release]
|
||||
debug = 1
|
||||
lto = "thin"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
capstone = { git = "https://github.com/leoetlino/capstone-rs" }
|
||||
colored = "2"
|
||||
cpp_demangle = "0.3.3"
|
||||
csv = "1.1"
|
||||
goblin = "0.4"
|
||||
itertools = "0.10.1"
|
||||
lazy-init = "0.5.0"
|
||||
memmap = "0.6.1"
|
||||
mimalloc = { version = "*", default-features = false }
|
||||
owning_ref = "0.4.1"
|
||||
rayon = "1.5.1"
|
||||
rustc-hash = "1.1.0"
|
||||
textwrap = "0.14.2"
|
||||
|
||||
[[bin]]
|
||||
name = "botw-check"
|
||||
path = "src/tools/check.rs"
|
|
@ -1,21 +0,0 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2021 leoetlino
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
|
@ -1,89 +0,0 @@
|
|||
use anyhow::{bail, Result};
|
||||
use capstone as cs;
|
||||
use cs::arch::arm64::{Arm64Insn, Arm64OpMem, Arm64Operand, Arm64OperandType};
|
||||
use cs::{arch::ArchOperand, RegId};
|
||||
|
||||
pub fn translate_cs_error<T>(err: cs::Error) -> Result<T> {
|
||||
bail!("capstone error: {}", err)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn map_two<'a, T, R, F: FnMut(&'a T) -> R>(x: &'a T, y: &'a T, mut f: F) -> (R, R) {
|
||||
(f(x), f(y))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn map_pair<'a, T, R, F: FnMut(&'a T) -> R>(pair: &'a (T, T), f: F) -> (R, R) {
|
||||
map_two(&pair.0, &pair.1, f)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn try_map_two<'a, T, R, F: FnMut(&'a T) -> Result<R, cs::Error>>(
|
||||
x: &'a T,
|
||||
y: &'a T,
|
||||
mut f: F,
|
||||
) -> Result<(R, R)> {
|
||||
Ok((
|
||||
f(x).or_else(translate_cs_error)?,
|
||||
f(y).or_else(translate_cs_error)?,
|
||||
))
|
||||
}
|
||||
|
||||
/// Checks if `id` is in [start, end] (inclusive range).
|
||||
#[inline]
|
||||
pub fn is_id_in_range(start: Arm64Insn, end: Arm64Insn, id: Arm64Insn) -> bool {
|
||||
let range = (start as u32)..=(end as u32);
|
||||
range.contains(&(id as u32))
|
||||
}
|
||||
|
||||
/// Used to make accessing arch-specific data less cumbersome.
|
||||
pub trait CsArchOperandUtil {
|
||||
fn arm64(&self) -> &Arm64Operand;
|
||||
}
|
||||
|
||||
impl CsArchOperandUtil for ArchOperand {
|
||||
fn arm64(&self) -> &Arm64Operand {
|
||||
match self {
|
||||
Self::Arm64Operand(x) => x,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Used to make accessing arch-specific data less cumbersome.
|
||||
pub trait CsArm64OperandTypeUtil {
|
||||
fn reg(&self) -> RegId;
|
||||
fn imm(&self) -> i64;
|
||||
fn try_mem(&self) -> Option<Arm64OpMem>;
|
||||
fn mem(&self) -> Arm64OpMem;
|
||||
}
|
||||
|
||||
impl CsArm64OperandTypeUtil for Arm64OperandType {
|
||||
fn reg(&self) -> RegId {
|
||||
match self {
|
||||
Self::Reg(x) => *x,
|
||||
_ => panic!("expected Reg, got {:#?}", &self),
|
||||
}
|
||||
}
|
||||
|
||||
fn imm(&self) -> i64 {
|
||||
match self {
|
||||
Self::Imm(x) => *x,
|
||||
_ => panic!("expected Imm, got {:#?}", &self),
|
||||
}
|
||||
}
|
||||
|
||||
fn try_mem(&self) -> Option<Arm64OpMem> {
|
||||
match self {
|
||||
Self::Mem(x) => Some(*x),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn mem(&self) -> Arm64OpMem {
|
||||
match self {
|
||||
Self::Mem(x) => *x,
|
||||
_ => panic!("expected Mem, got {:#?}", &self),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,518 +0,0 @@
|
|||
use anyhow::{ensure, Result};
|
||||
use capstone as cs;
|
||||
use cs::arch::arm64::{Arm64Insn, Arm64Operand, Arm64OperandType};
|
||||
use itertools::zip;
|
||||
use lazy_init::Lazy;
|
||||
use rustc_hash::FxHashMap;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::convert::TryInto;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use crate::{capstone_utils::*, elf, functions, repo, ui};
|
||||
|
||||
struct DataSymbol {
|
||||
/// Address of the symbol in the original executable.
|
||||
pub addr: u64,
|
||||
/// Name of the symbol in our source code.
|
||||
pub name: String,
|
||||
/// Size of the symbol in our source code (according to ELF info).
|
||||
pub size: u64,
|
||||
}
|
||||
|
||||
/// Keeps track of known data symbols so that data loads can be validated.
|
||||
#[derive(Default)]
|
||||
struct KnownDataSymbolMap {
|
||||
/// Symbols. Must be sorted by address.
|
||||
symbols: Vec<DataSymbol>,
|
||||
}
|
||||
|
||||
impl KnownDataSymbolMap {
|
||||
fn new() -> Self {
|
||||
Default::default()
|
||||
}
|
||||
|
||||
fn load(&mut self, csv_path: &Path, decomp_symtab: &elf::SymbolTableByName) -> Result<()> {
|
||||
let mut reader = csv::ReaderBuilder::new()
|
||||
.has_headers(false)
|
||||
.quoting(false)
|
||||
.from_path(csv_path)?;
|
||||
for (line, maybe_record) in reader.records().enumerate() {
|
||||
let record = &maybe_record?;
|
||||
ensure!(
|
||||
record.len() == 2,
|
||||
"invalid number of fields on line {}",
|
||||
line
|
||||
);
|
||||
|
||||
let addr = functions::parse_address(&record[0])?;
|
||||
let name = &record[1];
|
||||
|
||||
let symbol = decomp_symtab.get(name);
|
||||
// Ignore missing symbols.
|
||||
if symbol.is_none() {
|
||||
continue;
|
||||
}
|
||||
let symbol = symbol.unwrap();
|
||||
|
||||
self.symbols.push(DataSymbol {
|
||||
addr,
|
||||
name: name.to_string(),
|
||||
size: symbol.st_size,
|
||||
});
|
||||
}
|
||||
self.symbols.sort_by_key(|sym| sym.addr);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// If addr is part of a known data symbol, this function returns the corresponding symbol.
|
||||
fn get_symbol(&self, addr: u64) -> Option<&DataSymbol> {
|
||||
// Perform a binary search since `symbols` is sorted.
|
||||
let mut a: isize = 0;
|
||||
let mut b: isize = self.symbols.len() as isize - 1;
|
||||
while a <= b {
|
||||
let m = a + (b - a) / 2;
|
||||
|
||||
let mid_symbol = &self.symbols[m as usize];
|
||||
let mid_addr_begin = mid_symbol.addr;
|
||||
let mid_addr_end = mid_addr_begin + mid_symbol.size as u64;
|
||||
|
||||
if mid_addr_begin <= addr && addr < mid_addr_end {
|
||||
return Some(mid_symbol);
|
||||
}
|
||||
if addr <= mid_addr_begin {
|
||||
b = m - 1;
|
||||
} else if addr >= mid_addr_end {
|
||||
a = m + 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn get_data_symbol_csv_path() -> Result<PathBuf> {
|
||||
let mut path = repo::get_repo_root()?;
|
||||
path.push("data");
|
||||
path.push("data_symbols.csv");
|
||||
Ok(path)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ReferenceDiff {
|
||||
pub referenced_symbol: u64,
|
||||
pub expected_ref_in_decomp: u64,
|
||||
pub actual_ref_in_decomp: u64,
|
||||
|
||||
pub expected_symbol_name: String,
|
||||
pub actual_symbol_name: String,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ReferenceDiff {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"wrong reference to {ref} {ref_name}\n\
|
||||
--> decomp source code is referencing {actual} {actual_name}\n\
|
||||
--> expected to see {expected} to match original code",
|
||||
ref=ui::format_address(self.referenced_symbol),
|
||||
ref_name=ui::format_symbol_name(&self.expected_symbol_name),
|
||||
expected=ui::format_address(self.expected_ref_in_decomp),
|
||||
actual=ui::format_address(self.actual_ref_in_decomp),
|
||||
actual_name=ui::format_symbol_name(&self.actual_symbol_name),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum MismatchCause {
|
||||
FunctionSize,
|
||||
Register,
|
||||
Mnemonic,
|
||||
BranchTarget,
|
||||
FunctionCall(ReferenceDiff),
|
||||
DataReference(ReferenceDiff),
|
||||
Immediate,
|
||||
Unknown,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for MismatchCause {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match &self {
|
||||
Self::FunctionSize => write!(f, "wrong function size"),
|
||||
Self::Register => write!(f, "wrong register"),
|
||||
Self::Mnemonic => write!(f, "wrong mnemonic"),
|
||||
Self::BranchTarget => write!(f, "wrong branch target"),
|
||||
Self::FunctionCall(diff) => write!(f, "wrong function call\n{}", diff),
|
||||
Self::DataReference(diff) => write!(f, "wrong data reference\n{}", diff),
|
||||
Self::Immediate => write!(f, "wrong immediate"),
|
||||
Self::Unknown => write!(f, "unknown reason; check diff.py"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Mismatch {
|
||||
pub addr_orig: u64,
|
||||
pub addr_decomp: u64,
|
||||
pub cause: MismatchCause,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Mismatch {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"mismatch at {}: {}",
|
||||
ui::format_address(self.addr_orig),
|
||||
self.cause,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FunctionChecker<'a, 'functions, 'orig_elf, 'decomp_elf> {
|
||||
decomp_elf: &'decomp_elf elf::OwnedElf,
|
||||
decomp_symtab: &'a elf::SymbolTableByName<'decomp_elf>,
|
||||
decomp_glob_data_table: elf::GlobDataTable,
|
||||
|
||||
// Optional, only initialized when a mismatch is detected.
|
||||
decomp_addr_to_name_map: Lazy<elf::AddrToNameMap<'decomp_elf>>,
|
||||
|
||||
known_data_symbols: KnownDataSymbolMap,
|
||||
known_functions: FxHashMap<u64, &'functions functions::Info>,
|
||||
|
||||
orig_elf: &'orig_elf elf::OwnedElf,
|
||||
orig_got_section: &'orig_elf goblin::elf::SectionHeader,
|
||||
}
|
||||
|
||||
impl<'a, 'functions, 'orig_elf, 'decomp_elf>
|
||||
FunctionChecker<'a, 'functions, 'orig_elf, 'decomp_elf>
|
||||
{
|
||||
pub fn new(
|
||||
orig_elf: &'orig_elf elf::OwnedElf,
|
||||
decomp_elf: &'decomp_elf elf::OwnedElf,
|
||||
decomp_symtab: &'a elf::SymbolTableByName<'decomp_elf>,
|
||||
decomp_glob_data_table: elf::GlobDataTable,
|
||||
functions: &'functions [functions::Info],
|
||||
) -> Result<Self> {
|
||||
let mut known_data_symbols = KnownDataSymbolMap::new();
|
||||
known_data_symbols.load(get_data_symbol_csv_path()?.as_path(), &decomp_symtab)?;
|
||||
|
||||
let known_functions = functions::make_known_function_map(functions);
|
||||
let orig_got_section = elf::find_section(orig_elf, ".got")?;
|
||||
|
||||
Ok(FunctionChecker {
|
||||
decomp_elf,
|
||||
decomp_symtab,
|
||||
decomp_glob_data_table,
|
||||
decomp_addr_to_name_map: Lazy::new(),
|
||||
|
||||
known_data_symbols,
|
||||
known_functions,
|
||||
|
||||
orig_elf,
|
||||
orig_got_section,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn check(
|
||||
&self,
|
||||
cs: &mut cs::Capstone,
|
||||
orig_fn: &elf::Function,
|
||||
decomp_fn: &elf::Function,
|
||||
) -> Result<Option<Mismatch>> {
|
||||
// Keep track of registers that are used with ADRP so that we can check global data
|
||||
// references even when data is not placed at the same addresses
|
||||
// as in the original executable.
|
||||
#[derive(Default)]
|
||||
struct State {
|
||||
gprs1: HashMap<cs::RegId, u64>,
|
||||
gprs2: HashMap<cs::RegId, u64>,
|
||||
adrp_pair_registers: HashSet<cs::RegId>,
|
||||
}
|
||||
|
||||
impl State {
|
||||
fn forget_modified_registers(&mut self, detail: &cs::InsnDetail) {
|
||||
for reg in detail.regs_write() {
|
||||
self.adrp_pair_registers.remove(®);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut state = State::default();
|
||||
|
||||
if orig_fn.code.len() != decomp_fn.code.len() {
|
||||
return Ok(Some(Mismatch {
|
||||
addr_orig: orig_fn.addr,
|
||||
addr_decomp: decomp_fn.addr,
|
||||
cause: MismatchCause::FunctionSize,
|
||||
}));
|
||||
}
|
||||
|
||||
let mut instructions = try_map_two(&orig_fn, &decomp_fn, |func| {
|
||||
cs.disasm_iter(func.code, func.addr)
|
||||
})?;
|
||||
|
||||
// Check every pair of instructions.
|
||||
while let (Some(i1), Some(i2)) = (instructions.0.next(), instructions.1.next()) {
|
||||
let ids = map_two(&i1, &i2, |i| i.id().0);
|
||||
let detail = try_map_two(&i1, &i2, |insn| cs.insn_detail(&insn))?;
|
||||
let arch_detail = map_pair(&detail, |d| d.arch_detail());
|
||||
let ops = map_pair(&arch_detail, |a| a.arm64().unwrap().operands_ref());
|
||||
|
||||
if ids.0 != ids.1 {
|
||||
return Self::make_mismatch(&i1, &i2, MismatchCause::Mnemonic);
|
||||
}
|
||||
|
||||
let id = ids.0;
|
||||
|
||||
match id.into() {
|
||||
// Branches or function calls.
|
||||
Arm64Insn::ARM64_INS_B | Arm64Insn::ARM64_INS_BL => {
|
||||
let target =
|
||||
map_pair(&ops, |ops| Arm64Operand::from(&ops[0]).op_type.imm() as u64);
|
||||
|
||||
// If we are branching outside the function, this is likely a tail call.
|
||||
// Treat it as a function call.
|
||||
if !orig_fn.get_addr_range().contains(&target.0) {
|
||||
if let Some(mismatch_cause) = self.check_function_call(target.0, target.1) {
|
||||
return Self::make_mismatch(&i1, &i2, mismatch_cause);
|
||||
}
|
||||
} else {
|
||||
// Otherwise, it's a simple branch, and both targets must match.
|
||||
if i1.bytes() != i2.bytes() {
|
||||
return Self::make_mismatch(&i1, &i2, MismatchCause::BranchTarget);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Catch ADRP + (ADD/load/store) instruction pairs.
|
||||
Arm64Insn::ARM64_INS_ADRP => {
|
||||
let reg = map_pair(&ops, |ops| Arm64Operand::from(&ops[0]).op_type.reg());
|
||||
let imm =
|
||||
map_pair(&ops, |ops| Arm64Operand::from(&ops[1]).op_type.imm() as u64);
|
||||
|
||||
if reg.0 != reg.1 {
|
||||
return Self::make_mismatch(&i1, &i2, MismatchCause::Register);
|
||||
}
|
||||
|
||||
state.gprs1.insert(reg.0, imm.0);
|
||||
state.gprs2.insert(reg.1, imm.1);
|
||||
state.adrp_pair_registers.insert(reg.0);
|
||||
}
|
||||
|
||||
// Catch ADRP + ADD instruction pairs.
|
||||
Arm64Insn::ARM64_INS_ADD => {
|
||||
let mut diff_ok = false;
|
||||
|
||||
if ops.0.len() == 3 {
|
||||
let dest_reg =
|
||||
map_pair(&ops, |ops| Arm64Operand::from(&ops[0]).op_type.reg());
|
||||
let reg = map_pair(&ops, |ops| Arm64Operand::from(&ops[1]).op_type.reg());
|
||||
|
||||
if let Arm64OperandType::Imm(_) = Arm64Operand::from(&ops.0[2]).op_type {
|
||||
let imm =
|
||||
map_pair(&ops, |ops| Arm64Operand::from(&ops[2]).op_type.imm());
|
||||
|
||||
if dest_reg.0 != dest_reg.1 || reg.0 != reg.1 {
|
||||
return Self::make_mismatch(&i1, &i2, MismatchCause::Register);
|
||||
}
|
||||
|
||||
// Is this an ADRP pair we can check?
|
||||
if state.adrp_pair_registers.contains(®.0) {
|
||||
let orig_addr = state.gprs1[®.0] + imm.0 as u64;
|
||||
let decomp_addr = state.gprs2[®.1] + imm.1 as u64;
|
||||
|
||||
if let Some(mismatch_cause) =
|
||||
self.check_data_symbol(orig_addr, decomp_addr)
|
||||
{
|
||||
return Self::make_mismatch(&i1, &i2, mismatch_cause);
|
||||
}
|
||||
|
||||
// If the data symbol reference matches, allow the instructions to be different.
|
||||
diff_ok = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !diff_ok && i1.bytes() != i2.bytes() {
|
||||
return Self::make_mismatch(&i1, &i2, MismatchCause::Unknown);
|
||||
}
|
||||
|
||||
state.forget_modified_registers(&detail.0);
|
||||
}
|
||||
|
||||
// Loads and stores (single or paired).
|
||||
id if is_id_in_range(Arm64Insn::ARM64_INS_LD1, Arm64Insn::ARM64_INS_LDXRH, id)
|
||||
|| is_id_in_range(Arm64Insn::ARM64_INS_ST1, Arm64Insn::ARM64_INS_STXR, id) =>
|
||||
{
|
||||
let mut diff_ok = false;
|
||||
|
||||
// Check all operands for mismatches, except the Arm64OpMem which will be checked later.
|
||||
let mut mem = (None, None);
|
||||
for (op1, op2) in zip(ops.0, ops.1) {
|
||||
let op1 = Arm64Operand::from(op1);
|
||||
let op2 = Arm64Operand::from(op2);
|
||||
if let Some(mem1) = op1.op_type.try_mem() {
|
||||
if let Some(mem2) = op2.op_type.try_mem() {
|
||||
ensure!(
|
||||
mem.0.is_none() && mem.1.is_none(),
|
||||
"found more than one OpMem"
|
||||
);
|
||||
mem.0 = Some(mem1);
|
||||
mem.1 = Some(mem2);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if op1 != op2 {
|
||||
return Self::make_mismatch(&i1, &i2, MismatchCause::Unknown);
|
||||
}
|
||||
}
|
||||
|
||||
ensure!(mem.0.is_some() && mem.1.is_some(), "didn't find an OpMem");
|
||||
|
||||
let mem = (mem.0.unwrap(), mem.1.unwrap());
|
||||
|
||||
if mem.0.base() != mem.1.base() {
|
||||
return Self::make_mismatch(&i1, &i2, MismatchCause::Register);
|
||||
}
|
||||
|
||||
let reg = mem.0.base();
|
||||
|
||||
// Is this an ADRP pair we can check?
|
||||
if state.adrp_pair_registers.contains(®) {
|
||||
let orig_addr_ptr = (state.gprs1[®] as i64 + mem.0.disp() as i64) as u64;
|
||||
let decomp_addr_ptr =
|
||||
(state.gprs2[®] as i64 + mem.1.disp() as i64) as u64;
|
||||
|
||||
if let Some(mismatch_cause) =
|
||||
self.check_data_symbol_ptr(orig_addr_ptr, decomp_addr_ptr)
|
||||
{
|
||||
return Self::make_mismatch(&i1, &i2, mismatch_cause);
|
||||
}
|
||||
|
||||
// If the data symbol reference matches, allow the instructions to be different.
|
||||
diff_ok = true;
|
||||
}
|
||||
|
||||
if !diff_ok && i1.bytes() != i2.bytes() {
|
||||
return Self::make_mismatch(&i1, &i2, MismatchCause::Unknown);
|
||||
}
|
||||
|
||||
state.forget_modified_registers(&detail.0);
|
||||
}
|
||||
|
||||
// Anything else.
|
||||
_ => {
|
||||
if i1.bytes() != i2.bytes() {
|
||||
return Self::make_mismatch(&i1, &i2, MismatchCause::Unknown);
|
||||
}
|
||||
|
||||
state.forget_modified_registers(&detail.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Returns None on success and a MismatchCause on failure.
|
||||
fn check_function_call(&self, orig_addr: u64, decomp_addr: u64) -> Option<MismatchCause> {
|
||||
let info = *self.known_functions.get(&orig_addr)?;
|
||||
let name = info.name.as_str();
|
||||
let decomp_symbol = self.decomp_symtab.get(name)?;
|
||||
let expected = decomp_symbol.st_value;
|
||||
|
||||
if decomp_addr == expected {
|
||||
None
|
||||
} else {
|
||||
let actual_symbol_name = self.translate_decomp_addr_to_name(decomp_addr);
|
||||
|
||||
Some(MismatchCause::FunctionCall(ReferenceDiff {
|
||||
referenced_symbol: orig_addr,
|
||||
expected_ref_in_decomp: expected,
|
||||
actual_ref_in_decomp: decomp_addr,
|
||||
expected_symbol_name: name.to_string(),
|
||||
actual_symbol_name: actual_symbol_name.unwrap_or("unknown").to_string(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns None on success and a MismatchCause on failure.
|
||||
fn check_data_symbol_ex(
|
||||
&self,
|
||||
orig_addr: u64,
|
||||
decomp_addr: u64,
|
||||
symbol: &DataSymbol,
|
||||
) -> Option<MismatchCause> {
|
||||
let decomp_symbol = self.decomp_symtab.get(symbol.name.as_str())?;
|
||||
let expected = decomp_symbol.st_value;
|
||||
|
||||
if decomp_addr == expected {
|
||||
None
|
||||
} else {
|
||||
let actual_symbol_name = self.translate_decomp_addr_to_name(decomp_addr);
|
||||
|
||||
Some(MismatchCause::DataReference(ReferenceDiff {
|
||||
referenced_symbol: orig_addr,
|
||||
expected_ref_in_decomp: expected,
|
||||
actual_ref_in_decomp: decomp_addr,
|
||||
expected_symbol_name: symbol.name.to_string(),
|
||||
actual_symbol_name: actual_symbol_name.unwrap_or("unknown").to_string(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns None on success and a MismatchCause on failure.
|
||||
fn check_data_symbol(&self, orig_addr: u64, decomp_addr: u64) -> Option<MismatchCause> {
|
||||
let symbol = self.known_data_symbols.get_symbol(orig_addr)?;
|
||||
self.check_data_symbol_ex(orig_addr, decomp_addr, symbol)
|
||||
}
|
||||
|
||||
/// Returns None on success and a MismatchCause on failure.
|
||||
/// Unlike check_data_symbol, this function takes the addresses of *pointers to* possible data symbols,
|
||||
/// not the symbols themselves.
|
||||
fn check_data_symbol_ptr(
|
||||
&self,
|
||||
orig_addr_ptr: u64,
|
||||
decomp_addr_ptr: u64,
|
||||
) -> Option<MismatchCause> {
|
||||
if !elf::is_in_section(&self.orig_got_section, orig_addr_ptr, 8) {
|
||||
return None;
|
||||
}
|
||||
|
||||
let orig_offset = elf::get_offset_in_file(&self.orig_elf, orig_addr_ptr).ok()? as u64;
|
||||
let orig_addr = u64::from_le_bytes(
|
||||
elf::get_elf_bytes(&self.orig_elf, orig_offset, 8)
|
||||
.ok()?
|
||||
.try_into()
|
||||
.ok()?,
|
||||
);
|
||||
|
||||
let data_symbol = self.known_data_symbols.get_symbol(orig_addr)?;
|
||||
let decomp_addr = *self.decomp_glob_data_table.get(&decomp_addr_ptr)?;
|
||||
self.check_data_symbol_ex(orig_addr, decomp_addr, &data_symbol)
|
||||
}
|
||||
|
||||
fn make_mismatch(
|
||||
i1: &cs::Insn,
|
||||
i2: &cs::Insn,
|
||||
cause: MismatchCause,
|
||||
) -> Result<Option<Mismatch>> {
|
||||
Ok(Some(Mismatch {
|
||||
addr_orig: i1.address(),
|
||||
addr_decomp: i2.address(),
|
||||
cause,
|
||||
}))
|
||||
}
|
||||
|
||||
#[cold]
|
||||
#[inline(never)]
|
||||
fn translate_decomp_addr_to_name(&self, decomp_addr: u64) -> Option<&'decomp_elf str> {
|
||||
let map = self.decomp_addr_to_name_map.get_or_create(|| {
|
||||
let map = elf::make_addr_to_name_map(&self.decomp_elf).ok();
|
||||
map.unwrap_or_default()
|
||||
});
|
||||
map.get(&decomp_addr).copied()
|
||||
}
|
||||
}
|
|
@ -1,311 +0,0 @@
|
|||
use std::{collections::HashMap, ffi::CStr, fs::File, ops::Range, path::Path};
|
||||
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use goblin::{
|
||||
container,
|
||||
elf::{
|
||||
dynamic, reloc, section_header, sym, Dynamic, Elf, ProgramHeader, RelocSection,
|
||||
SectionHeader, Sym, Symtab,
|
||||
},
|
||||
elf64::program_header::PT_LOAD,
|
||||
strtab::Strtab,
|
||||
};
|
||||
use memmap::{Mmap, MmapOptions};
|
||||
use owning_ref::OwningHandle;
|
||||
use rustc_hash::FxHashMap;
|
||||
|
||||
use crate::repo;
|
||||
|
||||
pub type OwnedElf = OwningHandle<Box<(Box<File>, Mmap)>, Box<Elf<'static>>>;
|
||||
pub type SymbolTableByName<'a> = HashMap<&'a str, goblin::elf::Sym>;
|
||||
pub type SymbolTableByAddr = FxHashMap<u64, goblin::elf::Sym>;
|
||||
pub type AddrToNameMap<'a> = FxHashMap<u64, &'a str>;
|
||||
pub type GlobDataTable = FxHashMap<u64, u64>;
|
||||
|
||||
pub struct Function<'a> {
|
||||
/// The virtual address of the function in its containing executable.
|
||||
/// *Note*: does not contain the IDA base (0x7100000000).
|
||||
pub addr: u64,
|
||||
/// The bytes that make up the code for this function.
|
||||
pub code: &'a [u8],
|
||||
}
|
||||
|
||||
impl<'a> Function<'a> {
|
||||
#[inline]
|
||||
pub fn get_addr_range(&self) -> Range<u64> {
|
||||
self.addr..(self.addr + self.code.len() as u64)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn make_goblin_ctx() -> container::Ctx {
|
||||
// 64-bit, little endian
|
||||
container::Ctx::new(container::Container::Big, container::Endian::Little)
|
||||
}
|
||||
|
||||
/// A stripped down version of `goblin::elf::Elf::parse`, parsing only the sections that we need.
|
||||
///
|
||||
/// *Warning*: In particular, `strtab`, `dynstrtab`, `soname` and `libraries` are **not** parsed.
|
||||
fn parse_elf_faster(bytes: &[u8]) -> Result<Elf> {
|
||||
let header = Elf::parse_header(bytes)?;
|
||||
let mut elf = Elf::lazy_parse(header)?;
|
||||
let ctx = make_goblin_ctx();
|
||||
|
||||
elf.program_headers =
|
||||
ProgramHeader::parse(bytes, header.e_phoff as usize, header.e_phnum as usize, ctx)?;
|
||||
|
||||
elf.section_headers =
|
||||
SectionHeader::parse(bytes, header.e_shoff as usize, header.e_shnum as usize, ctx)?;
|
||||
|
||||
let get_strtab = |section_headers: &[SectionHeader], section_idx: usize| {
|
||||
if section_idx >= section_headers.len() {
|
||||
Ok(Strtab::default())
|
||||
} else {
|
||||
let shdr = §ion_headers[section_idx];
|
||||
shdr.check_size(bytes.len())?;
|
||||
Strtab::parse(bytes, shdr.sh_offset as usize, shdr.sh_size as usize, 0x0)
|
||||
}
|
||||
};
|
||||
|
||||
let strtab_idx = header.e_shstrndx as usize;
|
||||
elf.shdr_strtab = get_strtab(&elf.section_headers, strtab_idx)?;
|
||||
|
||||
for shdr in &elf.section_headers {
|
||||
if shdr.sh_type as u32 == section_header::SHT_SYMTAB {
|
||||
let size = shdr.sh_entsize;
|
||||
let count = if size == 0 { 0 } else { shdr.sh_size / size };
|
||||
elf.syms = Symtab::parse(bytes, shdr.sh_offset as usize, count as usize, ctx)?;
|
||||
}
|
||||
}
|
||||
|
||||
elf.dynamic = Dynamic::parse(bytes, &elf.program_headers, ctx)?;
|
||||
if let Some(ref dynamic) = elf.dynamic {
|
||||
let dyn_info = &dynamic.info;
|
||||
// parse the dynamic relocations
|
||||
elf.dynrelas = RelocSection::parse(bytes, dyn_info.rela, dyn_info.relasz, true, ctx)?;
|
||||
elf.dynrels = RelocSection::parse(bytes, dyn_info.rel, dyn_info.relsz, false, ctx)?;
|
||||
let is_rela = dyn_info.pltrel as u64 == dynamic::DT_RELA;
|
||||
elf.pltrelocs =
|
||||
RelocSection::parse(bytes, dyn_info.jmprel, dyn_info.pltrelsz, is_rela, ctx)?;
|
||||
}
|
||||
|
||||
Ok(elf)
|
||||
}
|
||||
|
||||
pub fn load_elf(path: &Path) -> Result<OwnedElf> {
|
||||
let file = Box::new(File::open(path)?);
|
||||
let mmap = unsafe { MmapOptions::new().map(&file)? };
|
||||
|
||||
OwningHandle::try_new(Box::new((file, mmap)), |pair| unsafe {
|
||||
let elf = parse_elf_faster(&(*pair).1).with_context(|| "failed to load ELF")?;
|
||||
Ok(Box::new(elf))
|
||||
})
|
||||
}
|
||||
|
||||
pub fn load_orig_elf() -> Result<OwnedElf> {
|
||||
let mut path = repo::get_repo_root()?;
|
||||
path.push("data");
|
||||
path.push("main.elf");
|
||||
load_elf(path.as_path())
|
||||
}
|
||||
|
||||
pub fn load_decomp_elf() -> Result<OwnedElf> {
|
||||
let mut path = repo::get_repo_root()?;
|
||||
path.push("build");
|
||||
path.push("uking");
|
||||
load_elf(path.as_path())
|
||||
}
|
||||
|
||||
struct SymbolStringTable<'elf> {
|
||||
bytes: &'elf [u8],
|
||||
}
|
||||
|
||||
impl<'elf> SymbolStringTable<'elf> {
|
||||
pub fn from_elf(elf: &'elf OwnedElf) -> Result<Self> {
|
||||
let bytes = &*elf.as_owner().1;
|
||||
for shdr in &elf.section_headers {
|
||||
if shdr.sh_type as u32 == section_header::SHT_SYMTAB {
|
||||
let table_hdr = elf
|
||||
.section_headers
|
||||
.get(shdr.sh_link as usize)
|
||||
.ok_or_else(|| anyhow!("symbol string table index out of bounds"))?;
|
||||
|
||||
table_hdr.check_size(bytes.len())?;
|
||||
|
||||
let start = table_hdr.sh_offset as usize;
|
||||
let end = start + table_hdr.sh_size as usize;
|
||||
return Ok(SymbolStringTable {
|
||||
bytes: &bytes[start..end],
|
||||
});
|
||||
}
|
||||
}
|
||||
bail!("couldn't find symbol string table")
|
||||
}
|
||||
|
||||
pub fn get_string(&self, offset: usize) -> &'elf str {
|
||||
unsafe {
|
||||
std::str::from_utf8_unchecked(
|
||||
CStr::from_ptr(self.bytes[offset..self.bytes.len()].as_ptr() as *const i8)
|
||||
.to_bytes(),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn filter_out_useless_syms(sym: &Sym) -> bool {
|
||||
matches!(
|
||||
sym.st_type(),
|
||||
sym::STT_OBJECT | sym::STT_FUNC | sym::STT_COMMON | sym::STT_TLS
|
||||
)
|
||||
}
|
||||
|
||||
pub fn make_symbol_map_by_name(elf: &OwnedElf) -> Result<SymbolTableByName> {
|
||||
let mut map = SymbolTableByName::with_capacity_and_hasher(
|
||||
elf.syms.iter().filter(filter_out_useless_syms).count(),
|
||||
Default::default(),
|
||||
);
|
||||
|
||||
let strtab = SymbolStringTable::from_elf(&elf)?;
|
||||
|
||||
for symbol in elf.syms.iter().filter(filter_out_useless_syms) {
|
||||
map.entry(strtab.get_string(symbol.st_name))
|
||||
.or_insert(symbol);
|
||||
}
|
||||
Ok(map)
|
||||
}
|
||||
|
||||
pub fn make_symbol_map_by_addr(elf: &OwnedElf) -> SymbolTableByAddr {
|
||||
let mut map = SymbolTableByAddr::with_capacity_and_hasher(
|
||||
elf.syms.iter().filter(filter_out_useless_syms).count(),
|
||||
Default::default(),
|
||||
);
|
||||
for symbol in elf.syms.iter().filter(filter_out_useless_syms) {
|
||||
map.entry(symbol.st_value).or_insert(symbol);
|
||||
}
|
||||
map
|
||||
}
|
||||
|
||||
pub fn make_addr_to_name_map(elf: &OwnedElf) -> Result<AddrToNameMap> {
|
||||
let mut map = AddrToNameMap::with_capacity_and_hasher(
|
||||
elf.syms.iter().filter(filter_out_useless_syms).count(),
|
||||
Default::default(),
|
||||
);
|
||||
|
||||
let strtab = SymbolStringTable::from_elf(&elf)?;
|
||||
|
||||
for symbol in elf.syms.iter().filter(filter_out_useless_syms) {
|
||||
map.entry(symbol.st_value)
|
||||
.or_insert_with(|| strtab.get_string(symbol.st_name));
|
||||
}
|
||||
Ok(map)
|
||||
}
|
||||
|
||||
fn parse_symtab<'a>(elf: &'a OwnedElf, shdr: &'a SectionHeader) -> Result<Symtab<'a>> {
|
||||
let bytes = &elf.as_owner().1;
|
||||
let size = shdr.sh_entsize;
|
||||
let count = if size == 0 { 0 } else { shdr.sh_size / size };
|
||||
|
||||
let syms = Symtab::parse(
|
||||
bytes,
|
||||
shdr.sh_offset as usize,
|
||||
count as usize,
|
||||
make_goblin_ctx(),
|
||||
)?;
|
||||
Ok(syms)
|
||||
}
|
||||
|
||||
pub fn find_section<'a>(elf: &'a OwnedElf, name: &str) -> Result<&'a SectionHeader> {
|
||||
elf.section_headers
|
||||
.iter()
|
||||
.find(|&header| &elf.shdr_strtab[header.sh_name] == name)
|
||||
.ok_or_else(|| anyhow!("failed to find {} section", name))
|
||||
}
|
||||
|
||||
pub fn get_linked_section<'a>(
|
||||
elf: &'a OwnedElf,
|
||||
shdr: &'a SectionHeader,
|
||||
) -> Result<&'a SectionHeader> {
|
||||
elf.section_headers
|
||||
.get(shdr.sh_link as usize)
|
||||
.ok_or_else(|| anyhow!("could not get linked section"))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_in_section(section: &SectionHeader, addr: u64, size: u64) -> bool {
|
||||
let begin = section.sh_addr;
|
||||
let end = begin + section.sh_size;
|
||||
(begin..end).contains(&addr) && (begin..=end).contains(&(addr + size))
|
||||
}
|
||||
|
||||
pub fn build_glob_data_table(elf: &OwnedElf) -> Result<GlobDataTable> {
|
||||
let section = &elf.dynrelas;
|
||||
let section_hdr = find_section(elf, ".rela.dyn")?;
|
||||
// The corresponding symbol table.
|
||||
let symtab = parse_symtab(elf, get_linked_section(elf, §ion_hdr)?)?;
|
||||
|
||||
let mut table = GlobDataTable::with_capacity_and_hasher(section.len(), Default::default());
|
||||
|
||||
for reloc in section.iter() {
|
||||
let symbol_value: u64 = symtab
|
||||
.get(reloc.r_sym)
|
||||
.ok_or_else(|| anyhow!("invalid symbol index"))?
|
||||
.st_value;
|
||||
|
||||
match reloc.r_type {
|
||||
reloc::R_AARCH64_GLOB_DAT => {
|
||||
table.insert(
|
||||
reloc.r_offset,
|
||||
(symbol_value as i64 + reloc.r_addend.unwrap()) as u64,
|
||||
);
|
||||
}
|
||||
reloc::R_AARCH64_RELATIVE => {
|
||||
// FIXME: this should be Delta(S) + A.
|
||||
table.insert(
|
||||
reloc.r_offset,
|
||||
(symbol_value as i64 + reloc.r_addend.unwrap()) as u64,
|
||||
);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(table)
|
||||
}
|
||||
|
||||
pub fn get_offset_in_file(elf: &OwnedElf, addr: u64) -> Result<usize> {
|
||||
let addr = addr as usize;
|
||||
for segment in elf.program_headers.iter() {
|
||||
if segment.p_type != PT_LOAD {
|
||||
continue;
|
||||
}
|
||||
|
||||
if segment.vm_range().contains(&addr) {
|
||||
return Ok(segment.file_range().start + addr - segment.vm_range().start);
|
||||
}
|
||||
}
|
||||
bail!("{:#x} doesn't belong to any segment", addr)
|
||||
}
|
||||
|
||||
pub fn get_elf_bytes(elf: &OwnedElf, addr: u64, size: u64) -> Result<&[u8]> {
|
||||
let offset = get_offset_in_file(&elf, addr)?;
|
||||
let size = size as usize;
|
||||
Ok(&elf.as_owner().1[offset..(offset + size)])
|
||||
}
|
||||
|
||||
pub fn get_function(elf: &OwnedElf, addr: u64, size: u64) -> Result<Function> {
|
||||
Ok(Function {
|
||||
addr,
|
||||
code: get_elf_bytes(&elf, addr, size)?,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn get_function_by_name<'a>(
|
||||
elf: &'a OwnedElf,
|
||||
symbols: &SymbolTableByName,
|
||||
name: &str,
|
||||
) -> Result<Function<'a>> {
|
||||
let symbol = symbols
|
||||
.get(&name)
|
||||
.ok_or_else(|| anyhow!("unknown function: {}", name))?;
|
||||
get_function(&elf, symbol.st_value, symbol.st_size)
|
||||
}
|
|
@ -1,221 +0,0 @@
|
|||
use crate::repo;
|
||||
use anyhow::{bail, ensure, Context, Result};
|
||||
use rayon::prelude::*;
|
||||
use rustc_hash::FxHashMap;
|
||||
use std::{
|
||||
collections::HashSet,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum Status {
|
||||
Matching,
|
||||
NonMatchingMinor,
|
||||
NonMatchingMajor,
|
||||
NotDecompiled,
|
||||
Wip,
|
||||
Library,
|
||||
}
|
||||
|
||||
impl Status {
|
||||
pub fn description(&self) -> &'static str {
|
||||
match &self {
|
||||
Status::Matching => "matching",
|
||||
Status::NonMatchingMinor => "non-matching (minor)",
|
||||
Status::NonMatchingMajor => "non-matching (major)",
|
||||
Status::NotDecompiled => "not decompiled",
|
||||
Status::Wip => "WIP",
|
||||
Status::Library => "library function",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Info {
|
||||
pub addr: u64,
|
||||
pub size: u32,
|
||||
pub name: String,
|
||||
pub status: Status,
|
||||
}
|
||||
|
||||
impl Info {
|
||||
pub fn is_decompiled(&self) -> bool {
|
||||
!matches!(self.status, Status::NotDecompiled | Status::Library)
|
||||
}
|
||||
}
|
||||
|
||||
pub const CSV_HEADER: &[&str] = &["Address", "Quality", "Size", "Name"];
|
||||
pub const ADDRESS_BASE: u64 = 0x71_0000_0000;
|
||||
|
||||
fn parse_base_16(value: &str) -> Result<u64> {
|
||||
if let Some(stripped) = value.strip_prefix("0x") {
|
||||
Ok(u64::from_str_radix(stripped, 16)?)
|
||||
} else {
|
||||
Ok(u64::from_str_radix(value, 16)?)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_address(value: &str) -> Result<u64> {
|
||||
Ok(parse_base_16(value)? - ADDRESS_BASE)
|
||||
}
|
||||
|
||||
fn parse_function_csv_entry(record: &csv::StringRecord) -> Result<Info> {
|
||||
ensure!(record.len() == 4, "invalid record");
|
||||
|
||||
let addr = parse_address(&record[0])?;
|
||||
let status_code = record[1].chars().next();
|
||||
let size = record[2].parse::<u32>()?;
|
||||
let decomp_name = record[3].to_string();
|
||||
|
||||
let status = match status_code {
|
||||
Some('m') => Status::NonMatchingMinor,
|
||||
Some('M') => Status::NonMatchingMajor,
|
||||
Some('O') => Status::Matching,
|
||||
Some('U') => Status::NotDecompiled,
|
||||
Some('W') => Status::Wip,
|
||||
Some('L') => Status::Library,
|
||||
Some(code) => bail!("unexpected status code: {}", code),
|
||||
None => bail!("missing status code"),
|
||||
};
|
||||
|
||||
Ok(Info {
|
||||
addr,
|
||||
size,
|
||||
name: decomp_name,
|
||||
status,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn get_functions_csv_path() -> Result<PathBuf> {
|
||||
let mut path = repo::get_repo_root()?;
|
||||
path.push("data");
|
||||
path.push("uking_functions.csv");
|
||||
Ok(path)
|
||||
}
|
||||
|
||||
/// Returns a Vec of all functions that are listed in the specified CSV.
|
||||
pub fn get_functions_for_path(csv_path: &Path) -> Result<Vec<Info>> {
|
||||
let mut reader = csv::ReaderBuilder::new()
|
||||
.has_headers(false)
|
||||
.quoting(false)
|
||||
.from_path(csv_path)?;
|
||||
|
||||
// We build the result array manually without using csv iterators for performance reasons.
|
||||
let mut result = Vec::with_capacity(110_000);
|
||||
let mut record = csv::StringRecord::new();
|
||||
let mut line_number = 1;
|
||||
let mut num_names = 0;
|
||||
if reader.read_record(&mut record)? {
|
||||
// Verify that the CSV has the correct format.
|
||||
ensure!(record.len() == 4, "invalid record; expected 4 fields");
|
||||
ensure!(record == *CSV_HEADER,
|
||||
"wrong CSV format; this program only works with the new function list format (added in commit 1d4c815fbae3)"
|
||||
);
|
||||
line_number += 1;
|
||||
}
|
||||
|
||||
while reader.read_record(&mut record)? {
|
||||
let entry = parse_function_csv_entry(&record)
|
||||
.with_context(|| format!("failed to parse CSV record at line {}", line_number))?;
|
||||
|
||||
if !entry.name.is_empty() {
|
||||
num_names += 1;
|
||||
}
|
||||
|
||||
result.push(entry);
|
||||
line_number += 1;
|
||||
}
|
||||
|
||||
// Check for duplicate names in the CSV.
|
||||
let mut known_names = HashSet::with_capacity(num_names);
|
||||
let mut duplicates = Vec::new();
|
||||
for entry in &result {
|
||||
if entry.is_decompiled() && entry.name.is_empty() {
|
||||
bail!(
|
||||
"function at {:016x} is marked as O/M/m but has an empty name",
|
||||
entry.addr | ADDRESS_BASE
|
||||
);
|
||||
}
|
||||
|
||||
if !entry.name.is_empty() && !known_names.insert(&entry.name) {
|
||||
duplicates.push(&entry.name);
|
||||
}
|
||||
}
|
||||
if !duplicates.is_empty() {
|
||||
bail!("found duplicates: {:#?}", duplicates);
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn write_functions_to_path(csv_path: &Path, functions: &[Info]) -> Result<()> {
|
||||
let mut writer = csv::Writer::from_path(csv_path)?;
|
||||
writer.write_record(CSV_HEADER)?;
|
||||
|
||||
for function in functions {
|
||||
let addr = format!("0x{:016x}", function.addr | ADDRESS_BASE);
|
||||
let status = match function.status {
|
||||
Status::Matching => "O",
|
||||
Status::NonMatchingMinor => "m",
|
||||
Status::NonMatchingMajor => "M",
|
||||
Status::NotDecompiled => "U",
|
||||
Status::Wip => "W",
|
||||
Status::Library => "L",
|
||||
}
|
||||
.to_string();
|
||||
let size = format!("{:06}", function.size);
|
||||
let name = function.name.clone();
|
||||
writer.write_record(&[addr, status, size, name])?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns a Vec of all known functions in the executable.
|
||||
pub fn get_functions() -> Result<Vec<Info>> {
|
||||
get_functions_for_path(get_functions_csv_path()?.as_path())
|
||||
}
|
||||
|
||||
pub fn write_functions(functions: &[Info]) -> Result<()> {
|
||||
write_functions_to_path(get_functions_csv_path()?.as_path(), functions)
|
||||
}
|
||||
|
||||
pub fn make_known_function_map(functions: &[Info]) -> FxHashMap<u64, &Info> {
|
||||
let mut known_functions =
|
||||
FxHashMap::with_capacity_and_hasher(functions.len(), Default::default());
|
||||
|
||||
for function in functions {
|
||||
if function.name.is_empty() {
|
||||
continue;
|
||||
}
|
||||
known_functions.insert(function.addr, function);
|
||||
}
|
||||
|
||||
known_functions
|
||||
}
|
||||
|
||||
/// Demangle a C++ symbol.
|
||||
pub fn demangle_str(name: &str) -> Result<String> {
|
||||
if !name.starts_with("_Z") {
|
||||
bail!("not an external mangled name");
|
||||
}
|
||||
|
||||
let symbol = cpp_demangle::Symbol::new(name)?;
|
||||
let options = cpp_demangle::DemangleOptions::new();
|
||||
Ok(symbol.demangle(&options)?)
|
||||
}
|
||||
|
||||
pub fn find_function_fuzzy<'a>(functions: &'a [Info], name: &str) -> Option<&'a Info> {
|
||||
functions
|
||||
.par_iter()
|
||||
.find_first(|function| function.name == name)
|
||||
.or_else(|| {
|
||||
// Comparing the demangled names is more expensive than a simple string comparison,
|
||||
// so only do this as a last resort.
|
||||
functions.par_iter().find_first(|function| {
|
||||
demangle_str(&function.name)
|
||||
.unwrap_or_else(|_| "".to_string())
|
||||
.contains(name)
|
||||
})
|
||||
})
|
||||
}
|
|
@ -1,6 +0,0 @@
|
|||
pub mod capstone_utils;
|
||||
pub mod checks;
|
||||
pub mod elf;
|
||||
pub mod functions;
|
||||
pub mod repo;
|
||||
pub mod ui;
|
|
@ -1,24 +0,0 @@
|
|||
use anyhow::{bail, Result};
|
||||
use std::path::PathBuf;
|
||||
|
||||
pub fn get_repo_root() -> Result<PathBuf> {
|
||||
let current_dir = std::env::current_dir()?;
|
||||
let mut dir = current_dir.as_path();
|
||||
|
||||
loop {
|
||||
if ["data", "src"].iter().all(|name| dir.join(name).is_dir()) {
|
||||
return Ok(dir.to_path_buf());
|
||||
}
|
||||
|
||||
match dir.parent() {
|
||||
None => {
|
||||
bail!("failed to find repo root -- run this program inside the repo");
|
||||
}
|
||||
Some(parent) => dir = parent,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_tools_path() -> Result<PathBuf> {
|
||||
Ok(get_repo_root()?.join("tools"))
|
||||
}
|
|
@ -1,315 +0,0 @@
|
|||
use anyhow::bail;
|
||||
use anyhow::ensure;
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use capstone as cs;
|
||||
use capstone::arch::BuildsCapstone;
|
||||
use colored::*;
|
||||
use itertools::Itertools;
|
||||
use rayon::prelude::*;
|
||||
use std::cell::RefCell;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use viking::checks::FunctionChecker;
|
||||
use viking::elf;
|
||||
use viking::functions;
|
||||
use viking::functions::Status;
|
||||
use viking::repo;
|
||||
use viking::ui;
|
||||
|
||||
use mimalloc::MiMalloc;
|
||||
|
||||
#[global_allocator]
|
||||
static GLOBAL: MiMalloc = MiMalloc;
|
||||
|
||||
/// Returns false if the program should exit with a failure code at the end.
|
||||
fn check_function(
|
||||
checker: &FunctionChecker,
|
||||
mut cs: &mut capstone::Capstone,
|
||||
orig_elf: &elf::OwnedElf,
|
||||
decomp_elf: &elf::OwnedElf,
|
||||
decomp_symtab: &elf::SymbolTableByName,
|
||||
function: &functions::Info,
|
||||
) -> Result<bool> {
|
||||
let name = function.name.as_str();
|
||||
let decomp_fn = elf::get_function_by_name(&decomp_elf, &decomp_symtab, &name);
|
||||
|
||||
match function.status {
|
||||
Status::NotDecompiled if decomp_fn.is_err() => return Ok(true),
|
||||
Status::Library => return Ok(true),
|
||||
_ => (),
|
||||
}
|
||||
|
||||
if decomp_fn.is_err() {
|
||||
let error = decomp_fn.err().unwrap();
|
||||
ui::print_warning(&format!(
|
||||
"couldn't check {}: {}",
|
||||
ui::format_symbol_name(name),
|
||||
error.to_string().dimmed(),
|
||||
));
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
let decomp_fn = decomp_fn.unwrap();
|
||||
|
||||
let get_orig_fn = || {
|
||||
elf::get_function(&orig_elf, function.addr, function.size as u64).with_context(|| {
|
||||
format!(
|
||||
"failed to get function {} ({}) from the original executable",
|
||||
name,
|
||||
ui::format_address(function.addr),
|
||||
)
|
||||
})
|
||||
};
|
||||
|
||||
match function.status {
|
||||
Status::Matching => {
|
||||
let orig_fn = get_orig_fn()?;
|
||||
|
||||
let result = checker
|
||||
.check(&mut cs, &orig_fn, &decomp_fn)
|
||||
.with_context(|| format!("checking {}", name))?;
|
||||
|
||||
if let Some(mismatch) = result {
|
||||
let stderr = std::io::stderr();
|
||||
let mut lock = stderr.lock();
|
||||
ui::print_error_ex(
|
||||
&mut lock,
|
||||
&format!(
|
||||
"function {} is marked as matching but does not match",
|
||||
ui::format_symbol_name(name),
|
||||
),
|
||||
);
|
||||
ui::print_detail_ex(&mut lock, &format!("{}", mismatch));
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
|
||||
Status::NotDecompiled
|
||||
| Status::NonMatchingMinor
|
||||
| Status::NonMatchingMajor
|
||||
| Status::Wip => {
|
||||
let orig_fn = get_orig_fn()?;
|
||||
|
||||
let result = checker
|
||||
.check(&mut cs, &orig_fn, &decomp_fn)
|
||||
.with_context(|| format!("checking {}", name))?;
|
||||
|
||||
if result.is_none() {
|
||||
ui::print_note(&format!(
|
||||
"function {} is marked as {} but matches",
|
||||
ui::format_symbol_name(name),
|
||||
function.status.description(),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Status::Library => unreachable!(),
|
||||
};
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
#[cold]
|
||||
#[inline(never)]
|
||||
fn make_cs() -> Result<cs::Capstone> {
|
||||
cs::Capstone::new()
|
||||
.arm64()
|
||||
.mode(cs::arch::arm64::ArchMode::Arm)
|
||||
.detail(true)
|
||||
.build()
|
||||
.or_else(viking::capstone_utils::translate_cs_error)
|
||||
}
|
||||
|
||||
thread_local! {
|
||||
static CAPSTONE: RefCell<cs::Capstone> = RefCell::new(make_cs().unwrap());
|
||||
}
|
||||
|
||||
fn check_all(
|
||||
functions: &[functions::Info],
|
||||
checker: &FunctionChecker,
|
||||
orig_elf: &elf::OwnedElf,
|
||||
decomp_elf: &elf::OwnedElf,
|
||||
decomp_symtab: &elf::SymbolTableByName,
|
||||
) -> Result<()> {
|
||||
let failed = AtomicBool::new(false);
|
||||
|
||||
functions.par_iter().try_for_each(|function| {
|
||||
CAPSTONE.with(|cs| -> Result<()> {
|
||||
let mut cs = cs.borrow_mut();
|
||||
let ok = check_function(
|
||||
&checker,
|
||||
&mut cs,
|
||||
&orig_elf,
|
||||
&decomp_elf,
|
||||
&decomp_symtab,
|
||||
function,
|
||||
)?;
|
||||
if !ok {
|
||||
failed.store(true, std::sync::atomic::Ordering::Relaxed);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
})?;
|
||||
|
||||
if failed.load(std::sync::atomic::Ordering::Relaxed) {
|
||||
bail!("found at least one error");
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn get_function_to_check_from_args(args: &[String]) -> Result<String> {
|
||||
let mut maybe_fn_to_check: Vec<String> = args
|
||||
.iter()
|
||||
.filter(|s| !s.starts_with("-"))
|
||||
.map(|s| s.clone())
|
||||
.collect();
|
||||
|
||||
ensure!(
|
||||
maybe_fn_to_check.len() == 1,
|
||||
"expected only one function name (one argument that isn't prefixed with '-')"
|
||||
);
|
||||
|
||||
Ok(maybe_fn_to_check.remove(0))
|
||||
}
|
||||
|
||||
fn check_single(
|
||||
functions: &[functions::Info],
|
||||
checker: &FunctionChecker,
|
||||
orig_elf: &elf::OwnedElf,
|
||||
decomp_elf: &elf::OwnedElf,
|
||||
decomp_symtab: &elf::SymbolTableByName,
|
||||
args: &Vec<String>,
|
||||
) -> Result<()> {
|
||||
let fn_to_check = get_function_to_check_from_args(&args)?;
|
||||
let function = functions::find_function_fuzzy(&functions, &fn_to_check)
|
||||
.with_context(|| format!("unknown function: {}", ui::format_symbol_name(&fn_to_check)))?;
|
||||
let name = function.name.as_str();
|
||||
|
||||
eprintln!("{}", ui::format_symbol_name(name).bold());
|
||||
|
||||
if matches!(function.status, Status::Library) {
|
||||
bail!("L functions should not be decompiled");
|
||||
}
|
||||
|
||||
let decomp_fn =
|
||||
elf::get_function_by_name(&decomp_elf, &decomp_symtab, &name).with_context(|| {
|
||||
format!(
|
||||
"failed to get decomp function: {}",
|
||||
ui::format_symbol_name(name)
|
||||
)
|
||||
})?;
|
||||
|
||||
let orig_fn = elf::get_function(&orig_elf, function.addr, function.size as u64)?;
|
||||
|
||||
let maybe_mismatch = checker
|
||||
.check(&mut make_cs()?, &orig_fn, &decomp_fn)
|
||||
.with_context(|| format!("checking {}", name))?;
|
||||
|
||||
let mut should_show_diff = args
|
||||
.iter()
|
||||
.find(|s| s.as_str() == "--always-diff")
|
||||
.is_some();
|
||||
|
||||
if let Some(mismatch) = &maybe_mismatch {
|
||||
eprintln!("{}\n{}", "mismatch".red().bold(), &mismatch);
|
||||
should_show_diff = true;
|
||||
} else {
|
||||
eprintln!("{}", "OK".green().bold());
|
||||
}
|
||||
|
||||
if should_show_diff {
|
||||
let diff_args = args
|
||||
.iter()
|
||||
.filter(|s| s.as_str() != &fn_to_check && s.as_str() != "--always-diff");
|
||||
|
||||
std::process::Command::new(repo::get_tools_path()?.join("asm-differ").join("diff.py"))
|
||||
.arg("-I")
|
||||
.arg("-e")
|
||||
.arg(name)
|
||||
.arg(format!("0x{:016x}", function.addr))
|
||||
.arg(format!("0x{:016x}", function.addr + function.size as u64))
|
||||
.args(diff_args)
|
||||
.status()?;
|
||||
}
|
||||
|
||||
let new_status = match maybe_mismatch {
|
||||
None => Status::Matching,
|
||||
Some(_) => Status::Wip,
|
||||
};
|
||||
|
||||
// Update the function status if needed.
|
||||
if function.status != new_status {
|
||||
ui::print_note(&format!(
|
||||
"changing status from {:?} to {:?}",
|
||||
function.status, new_status
|
||||
));
|
||||
|
||||
let mut new_functions = functions.iter().cloned().collect_vec();
|
||||
new_functions
|
||||
.iter_mut()
|
||||
.find(|info| info.addr == function.addr)
|
||||
.unwrap()
|
||||
.status = new_status;
|
||||
functions::write_functions(&new_functions)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let args: Vec<String> = std::env::args().skip(1).collect();
|
||||
|
||||
let orig_elf = elf::load_orig_elf().with_context(|| "failed to load original ELF")?;
|
||||
let decomp_elf = elf::load_decomp_elf().with_context(|| "failed to load decomp ELF")?;
|
||||
|
||||
// Load these in parallel.
|
||||
let mut decomp_symtab = None;
|
||||
let mut decomp_glob_data_table = None;
|
||||
let mut functions = None;
|
||||
|
||||
rayon::scope(|s| {
|
||||
s.spawn(|_| decomp_symtab = Some(elf::make_symbol_map_by_name(&decomp_elf)));
|
||||
s.spawn(|_| decomp_glob_data_table = Some(elf::build_glob_data_table(&decomp_elf)));
|
||||
s.spawn(|_| functions = Some(functions::get_functions()));
|
||||
});
|
||||
|
||||
let decomp_symtab = decomp_symtab
|
||||
.unwrap()
|
||||
.with_context(|| "failed to make symbol map")?;
|
||||
|
||||
let decomp_glob_data_table = decomp_glob_data_table
|
||||
.unwrap()
|
||||
.with_context(|| "failed to make global data table")?;
|
||||
|
||||
let functions = functions
|
||||
.unwrap()
|
||||
.with_context(|| "failed to load function CSV")?;
|
||||
|
||||
let checker = FunctionChecker::new(
|
||||
&orig_elf,
|
||||
&decomp_elf,
|
||||
&decomp_symtab,
|
||||
decomp_glob_data_table,
|
||||
&functions,
|
||||
)
|
||||
.with_context(|| "failed to construct FunctionChecker")?;
|
||||
|
||||
if args.len() >= 1 {
|
||||
// Single function mode.
|
||||
check_single(
|
||||
&functions,
|
||||
&checker,
|
||||
&orig_elf,
|
||||
&decomp_elf,
|
||||
&decomp_symtab,
|
||||
&args,
|
||||
)?;
|
||||
} else {
|
||||
// Normal check mode.
|
||||
check_all(&functions, &checker, &orig_elf, &decomp_elf, &decomp_symtab)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
use colored::*;
|
||||
use std::io::StderrLock;
|
||||
use std::io::Write;
|
||||
use textwrap::indent;
|
||||
|
||||
use crate::functions;
|
||||
|
||||
pub fn print_note(msg: &str) {
|
||||
eprintln!("{}{}{}", "note".bold().cyan(), ": ".bold(), msg.bold())
|
||||
}
|
||||
|
||||
pub fn print_warning(msg: &str) {
|
||||
eprintln!("{}{}{}", "warning".bold().yellow(), ": ".bold(), msg.bold())
|
||||
}
|
||||
|
||||
pub fn print_error(msg: &str) {
|
||||
let stderr = std::io::stderr();
|
||||
let mut lock = stderr.lock();
|
||||
print_error_ex(&mut lock, msg);
|
||||
}
|
||||
|
||||
pub fn print_error_ex(lock: &mut StderrLock, msg: &str) {
|
||||
writeln!(
|
||||
lock,
|
||||
"{}{}{}",
|
||||
"error".bold().red(),
|
||||
": ".bold(),
|
||||
msg.bold()
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub fn format_symbol_name(name: &str) -> String {
|
||||
functions::demangle_str(name).map_or(name.blue().to_string(), |demangled| {
|
||||
format!("{} ({})", demangled.blue(), name.blue().dimmed(),)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn format_address(addr: u64) -> String {
|
||||
format!("{:#x}", addr).green().to_string()
|
||||
}
|
||||
|
||||
pub fn print_detail(msg: &str) {
|
||||
let stderr = std::io::stderr();
|
||||
let mut lock = stderr.lock();
|
||||
print_detail_ex(&mut lock, msg);
|
||||
}
|
||||
|
||||
pub fn print_detail_ex(lock: &mut StderrLock, msg: &str) {
|
||||
writeln!(
|
||||
lock,
|
||||
"{}\n",
|
||||
indent(
|
||||
&msg.clear().to_string(),
|
||||
&" │ ".bold().dimmed().to_string()
|
||||
)
|
||||
)
|
||||
.unwrap();
|
||||
}
|
|
@ -21,14 +21,14 @@ FILE_SYNC_PY=../file-sync.py
|
|||
WORKING_FILES=../file-list.txt
|
||||
#Path to uking_functions.csv
|
||||
UKING_FUNCTIONS=data/uking_functions.csv
|
||||
#Path to temporarily put the output of tools/print_decomp_symbols.py
|
||||
#Path to temporarily put the output of tools/common/print_decomp_symbols.py
|
||||
SYMBOL_OUT=build/symbols.txt
|
||||
#Path to diff.py
|
||||
DIFF_PY=./diff.py
|
||||
#Path to print_decomp_symbols.py
|
||||
PRINT_DECOMP_SYMBOLS_PY=tools/print_decomp_symbols.py
|
||||
PRINT_DECOMP_SYMBOLS_PY=tools/common/print_decomp_symbols.py
|
||||
#Path to check.py
|
||||
CHECK_PY=tools/check.py
|
||||
CHECK_PY=tools/common/check.py
|
||||
#clang-format
|
||||
CLANG_FORMAT=clang-format
|
||||
|
||||
|
@ -231,7 +231,7 @@ check|c )
|
|||
echo " [-f] Search only, do not update function list"
|
||||
echo "d|diff <function> Diff function"
|
||||
echo " [-v] verbose, show source when diffing"
|
||||
echo "c|check Format source code and run tools/check.py"
|
||||
echo "c|check Format source code and run tools/common/check.py"
|
||||
if [[ ${FILE_SYNC} == "ON" ]]
|
||||
then
|
||||
echo " [-w] sync formatted code"
|
||||
|
|
Loading…
Reference in New Issue