Merge pull request #51 from leoetlino/optimized-checker

tools: Add a new, optimized check tool
This commit is contained in:
Léo Lam 2021-07-31 12:23:40 +02:00 committed by GitHub
commit 71fc0f35e8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
83 changed files with 1883 additions and 45 deletions

View File

@ -1 +0,0 @@
в▓Ч

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,29 +0,0 @@
#!/usr/bin/env python3
import argparse
import util.elf
from util import utils
def dump_fn(name: str) -> None:
expected_dir = utils.get_repo_root() / "expected"
try:
fn = util.elf.get_fn_from_my_elf(name)
path = expected_dir / f"{name}.bin"
path.parent.mkdir(exist_ok=True)
path.write_bytes(fn.data)
except KeyError:
utils.fail("could not find function")
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("function_name", help="Name of the function to dump")
args = parser.parse_args()
dump_fn(args.function_name)
if __name__ == "__main__":
main()

View File

@ -2,8 +2,6 @@
import argparse
from collections import defaultdict
from colorama import Back, Fore, Style
import enum
from pathlib import Path
from util import utils
from util.utils import FunctionStatus
import typing as tp
@ -17,8 +15,6 @@ parser.add_argument("--print-eq", "-e", action="store_true",
help="Print non-matching functions with minor issues")
parser.add_argument("--print-ok", "-m", action="store_true",
help="Print matching functions")
parser.add_argument("--hide-nonmatchings-with-dumps", "-H", help="Hide non-matching functions that have expected "
"output dumps", action="store_true")
args = parser.parse_args()
code_size_total = 0
@ -26,15 +22,6 @@ num_total = 0
code_size: tp.DefaultDict[FunctionStatus, int] = defaultdict(int)
counts: tp.DefaultDict[FunctionStatus, int] = defaultdict(int)
nonmatching_fns_with_dump = {p.stem for p in (Path(__file__).parent.parent / "expected").glob("*.bin")}
def should_hide_nonmatching(name: str) -> bool:
if not args.hide_nonmatchings_with_dumps:
return False
return name in nonmatching_fns_with_dump
for info in utils.get_functions():
code_size_total += info.size
num_total += 1
@ -47,10 +34,10 @@ for info in utils.get_functions():
if not args.csv:
if info.status == FunctionStatus.NonMatching:
if args.print_nm and not should_hide_nonmatching(info.decomp_name):
if args.print_nm:
print(f"{Fore.RED}NM{Fore.RESET} {utils.format_symbol_name(info.decomp_name)}")
elif info.status == FunctionStatus.Equivalent:
if args.print_eq and not should_hide_nonmatching(info.decomp_name):
if args.print_eq:
print(f"{Fore.YELLOW}EQ{Fore.RESET} {utils.format_symbol_name(info.decomp_name)}")
elif info.status == FunctionStatus.Matching:
if args.print_ok:

1
tools/viking/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

506
tools/viking/Cargo.lock generated Normal file
View File

@ -0,0 +1,506 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "0.7.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
dependencies = [
"memchr",
]
[[package]]
name = "anyhow"
version = "1.0.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "595d3cfa7a60d4555cb5067b99f07142a08ea778de5cf993f7b75c7d8fabc486"
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi",
"libc",
"winapi",
]
[[package]]
name = "autocfg"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
[[package]]
name = "bstr"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90682c8d613ad3373e66de8c6411e0ae2ab2571e879d2efbf73558cc66f21279"
dependencies = [
"lazy_static",
"memchr",
"regex-automata",
"serde",
]
[[package]]
name = "capstone"
version = "0.9.0"
source = "git+https://github.com/leoetlino/capstone-rs#1f962210b1e2ff418cf6c1bcb6c6785427662a07"
dependencies = [
"capstone-sys",
"libc",
]
[[package]]
name = "capstone-sys"
version = "0.13.0"
source = "git+https://github.com/leoetlino/capstone-rs#1f962210b1e2ff418cf6c1bcb6c6785427662a07"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "cc"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e70cc2f62c6ce1868963827bd677764c62d07c3d9a3e1fb1177ee1a9ab199eb2"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "colored"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3616f750b84d8f0de8a58bda93e08e2a81ad3f523089b05f1dffecab48c6cbd"
dependencies = [
"atty",
"lazy_static",
"winapi",
]
[[package]]
name = "cpp_demangle"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea47428dc9d2237f3c6bc134472edfd63ebba0af932e783506dcfd66f10d18a"
dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
dependencies = [
"cfg-if",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9"
dependencies = [
"cfg-if",
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd"
dependencies = [
"cfg-if",
"crossbeam-utils",
"lazy_static",
"memoffset",
"scopeguard",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db"
dependencies = [
"cfg-if",
"lazy_static",
]
[[package]]
name = "csv"
version = "1.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1"
dependencies = [
"bstr",
"csv-core",
"itoa",
"ryu",
"serde",
]
[[package]]
name = "csv-core"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
dependencies = [
"memchr",
]
[[package]]
name = "either"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]]
name = "goblin"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b1800b95efee8ad4ef04517d4d69f8e209e763b1668f1179aeeedd0e454da55"
dependencies = [
"log",
"plain",
"scroll",
]
[[package]]
name = "hermit-abi"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
dependencies = [
"libc",
]
[[package]]
name = "itertools"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
[[package]]
name = "lazy-init"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23517540be87a91d06324e6bf6286ba8214171123ee8862ae9a5e7d938d71815"
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320cfe77175da3a483efed4bc0adc1968ca050b098ce4f2f1c13a56626128790"
[[package]]
name = "libmimalloc-sys"
version = "0.1.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d1b8479c593dba88c2741fc50b92e13dbabbbe0bd504d979f244ccc1a5b1c01"
dependencies = [
"cc",
]
[[package]]
name = "log"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
dependencies = [
"cfg-if",
]
[[package]]
name = "memchr"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
[[package]]
name = "memmap"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2ffa2c986de11a9df78620c01eeaaf27d94d3ff02bf81bfcca953102dd0c6ff"
dependencies = [
"libc",
"winapi",
]
[[package]]
name = "memoffset"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59accc507f1338036a0477ef61afdae33cde60840f4dfe481319ce3ad116ddf9"
dependencies = [
"autocfg",
]
[[package]]
name = "mimalloc"
version = "0.1.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb74897ce508e6c49156fd1476fc5922cbc6e75183c65e399c765a09122e5130"
dependencies = [
"libmimalloc-sys",
]
[[package]]
name = "num_cpus"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3"
dependencies = [
"hermit-abi",
"libc",
]
[[package]]
name = "owning_ref"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ff55baddef9e4ad00f88b6c743a2a8062d4c6ade126c2a528644b8e444d52ce"
dependencies = [
"stable_deref_trait",
]
[[package]]
name = "plain"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6"
[[package]]
name = "proc-macro2"
version = "1.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c7ed8b8c7b886ea3ed7dde405212185f423ab44682667c8c6dd14aa1d9f6612"
dependencies = [
"unicode-xid",
]
[[package]]
name = "quote"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rayon"
version = "1.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90"
dependencies = [
"autocfg",
"crossbeam-deque",
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e"
dependencies = [
"crossbeam-channel",
"crossbeam-deque",
"crossbeam-utils",
"lazy_static",
"num_cpus",
]
[[package]]
name = "regex"
version = "1.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
[[package]]
name = "regex-syntax"
version = "0.6.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "ryu"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
[[package]]
name = "scopeguard"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "scroll"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fda28d4b4830b807a8b43f7b0e6b5df875311b3e7621d84577188c175b6ec1ec"
dependencies = [
"scroll_derive",
]
[[package]]
name = "scroll_derive"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aaaae8f38bb311444cfb7f1979af0bc9240d95795f75f9ceddf6a59b79ceffa0"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde"
version = "1.0.126"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03"
[[package]]
name = "smawk"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f67ad224767faa3c7d8b6d91985b78e70a1324408abcb1cfcc2be4c06bc06043"
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "syn"
version = "1.0.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1873d832550d4588c3dbc20f01361ab00bfe741048f71e3fecf145a7cc18b29c"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]]
name = "textwrap"
version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0066c8d12af8b5acd21e00547c3797fde4e8677254a7ee429176ccebbe93dd80"
dependencies = [
"smawk",
"unicode-linebreak",
"unicode-width",
]
[[package]]
name = "unicode-linebreak"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a52dcaab0c48d931f7cc8ef826fa51690a08e1ea55117ef26f89864f532383f"
dependencies = [
"regex",
]
[[package]]
name = "unicode-width"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
[[package]]
name = "unicode-xid"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
[[package]]
name = "viking"
version = "1.0.0"
dependencies = [
"anyhow",
"capstone",
"colored",
"cpp_demangle",
"csv",
"goblin",
"itertools",
"lazy-init",
"memmap",
"mimalloc",
"owning_ref",
"rayon",
"rustc-hash",
"textwrap",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

28
tools/viking/Cargo.toml Normal file
View File

@ -0,0 +1,28 @@
[package]
name = "viking"
version = "1.0.0"
edition = "2018"
[profile.release]
debug = 1
lto = "thin"
[dependencies]
anyhow = "1.0"
capstone = { git = "https://github.com/leoetlino/capstone-rs" }
colored = "2"
cpp_demangle = "0.3.3"
csv = "1.1"
goblin = "0.4"
itertools = "0.10.1"
lazy-init = "0.5.0"
memmap = "0.6.1"
mimalloc = { version = "*", default-features = false }
owning_ref = "0.4.1"
rayon = "1.5.1"
rustc-hash = "1.1.0"
textwrap = "0.14.2"
[[bin]]
name = "botw-check"
path = "src/tools/check.rs"

21
tools/viking/LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2021 leoetlino
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,89 @@
use anyhow::{bail, Result};
use capstone as cs;
use cs::arch::arm64::{Arm64Insn, Arm64OpMem, Arm64Operand, Arm64OperandType};
use cs::{arch::ArchOperand, RegId};
pub fn translate_cs_error<T>(err: cs::Error) -> Result<T> {
bail!("capstone error: {}", err)
}
#[inline]
pub fn map_two<'a, T, R, F: FnMut(&'a T) -> R>(x: &'a T, y: &'a T, mut f: F) -> (R, R) {
(f(x), f(y))
}
#[inline]
pub fn map_pair<'a, T, R, F: FnMut(&'a T) -> R>(pair: &'a (T, T), f: F) -> (R, R) {
map_two(&pair.0, &pair.1, f)
}
#[inline]
pub fn try_map_two<'a, T, R, F: FnMut(&'a T) -> Result<R, cs::Error>>(
x: &'a T,
y: &'a T,
mut f: F,
) -> Result<(R, R)> {
Ok((
f(x).or_else(translate_cs_error)?,
f(y).or_else(translate_cs_error)?,
))
}
/// Checks if `id` is in [start, end] (inclusive range).
#[inline]
pub fn is_id_in_range(start: Arm64Insn, end: Arm64Insn, id: Arm64Insn) -> bool {
let range = (start as u32)..=(end as u32);
range.contains(&(id as u32))
}
/// Used to make accessing arch-specific data less cumbersome.
pub trait CsArchOperandUtil {
fn arm64(&self) -> &Arm64Operand;
}
impl CsArchOperandUtil for ArchOperand {
fn arm64(&self) -> &Arm64Operand {
match self {
Self::Arm64Operand(x) => x,
_ => unreachable!(),
}
}
}
/// Used to make accessing arch-specific data less cumbersome.
pub trait CsArm64OperandTypeUtil {
fn reg(&self) -> RegId;
fn imm(&self) -> i64;
fn try_mem(&self) -> Option<Arm64OpMem>;
fn mem(&self) -> Arm64OpMem;
}
impl CsArm64OperandTypeUtil for Arm64OperandType {
fn reg(&self) -> RegId {
match self {
Self::Reg(x) => *x,
_ => panic!("expected Reg, got {:#?}", &self),
}
}
fn imm(&self) -> i64 {
match self {
Self::Imm(x) => *x,
_ => panic!("expected Imm, got {:#?}", &self),
}
}
fn try_mem(&self) -> Option<Arm64OpMem> {
match self {
Self::Mem(x) => Some(*x),
_ => None,
}
}
fn mem(&self) -> Arm64OpMem {
match self {
Self::Mem(x) => *x,
_ => panic!("expected Mem, got {:#?}", &self),
}
}
}

518
tools/viking/src/checks.rs Normal file
View File

@ -0,0 +1,518 @@
use anyhow::{ensure, Result};
use capstone as cs;
use cs::arch::arm64::{Arm64Insn, Arm64Operand, Arm64OperandType};
use itertools::zip;
use lazy_init::Lazy;
use rustc_hash::FxHashMap;
use std::collections::{HashMap, HashSet};
use std::convert::TryInto;
use std::path::{Path, PathBuf};
use crate::{capstone_utils::*, elf, functions, repo, ui};
struct DataSymbol {
/// Address of the symbol in the original executable.
pub addr: u64,
/// Name of the symbol in our source code.
pub name: String,
/// Size of the symbol in our source code (according to ELF info).
pub size: u64,
}
/// Keeps track of known data symbols so that data loads can be validated.
#[derive(Default)]
struct KnownDataSymbolMap {
/// Symbols. Must be sorted by address.
symbols: Vec<DataSymbol>,
}
impl KnownDataSymbolMap {
fn new() -> Self {
Default::default()
}
fn load(&mut self, csv_path: &Path, decomp_symtab: &elf::SymbolTableByName) -> Result<()> {
let mut reader = csv::ReaderBuilder::new()
.has_headers(false)
.quoting(false)
.from_path(csv_path)?;
for (line, maybe_record) in reader.records().enumerate() {
let record = &maybe_record?;
ensure!(
record.len() == 2,
"invalid number of fields on line {}",
line
);
let addr = functions::parse_address(&record[0])?;
let name = &record[1];
let symbol = decomp_symtab.get(name);
// Ignore missing symbols.
if symbol.is_none() {
continue;
}
let symbol = symbol.unwrap();
self.symbols.push(DataSymbol {
addr,
name: name.to_string(),
size: symbol.st_size,
});
}
self.symbols.sort_by_key(|sym| sym.addr);
Ok(())
}
/// If addr is part of a known data symbol, this function returns the corresponding symbol.
fn get_symbol(&self, addr: u64) -> Option<&DataSymbol> {
// Perform a binary search since `symbols` is sorted.
let mut a: isize = 0;
let mut b: isize = self.symbols.len() as isize - 1;
while a <= b {
let m = a + (b - a) / 2;
let mid_symbol = &self.symbols[m as usize];
let mid_addr_begin = mid_symbol.addr;
let mid_addr_end = mid_addr_begin + mid_symbol.size as u64;
if mid_addr_begin <= addr && addr < mid_addr_end {
return Some(mid_symbol);
}
if addr <= mid_addr_begin {
b = m - 1;
} else if addr >= mid_addr_end {
a = m + 1;
} else {
break;
}
}
None
}
}
fn get_data_symbol_csv_path() -> Result<PathBuf> {
let mut path = repo::get_repo_root()?;
path.push("data");
path.push("data_symbols.csv");
Ok(path)
}
#[derive(Debug)]
pub struct ReferenceDiff {
pub referenced_symbol: u64,
pub expected_ref_in_decomp: u64,
pub actual_ref_in_decomp: u64,
pub expected_symbol_name: String,
pub actual_symbol_name: String,
}
impl std::fmt::Display for ReferenceDiff {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"wrong reference to {ref} {ref_name}\n\
--> decomp source code is referencing {actual} {actual_name}\n\
--> expected to see {expected} to match original code",
ref=ui::format_address(self.referenced_symbol),
ref_name=ui::format_symbol_name(&self.expected_symbol_name),
expected=ui::format_address(self.expected_ref_in_decomp),
actual=ui::format_address(self.actual_ref_in_decomp),
actual_name=ui::format_symbol_name(&self.actual_symbol_name),
)
}
}
#[derive(Debug)]
pub enum MismatchCause {
FunctionSize,
Register,
Mnemonic,
BranchTarget,
FunctionCall(ReferenceDiff),
DataReference(ReferenceDiff),
Immediate,
Unknown,
}
impl std::fmt::Display for MismatchCause {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match &self {
Self::FunctionSize => write!(f, "wrong function size"),
Self::Register => write!(f, "wrong register"),
Self::Mnemonic => write!(f, "wrong mnemonic"),
Self::BranchTarget => write!(f, "wrong branch target"),
Self::FunctionCall(diff) => write!(f, "wrong function call\n{}", diff),
Self::DataReference(diff) => write!(f, "wrong data reference\n{}", diff),
Self::Immediate => write!(f, "wrong immediate"),
Self::Unknown => write!(f, "unknown reason; check diff.py"),
}
}
}
#[derive(Debug)]
pub struct Mismatch {
pub addr_orig: u64,
pub addr_decomp: u64,
pub cause: MismatchCause,
}
impl std::fmt::Display for Mismatch {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"mismatch at {}: {}",
ui::format_address(self.addr_orig),
self.cause,
)
}
}
pub struct FunctionChecker<'a, 'functions, 'orig_elf, 'decomp_elf> {
decomp_elf: &'decomp_elf elf::OwnedElf,
decomp_symtab: &'a elf::SymbolTableByName<'decomp_elf>,
decomp_glob_data_table: elf::GlobDataTable,
// Optional, only initialized when a mismatch is detected.
decomp_addr_to_name_map: Lazy<elf::AddrToNameMap<'decomp_elf>>,
known_data_symbols: KnownDataSymbolMap,
known_functions: FxHashMap<u64, &'functions functions::Info>,
orig_elf: &'orig_elf elf::OwnedElf,
orig_got_section: &'orig_elf goblin::elf::SectionHeader,
}
impl<'a, 'functions, 'orig_elf, 'decomp_elf>
FunctionChecker<'a, 'functions, 'orig_elf, 'decomp_elf>
{
pub fn new(
orig_elf: &'orig_elf elf::OwnedElf,
decomp_elf: &'decomp_elf elf::OwnedElf,
decomp_symtab: &'a elf::SymbolTableByName<'decomp_elf>,
decomp_glob_data_table: elf::GlobDataTable,
functions: &'functions [functions::Info],
) -> Result<Self> {
let mut known_data_symbols = KnownDataSymbolMap::new();
known_data_symbols.load(get_data_symbol_csv_path()?.as_path(), &decomp_symtab)?;
let known_functions = functions::make_known_function_map(functions);
let orig_got_section = elf::find_section(orig_elf, ".got")?;
Ok(FunctionChecker {
decomp_elf,
decomp_symtab,
decomp_glob_data_table,
decomp_addr_to_name_map: Lazy::new(),
known_data_symbols,
known_functions,
orig_elf,
orig_got_section,
})
}
pub fn check(
&self,
cs: &mut cs::Capstone,
orig_fn: &elf::Function,
decomp_fn: &elf::Function,
) -> Result<Option<Mismatch>> {
// Keep track of registers that are used with ADRP so that we can check global data
// references even when data is not placed at the same addresses
// as in the original executable.
#[derive(Default)]
struct State {
gprs1: HashMap<cs::RegId, u64>,
gprs2: HashMap<cs::RegId, u64>,
adrp_pair_registers: HashSet<cs::RegId>,
}
impl State {
fn forget_modified_registers(&mut self, detail: &cs::InsnDetail) {
for reg in detail.regs_write() {
self.adrp_pair_registers.remove(&reg);
}
}
}
let mut state = State::default();
if orig_fn.code.len() != decomp_fn.code.len() {
return Ok(Some(Mismatch {
addr_orig: orig_fn.addr,
addr_decomp: decomp_fn.addr,
cause: MismatchCause::FunctionSize,
}));
}
let mut instructions = try_map_two(&orig_fn, &decomp_fn, |func| {
cs.disasm_iter(func.code, func.addr)
})?;
// Check every pair of instructions.
while let (Some(i1), Some(i2)) = (instructions.0.next(), instructions.1.next()) {
let ids = map_two(&i1, &i2, |i| i.id().0);
let detail = try_map_two(&i1, &i2, |insn| cs.insn_detail(&insn))?;
let arch_detail = map_pair(&detail, |d| d.arch_detail());
let ops = map_pair(&arch_detail, |a| a.arm64().unwrap().operands_ref());
if ids.0 != ids.1 {
return Self::make_mismatch(&i1, &i2, MismatchCause::Mnemonic);
}
let id = ids.0;
match id.into() {
// Branches or function calls.
Arm64Insn::ARM64_INS_B | Arm64Insn::ARM64_INS_BL => {
let target =
map_pair(&ops, |ops| Arm64Operand::from(&ops[0]).op_type.imm() as u64);
// If we are branching outside the function, this is likely a tail call.
// Treat it as a function call.
if !orig_fn.get_addr_range().contains(&target.0) {
if let Some(mismatch_cause) = self.check_function_call(target.0, target.1) {
return Self::make_mismatch(&i1, &i2, mismatch_cause);
}
} else {
// Otherwise, it's a simple branch, and both targets must match.
if i1.bytes() != i2.bytes() {
return Self::make_mismatch(&i1, &i2, MismatchCause::BranchTarget);
}
}
}
// Catch ADRP + (ADD/load/store) instruction pairs.
Arm64Insn::ARM64_INS_ADRP => {
let reg = map_pair(&ops, |ops| Arm64Operand::from(&ops[0]).op_type.reg());
let imm =
map_pair(&ops, |ops| Arm64Operand::from(&ops[1]).op_type.imm() as u64);
if reg.0 != reg.1 {
return Self::make_mismatch(&i1, &i2, MismatchCause::Register);
}
state.gprs1.insert(reg.0, imm.0);
state.gprs2.insert(reg.1, imm.1);
state.adrp_pair_registers.insert(reg.0);
}
// Catch ADRP + ADD instruction pairs.
Arm64Insn::ARM64_INS_ADD => {
let mut diff_ok = false;
if ops.0.len() == 3 {
let dest_reg =
map_pair(&ops, |ops| Arm64Operand::from(&ops[0]).op_type.reg());
let reg = map_pair(&ops, |ops| Arm64Operand::from(&ops[1]).op_type.reg());
if let Arm64OperandType::Imm(_) = Arm64Operand::from(&ops.0[2]).op_type {
let imm =
map_pair(&ops, |ops| Arm64Operand::from(&ops[2]).op_type.imm());
if dest_reg.0 != dest_reg.1 || reg.0 != reg.1 {
return Self::make_mismatch(&i1, &i2, MismatchCause::Register);
}
// Is this an ADRP pair we can check?
if state.adrp_pair_registers.contains(&reg.0) {
let orig_addr = state.gprs1[&reg.0] + imm.0 as u64;
let decomp_addr = state.gprs2[&reg.1] + imm.1 as u64;
if let Some(mismatch_cause) =
self.check_data_symbol(orig_addr, decomp_addr)
{
return Self::make_mismatch(&i1, &i2, mismatch_cause);
}
// If the data symbol reference matches, allow the instructions to be different.
diff_ok = true;
}
}
}
if !diff_ok && i1.bytes() != i2.bytes() {
return Self::make_mismatch(&i1, &i2, MismatchCause::Unknown);
}
state.forget_modified_registers(&detail.0);
}
// Loads and stores (single or paired).
id if is_id_in_range(Arm64Insn::ARM64_INS_LD1, Arm64Insn::ARM64_INS_LDXRH, id)
|| is_id_in_range(Arm64Insn::ARM64_INS_ST1, Arm64Insn::ARM64_INS_STXR, id) =>
{
let mut diff_ok = false;
// Check all operands for mismatches, except the Arm64OpMem which will be checked later.
let mut mem = (None, None);
for (op1, op2) in zip(ops.0, ops.1) {
let op1 = Arm64Operand::from(op1);
let op2 = Arm64Operand::from(op2);
if let Some(mem1) = op1.op_type.try_mem() {
if let Some(mem2) = op2.op_type.try_mem() {
ensure!(
mem.0.is_none() && mem.1.is_none(),
"found more than one OpMem"
);
mem.0 = Some(mem1);
mem.1 = Some(mem2);
continue;
}
}
if op1 != op2 {
return Self::make_mismatch(&i1, &i2, MismatchCause::Unknown);
}
}
ensure!(mem.0.is_some() && mem.1.is_some(), "didn't find an OpMem");
let mem = (mem.0.unwrap(), mem.1.unwrap());
if mem.0.base() != mem.1.base() {
return Self::make_mismatch(&i1, &i2, MismatchCause::Register);
}
let reg = mem.0.base();
// Is this an ADRP pair we can check?
if state.adrp_pair_registers.contains(&reg) {
let orig_addr_ptr = (state.gprs1[&reg] as i64 + mem.0.disp() as i64) as u64;
let decomp_addr_ptr =
(state.gprs2[&reg] as i64 + mem.1.disp() as i64) as u64;
if let Some(mismatch_cause) =
self.check_data_symbol_ptr(orig_addr_ptr, decomp_addr_ptr)
{
return Self::make_mismatch(&i1, &i2, mismatch_cause);
}
// If the data symbol reference matches, allow the instructions to be different.
diff_ok = true;
}
if !diff_ok && i1.bytes() != i2.bytes() {
return Self::make_mismatch(&i1, &i2, MismatchCause::Unknown);
}
state.forget_modified_registers(&detail.0);
}
// Anything else.
_ => {
if i1.bytes() != i2.bytes() {
return Self::make_mismatch(&i1, &i2, MismatchCause::Unknown);
}
state.forget_modified_registers(&detail.0);
}
}
}
Ok(None)
}
/// Returns None on success and a MismatchCause on failure.
fn check_function_call(&self, orig_addr: u64, decomp_addr: u64) -> Option<MismatchCause> {
let info = *self.known_functions.get(&orig_addr)?;
let name = info.name.as_str();
let decomp_symbol = self.decomp_symtab.get(name)?;
let expected = decomp_symbol.st_value;
if decomp_addr == expected {
None
} else {
let actual_symbol_name = self.translate_decomp_addr_to_name(decomp_addr);
Some(MismatchCause::FunctionCall(ReferenceDiff {
referenced_symbol: orig_addr,
expected_ref_in_decomp: expected,
actual_ref_in_decomp: decomp_addr,
expected_symbol_name: name.to_string(),
actual_symbol_name: actual_symbol_name.unwrap_or("unknown").to_string(),
}))
}
}
/// Returns None on success and a MismatchCause on failure.
fn check_data_symbol_ex(
&self,
orig_addr: u64,
decomp_addr: u64,
symbol: &DataSymbol,
) -> Option<MismatchCause> {
let decomp_symbol = self.decomp_symtab.get(symbol.name.as_str())?;
let expected = decomp_symbol.st_value;
if decomp_addr == expected {
None
} else {
let actual_symbol_name = self.translate_decomp_addr_to_name(decomp_addr);
Some(MismatchCause::DataReference(ReferenceDiff {
referenced_symbol: orig_addr,
expected_ref_in_decomp: expected,
actual_ref_in_decomp: decomp_addr,
expected_symbol_name: symbol.name.to_string(),
actual_symbol_name: actual_symbol_name.unwrap_or("unknown").to_string(),
}))
}
}
/// Returns None on success and a MismatchCause on failure.
fn check_data_symbol(&self, orig_addr: u64, decomp_addr: u64) -> Option<MismatchCause> {
let symbol = self.known_data_symbols.get_symbol(orig_addr)?;
self.check_data_symbol_ex(orig_addr, decomp_addr, symbol)
}
/// Returns None on success and a MismatchCause on failure.
/// Unlike check_data_symbol, this function takes the addresses of *pointers to* possible data symbols,
/// not the symbols themselves.
fn check_data_symbol_ptr(
&self,
orig_addr_ptr: u64,
decomp_addr_ptr: u64,
) -> Option<MismatchCause> {
if !elf::is_in_section(&self.orig_got_section, orig_addr_ptr, 8) {
return None;
}
let orig_offset = elf::get_offset_in_file(&self.orig_elf, orig_addr_ptr).ok()? as u64;
let orig_addr = u64::from_le_bytes(
elf::get_elf_bytes(&self.orig_elf, orig_offset, 8)
.ok()?
.try_into()
.ok()?,
);
let data_symbol = self.known_data_symbols.get_symbol(orig_addr)?;
let decomp_addr = *self.decomp_glob_data_table.get(&decomp_addr_ptr)?;
self.check_data_symbol_ex(orig_addr, decomp_addr, &data_symbol)
}
fn make_mismatch(
i1: &cs::Insn,
i2: &cs::Insn,
cause: MismatchCause,
) -> Result<Option<Mismatch>> {
Ok(Some(Mismatch {
addr_orig: i1.address(),
addr_decomp: i2.address(),
cause,
}))
}
#[cold]
#[inline(never)]
fn translate_decomp_addr_to_name(&self, decomp_addr: u64) -> Option<&'decomp_elf str> {
let map = self.decomp_addr_to_name_map.get_or_create(|| {
let map = elf::make_addr_to_name_map(&self.decomp_elf).ok();
map.unwrap_or_default()
});
map.get(&decomp_addr).copied()
}
}

311
tools/viking/src/elf.rs Normal file
View File

@ -0,0 +1,311 @@
use std::{collections::HashMap, ffi::CStr, fs::File, ops::Range, path::Path};
use anyhow::{anyhow, bail, Context, Result};
use goblin::{
container,
elf::{
dynamic, reloc, section_header, sym, Dynamic, Elf, ProgramHeader, RelocSection,
SectionHeader, Sym, Symtab,
},
elf64::program_header::PT_LOAD,
strtab::Strtab,
};
use memmap::{Mmap, MmapOptions};
use owning_ref::OwningHandle;
use rustc_hash::FxHashMap;
use crate::repo;
pub type OwnedElf = OwningHandle<Box<(Box<File>, Mmap)>, Box<Elf<'static>>>;
pub type SymbolTableByName<'a> = HashMap<&'a str, goblin::elf::Sym>;
pub type SymbolTableByAddr = FxHashMap<u64, goblin::elf::Sym>;
pub type AddrToNameMap<'a> = FxHashMap<u64, &'a str>;
pub type GlobDataTable = FxHashMap<u64, u64>;
pub struct Function<'a> {
/// The virtual address of the function in its containing executable.
/// *Note*: does not contain the IDA base (0x7100000000).
pub addr: u64,
/// The bytes that make up the code for this function.
pub code: &'a [u8],
}
impl<'a> Function<'a> {
#[inline]
pub fn get_addr_range(&self) -> Range<u64> {
self.addr..(self.addr + self.code.len() as u64)
}
}
#[inline]
fn make_goblin_ctx() -> container::Ctx {
// 64-bit, little endian
container::Ctx::new(container::Container::Big, container::Endian::Little)
}
/// A stripped down version of `goblin::elf::Elf::parse`, parsing only the sections that we need.
///
/// *Warning*: In particular, `strtab`, `dynstrtab`, `soname` and `libraries` are **not** parsed.
fn parse_elf_faster(bytes: &[u8]) -> Result<Elf> {
let header = Elf::parse_header(bytes)?;
let mut elf = Elf::lazy_parse(header)?;
let ctx = make_goblin_ctx();
elf.program_headers =
ProgramHeader::parse(bytes, header.e_phoff as usize, header.e_phnum as usize, ctx)?;
elf.section_headers =
SectionHeader::parse(bytes, header.e_shoff as usize, header.e_shnum as usize, ctx)?;
let get_strtab = |section_headers: &[SectionHeader], section_idx: usize| {
if section_idx >= section_headers.len() {
Ok(Strtab::default())
} else {
let shdr = &section_headers[section_idx];
shdr.check_size(bytes.len())?;
Strtab::parse(bytes, shdr.sh_offset as usize, shdr.sh_size as usize, 0x0)
}
};
let strtab_idx = header.e_shstrndx as usize;
elf.shdr_strtab = get_strtab(&elf.section_headers, strtab_idx)?;
for shdr in &elf.section_headers {
if shdr.sh_type as u32 == section_header::SHT_SYMTAB {
let size = shdr.sh_entsize;
let count = if size == 0 { 0 } else { shdr.sh_size / size };
elf.syms = Symtab::parse(bytes, shdr.sh_offset as usize, count as usize, ctx)?;
}
}
elf.dynamic = Dynamic::parse(bytes, &elf.program_headers, ctx)?;
if let Some(ref dynamic) = elf.dynamic {
let dyn_info = &dynamic.info;
// parse the dynamic relocations
elf.dynrelas = RelocSection::parse(bytes, dyn_info.rela, dyn_info.relasz, true, ctx)?;
elf.dynrels = RelocSection::parse(bytes, dyn_info.rel, dyn_info.relsz, false, ctx)?;
let is_rela = dyn_info.pltrel as u64 == dynamic::DT_RELA;
elf.pltrelocs =
RelocSection::parse(bytes, dyn_info.jmprel, dyn_info.pltrelsz, is_rela, ctx)?;
}
Ok(elf)
}
pub fn load_elf(path: &Path) -> Result<OwnedElf> {
let file = Box::new(File::open(path)?);
let mmap = unsafe { MmapOptions::new().map(&file)? };
OwningHandle::try_new(Box::new((file, mmap)), |pair| unsafe {
let elf = parse_elf_faster(&(*pair).1).with_context(|| "failed to load ELF")?;
Ok(Box::new(elf))
})
}
pub fn load_orig_elf() -> Result<OwnedElf> {
let mut path = repo::get_repo_root()?;
path.push("data");
path.push("main.elf");
load_elf(path.as_path())
}
pub fn load_decomp_elf() -> Result<OwnedElf> {
let mut path = repo::get_repo_root()?;
path.push("build");
path.push("uking");
load_elf(path.as_path())
}
struct SymbolStringTable<'elf> {
bytes: &'elf [u8],
}
impl<'elf> SymbolStringTable<'elf> {
pub fn from_elf(elf: &'elf OwnedElf) -> Result<Self> {
let bytes = &*elf.as_owner().1;
for shdr in &elf.section_headers {
if shdr.sh_type as u32 == section_header::SHT_SYMTAB {
let table_hdr = elf
.section_headers
.get(shdr.sh_link as usize)
.ok_or_else(|| anyhow!("symbol string table index out of bounds"))?;
table_hdr.check_size(bytes.len())?;
let start = table_hdr.sh_offset as usize;
let end = start + table_hdr.sh_size as usize;
return Ok(SymbolStringTable {
bytes: &bytes[start..end],
});
}
}
bail!("couldn't find symbol string table")
}
pub fn get_string(&self, offset: usize) -> &'elf str {
unsafe {
std::str::from_utf8_unchecked(
CStr::from_ptr(self.bytes[offset..self.bytes.len()].as_ptr() as *const i8)
.to_bytes(),
)
}
}
}
fn filter_out_useless_syms(sym: &Sym) -> bool {
matches!(
sym.st_type(),
sym::STT_OBJECT | sym::STT_FUNC | sym::STT_COMMON | sym::STT_TLS
)
}
pub fn make_symbol_map_by_name(elf: &OwnedElf) -> Result<SymbolTableByName> {
let mut map = SymbolTableByName::with_capacity_and_hasher(
elf.syms.iter().filter(filter_out_useless_syms).count(),
Default::default(),
);
let strtab = SymbolStringTable::from_elf(&elf)?;
for symbol in elf.syms.iter().filter(filter_out_useless_syms) {
map.entry(strtab.get_string(symbol.st_name))
.or_insert(symbol);
}
Ok(map)
}
pub fn make_symbol_map_by_addr(elf: &OwnedElf) -> SymbolTableByAddr {
let mut map = SymbolTableByAddr::with_capacity_and_hasher(
elf.syms.iter().filter(filter_out_useless_syms).count(),
Default::default(),
);
for symbol in elf.syms.iter().filter(filter_out_useless_syms) {
map.entry(symbol.st_value).or_insert(symbol);
}
map
}
pub fn make_addr_to_name_map(elf: &OwnedElf) -> Result<AddrToNameMap> {
let mut map = AddrToNameMap::with_capacity_and_hasher(
elf.syms.iter().filter(filter_out_useless_syms).count(),
Default::default(),
);
let strtab = SymbolStringTable::from_elf(&elf)?;
for symbol in elf.syms.iter().filter(filter_out_useless_syms) {
map.entry(symbol.st_value)
.or_insert_with(|| strtab.get_string(symbol.st_name));
}
Ok(map)
}
fn parse_symtab<'a>(elf: &'a OwnedElf, shdr: &'a SectionHeader) -> Result<Symtab<'a>> {
let bytes = &elf.as_owner().1;
let size = shdr.sh_entsize;
let count = if size == 0 { 0 } else { shdr.sh_size / size };
let syms = Symtab::parse(
bytes,
shdr.sh_offset as usize,
count as usize,
make_goblin_ctx(),
)?;
Ok(syms)
}
pub fn find_section<'a>(elf: &'a OwnedElf, name: &str) -> Result<&'a SectionHeader> {
elf.section_headers
.iter()
.find(|&header| &elf.shdr_strtab[header.sh_name] == name)
.ok_or_else(|| anyhow!("failed to find {} section", name))
}
pub fn get_linked_section<'a>(
elf: &'a OwnedElf,
shdr: &'a SectionHeader,
) -> Result<&'a SectionHeader> {
elf.section_headers
.get(shdr.sh_link as usize)
.ok_or_else(|| anyhow!("could not get linked section"))
}
#[inline]
pub fn is_in_section(section: &SectionHeader, addr: u64, size: u64) -> bool {
let begin = section.sh_addr;
let end = begin + section.sh_size;
(begin..end).contains(&addr) && (begin..=end).contains(&(addr + size))
}
pub fn build_glob_data_table(elf: &OwnedElf) -> Result<GlobDataTable> {
let section = &elf.dynrelas;
let section_hdr = find_section(elf, ".rela.dyn")?;
// The corresponding symbol table.
let symtab = parse_symtab(elf, get_linked_section(elf, &section_hdr)?)?;
let mut table = GlobDataTable::with_capacity_and_hasher(section.len(), Default::default());
for reloc in section.iter() {
let symbol_value: u64 = symtab
.get(reloc.r_sym)
.ok_or_else(|| anyhow!("invalid symbol index"))?
.st_value;
match reloc.r_type {
reloc::R_AARCH64_GLOB_DAT => {
table.insert(
reloc.r_offset,
(symbol_value as i64 + reloc.r_addend.unwrap()) as u64,
);
}
reloc::R_AARCH64_RELATIVE => {
// FIXME: this should be Delta(S) + A.
table.insert(
reloc.r_offset,
(symbol_value as i64 + reloc.r_addend.unwrap()) as u64,
);
}
_ => (),
}
}
Ok(table)
}
pub fn get_offset_in_file(elf: &OwnedElf, addr: u64) -> Result<usize> {
let addr = addr as usize;
for segment in elf.program_headers.iter() {
if segment.p_type != PT_LOAD {
continue;
}
if segment.vm_range().contains(&addr) {
return Ok(segment.file_range().start + addr - segment.vm_range().start);
}
}
bail!("{:#x} doesn't belong to any segment", addr)
}
pub fn get_elf_bytes(elf: &OwnedElf, addr: u64, size: u64) -> Result<&[u8]> {
let offset = get_offset_in_file(&elf, addr)?;
let size = size as usize;
Ok(&elf.as_owner().1[offset..(offset + size)])
}
pub fn get_function(elf: &OwnedElf, addr: u64, size: u64) -> Result<Function> {
Ok(Function {
addr,
code: get_elf_bytes(&elf, addr, size)?,
})
}
pub fn get_function_by_name<'a>(
elf: &'a OwnedElf,
symbols: &SymbolTableByName,
name: &str,
) -> Result<Function<'a>> {
let symbol = symbols
.get(&name)
.ok_or_else(|| anyhow!("unknown function: {}", name))?;
get_function(&elf, symbol.st_value, symbol.st_size)
}

View File

@ -0,0 +1,142 @@
use crate::repo;
use anyhow::{bail, ensure, Context, Result};
use rustc_hash::FxHashMap;
use std::path::{Path, PathBuf};
pub enum Status {
Matching,
NonMatchingMinor,
NonMatchingMajor,
NotDecompiled,
Library,
}
pub struct Info {
pub addr: u64,
pub size: u32,
pub name: String,
pub status: Status,
}
impl Info {
pub fn is_decompiled(&self) -> bool {
!matches!(self.status, Status::NotDecompiled | Status::Library)
}
}
pub const ADDRESS_BASE: u64 = 0x71_0000_0000;
fn parse_base_16(value: &str) -> Result<u64> {
if let Some(stripped) = value.strip_prefix("0x") {
Ok(u64::from_str_radix(stripped, 16)?)
} else {
Ok(u64::from_str_radix(value, 16)?)
}
}
pub fn parse_address(value: &str) -> Result<u64> {
Ok(parse_base_16(value)? - ADDRESS_BASE)
}
fn parse_function_csv_entry(record: &csv::StringRecord) -> Result<Info> {
ensure!(record.len() == 4, "invalid record");
let addr = parse_address(&record[0])?;
let status_code = record[1].chars().next();
let size = record[2].parse::<u32>()?;
let decomp_name = record[3].to_string();
let status = match status_code {
Some('m') => Status::NonMatchingMinor,
Some('M') => Status::NonMatchingMajor,
Some('O') => Status::Matching,
Some('U') => Status::NotDecompiled,
Some('L') => Status::Library,
Some(code) => bail!("unexpected status code: {}", code),
None => bail!("missing status code"),
};
Ok(Info {
addr,
size,
name: decomp_name,
status,
})
}
pub fn get_functions_csv_path() -> Result<PathBuf> {
let mut path = repo::get_repo_root()?;
path.push("data");
path.push("uking_functions.csv");
Ok(path)
}
/// Returns a Vec of all functions that are listed in the specified CSV.
pub fn get_functions_for_path(csv_path: &Path) -> Result<Vec<Info>> {
let mut reader = csv::ReaderBuilder::new()
.has_headers(false)
.quoting(false)
.from_path(csv_path)?;
// We build the result array manually without using csv iterators for performance reasons.
let mut result = Vec::with_capacity(110_000);
let mut record = csv::StringRecord::new();
let mut line_number = 1;
if reader.read_record(&mut record)? {
// Verify that the CSV has the correct format.
ensure!(record.len() == 4, "invalid record; expected 4 fields");
ensure!(
&record[0] == "Address"
&& &record[1] == "Quality"
&& &record[2] == "Size"
&& &record[3] == "Name",
"wrong CSV format; this program only works with the new function list format (added in commit 1d4c815fbae3)"
);
line_number += 1;
}
while reader.read_record(&mut record)? {
result.push(
parse_function_csv_entry(&record)
.with_context(|| format!("failed to parse CSV record at line {}", line_number))?,
);
line_number += 1;
}
Ok(result)
}
/// Returns a Vec of all known functions in the executable.
pub fn get_functions() -> Result<Vec<Info>> {
get_functions_for_path(get_functions_csv_path()?.as_path())
}
pub fn make_known_function_map(functions: &[Info]) -> FxHashMap<u64, &Info> {
let mut known_functions =
FxHashMap::with_capacity_and_hasher(functions.len(), Default::default());
for function in functions {
if !function.is_decompiled() {
continue;
}
known_functions.insert(function.addr, function);
}
known_functions
}
/// Demangle a C++ symbol.
pub fn demangle_str(name: &str) -> Result<String> {
if !name.starts_with("_Z") {
bail!("not an external mangled name");
}
let symbol = cpp_demangle::Symbol::new(name)?;
let options = cpp_demangle::DemangleOptions::new();
Ok(symbol.demangle(&options)?)
}
pub fn get_expected_dir_path() -> Result<PathBuf> {
let mut path = repo::get_repo_root()?;
path.push("expected");
Ok(path)
}

6
tools/viking/src/lib.rs Normal file
View File

@ -0,0 +1,6 @@
pub mod capstone_utils;
pub mod checks;
pub mod elf;
pub mod functions;
pub mod repo;
pub mod ui;

20
tools/viking/src/repo.rs Normal file
View File

@ -0,0 +1,20 @@
use anyhow::{bail, Result};
use std::path::PathBuf;
pub fn get_repo_root() -> Result<PathBuf> {
let current_dir = std::env::current_dir()?;
let mut dir = current_dir.as_path();
loop {
if ["data", "src"].iter().all(|name| dir.join(name).is_dir()) {
return Ok(dir.to_path_buf());
}
match dir.parent() {
None => {
bail!("failed to find repo root -- run this program inside the repo");
}
Some(parent) => dir = parent,
};
}
}

View File

@ -0,0 +1,180 @@
use anyhow::bail;
use anyhow::Context;
use anyhow::Result;
use capstone as cs;
use capstone::arch::BuildsCapstone;
use colored::*;
use rayon::prelude::*;
use std::cell::RefCell;
use std::sync::atomic::AtomicBool;
use viking::checks::FunctionChecker;
use viking::elf;
use viking::functions;
use viking::functions::Status;
use viking::ui;
use mimalloc::MiMalloc;
#[global_allocator]
static GLOBAL: MiMalloc = MiMalloc;
/// Returns false if the program should exit with a failure code at the end.
fn check_function(
checker: &FunctionChecker,
mut cs: &mut capstone::Capstone,
orig_elf: &elf::OwnedElf,
decomp_elf: &elf::OwnedElf,
decomp_symtab: &elf::SymbolTableByName,
function: &functions::Info,
) -> Result<bool> {
if !function.is_decompiled() {
return Ok(true);
}
let name = function.name.as_str();
let decomp_fn = elf::get_function_by_name(&decomp_elf, &decomp_symtab, &name);
if decomp_fn.is_err() {
let error = decomp_fn.err().unwrap();
ui::print_warning(&format!(
"couldn't check {}: {}",
ui::format_symbol_name(name),
error.to_string().dimmed(),
));
return Ok(true);
}
let decomp_fn = decomp_fn.unwrap();
let get_orig_fn = || {
elf::get_function(&orig_elf, function.addr, function.size as u64).with_context(|| {
format!(
"failed to get function {} ({}) from the original executable",
name,
ui::format_address(function.addr),
)
})
};
match function.status {
Status::Matching => {
let orig_fn = get_orig_fn()?;
let result = checker
.check(&mut cs, &orig_fn, &decomp_fn)
.with_context(|| format!("checking {}", name))?;
if let Some(mismatch) = result {
let stderr = std::io::stderr();
let mut lock = stderr.lock();
ui::print_error_ex(
&mut lock,
&format!(
"function {} is marked as matching but does not match",
ui::format_symbol_name(name),
),
);
ui::print_detail_ex(&mut lock, &format!("{}", mismatch));
return Ok(false);
}
}
Status::NonMatchingMinor | Status::NonMatchingMajor => {
let orig_fn = get_orig_fn()?;
let result = checker
.check(&mut cs, &orig_fn, &decomp_fn)
.with_context(|| format!("checking {}", name))?;
if result.is_none() {
ui::print_note(&format!(
"function {} is marked as non-matching but matches",
ui::format_symbol_name(name),
));
}
}
Status::NotDecompiled | Status::Library => unreachable!(),
};
Ok(true)
}
#[cold]
#[inline(never)]
fn make_cs() -> Result<cs::Capstone> {
cs::Capstone::new()
.arm64()
.mode(cs::arch::arm64::ArchMode::Arm)
.detail(true)
.build()
.or_else(viking::capstone_utils::translate_cs_error)
}
thread_local! {
static CAPSTONE: RefCell<cs::Capstone> = RefCell::new(make_cs().unwrap());
}
fn main() -> Result<()> {
let orig_elf = elf::load_orig_elf().with_context(|| "failed to load original ELF")?;
let decomp_elf = elf::load_decomp_elf().with_context(|| "failed to load decomp ELF")?;
// Load these in parallel.
let mut decomp_symtab = None;
let mut decomp_glob_data_table = None;
let mut functions = None;
rayon::scope(|s| {
s.spawn(|_| decomp_symtab = Some(elf::make_symbol_map_by_name(&decomp_elf)));
s.spawn(|_| decomp_glob_data_table = Some(elf::build_glob_data_table(&decomp_elf)));
s.spawn(|_| functions = Some(functions::get_functions()));
});
let decomp_symtab = decomp_symtab
.unwrap()
.with_context(|| "failed to make symbol map")?;
let decomp_glob_data_table = decomp_glob_data_table
.unwrap()
.with_context(|| "failed to make global data table")?;
let functions = functions
.unwrap()
.with_context(|| "failed to load function CSV")?;
let checker = FunctionChecker::new(
&orig_elf,
&decomp_elf,
&decomp_symtab,
decomp_glob_data_table,
&functions,
)
.with_context(|| "failed to construct FunctionChecker")?;
let failed = AtomicBool::new(false);
functions.par_iter().try_for_each(|function| {
CAPSTONE.with(|cs| -> Result<()> {
let mut cs = cs.borrow_mut();
let ok = check_function(
&checker,
&mut cs,
&orig_elf,
&decomp_elf,
&decomp_symtab,
function,
)?;
if !ok {
failed.store(true, std::sync::atomic::Ordering::Relaxed);
}
Ok(())
})
})?;
if failed.load(std::sync::atomic::Ordering::Relaxed) {
bail!("found at least one error");
} else {
Ok(())
}
}

59
tools/viking/src/ui.rs Normal file
View File

@ -0,0 +1,59 @@
use colored::*;
use std::io::StderrLock;
use std::io::Write;
use textwrap::indent;
use crate::functions;
pub fn print_note(msg: &str) {
eprintln!("{}{}{}", "note".bold().cyan(), ": ".bold(), msg.bold())
}
pub fn print_warning(msg: &str) {
eprintln!("{}{}{}", "warning".bold().yellow(), ": ".bold(), msg.bold())
}
pub fn print_error(msg: &str) {
let stderr = std::io::stderr();
let mut lock = stderr.lock();
print_error_ex(&mut lock, msg);
}
pub fn print_error_ex(lock: &mut StderrLock, msg: &str) {
writeln!(
lock,
"{}{}{}",
"error".bold().red(),
": ".bold(),
msg.bold()
)
.unwrap();
}
pub fn format_symbol_name(name: &str) -> String {
functions::demangle_str(name).map_or(name.blue().to_string(), |demangled| {
format!("{} ({})", demangled.blue(), name.blue().dimmed(),)
})
}
pub fn format_address(addr: u64) -> String {
format!("{:#x}", addr).green().to_string()
}
pub fn print_detail(msg: &str) {
let stderr = std::io::stderr();
let mut lock = stderr.lock();
print_detail_ex(&mut lock, msg);
}
pub fn print_detail_ex(lock: &mut StderrLock, msg: &str) {
writeln!(
lock,
"{}\n",
indent(
&msg.clear().to_string(),
&"".bold().dimmed().to_string()
)
)
.unwrap();
}