papermario/tools/splat/split.py

348 lines
11 KiB
Python
Executable File

#! /usr/bin/env python3
import hashlib
from typing import Dict, List, Union, Set, Any
import argparse
import pylibyaml
import yaml
import pickle
from colorama import Style, Fore
from segtypes.segment import Segment
from segtypes.linker_entry import LinkerWriter, to_cname
from util import log
from util import options
from util import symbols
from util import palettes
VERSION = "0.8.1.0"
parser = argparse.ArgumentParser(
description="Split a rom given a rom, a config, and output directory"
)
parser.add_argument("config", help="path to a compatible config .yaml file", nargs="+")
parser.add_argument("--target", help="path to a file to split (.z64 rom)")
parser.add_argument("--basedir", help="a directory in which to extract the rom")
parser.add_argument("--modes", nargs="+", default="all")
parser.add_argument("--verbose", action="store_true", help="Enable debug logging")
parser.add_argument(
"--use-cache", action="store_true", help="Only split changed segments in config"
)
linker_writer: LinkerWriter
config: Dict[str, Any]
def fmt_size(size):
if size > 1000000:
return str(size // 1000000) + " MB"
elif size > 1000:
return str(size // 1000) + " KB"
else:
return str(size) + " B"
def initialize_segments(config_segments: Union[dict, list]) -> List[Segment]:
seen_segment_names: Set[str] = set()
ret = []
for i, seg_yaml in enumerate(config_segments):
# rompos marker
if isinstance(seg_yaml, list) and len(seg_yaml) == 1:
continue
seg_type = Segment.parse_segment_type(seg_yaml)
segment_class = Segment.get_class_for_type(seg_type)
this_start = Segment.parse_segment_start(seg_yaml)
next_start = Segment.parse_segment_start(config_segments[i + 1])
segment: Segment = Segment.from_yaml(
segment_class, seg_yaml, this_start, next_start
)
if segment.require_unique_name:
if segment.name in seen_segment_names:
log.error(f"segment name '{segment.name}' is not unique")
seen_segment_names.add(segment.name)
ret.append(segment)
return ret
def get_segment_symbols(segment, all_segments):
seg_syms = {}
other_syms = {}
for symbol in symbols.all_symbols:
if symbols.is_symbol_isolated(symbol, all_segments) and not symbol.rom:
if segment.contains_vram(symbol.vram_start):
if symbol.vram_start not in seg_syms:
seg_syms[symbol.vram_start] = []
seg_syms[symbol.vram_start].append(symbol)
else:
if symbol.vram_start not in other_syms:
other_syms[symbol.vram_start] = []
other_syms[symbol.vram_start].append(symbol)
else:
if symbol.rom and segment.contains_rom(symbol.rom):
if symbol.vram_start not in seg_syms:
seg_syms[symbol.vram_start] = []
seg_syms[symbol.vram_start].append(symbol)
else:
if symbol.vram_start not in other_syms:
other_syms[symbol.vram_start] = []
other_syms[symbol.vram_start].append(symbol)
return seg_syms, other_syms
def do_statistics(seg_sizes, rom_bytes, seg_split, seg_cached):
unk_size = seg_sizes.get("unk", 0)
rest_size = 0
total_size = len(rom_bytes)
for typ in seg_sizes:
if typ != "unk":
rest_size += seg_sizes[typ]
known_ratio = rest_size / total_size
unk_ratio = unk_size / total_size
log.write(f"Split {fmt_size(rest_size)} ({known_ratio:.2%}) in defined segments")
for typ in seg_sizes:
if typ != "unk":
tmp_size = seg_sizes[typ]
tmp_ratio = tmp_size / total_size
log.write(
f"{typ:>20}: {fmt_size(tmp_size):>8} ({tmp_ratio:.2%}) {Fore.GREEN}{seg_split[typ]} split{Style.RESET_ALL}, {Style.DIM}{seg_cached[typ]} cached"
)
log.write(
f"{'unknown':>20}: {fmt_size(unk_size):>8} ({unk_ratio:.2%}) from unknown bin files"
)
def merge_configs(main_config, additional_config):
# Merge rules are simple
# For each key in the dictionary
# - If list then append to list
# - If a dictionary then repeat merge on sub dictionary entries
# - Else assume string or number and replace entry
for curkey in additional_config:
if curkey not in main_config:
main_config[curkey] = additional_config[curkey]
elif type(main_config[curkey]) != type(additional_config[curkey]):
log.error(f"Type for key {curkey} in configs does not match")
else:
# keys exist and match, see if a list to append
if type(main_config[curkey]) == list:
main_config[curkey] += additional_config[curkey]
elif type(main_config[curkey]) == dict:
# need to merge sub areas
main_config[curkey] = merge_configs(
main_config[curkey], additional_config[curkey]
)
else:
# not a list or dictionary, must be a number or string, overwrite
main_config[curkey] = additional_config[curkey]
return main_config
def main(config_path, base_dir, target_path, modes, verbose, use_cache=True):
global config
log.write(f"splat {VERSION}")
# Load config
config = {}
for entry in config_path:
with open(entry) as f:
additional_config = yaml.load(f.read(), Loader=yaml.SafeLoader)
config = merge_configs(config, additional_config)
options.initialize(config, config_path, base_dir, target_path)
options.set("modes", modes)
if verbose:
options.set("verbose", True)
with options.get_target_path().open("rb") as f2:
rom_bytes = f2.read()
if "sha1" in config:
sha1 = hashlib.sha1(rom_bytes).hexdigest()
e_sha1 = config["sha1"].lower()
if e_sha1 != sha1:
log.error(f"sha1 mismatch: expected {e_sha1}, was {sha1}")
# Create main output dir
options.get_base_path().mkdir(parents=True, exist_ok=True)
processed_segments: List[Segment] = []
seg_sizes: Dict[str, int] = {}
seg_split: Dict[str, int] = {}
seg_cached: Dict[str, int] = {}
# Load cache
if use_cache:
try:
with options.get_cache_path().open("rb") as f3:
cache = pickle.load(f3)
if verbose:
log.write(f"Loaded cache ({len(cache.keys())} items)")
except Exception:
cache = {}
else:
cache = {}
# invalidate entire cache if options change
if use_cache and cache.get("__options__") != config.get("options"):
if verbose:
log.write("Options changed, invalidating cache")
cache = {
"__options__": config.get("options"),
}
# Initialize segments
all_segments = initialize_segments(config["segments"])
# Load and process symbols
if options.mode_active("code"):
log.write("Loading and processing symbols")
symbols.initialize(all_segments)
# Resolve raster/palette siblings
if options.mode_active("img"):
palettes.initialize(all_segments)
# Scan
log.write("Starting scan")
for segment in all_segments:
typ = segment.type
if segment.type == "bin" and segment.is_name_default():
typ = "unk"
if typ not in seg_sizes:
seg_sizes[typ] = 0
seg_split[typ] = 0
seg_cached[typ] = 0
seg_sizes[typ] += 0 if segment.size is None else segment.size
if segment.should_scan():
# Check cache but don't write anything
if use_cache:
if segment.cache() == cache.get(segment.unique_id()):
continue
if segment.needs_symbols:
segment_symbols, other_symbols = get_segment_symbols(
segment, all_segments
)
segment.given_seg_symbols = segment_symbols
segment.given_ext_symbols = other_symbols
segment.did_run = True
segment.scan(rom_bytes)
processed_segments.append(segment)
seg_split[typ] += 1
log.dot(status=segment.status())
# Split
log.write("Starting split")
for segment in all_segments:
if use_cache:
cached = segment.cache()
if cached == cache.get(segment.unique_id()):
# Cache hit
seg_cached[typ] += 1
continue
else:
# Cache miss; split
cache[segment.unique_id()] = cached
if segment.should_split():
segment.split(rom_bytes)
log.dot(status=segment.status())
if options.mode_active("ld"):
global linker_writer
linker_writer = LinkerWriter()
for segment in all_segments:
linker_writer.add(segment)
linker_writer.save_linker_script()
linker_writer.save_symbol_header()
# write elf_sections.txt - this only lists the generated sections in the elf, not subsections
# that the elf combines into one section
if options.get_create_elf_section_list_auto():
section_list = ""
for segment in all_segments:
section_list += "." + to_cname(segment.name) + "\n"
with open(options.get_elf_section_list_path(), "w", newline="\n") as f:
f.write(section_list)
# Write undefined_funcs_auto.txt
if options.get_create_undefined_funcs_auto():
to_write = [
s
for s in symbols.all_symbols
if s.referenced and not s.defined and not s.dead and s.type == "func"
]
if len(to_write) > 0:
with open(options.get_undefined_funcs_auto_path(), "w", newline="\n") as f:
for symbol in to_write:
f.write(f"{symbol.name} = 0x{symbol.vram_start:X};\n")
# write undefined_syms_auto.txt
if options.get_create_undefined_syms_auto():
to_write = [
s
for s in symbols.all_symbols
if s.referenced and not s.defined and not s.dead and not s.type == "func"
]
if len(to_write) > 0:
with open(options.get_undefined_syms_auto_path(), "w", newline="\n") as f:
for symbol in to_write:
f.write(f"{symbol.name} = 0x{symbol.vram_start:X};\n")
# print warnings during split
for segment in all_segments:
if len(segment.warnings) > 0:
log.write(
f"{Style.DIM}0x{segment.rom_start:06X}{Style.RESET_ALL} {segment.type} {Style.BRIGHT}{segment.name}{Style.RESET_ALL}:"
)
for warn in segment.warnings:
log.write("warning: " + warn, status="warn")
log.write("") # empty line
# Statistics
do_statistics(seg_sizes, rom_bytes, seg_split, seg_cached)
# Save cache
if cache != {} and use_cache:
if verbose:
log.write("Writing cache")
with open(options.get_cache_path(), "wb") as f4:
pickle.dump(cache, f4)
if __name__ == "__main__":
args = parser.parse_args()
main(
args.config, args.basedir, args.target, args.modes, args.verbose, args.use_cache
)