mm/tools/extract_yars.py

136 lines
3.6 KiB
Python
Executable File

#!/usr/bin/env python3
# SPDX-FileCopyrightText: © 2023 ZeldaRET
# SPDX-License-Identifier: MIT
# yar (Yaz0 ARchive) decompressor
#
# This program decompresses every raw yar binary file listed in
# `tools/filelists/{version}/archives.csv` to a corresponding
# `extracted/{version}/baserom/{file}.unarchive` raw file.
#
# It works by decompressing every Yaz0 block and appending them one by one into
# a new raw binary file so it can be processed normally by other tools.
from __future__ import annotations
import argparse
import crunch64
import dataclasses
from pathlib import Path
import struct
from version import version_config
PRINT_XML = False
@dataclasses.dataclass
class ArchiveMeta:
start: int
end: int
def readFileAsBytearray(filepath: Path) -> bytearray:
with filepath.open(mode="rb") as f:
return bytearray(f.read())
def readBytesAsWord(array_of_bytes: bytearray, offset: int) -> int:
return struct.unpack_from(f">I", array_of_bytes, offset)[0]
def getOffsetsList(archiveBytes: bytearray) -> list[ArchiveMeta]:
archivesOffsets: list[ArchiveMeta] = []
firstEntryOffset = readBytesAsWord(archiveBytes, 0)
firstEntrySize = readBytesAsWord(archiveBytes, 4)
archivesOffsets.append(
ArchiveMeta(firstEntryOffset, firstEntryOffset + firstEntrySize)
)
offset = 4
while offset < firstEntryOffset - 4:
entry = readBytesAsWord(archiveBytes, offset)
nextEntry = readBytesAsWord(archiveBytes, offset + 4)
entryStart = entry + firstEntryOffset
entryEnd = nextEntry + firstEntryOffset
archivesOffsets.append(ArchiveMeta(entryStart, entryEnd))
offset += 4
return archivesOffsets
def extractArchive(archivePath: Path, outPath: Path):
archiveBytes = readFileAsBytearray(archivePath)
if readBytesAsWord(archiveBytes, 0) == 0:
# Empty file, ignore it
return
print(f"Extracting '{archivePath}' -> '{outPath}'")
archivesOffsets = getOffsetsList(archiveBytes)
if PRINT_XML:
print("<Root>")
print(f' <File Name="{outPath.stem}">')
with outPath.open("wb") as out:
currentOffset = 0
for meta in archivesOffsets:
decompressedBytes = crunch64.yaz0.decompress(
archiveBytes[meta.start : meta.end]
)
decompressedSize = len(decompressedBytes)
out.write(decompressedBytes)
if PRINT_XML:
print(
f' <Blob Name="{archivePath.stem}_Blob_{currentOffset:06X}" Size="0x{decompressedSize:04X}" Offset="0x{currentOffset:X}" />'
)
currentOffset += decompressedSize
if PRINT_XML:
print(f" </File>")
print("</Root>")
def main():
parser = argparse.ArgumentParser(description="MM archives extractor")
parser.add_argument(
"baserom_segments_dir",
type=Path,
help="Directory of uncompressed ROM segments",
)
parser.add_argument(
"-v",
"--version",
help="version to process",
default="n64-us",
)
parser.add_argument("--xml", help="Generate xml to stdout", action="store_true")
args = parser.parse_args()
baseromSegmentsDir: Path = args.baserom_segments_dir
version: str = args.version
global PRINT_XML
PRINT_XML = args.xml
config = version_config.load_version_config(version)
for archiveName in config.archives:
archivePath = baseromSegmentsDir / archiveName
extractedPath = Path(str(archivePath) + ".unarchive")
extractArchive(archivePath, extractedPath)
if __name__ == "__main__":
main()