tp/tools/libdol2asm/linker_map.py

200 lines
6.7 KiB
Python

import os
import re
import rich
from dataclasses import dataclass, field
from typing import Optional, List, Tuple
from collections import defaultdict
from pathlib import Path
from . import settings
from .context import Context
@dataclass
class Symbol:
""" Symbol from the linker map """
addr: int
size: int
padding: int
name: str
lib: Optional[str] = None
obj: Optional[str] = None
is_function: bool = False
source: str = None
access: "disassembler.Access" = None
first_padding: int = 0
@property
def start(self):
return self.addr
@property
def end(self):
return self.addr + self.size
@dataclass
class Section:
""" Section from the linker map """
name: str
addr: int
size: int
cs: List[Tuple[int, int]] = field(default_factory=list)
symbols: List[Symbol] = field(default_factory=list)
objects: List[str] = field(default_factory=list)
index: int = 0
data: bytes = field(default=None, repr=False)
@property
def start(self):
return self.addr
@property
def end(self):
return self.addr + self.size
def is_addr_code(self, addr):
for segment in self.cs:
if addr >= segment[0] and addr < segment[1]:
return True
return False
#
SECTION_NAME_ORDER = [
".text",
".ctors",
".dtors",
".rodata",
".data",
".bss",
]
def parse(context: Context, module_id: int, linker_map_path: Path, executable_sections, base_folder=True):
"""Read the linker map file to get symbols and section names."""
sections = dict()
lines = []
# read linker map line-by-line
with linker_map_path.open('r') as file:
lines = file.readlines()
# group linker map lines by what section they are in
groups = defaultdict(list)
section_name = None
for text_line in lines:
if "section layout" in text_line:
section_name = text_line.split(" ")[0]
groups[section_name] = []
elif section_name:
groups[section_name].append(text_line)
# determine what section names are already known
already_known_names = set()
for section in executable_sections:
if section.name:
already_known_names.add(section.name)
# get all the linker map sections which are not already known
map_names = [x for x in groups.keys() if not x in already_known_names]
# get the name for all sections which has size and are not already known (this will probably be an list of None's)
sects_names = [
x.name for x in executable_sections if not x.name in already_known_names and x.size > 0]
# try to match the section names with those from the linker map, i.e., assign the section a name from the linker map.
skip = 0
for i, section in enumerate([x for x in executable_sections if not x.name in already_known_names and x.size > 0]):
index = i + skip
if index < len(map_names):
if len(groups[map_names[index]]) == 0 and map_names[index] != ".bss":
section.name = map_names[index + 1]
skip += 1
else:
section.name = map_names[index]
# sort and create linker map sections
executable_sections.sort(key=lambda x: x.start)
for i, section in enumerate(executable_sections):
if section.size > 0:
if not section.name:
assert i > 1
last_section = executable_sections[i - 1]
name_index = SECTION_NAME_ORDER.index(last_section.name)
section.name = SECTION_NAME_ORDER[name_index + 1]
context.warning(f"section name could not be determine using the map file, using the next name instead '{section.name}'")
assert False # TODO
sections[section.name] = Section(
section.name, section.start, section.size, section.code_segments)
sections[section.name].first_padding = section.first_padding + section.offset_padding
sections[section.name].data = section.data
sections[section.name].index = i
sections[section.name].alignment = section.alignment
# for each section, go through all the lines and try to find sysmbols
for section_name, lines in groups.items():
for text_line in lines:
line = re.sub(r'( \(entry of [^)]*\))', "", text_line)
data = [x.strip() for x in line.strip().split(" ")]
data = [x for x in data if len(x) > 0]
# not symbols
if len(data) < 6 or len(data) > 7:
continue
lib = None
obj = None
name = None
size = int(data[1], base=16)
addr = int(data[2], base=16)
if len(data) == 6:
name = data[4].split("\\")[-1]
obj = data[5]
elif len(data) == 7:
name = data[4]
lib = data[5].split("\\")[-1]
obj = data[6]
# group libraries togather (e.g. JSystem)
if lib:
for k, v in settings.LIBRARY_LUT:
if lib.startswith(k):
lib = v + lib
break
if obj:
# move translation units into files (e.g. d_a_XXX -> d/a/d_a_XXX)
if base_folder:
if not lib:
for k, v in settings.FOLDERS:
if obj.startswith(k):
obj = v + obj
break
if "\\" in obj:
obj = obj.replace("\\", "/")
if "/" in obj:
# if the object file is a path only use the last 3 parts.
# choice was arbitrary, but it looks OK
obj = "/".join(obj.split("/")[-3:])
# if we're the main.dol, then convert the address to relative address
if module_id == 0:
addr -= sections[section_name].addr
# add the symbol
symbol = Symbol(addr, size, 0, name, lib, obj)
symbol.source = f"linker_map/'{linker_map_path}'/{addr:08X}"
sections[section_name].symbols.append(symbol)
# calculate a dictionary of addresses used by each section, this will later
# be used to remove access labels generated by the analyze.py module
addrs = defaultdict(lambda: dict())
for k, v in sections.items():
for symbol in v.symbols:
addrs[k][symbol.addr] = symbol
return sections, addrs