tp/tools/libdol2asm/linker_map.py

import os
import re
import rich

from dataclasses import dataclass, field
from typing import Optional, List, Tuple
from collections import defaultdict
from pathlib import Path

from . import settings
from .context import Context

@dataclass
class Symbol:
    """ Symbol from the linker map """
    addr: int
    size: int
    padding: int
    name: str
    lib: Optional[str] = None
    obj: Optional[str] = None
    is_function: bool = False
    source: str = None
    access: "disassembler.Access" = None
    first_padding: int = 0

    @property
    def start(self):
        return self.addr

    @property
    def end(self):
        return self.addr + self.size


@dataclass
class Section:
    """ Section from the linker map """
    name: str
    addr: int
    size: int
    cs: List[Tuple[int, int]] = field(default_factory=list)
    symbols: List[Symbol] = field(default_factory=list)
    objects: List[str] = field(default_factory=list)
    index: int = 0
    data: bytes = field(default=None, repr=False)

    @property
    def start(self):
        return self.addr

    @property
    def end(self):
        return self.addr + self.size

    def is_addr_code(self, addr):
        for segment in self.cs:
            if addr >= segment[0] and addr < segment[1]:
                return True
        return False


#
SECTION_NAME_ORDER = [
    ".text",
    ".ctors",
    ".dtors",
    ".rodata",
    ".data",
    ".bss",
]


def parse(context: Context, module_id: int, linker_map_path: Path, executable_sections, base_folder=True):
    """Read the linker map file to get symbols and section names."""

    sections = dict()
    lines = []

    # read linker map line-by-line
    with linker_map_path.open('r') as file:
        lines = file.readlines()

    # group linker map lines by what section they are in
    groups = defaultdict(list)
    section_name = None
    for text_line in lines:
        if "section layout" in text_line:
            section_name = text_line.split(" ")[0]
            groups[section_name] = []
        elif section_name:
            groups[section_name].append(text_line)

    # determine what section names are already known
    already_known_names = set()
    for section in executable_sections:
        if section.name:
            already_known_names.add(section.name)

    # get all the linker map sections which are not already known
    map_names = [x for x in groups.keys() if not x in already_known_names]

    # get the name for all sections which has size and are not already known (this will probably be an list of None's)
    sects_names = [
        x.name for x in executable_sections if not x.name in already_known_names and x.size > 0]

    # try to match the section names with those from the linker map, i.e., assign the section a name from the linker map.
    skip = 0
    for i, section in enumerate([x for x in executable_sections if not x.name in already_known_names and x.size > 0]):
        index = i + skip
        if index < len(map_names):
            if len(groups[map_names[index]]) == 0 and map_names[index] != ".bss":
                section.name = map_names[index + 1]
                skip += 1
            else:
                section.name = map_names[index]

    # sort and create linker map sections
    executable_sections.sort(key=lambda x: x.start)
    for i, section in enumerate(executable_sections):
        if section.size > 0:
            if not section.name:
                assert i > 1
                last_section = executable_sections[i - 1]
                name_index = SECTION_NAME_ORDER.index(last_section.name)
                section.name = SECTION_NAME_ORDER[name_index + 1]
                context.warning(f"section name could not be determine using the map file, using the next name instead '{section.name}'")
                assert False  # TODO

            sections[section.name] = Section(
                section.name, section.start, section.size, section.code_segments)
            sections[section.name].first_padding = section.first_padding + section.offset_padding
            sections[section.name].data = section.data
            sections[section.name].index = i
            sections[section.name].alignment = section.alignment

    # for each section, go through all the lines and try to find sysmbols
    for section_name, lines in groups.items():
        for text_line in lines:
            line = re.sub(r'( \(entry of [^)]*\))', "", text_line)
            data = [x.strip() for x in line.strip().split(" ")]
            data = [x for x in data if len(x) > 0]

            # not symbols
            if len(data) < 6 or len(data) > 7:
                continue

            lib = None
            obj = None
            name = None
            size = int(data[1], base=16)
            addr = int(data[2], base=16)
            if len(data) == 6:
                name = data[4].split("\\")[-1]
                obj = data[5]
            elif len(data) == 7:
                name = data[4]
                lib = data[5].split("\\")[-1]
                obj = data[6]

            # group libraries togather (e.g. JSystem)
            if lib:
                for k, v in settings.LIBRARY_LUT:
                    if lib.startswith(k):
                        lib = v + lib
                        break

            if obj:
                # move translation units into files (e.g. d_a_XXX -> d/a/d_a_XXX)
                if base_folder:
                    if not lib:
                        for k, v in settings.FOLDERS:
                            if obj.startswith(k):
                                obj = v + obj
                                break
                if "\\" in obj:
                    obj = obj.replace("\\", "/")
                if "/" in obj:
                    # if the object file is a path only use the last 3 parts.
                    # choice was arbitrary, but it looks OK
                    obj = "/".join(obj.split("/")[-3:])

            # if we're the main.dol, then convert the address to relative address
            if module_id == 0:
                addr -= sections[section_name].addr

            # add the symbol
            symbol = Symbol(addr, size, 0, name, lib, obj)
            symbol.source = f"linker_map/'{linker_map_path}'/{addr:08X}"
            sections[section_name].symbols.append(symbol)

    # calculate a dictionary of addresses used by each section, this will later
    # be used to remove access labels generated by the analyze.py module
    addrs = defaultdict(lambda: dict())
    for k, v in sections.items():
        for symbol in v.symbols:
            addrs[k][symbol.addr] = symbol

    return sections, addrs