""" Library for demangle names generated by Metrowerks Code Warrior Compilers. Based on the demangle script from the splitter script. """ import re from typing import List, Optional, Tuple from dataclasses import dataclass, field from pathlib import Path operator_func_re = re.compile(r'^__([a-z]+)') find_double_underscore = re.compile(r'__[0-9FQ]') types = { 'i': 'int', 'l': 'long', 's': 'short', 'c': 'char', 'f': 'f32', 'd': 'f64', 'v': 'void', 'x': 'long long', 'b': 'bool', 'e': '...', } short_type_names = { 'char': '8', 'short': '16', 'long': '32', 'long long': '64', } special_funcs = { 'eq': 'operator==', 'as': 'operator=', 'ne': 'operator!=', 'dv': 'operator/', 'pl': 'operator+', 'mi': 'operator-', 'ml': 'operator*', 'adv': 'operator/=', 'apl': 'operator+=', 'ami': 'operator-=', 'amu': 'operator*=', 'lt': 'operator<', 'gt': 'operator>', 'cl': 'operator()', 'dla': 'operator delete[]', 'nwa': 'operator new[]', 'dl': 'operator delete', 'nw': 'operator new', } def is_builtin_type(t): return t in types.values() @dataclass class ClassName: name: str template_types: List["Param"] = field(default_factory=list) def to_str(self, without_template: bool = False) -> str: if self.template_types and not without_template: args = ", ".join([x.to_str() for x in self.template_types]) return f"{self.name}<{args}>" return self.name @dataclass class QualifiedName: parts: List[ClassName] = field(default_factory=list) def to_str(self) -> str: return "::".join([x.to_str() for x in self.parts]) def is_simple(self) -> bool: return len(self.parts) == 1 and not self.parts[0].template_types @property def has_class(self): return len(self.parts) > 1 @property def name(self): return self.to_str() @property def last(self): return self.parts[-1] @property def first(self): return self.parts[0] @dataclass class Param: name: QualifiedName = None pointer_lvl: int = 0 is_const: bool = False is_ref: bool = False is_unsigned: bool = False is_signed: bool = False def to_str(self) -> str: ret = '' if self.is_const: ret += 'const ' if self.name and self.name.is_simple() and (self.name.name in short_type_names) and (self.is_signed or self.is_unsigned): ret += 'u' if self.is_unsigned else 's' ret += short_type_names[self.name.name] else: if self.is_unsigned: ret += 'unsigned ' if self.name: ret += self.name.to_str() for _ in range(self.pointer_lvl): ret += '*' if self.is_ref: ret += '&' return ret @dataclass class ReferenceParam: base_type: Optional[Param] = None def to_str(self) -> str: return f"{self.base_type.to_str()}&" @dataclass class ConstParam: base_type: Optional[Param] = None def to_str(self) -> str: return f"{self.base_type.to_str()} const" @dataclass class PointerParam: base_type: Optional[Param] = None def to_str(self) -> str: return f"{self.base_type.to_str()}*" @dataclass class FuncParam: ret_type: Optional[Param] = None inner_type: Optional[Param] = None params: List[Param] = field(default_factory=list) class_name: QualifiedName = None def to_str(self) -> str: ret = '' if self.ret_type is None: ret += 'void' else: ret += self.ret_type.to_str() class_name = "" if self.class_name: class_name = f"{self.class_name.to_str()}::" ret += f' ({class_name}{self.inner_type.to_str()})(' ret += ', '.join([x.to_str() for x in self.params]) ret += ')' return ret @dataclass class ArrayParam: base_type: Optional[Param] = None inner_type: Optional[Param] = None sizes: List[int] = field(default_factory=list) def to_str(self) -> str: return self.base_type.to_str() + " " + f"({self.inner_type.to_str()})" + ''.join(f'[{i}]' for i in self.sizes) @dataclass class IntegerParam: value: int def to_str(self) -> str: return f"{self.value}" class ParseError(Exception): ... class ParseCtx: def __init__(self, mangled: str): self.mangled = mangled self.index = 0 self.demangled = [] self.cur_type = None self.class_name = None self.is_const = False self.func_name = None self.special_func_name = None self.not_function = False def demangle_variable(self): match = None for match in find_double_underscore.finditer(self.mangled): pass if not match: return split_pos = match.start() if split_pos == -1 or split_pos == 0: return self.not_function = True self.func_name = self.mangled[:split_pos] self.mangled = self.mangled[split_pos+2:] self.demangle_first_class_variable() self.func_name = QualifiedName([ self.demangle_str_to_class_name(x) for x in self.func_name.split("::") ]) parts = [] if self.class_name: parts += self.class_name.parts parts += self.func_name.parts self.full_name = QualifiedName(parts) def demangle(self): # this split is still not accurate, but good enough for most cases match = None for match in find_double_underscore.finditer(self.mangled): pass if not match: return split_pos = match.start() if split_pos == -1 or split_pos == 0: return self.func_name = self.mangled[:split_pos] self.mangled = self.mangled[split_pos+2:] if self.func_name.startswith('__'): match = operator_func_re.match(self.func_name) if match: special_func_name = match.group(1) if special_func_name in special_funcs: self.special_func_name = special_func_name self.func_name = special_funcs[special_func_name] else: if special_func_name == 'ct': self.special_func_name = special_func_name self.func_name = '.ctor' elif special_func_name == 'dt': self.special_func_name = special_func_name self.func_name = '.dtor' self.demangle_first_class() while self.index < len(self.mangled): t = self.demangle_next_type() self.demangled.append(t) if self.func_name == '.ctor': self.func_name = self.class_name.last.to_str(without_template=True) if self.func_name == '.dtor': self.func_name = '~' + \ self.class_name.last.to_str(without_template=True) self.func_name = QualifiedName([ self.demangle_str_to_class_name(x) for x in self.func_name.split("::") ]) parts = [] if self.class_name: parts += self.class_name.parts parts += self.func_name.parts self.full_name = QualifiedName(parts) def demangle_first_class(self): if self.peek_next_char().isdecimal(): self.class_name = QualifiedName([self.demangle_class()]) if self.peek_next_char() == 'C': self.is_const = True self.index += 1 if self.consume_next_char() != 'F': raise ParseError('next char should be F! (decimal)') elif self.peek_next_char() == 'Q': self.index += 1 self.class_name = self.demangle_qualified_name() if self.peek_next_char() == 'C': self.is_const = True self.index += 1 if self.consume_next_char() != 'F': raise ParseError('next char should be F! (Q)') else: if self.consume_next_char() != 'F': raise ParseError('next char should be F!') def demangle_first_class_variable(self): if self.peek_next_char().isdecimal(): self.class_name = QualifiedName([self.demangle_class()]) elif self.peek_next_char() == 'Q': self.index += 1 self.class_name = self.demangle_qualified_name() def apply_prev_types(self, cur_type, types): for type_char in reversed(types): if type_char == 'C': cur_type = ConstParam(cur_type) elif type_char == 'R': cur_type = ReferenceParam(cur_type) elif type_char == 'P': cur_type = PointerParam(cur_type) else: assert False return cur_type def demangle_next_type(self) -> str: prev_types = [] cur_type = Param() while True: cur_char = self.peek_next_char() if cur_char.isdecimal(): class_name = self.demangle_class() cur_type.name = QualifiedName([class_name]) return self.apply_prev_types(cur_type, prev_types) elif cur_char in types: type_name = self.demangle_prim_type() cur_type.name = QualifiedName([type_name]) return self.apply_prev_types(cur_type, prev_types) elif cur_char == 'U': cur_type.is_unsigned = True self.index += 1 elif cur_char == 'S': cur_type.is_signed = True self.index += 1 elif cur_char == 'C': prev_types.append(cur_char) self.index += 1 elif cur_char == 'P': prev_types.append(cur_char) self.index += 1 elif cur_char == 'R': prev_types.append(cur_char) self.index += 1 elif cur_char == 'F': self.index += 1 func = self.demangle_function( self.apply_prev_types(cur_type, prev_types)) return func elif cur_char == 'Q': self.index += 1 qual_type = cur_type qual_type.name = self.demangle_qualified_name() return self.apply_prev_types(qual_type, prev_types) elif cur_char == 'A': # if cur_type.pointer_lvl < 1 and not cur_type.is_ref: # raise ParseError("pointer level for array is wrong!") array_type = self.demangle_array( self.apply_prev_types(cur_type, prev_types)) return array_type elif cur_char == 'M': self.index += 1 if self.peek_next_char() == 'Q': self.index += 1 class_name = self.demangle_qualified_name() else: class_name = QualifiedName([self.demangle_class()]) if self.peek_next_char() != 'F': raise ParseError( f"expected character 'F' after class name") self.index += 1 # This symbol name # execCommand__12JASSeqParserFP8JASTrackM12JASSeqParserFPCvPvP8JASTrackPUl_lUlPUl # should be demangled to something like this: # void execCommand(JASTrack* , s32 (JASSeqParser::*)(void const*, void*, JASTrack*, u32*), u32, u32*); # # But metrowerks will mangle the name of the code about to: # execCommand__12JASSeqParserFP8JASTrackM12JASSeqParserFPCvPvPCvPvP8JASTrackPUl_lUlPUl # # Notice, the extra pair of PCvPv (void const*, void*) in the middle. These seems to be added by the compiler # for some reason that I cannot figure out. The fix is to remove the extra pair of PCvPv. Thus, the # code would look like this # void execCommand(JASTrack* , s32 (JASSeqParser::*)(JASTrack*, u32*), u32, u32*); # and it compiles to the correct mangled name: # execCommand__12JASSeqParserFP8JASTrackM12JASSeqParserFPCvPvP8JASTrackPUl_lUlPUl # # TODO: not very nice if self.peek_next_char() != 'P': raise ParseError( f"expected character 'P' after class name") self.index += 1 if self.peek_next_char() != 'C': raise ParseError( f"expected character 'C' after class name") self.index += 1 if self.peek_next_char() != 'v': raise ParseError( f"expected character 'v' after class name") self.index += 1 if self.peek_next_char() != 'P': raise ParseError( f"expected character 'P' after class name") self.index += 1 if self.peek_next_char() != 'v': raise ParseError( f"expected character 'v' after class name") self.index += 1 prev_types.append('P') func = self.demangle_function( self.apply_prev_types(cur_type, prev_types)) func.class_name = class_name return func else: raise ParseError(f'unexpected character {cur_char}') def demangle_array(self, parent_type) -> ArrayParam: sizes = [] while self.peek_next_char() == 'A': self.index += 1 sizes.append(self.read_next_int()) if self.consume_next_char() != '_': raise ParseError("Need to have '_' after Array size!") array_type = self.demangle_next_type() cur_type = ArrayParam() cur_type.base_type = array_type cur_type.inner_type = parent_type cur_type.sizes = sizes return cur_type def demangle_function(self, parent_type) -> FuncParam: func_param = FuncParam() func_param.inner_type = parent_type while True: cur_char = self.peek_next_char() if cur_char == '_': self.index += 1 func_param.ret_type = self.demangle_next_type() return func_param func_param.params.append(self.demangle_next_type()) def demangle_qualified_name(self) -> QualifiedName: part_count = int(self.consume_next_char()) parts = [] for _ in range(part_count): parts.append(self.demangle_class()) return QualifiedName(parts) def read_next_int(self) -> int: class_len_str = '' cur_char = self.peek_next_char() while cur_char.isdecimal(): class_len_str += cur_char self.index += 1 cur_char = self.peek_next_char() return int(class_len_str) def demangle_template_args(self): if self.peek_next_char() != '<': raise ParseError(f"expected character '<'") self.index += 1 types = [] while True: last_index = self.index next_type = None if self.peek_next_char().isdecimal() or self.peek_next_char() == '-': is_negative = False if self.peek_next_char() == '-': is_negative = True self.index += 1 integer = self.read_next_int() if self.peek_next_char() == '>' or self.peek_next_char() == ',': next_type = IntegerParam(integer) else: self.index = last_index if next_type == None: next_type = self.demangle_next_type() types.append(next_type) if self.peek_next_char() == '>': self.index += 1 break if self.peek_next_char() != ',': raise ParseError(f"expected character '<' or ','") self.index += 1 return types def demangle_template(self, name) -> Tuple[str, List[Param]]: if not "<" in name or not name.endswith(">"): return name, [] index = name.find("<") prefix = name[0:index] inner = name[index:] ctx = ParseCtx(inner) types = ctx.demangle_template_args() type_str = ", ".join([x.to_str() for x in types]) return prefix, types def demangle_str_to_class_name(self, text) -> ClassName: return ClassName(*self.demangle_template(text)) def demangle_class(self) -> ClassName: if not self.peek_next_char().isdecimal(): raise ParseError(f'class mangling must start with number') index = self.index class_len = self.read_next_int() full_class_name = self.mangled[self.index:][:class_len] class_name, argument_types = self.demangle_template(full_class_name) self.index += class_len return ClassName(class_name, argument_types) def demangle_prim_type(self) -> ClassName: ret = types[self.consume_next_char()] return ClassName(ret, []) def consume_next_char(self) -> str: next_char = self.mangled[self.index] self.index += 1 return next_char def peek_next_char(self) -> str: if self.index >= len(self.mangled): return None return self.mangled[self.index] def to_str(self) -> str: if self.not_function: return self.class_name.to_str() + '::' + self.func_name.to_str() if self.func_name is None: return '' elif self.class_name is None: return self.func_name.to_str() + '(' + ', '.join([x.to_str() for x in self.demangled]) + ')' else: return self.class_name.to_str() + '::' + self.func_name.to_str() + '(' + ', '.join([x.to_str() for x in self.demangled]) + ')' + (' const' if self.is_const else '')