tp/tools/libdemangle/demangle.py

548 lines
18 KiB
Python

"""
Library for demangle names generated by Metrowerks Code Warrior Compilers.
Based on the demangle script from the splitter script.
"""
import re
from typing import List, Optional, Tuple
from dataclasses import dataclass, field
from pathlib import Path
operator_func_re = re.compile(r'^__([a-z]+)')
find_double_underscore = re.compile(r'__[0-9FQ]')
types = {
'i': 'int',
'l': 'long',
's': 'short',
'c': 'char',
'f': 'f32',
'd': 'f64',
'v': 'void',
'x': 'long long',
'b': 'bool',
'e': '...',
}
short_type_names = {
'char': '8',
'short': '16',
'long': '32',
'long long': '64',
}
special_funcs = {
'eq': 'operator==',
'as': 'operator=',
'ne': 'operator!=',
'dv': 'operator/',
'pl': 'operator+',
'mi': 'operator-',
'ml': 'operator*',
'adv': 'operator/=',
'apl': 'operator+=',
'ami': 'operator-=',
'amu': 'operator*=',
'lt': 'operator<',
'gt': 'operator>',
'cl': 'operator()',
'dla': 'operator delete[]',
'nwa': 'operator new[]',
'dl': 'operator delete',
'nw': 'operator new',
}
def is_builtin_type(t):
return t in types.values()
@dataclass
class ClassName:
name: str
template_types: List["Param"] = field(default_factory=list)
def to_str(self, without_template: bool = False) -> str:
if self.template_types and not without_template:
args = ", ".join([x.to_str() for x in self.template_types])
return f"{self.name}<{args}>"
return self.name
@dataclass
class QualifiedName:
parts: List[ClassName] = field(default_factory=list)
def to_str(self) -> str:
return "::".join([x.to_str() for x in self.parts])
def is_simple(self) -> bool:
return len(self.parts) == 1 and not self.parts[0].template_types
@property
def has_class(self):
return len(self.parts) > 1
@property
def name(self):
return self.to_str()
@property
def last(self):
return self.parts[-1]
@property
def first(self):
return self.parts[0]
@dataclass
class Param:
name: QualifiedName = None
pointer_lvl: int = 0
is_const: bool = False
is_ref: bool = False
is_unsigned: bool = False
is_signed: bool = False
def to_str(self) -> str:
ret = ''
if self.is_const:
ret += 'const '
if self.name and self.name.is_simple() and (self.name.name in short_type_names) and (self.is_signed or self.is_unsigned):
ret += 'u' if self.is_unsigned else 's'
ret += short_type_names[self.name.name]
else:
if self.is_unsigned:
ret += 'unsigned '
if self.name:
ret += self.name.to_str()
for _ in range(self.pointer_lvl):
ret += '*'
if self.is_ref:
ret += '&'
return ret
@dataclass
class ReferenceParam:
base_type: Optional[Param] = None
def to_str(self) -> str:
return f"{self.base_type.to_str()}&"
@dataclass
class ConstParam:
base_type: Optional[Param] = None
def to_str(self) -> str:
return f"{self.base_type.to_str()} const"
@dataclass
class PointerParam:
base_type: Optional[Param] = None
def to_str(self) -> str:
return f"{self.base_type.to_str()}*"
@dataclass
class FuncParam:
ret_type: Optional[Param] = None
inner_type: Optional[Param] = None
params: List[Param] = field(default_factory=list)
class_name: QualifiedName = None
def to_str(self) -> str:
ret = ''
if self.ret_type is None:
ret += 'void'
else:
ret += self.ret_type.to_str()
class_name = ""
if self.class_name:
class_name = f"{self.class_name.to_str()}::"
ret += f' ({class_name}{self.inner_type.to_str()})('
ret += ', '.join([x.to_str() for x in self.params])
ret += ')'
return ret
@dataclass
class ArrayParam:
base_type: Optional[Param] = None
inner_type: Optional[Param] = None
sizes: List[int] = field(default_factory=list)
def to_str(self) -> str:
return self.base_type.to_str() + " " + f"({self.inner_type.to_str()})" + ''.join(f'[{i}]' for i in self.sizes)
@dataclass
class IntegerParam:
value: int
def to_str(self) -> str:
return f"{self.value}"
class ParseError(Exception):
...
class ParseCtx:
def __init__(self, mangled: str):
self.mangled = mangled
self.index = 0
self.demangled = []
self.cur_type = None
self.class_name = None
self.is_const = False
self.func_name = None
self.special_func_name = None
self.not_function = False
def demangle_variable(self):
match = None
for match in find_double_underscore.finditer(self.mangled):
pass
if not match:
return
split_pos = match.start()
if split_pos == -1 or split_pos == 0:
return
self.not_function = True
self.func_name = self.mangled[:split_pos]
self.mangled = self.mangled[split_pos+2:]
self.demangle_first_class_variable()
self.func_name = QualifiedName([
self.demangle_str_to_class_name(x)
for x in self.func_name.split("::")
])
parts = []
if self.class_name:
parts += self.class_name.parts
parts += self.func_name.parts
self.full_name = QualifiedName(parts)
def demangle(self):
# this split is still not accurate, but good enough for most cases
match = None
for match in find_double_underscore.finditer(self.mangled):
pass
if not match:
return
split_pos = match.start()
if split_pos == -1 or split_pos == 0:
return
self.func_name = self.mangled[:split_pos]
self.mangled = self.mangled[split_pos+2:]
if self.func_name.startswith('__'):
match = operator_func_re.match(self.func_name)
if match:
special_func_name = match.group(1)
if special_func_name in special_funcs:
self.special_func_name = special_func_name
self.func_name = special_funcs[special_func_name]
else:
if special_func_name == 'ct':
self.special_func_name = special_func_name
self.func_name = '.ctor'
elif special_func_name == 'dt':
self.special_func_name = special_func_name
self.func_name = '.dtor'
self.demangle_first_class()
while self.index < len(self.mangled):
t = self.demangle_next_type()
self.demangled.append(t)
if self.func_name == '.ctor':
self.func_name = self.class_name.last.to_str(without_template=True)
if self.func_name == '.dtor':
self.func_name = '~' + \
self.class_name.last.to_str(without_template=True)
self.func_name = QualifiedName([
self.demangle_str_to_class_name(x)
for x in self.func_name.split("::")
])
parts = []
if self.class_name:
parts += self.class_name.parts
parts += self.func_name.parts
self.full_name = QualifiedName(parts)
def demangle_first_class(self):
if self.peek_next_char().isdecimal():
self.class_name = QualifiedName([self.demangle_class()])
if self.peek_next_char() == 'C':
self.is_const = True
self.index += 1
if self.consume_next_char() != 'F':
raise ParseError('next char should be F! (decimal)')
elif self.peek_next_char() == 'Q':
self.index += 1
self.class_name = self.demangle_qualified_name()
if self.peek_next_char() == 'C':
self.is_const = True
self.index += 1
if self.consume_next_char() != 'F':
raise ParseError('next char should be F! (Q)')
else:
if self.consume_next_char() != 'F':
raise ParseError('next char should be F!')
def demangle_first_class_variable(self):
if self.peek_next_char().isdecimal():
self.class_name = QualifiedName([self.demangle_class()])
elif self.peek_next_char() == 'Q':
self.index += 1
self.class_name = self.demangle_qualified_name()
def apply_prev_types(self, cur_type, types):
for type_char in reversed(types):
if type_char == 'C':
cur_type = ConstParam(cur_type)
elif type_char == 'R':
cur_type = ReferenceParam(cur_type)
elif type_char == 'P':
cur_type = PointerParam(cur_type)
else:
assert False
return cur_type
def demangle_next_type(self) -> str:
prev_types = []
cur_type = Param()
while True:
cur_char = self.peek_next_char()
if cur_char.isdecimal():
class_name = self.demangle_class()
cur_type.name = QualifiedName([class_name])
return self.apply_prev_types(cur_type, prev_types)
elif cur_char in types:
type_name = self.demangle_prim_type()
cur_type.name = QualifiedName([type_name])
return self.apply_prev_types(cur_type, prev_types)
elif cur_char == 'U':
cur_type.is_unsigned = True
self.index += 1
elif cur_char == 'S':
cur_type.is_signed = True
self.index += 1
elif cur_char == 'C':
prev_types.append(cur_char)
self.index += 1
elif cur_char == 'P':
prev_types.append(cur_char)
self.index += 1
elif cur_char == 'R':
prev_types.append(cur_char)
self.index += 1
elif cur_char == 'F':
self.index += 1
func = self.demangle_function(
self.apply_prev_types(cur_type, prev_types))
return func
elif cur_char == 'Q':
self.index += 1
qual_type = cur_type
qual_type.name = self.demangle_qualified_name()
return self.apply_prev_types(qual_type, prev_types)
elif cur_char == 'A':
# if cur_type.pointer_lvl < 1 and not cur_type.is_ref:
# raise ParseError("pointer level for array is wrong!")
array_type = self.demangle_array(
self.apply_prev_types(cur_type, prev_types))
return array_type
elif cur_char == 'M':
self.index += 1
if self.peek_next_char() == 'Q':
self.index += 1
class_name = self.demangle_qualified_name()
else:
class_name = QualifiedName([self.demangle_class()])
if self.peek_next_char() != 'F':
raise ParseError(
f"expected character 'F' after class name")
self.index += 1
# This symbol name
# execCommand__12JASSeqParserFP8JASTrackM12JASSeqParserFPCvPvP8JASTrackPUl_lUlPUl
# should be demangled to something like this:
# void execCommand(JASTrack* , s32 (JASSeqParser::*)(void const*, void*, JASTrack*, u32*), u32, u32*);
#
# But metrowerks will mangle the name of the code about to:
# execCommand__12JASSeqParserFP8JASTrackM12JASSeqParserFPCvPvPCvPvP8JASTrackPUl_lUlPUl
#
# Notice, the extra pair of PCvPv (void const*, void*) in the middle. These seems to be added by the compiler
# for some reason that I cannot figure out. The fix is to remove the extra pair of PCvPv. Thus, the
# code would look like this
# void execCommand(JASTrack* , s32 (JASSeqParser::*)(JASTrack*, u32*), u32, u32*);
# and it compiles to the correct mangled name:
# execCommand__12JASSeqParserFP8JASTrackM12JASSeqParserFPCvPvP8JASTrackPUl_lUlPUl
#
# TODO: not very nice
if self.peek_next_char() != 'P':
raise ParseError(
f"expected character 'P' after class name")
self.index += 1
if self.peek_next_char() != 'C':
raise ParseError(
f"expected character 'C' after class name")
self.index += 1
if self.peek_next_char() != 'v':
raise ParseError(
f"expected character 'v' after class name")
self.index += 1
if self.peek_next_char() != 'P':
raise ParseError(
f"expected character 'P' after class name")
self.index += 1
if self.peek_next_char() != 'v':
raise ParseError(
f"expected character 'v' after class name")
self.index += 1
prev_types.append('P')
func = self.demangle_function(
self.apply_prev_types(cur_type, prev_types))
func.class_name = class_name
return func
else:
raise ParseError(f'unexpected character {cur_char}')
def demangle_array(self, parent_type) -> ArrayParam:
sizes = []
while self.peek_next_char() == 'A':
self.index += 1
sizes.append(self.read_next_int())
if self.consume_next_char() != '_':
raise ParseError("Need to have '_' after Array size!")
array_type = self.demangle_next_type()
cur_type = ArrayParam()
cur_type.base_type = array_type
cur_type.inner_type = parent_type
cur_type.sizes = sizes
return cur_type
def demangle_function(self, parent_type) -> FuncParam:
func_param = FuncParam()
func_param.inner_type = parent_type
while True:
cur_char = self.peek_next_char()
if cur_char == '_':
self.index += 1
func_param.ret_type = self.demangle_next_type()
return func_param
func_param.params.append(self.demangle_next_type())
def demangle_qualified_name(self) -> QualifiedName:
part_count = int(self.consume_next_char())
parts = []
for _ in range(part_count):
parts.append(self.demangle_class())
return QualifiedName(parts)
def read_next_int(self) -> int:
class_len_str = ''
cur_char = self.peek_next_char()
while cur_char.isdecimal():
class_len_str += cur_char
self.index += 1
cur_char = self.peek_next_char()
return int(class_len_str)
def demangle_template_args(self):
if self.peek_next_char() != '<':
raise ParseError(f"expected character '<'")
self.index += 1
types = []
while True:
last_index = self.index
next_type = None
if self.peek_next_char().isdecimal() or self.peek_next_char() == '-':
is_negative = False
if self.peek_next_char() == '-':
is_negative = True
self.index += 1
integer = self.read_next_int()
if self.peek_next_char() == '>' or self.peek_next_char() == ',':
next_type = IntegerParam(integer)
else:
self.index = last_index
if next_type == None:
next_type = self.demangle_next_type()
types.append(next_type)
if self.peek_next_char() == '>':
self.index += 1
break
if self.peek_next_char() != ',':
raise ParseError(f"expected character '<' or ','")
self.index += 1
return types
def demangle_template(self, name) -> Tuple[str, List[Param]]:
if not "<" in name or not name.endswith(">"):
return name, []
index = name.find("<")
prefix = name[0:index]
inner = name[index:]
ctx = ParseCtx(inner)
types = ctx.demangle_template_args()
type_str = ", ".join([x.to_str() for x in types])
return prefix, types
def demangle_str_to_class_name(self, text) -> ClassName:
return ClassName(*self.demangle_template(text))
def demangle_class(self) -> ClassName:
if not self.peek_next_char().isdecimal():
raise ParseError(f'class mangling must start with number')
index = self.index
class_len = self.read_next_int()
full_class_name = self.mangled[self.index:][:class_len]
class_name, argument_types = self.demangle_template(full_class_name)
self.index += class_len
return ClassName(class_name, argument_types)
def demangle_prim_type(self) -> ClassName:
ret = types[self.consume_next_char()]
return ClassName(ret, [])
def consume_next_char(self) -> str:
next_char = self.mangled[self.index]
self.index += 1
return next_char
def peek_next_char(self) -> str:
if self.index >= len(self.mangled):
return None
return self.mangled[self.index]
def to_str(self) -> str:
if self.not_function:
return self.class_name.to_str() + '::' + self.func_name.to_str()
if self.func_name is None:
return ''
elif self.class_name is None:
return self.func_name.to_str() + '(' + ', '.join([x.to_str() for x in self.demangled]) + ')'
else:
return self.class_name.to_str() + '::' + self.func_name.to_str() + '(' + ', '.join([x.to_str() for x in self.demangled]) + ')' + (' const' if self.is_const else '')