tp/tools/splitter/demangle.py

311 lines
9.6 KiB
Python

from typing import List, Optional
from pathlib import Path
from dataclasses import dataclass, field
import re
operator_func_re = re.compile(r'^__([a-z]+)')
types = {
'i': 'int',
'l': 'long',
's': 'short',
'c': 'char',
'f': 'f32',
'd': 'f64',
'v': 'void',
'x': 'long long',
'b': 'bool',
'e': 'varargs...',
}
short_type_names = {
'char': '8',
'short': '16',
'long' : '32',
'long long': '64',
}
# {'defctor', 'ops',}
special_funcs = {
'eq': 'operator==',
'as': 'operator=',
'ne': 'operator!=',
'dv': 'operator/',
'pl': 'operator+',
'mi': 'operator-',
'ml': 'operator*',
'adv': 'operator/=',
'apl': 'operator+=',
'ami': 'operator-=',
'amu': 'operator*=',
'lt': 'operator<',
'gt': 'operator>',
'cl': 'operator()',
'dla': 'operator delete[]',
'nwa': 'operator new[]',
'dl': 'operator delete',
'nw': 'operator new',
}
@dataclass
class Param:
name: str = ''
pointer_lvl: int = 0
is_const: bool = False
is_ref: bool = False
is_unsigned: bool = False
is_signed: bool = False
def to_str(self) -> str:
ret = ''
if self.is_const:
ret += 'const '
if self.name in short_type_names:
ret += 'u' if self.is_unsigned else 's'
ret += short_type_names[self.name]
else:
if self.is_unsigned:
ret += 'unsigned '
ret += self.name
for _ in range(self.pointer_lvl):
ret += '*'
if self.is_ref:
ret += '&'
return ret
@dataclass
class FuncParam:
ret_type: Optional[str] = None
params: List[str] = field(default_factory=list)
def to_str(self) -> str:
ret = ''
if self.ret_type is None:
ret += 'void'
else:
ret += self.ret_type
ret += ' (*)('
ret += ', '.join(self.params)
ret += ')'
return ret
class ParseError(Exception):
...
class ParseCtx:
def __init__(self, mangled: str):
self.mangled = mangled
self.index = 0
self.demangled = []
self.cur_type = None
self.class_name = None
self.is_const = False
self.func_name = None
def demangle(self):
# this split is still not accurate, but good enough for most cases
last_f = self.mangled.rfind('F')
if last_f == -1:
return
split_pos = self.mangled.rfind('__', 0, last_f)
if split_pos == -1 or split_pos == 0:
return
self.func_name = self.mangled[:split_pos]
self.mangled = self.mangled[split_pos+2:]
if self.func_name.startswith('__'):
match = operator_func_re.match(self.func_name)
if match:
special_func_name = match.group(1)
if special_func_name in special_funcs:
self.func_name = special_funcs[special_func_name]
else:
if special_func_name == 'ct':
self.func_name = '.ctor'
elif special_func_name == 'dt':
self.func_name = '.dtor'
self.demangle_first_class()
while self.index < len(self.mangled):
self.demangled.append(self.demangle_next_type())
if self.func_name == '.ctor':
self.func_name = self.class_name
if self.func_name == '.dtor':
self.func_name = '~' + self.class_name
def demangle_first_class(self):
if self.peek_next_char().isdecimal():
self.class_name = self.demangle_class()
if self.peek_next_char() == 'C':
self.is_const = True
self.index += 1
assert self.consume_next_char() == 'F', 'next char should be F!'
elif self.peek_next_char() == 'Q':
self.index += 1
self.class_name = self.demangle_qualified_name()
if self.peek_next_char() == 'C':
self.is_const = True
self.index += 1
assert self.consume_next_char() == 'F', 'next char should be F!'
else:
assert self.consume_next_char() == 'F', 'next char should be F!'
def demangle_next_type(self) -> str:
cur_type = Param()
while True:
cur_char = self.peek_next_char()
if cur_char.isdecimal():
class_name = self.demangle_class()
cur_type.name = class_name
return cur_type.to_str()
elif cur_char in types:
type_name = self.demangle_prim_type()
cur_type.name = type_name
return cur_type.to_str()
elif cur_char == 'U':
cur_type.is_unsigned = True
self.index += 1
elif cur_char == 'S':
cur_type.is_signed = True
self.index += 1
elif cur_char == 'C':
cur_type.is_const = True
self.index += 1
elif cur_char == 'P':
cur_type.pointer_lvl += 1
self.index += 1
elif cur_char == 'R':
cur_type.is_ref = True
self.index += 1
elif cur_char == 'F':
self.index += 1
func = self.demangle_function()
return func.to_str()
elif cur_char == 'Q':
self.index += 1
return self.demangle_qualified_name()
elif cur_char == 'A':
if cur_type.pointer_lvl < 1 and not cur_type.is_ref:
raise ParseError("pointer level for array is wrong!")
# decrease pointer level by one, cause one is already handled in the array demangle
if not cur_type.is_ref:
cur_type.pointer_lvl -= 1
cur_type.name = self.demangle_array()
return cur_type.to_str()
else:
raise ParseError(f'unexpected character {cur_char}')
def demangle_array(self) -> str:
sizes = []
while self.peek_next_char() == 'A':
self.index += 1
sizes.append(self.read_next_int())
if self.consume_next_char() != '_':
raise ParseError("Need to have '_' after Array size!")
array_type = self.demangle_next_type()
return f'{array_type} []' + ''.join(f'[{i}]' for i in sizes)
def demangle_function(self) -> FuncParam:
func_param = FuncParam()
while True:
cur_char = self.peek_next_char()
if cur_char == '_':
self.index += 1
func_param.ret_type = self.demangle_next_type()
return func_param
func_param.params.append(self.demangle_next_type())
def demangle_qualified_name(self) -> str:
part_count = int(self.consume_next_char())
parts = []
for _ in range(part_count):
parts.append(self.demangle_class())
return '::'.join(parts)
def read_next_int(self) -> int:
class_len_str = ''
cur_char = self.peek_next_char()
while cur_char.isdecimal():
class_len_str += cur_char
self.index += 1
cur_char = self.peek_next_char()
return int(class_len_str)
def demangle_class(self) -> str:
if not self.peek_next_char().isdecimal():
raise ParseError(f'class mangling must start with number')
class_len = self.read_next_int()
class_name = self.mangled[self.index : self.index + class_len]
self.index += class_len
if self.peek_next_char() == 'M':
self.index += 1
class_name += '::' + self.demangle_class()
return class_name
def demangle_prim_type(self) -> str:
ret = types[self.consume_next_char()]
return ret
def consume_next_char(self) -> str:
next_char = self.mangled[self.index]
self.index += 1
return next_char
def peek_next_char(self) -> str:
if self.index >= len(self.mangled):
return None
return self.mangled[self.index]
def to_str(self) -> str:
if self.func_name is None:
return ''
elif self.class_name is None:
return self.func_name + '(' + ', '.join(self.demangled) + ')'
else:
return self.class_name + '::' + self.func_name + '(' + ', '.join(self.demangled) + ')' + (' const' if self.is_const else '')
def demangle(s):
p = ParseCtx(s)
p.demangle()
return p.to_str()
def parse_framework_map(path: Path):
address_funcname = {}
with path.open() as f:
for line in f.readlines():
if line.startswith('.ctors'):
return address_funcname
if not line.startswith(' '):
continue
funcname = line[30:].split(' ', 1)[0]
address = line[18:26]
address_funcname[address] = funcname
return address_funcname
# def try_demangle_all():
# with open('frameworkF.map') as f:
# for line in f.readlines():
# if line.startswith('.ctors'):
# return
# if not line.startswith(' '):
# continue
# line = line[30:]
# line_spl = line.split(' ',1)[0]
# try:
# d = demangle(line_spl)
# if d:
# print(d)
# # except NotImplementedError:
# # pass
# except Exception as e:
# # print(f'could not demangle {line_spl}: {repr(e)}')
# # raise e
# pass
# try_demangle_all()