tp/tools/splitter/asm_parser.py

160 lines
3.4 KiB
Python

from dataclasses import dataclass
from parsy import string, regex, seq, generate, line_info
from typing import Optional, List, Union, Protocol
class Emittable(Protocol):
def emit(self) -> str:
...
@dataclass
class BlockComment:
text: str
def emit(self) -> str:
return f'/*{self.text}*/'
@dataclass
class TrailingComment:
text: str
def emit(self) -> str:
return f'# {self.text}'
@dataclass
class Include:
file: str
def emit(self) -> str:
return f'.include "{self.file}"'
@dataclass
class Section:
name: str
flags: Optional[str]
def emit(self) -> str:
directive = f'.section .{self.name}'
if self.flags is not None:
directive += f', "{self.flags}"'
return directive
@dataclass
class Global:
symbol: str
def emit(self) -> str:
return f'.global {self.symbol}'
@dataclass
class Label:
symbol: str
def emit(self) -> str:
return f'{self.symbol}:'
@dataclass
class Instruction:
opcode: str
operands: List[str]
def emit(self) -> str:
instr = self.opcode
if len(self.operands) > 0:
instr += ' ' + ', '.join(self.operands)
return instr
@dataclass
class Line:
index: int
content: List[
Union[
BlockComment, TrailingComment, Instruction, Global, Section, Include, Label
]
]
body: Optional[Union[Global, Section, Include, Label, Instruction]]
def emit(self) -> str:
return ' '.join([x.emit() for x in self.content])
space = regex(r'[ \t]+')
line_ending = regex('(\n)|(\r\n)').desc('newline')
pad = regex(r'[ \t]*')
block_comment = (
string('/*') >> regex(r'[\w\s]*').map(BlockComment) << string('*/')
).desc('block comment')
trailing_comment = (
string('#') >> pad >> regex(r'[^\n\r]*').map(TrailingComment)
).desc('trailing comment')
symbolname = regex(r'[a-zA-Z._$][a-zA-Z0-9._$?]*')
label = (symbolname.map(Label) << string(':')).desc('label')
delimited_string = (string('"') >> regex(r'[^"]*') << string('"')).desc(
'double-quote delimited string'
)
directive_include = string('include') >> space >> delimited_string.map(Include)
directive_section = seq(
name=string('section')
>> space
>> string('.')
>> regex(r'[a-z]+'),
flags=(pad >> string(',') >> space >> delimited_string).optional(),
).combine_dict(Section)
directive_global = string('global') >> space >> symbolname.map(Global)
directive = (
string('.')
>> (
directive_include
| directive_section
| directive_global
| string('text').result(Section('text', flags=None))
| string('data').result(Section('data', flags=None))
)
).desc('directive')
opcode = regex(r'[a-z_0-9]+\.?').concat().desc('opcode')
operand = regex(r'[^,#\s]+')
operands = operand.sep_by(string(',') << pad)
@generate
def instruction():
op = yield opcode
sp = yield space.optional()
if sp:
oprs = yield operands
else:
oprs = []
return Instruction(op, oprs)
@generate
def line():
line, _ = yield line_info
content = yield (pad >> block_comment << pad).many()
body = yield (directive | label | instruction).optional() << pad
if body:
content.append(body)
content += yield (pad >> block_comment).many()
trailing = yield (pad >> trailing_comment).optional()
if trailing:
content.append(trailing)
return Line(line, content, body)
asm = line.sep_by(line_ending)