mm/tools/buildtools/schc.py

1036 lines
46 KiB
Python
Executable File

#!/usr/bin/env python3
# SPDX-FileCopyrightText: © 2023-2024 ZeldaRET
# SPDX-License-Identifier: MIT
#
# Schedule scripting language compiler
#
# For a reference about the language see https://github.com/zeldaret/mm/blob/main/docs/schedule_scripting_language.md
#
#
# Version history:
#
# 1.0.0:
#
# * Initial release
#
# TODO: Check for repeated labels
# TODO: think on a catchy name for the schedule language and the compiler
# TODO: Warning/Error for control flows that do not led to a return. Maybe consider inserting return_none on those cases
# TODO: consider adding optimization passes/options
# TODO: consider adding and/or operators
from __future__ import annotations
# Short for "schedule compiler"
__prog_name__ = "schc"
__version__ = "1.0.0"
import colorama
colorama.init()
import argparse
import dataclasses
import enum
from pathlib import Path
import re
from typing import NoReturn
import sys
class SchcError(Exception):
pass
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
DEBUG = False
def debugPrint(*args, **kwargs):
if not DEBUG:
return
eprint(*args, **kwargs)
def fatalError(message: str, filename: str, lineNumber: int, columnNumber: int, **kwargs) -> NoReturn:
# Print the filename/linenumber in a format that some IDEs can follow by ctrl-click on them
eprint(f"{colorama.Style.BRIGHT}{filename}:{lineNumber}:{columnNumber}{colorama.Style.RESET_ALL}: {colorama.Style.BRIGHT}{colorama.Fore.RED}Error{colorama.Style.RESET_ALL}: {message}")
if DEBUG:
# Get the info from the caller
frame = sys._getframe().f_back
if frame is not None:
funcName = frame.f_code.co_name
debugPrint(f" Halted from: {funcName} at {frame.f_code.co_filename}:{frame.f_lineno}")
for key, value in kwargs.items():
debugPrint(f" {key}: {value}")
# TODO: Add fun error messages
exit(1)
def warning(message: str, filename: str, lineNumber: int, columnNumber: int, **kwargs) -> None:
# Print the filename/linenumber in a format that some IDEs can follow by ctrl-click on them
eprint(f"{colorama.Style.BRIGHT}{filename}:{lineNumber}:{columnNumber}{colorama.Style.RESET_ALL}: {colorama.Style.BRIGHT}{colorama.Fore.MAGENTA}Warning{colorama.Style.RESET_ALL}: {message}")
if DEBUG:
# Get the info from the caller
frame = sys._getframe().f_back
if frame is not None:
funcName = frame.f_code.co_name
debugPrint(f" Warning triggered in: {funcName} at {frame.f_code.co_filename}:{frame.f_lineno}")
for key, value in kwargs.items():
debugPrint(f" {key}: {value}")
class TokenType(enum.Enum):
# Schedule commands
IF_WEEKEVENTREG_S = "if_week_event_reg_s"
IF_WEEKEVENTREG_L = "if_week_event_reg_l"
IF_TIMERANGE_S = "if_time_range_s"
IF_TIMERANGE_L = "if_time_range_l"
RETURN_S = "return_s"
RETURN_L = "return_l"
RETURN_NONE = "return_none"
RETURN_EMPTY = "return_empty"
IF_MISC_S = "if_misc_s"
IF_SCENE_S = "if_scene_s"
IF_SCENE_L = "if_scene_l"
IF_DAY_S = "if_day_s"
IF_DAY_L = "if_day_l"
NOP = "nop"
RETURN_TIME = "return_time"
IF_BEFORETIME_S = "if_before_time_s"
IF_BEFORETIME_L = "if_before_time_l"
IF_SINCETIME_S = "if_since_time_s"
IF_SINCETIME_L = "if_since_time_l"
BRANCH_S = "branch_s"
BRANCH_L = "branch_l"
# Generics
IF_WEEKEVENTREG = "if_week_event_reg"
IF_TIMERANGE = "if_time_range"
IF_MISC = "if_misc"
IF_SCENE = "if_scene"
IF_DAY = "if_day"
IF_BEFORETIME = "if_before_time"
IF_SINCETIME = "if_since_time"
BRANCH = "branch"
# Extra tokens
ELSE = "else"
BRACE_OPEN = "{"
BRACE_CLOSE = "}"
ARGS = "(args)"
NOT = "not"
LABEL = "label"
IDENTIFIER = ""
@dataclasses.dataclass
class TokenProperties:
macro: str|None = None
cmdLength: int|None = None
isExtraToken: bool = False
isConditionalBranch: bool = False
isUnconditionalBranch: bool = False
hasArguments: bool = False
isGeneric: bool = False
needsToInvert: bool = False
isShort: bool = False
shortVersion: TokenType|None=None
longVersion: TokenType|None=None
@property
def isAnyBranch(self) -> bool:
return self.isConditionalBranch or self.isUnconditionalBranch
tokenPropertiesDict: dict[TokenType, TokenProperties] = {
# Schedule commands
TokenType.IF_WEEKEVENTREG_S: TokenProperties(macro="SCHEDULE_CMD_CHECK_FLAG_S", cmdLength=0x04, isConditionalBranch=True, hasArguments=True, needsToInvert=True, isShort=True, shortVersion=TokenType.IF_WEEKEVENTREG_S, longVersion=TokenType.IF_WEEKEVENTREG_L),
TokenType.IF_WEEKEVENTREG_L: TokenProperties(macro="SCHEDULE_CMD_CHECK_FLAG_L", cmdLength=0x05, isConditionalBranch=True, hasArguments=True, needsToInvert=True, shortVersion=TokenType.IF_WEEKEVENTREG_S, longVersion=TokenType.IF_WEEKEVENTREG_L),
TokenType.IF_TIMERANGE_S: TokenProperties(macro="SCHEDULE_CMD_CHECK_TIME_RANGE_S", cmdLength=0x06, isConditionalBranch=True, hasArguments=True, needsToInvert=True, isShort=True, shortVersion=TokenType.IF_TIMERANGE_S, longVersion=TokenType.IF_TIMERANGE_L),
TokenType.IF_TIMERANGE_L: TokenProperties(macro="SCHEDULE_CMD_CHECK_TIME_RANGE_L", cmdLength=0x07, isConditionalBranch=True, hasArguments=True, needsToInvert=True, shortVersion=TokenType.IF_TIMERANGE_S, longVersion=TokenType.IF_TIMERANGE_L),
TokenType.RETURN_S: TokenProperties(macro="SCHEDULE_CMD_RET_VAL_S", cmdLength=0x02, hasArguments=True, isShort=True),
TokenType.RETURN_L: TokenProperties(macro="SCHEDULE_CMD_RET_VAL_L", cmdLength=0x03, hasArguments=True),
TokenType.RETURN_NONE: TokenProperties(macro="SCHEDULE_CMD_RET_NONE", cmdLength=0x01),
TokenType.RETURN_EMPTY: TokenProperties(macro="SCHEDULE_CMD_RET_EMPTY", cmdLength=0x01),
TokenType.IF_MISC_S: TokenProperties(macro="SCHEDULE_CMD_CHECK_MISC_S", cmdLength=0x03, isConditionalBranch=True, hasArguments=True, needsToInvert=True, isShort=True, shortVersion=TokenType.IF_MISC_S),
TokenType.IF_SCENE_S: TokenProperties(macro="SCHEDULE_CMD_CHECK_NOT_IN_SCENE_S", cmdLength=0x04, isConditionalBranch=True, hasArguments=True, isShort=True, shortVersion=TokenType.IF_SCENE_S, longVersion=TokenType.IF_SCENE_L),
TokenType.IF_SCENE_L: TokenProperties(macro="SCHEDULE_CMD_CHECK_NOT_IN_SCENE_L", cmdLength=0x05, isConditionalBranch=True, hasArguments=True, shortVersion=TokenType.IF_SCENE_S, longVersion=TokenType.IF_SCENE_L),
TokenType.IF_DAY_S: TokenProperties(macro="SCHEDULE_CMD_CHECK_NOT_IN_DAY_S", cmdLength=0x04, isConditionalBranch=True, hasArguments=True, isShort=True, shortVersion=TokenType.IF_DAY_S, longVersion=TokenType.IF_DAY_L),
TokenType.IF_DAY_L: TokenProperties(macro="SCHEDULE_CMD_CHECK_NOT_IN_DAY_L", cmdLength=0x05, isConditionalBranch=True, hasArguments=True, shortVersion=TokenType.IF_DAY_S, longVersion=TokenType.IF_DAY_L),
TokenType.NOP: TokenProperties(macro="SCHEDULE_CMD_NOP", cmdLength=0x04, hasArguments=True),
TokenType.RETURN_TIME: TokenProperties(macro="SCHEDULE_CMD_RET_TIME", cmdLength=0x06, hasArguments=True),
TokenType.IF_BEFORETIME_S: TokenProperties(macro="SCHEDULE_CMD_CHECK_BEFORE_TIME_S", cmdLength=0x04, isConditionalBranch=True, hasArguments=True, needsToInvert=True, isShort=True, shortVersion=TokenType.IF_BEFORETIME_S, longVersion=TokenType.IF_BEFORETIME_L),
TokenType.IF_BEFORETIME_L: TokenProperties(macro="SCHEDULE_CMD_CHECK_BEFORE_TIME_L", cmdLength=0x05, isConditionalBranch=True, hasArguments=True, needsToInvert=True, shortVersion=TokenType.IF_BEFORETIME_S, longVersion=TokenType.IF_BEFORETIME_L),
TokenType.IF_SINCETIME_S: TokenProperties(macro="SCHEDULE_CMD_CHECK_BEFORE_TIME_S", cmdLength=0x04, isConditionalBranch=True, hasArguments=True, isShort=True, shortVersion=TokenType.IF_SINCETIME_S, longVersion=TokenType.IF_SINCETIME_L),
TokenType.IF_SINCETIME_L: TokenProperties(macro="SCHEDULE_CMD_CHECK_BEFORE_TIME_L", cmdLength=0x05, isConditionalBranch=True, hasArguments=True, shortVersion=TokenType.IF_SINCETIME_S, longVersion=TokenType.IF_SINCETIME_L),
TokenType.BRANCH_S: TokenProperties(macro="SCHEDULE_CMD_BRANCH_S", cmdLength=0x02, isUnconditionalBranch=True, hasArguments=True, isShort=True, shortVersion=TokenType.BRANCH_S, longVersion=TokenType.BRANCH_L),
TokenType.BRANCH_L: TokenProperties(macro="SCHEDULE_CMD_BRANCH_L", cmdLength=0x03, isUnconditionalBranch=True, hasArguments=True, shortVersion=TokenType.BRANCH_S, longVersion=TokenType.BRANCH_L),
# Generics
TokenType.IF_WEEKEVENTREG: TokenProperties(isConditionalBranch=True, hasArguments=True, isGeneric=True, needsToInvert=True, shortVersion=TokenType.IF_WEEKEVENTREG_S, longVersion=TokenType.IF_WEEKEVENTREG_L),
TokenType.IF_TIMERANGE: TokenProperties(isConditionalBranch=True, hasArguments=True, isGeneric=True, needsToInvert=True, shortVersion=TokenType.IF_TIMERANGE_S, longVersion=TokenType.IF_TIMERANGE_L),
TokenType.IF_MISC: TokenProperties(isConditionalBranch=True, hasArguments=True, isGeneric=True, needsToInvert=True, shortVersion=TokenType.IF_MISC_S),
TokenType.IF_SCENE: TokenProperties(isConditionalBranch=True, hasArguments=True, isGeneric=True, shortVersion=TokenType.IF_SCENE_S, longVersion=TokenType.IF_SCENE_L),
TokenType.IF_DAY: TokenProperties(isConditionalBranch=True, hasArguments=True, isGeneric=True, shortVersion=TokenType.IF_DAY_S, longVersion=TokenType.IF_DAY_L),
TokenType.IF_BEFORETIME: TokenProperties(isConditionalBranch=True, hasArguments=True, isGeneric=True, needsToInvert=True, shortVersion=TokenType.IF_BEFORETIME_S, longVersion=TokenType.IF_BEFORETIME_L),
TokenType.IF_SINCETIME: TokenProperties(isConditionalBranch=True, hasArguments=True, isGeneric=True, shortVersion=TokenType.IF_SINCETIME_S, longVersion=TokenType.IF_SINCETIME_L),
TokenType.BRANCH: TokenProperties(isUnconditionalBranch=True, hasArguments=True, isGeneric=True, shortVersion=TokenType.BRANCH_S, longVersion=TokenType.BRANCH_L),
# Extra tokens
TokenType.ELSE: TokenProperties(isExtraToken=True),
TokenType.BRACE_OPEN: TokenProperties(isExtraToken=True),
TokenType.BRACE_CLOSE: TokenProperties(isExtraToken=True),
TokenType.ARGS: TokenProperties(isExtraToken=True),
TokenType.NOT: TokenProperties(isExtraToken=True),
TokenType.LABEL: TokenProperties(isExtraToken=True),
TokenType.IDENTIFIER: TokenProperties(isExtraToken=True),
}
tokenLiterals: dict[str, TokenType] = {x.value: x for x in TokenType}
regex_label = re.compile(r"(?P<label>\w+)\s*:")
regex_identifier = re.compile(r"(?P<identifier>\w+)")
regex_individualTokens = re.compile(r"(?P<individual>[\{\}])")
@dataclasses.dataclass
class Token:
tokenType: TokenType
tokenLiteral: str
"""The literal read from the input. Some token types don't have a fixed literal, like LABEL or ARGS"""
# Track the token position on the original file, for better error messages
filename: str
lineNumber: int
columnNumber: int
def getProperties(self) -> TokenProperties:
return tokenPropertiesDict[self.tokenType]
def newFromTokenType(self, newType: TokenType) -> Token:
return Token(newType, newType.value, self.filename, self.lineNumber, self.columnNumber)
def newFromTokenTypePreserveLiteral(self, newType: TokenType) -> Token:
return Token(newType, self.tokenLiteral, self.filename, self.lineNumber, self.columnNumber)
def __str__(self) -> str:
ret = f"{self.filename}:{self.lineNumber}:{self.columnNumber}\n"
ret += f" {self.tokenType.name}"
if self.tokenLiteral != self.tokenType.value:
ret += f" '{self.tokenLiteral}'"
return ret
class TokenIterator:
"""Allows to know which token will be the next one, even with recursive functions"""
def __init__(self, tokens: list[Token]):
self.tokens = list(tokens)
self.index = 0
def get(self) -> Token|None:
if self.index >= len(self.tokens):
return None
token = self.tokens[self.index]
self.index += 1
return token
def unget(self) -> None:
if self.index <= 0:
raise SchcError("bad programming, can't unget")
self.index -= 1
def remainingTokens(self) -> int:
return len(self.tokens) - self.index
def reset(self) -> None:
self.index = 0
# Strips comments
def preprocess(contents: str, filename: str) -> str:
result: list[str] = []
lineNumber = 1
columnNumber = 1
blockComment = False
lineComment = False
contentsLength = len(contents)
i = 0
while i < contentsLength:
char = contents[i]
if blockComment and lineComment:
eprint(f"Error: internal error. {filename}:{lineNumber}:{columnNumber}")
debugPrint(f" if blockComment and lineComment:")
debugPrint(f" internal index: {i}")
exit(1)
if char == "\n":
lineComment = False
result.append(char)
lineNumber += 1
columnNumber = 1
i += 1
elif blockComment or lineComment:
if blockComment and contents[i:i+2] == "*/":
# end of block comment
blockComment = False
result.append(" " * 2)
columnNumber += 2
i += 2
continue
result.append(" ")
columnNumber += 1
i += 1
elif contents[i:i+2] == "/*":
# block comment
endIndex = contents.find("*/", i+2)
if endIndex == -1:
eprint(f"Error: Unterminated comment at {filename}:{lineNumber}:{columnNumber}")
debugPrint(f" internal index: {i}")
exit(1)
blockComment = True
result.append(" " * 2)
columnNumber += 2
i += 2
elif contents[i:i+2] == "//":
# line comment
lineComment = True
result.append(" " * 2)
columnNumber += 2
i += 2
else:
result.append(char)
columnNumber += 1
i += 1
return "".join(result)
# Takes a preprocessed input and converts them into a list of tokens
def tokenize(contents: str, filename: str) -> TokenIterator:
tokens: list[Token] = []
lineNumber = 1
columnNumber = 1
contentsLength = len(contents)
i = 0
while i < contentsLength:
char = contents[i]
if char == "(":
# Command arguments are handled in a special way,
# all the arguments are grouped together as a single token
lineNumberStart = lineNumber
columnNumberStart = columnNumber
parenCount = 0
subIndex = i+1
parenEndFound = False
while subIndex < contentsLength:
# We need to pair this parenthesis, allowing inner parenthesis
subChar = contents[subIndex]
columnNumber += 1
if subChar == ")":
parenCount -= 1
elif subChar == "(":
parenCount += 1
elif subChar == "\n":
lineNumber += 1
columnNumber = 1
if parenCount < 0:
parenEndFound = True
break
subIndex += 1
if not parenEndFound:
fatalError("Unterminated parenthesis", filename, lineNumber, columnNumber, i=i, char=char)
parenContents = contents[i+1:subIndex]
tokens.append(Token(TokenType.ARGS, parenContents, filename, lineNumberStart, columnNumberStart))
i = subIndex + 1
columnNumber += 1
continue
if char == "\n":
lineNumber += 1
columnNumber = 1
elif char.isspace():
columnNumber += 1
else:
isIdentifier = False
# Look for tokens
if (reMatch := regex_label.match(contents, pos=i)) is not None:
literal = reMatch["label"]
tokenType = TokenType.LABEL
elif (reMatch := regex_identifier.match(contents, pos=i)) is not None:
literal = reMatch["identifier"]
tokenType = tokenLiterals.get(literal)
isIdentifier = True
elif (reMatch := regex_individualTokens.match(contents, pos=i)) is not None:
literal = reMatch["individual"]
tokenType = tokenLiterals.get(literal)
else:
fatalError(f"Unrecognized token found (starts with '{char}')", filename, lineNumber, columnNumber, i=i, char=char)
if tokenType is None:
if isIdentifier:
# Non recognized token, let say it is a generic identifier
tokenType = TokenType.IDENTIFIER
else:
fatalError(f"Unrecognized token '{literal}' found", filename, lineNumber, columnNumber, i=i, char=char)
tokens.append(Token(tokenType, literal, filename, lineNumber, columnNumber))
# Calculate how long the found token is
spanStart, spanEnd = reMatch.span()
matchLen = spanEnd - spanStart
columnNumber += matchLen
i += matchLen
continue
i += 1
return TokenIterator(tokens)
# Tree
@dataclasses.dataclass
class Expression:
token: Token
args: Token|None = None
left: list[Expression] = dataclasses.field(default_factory=list)
"""The body of an `if` check"""
right: list[Expression] = dataclasses.field(default_factory=list)
"""The body of an `else`"""
negated: bool = False
"""This expression follows a `not` operator"""
def toStr(self, depth=0) -> str:
spaces = " " * depth
ret = f"{spaces}"
if self.negated:
ret += f"not "
ret += f"{self.token.tokenLiteral}"
if self.token.tokenType == TokenType.LABEL:
ret += f":"
if self.args is not None:
ret += f" ({self.args.tokenLiteral})"
if len(self.left) == 0:
ret += f"\n"
else:
ret += f" {{\n"
for expr in self.left:
ret += expr.toStr(depth+1)
if len(self.right) > 0:
ret += f"{spaces}}} else {{\n"
for expr in self.right:
ret += expr.toStr(depth+1)
ret += f"{spaces}}}\n"
return ret
def __str__(self) -> str:
return self.toStr()
# Parses the tokens into a basic AST
def makeTree(tokens: TokenIterator, inputPath: str, *, depth: int=0) -> list[Expression]:
exprs: list[Expression] = []
currentExpr: Expression|None = None
foundElse = False
i = 0
while (token := tokens.get()) is not None:
tokenProperties = token.getProperties()
if token.tokenType == TokenType.ARGS:
if currentExpr is None or currentExpr.args is not None:
fatalError(f"Invalid syntax, args following invalid token", inputPath, token.lineNumber, token.columnNumber, i=i, depth=depth, token=token, currentExpr=currentExpr, foundElse=foundElse)
currentExpr.args = token
elif not tokenProperties.isExtraToken or token.tokenType == TokenType.NOT:
if currentExpr is not None and currentExpr.token.getProperties().isAnyBranch:
if len(currentExpr.left) == 0:
fatalError(f"invalid syntax", inputPath, token.lineNumber, token.columnNumber, i=i, depth=depth, token=token, currentExpr=currentExpr, foundElse=foundElse)
negate = False
if token.tokenType == TokenType.NOT:
negate = True
# Get the token that is being negated
tokenAux = tokens.get()
if tokenAux is None:
fatalError(f"`not` operator followed by nothing", inputPath, token.lineNumber, token.columnNumber, i=i, depth=depth, token=token, currentExpr=currentExpr, foundElse=foundElse)
if tokenAux.tokenType in { TokenType.IF_BEFORETIME, TokenType.IF_BEFORETIME_S, TokenType.IF_BEFORETIME_L }:
warning(f"Negating a '{tokenAux.tokenLiteral}' command. Consider using a '{TokenType.IF_SINCETIME.value}' command instead", inputPath, token.lineNumber, token.columnNumber)
if tokenAux.tokenType in { TokenType.IF_SINCETIME, TokenType.IF_SINCETIME_S, TokenType.IF_SINCETIME_L }:
warning(f"Negating a '{tokenAux.tokenLiteral}' command. Consider using a '{TokenType.IF_BEFORETIME.value}' command instead", inputPath, token.lineNumber, token.columnNumber)
token = tokenAux
tokenProperties = token.getProperties()
if not tokenProperties.isAnyBranch:
fatalError(f"`not` operator followed by invalid `{token.tokenLiteral}` token", inputPath, token.lineNumber, token.columnNumber, i=i, depth=depth, token=token, currentExpr=currentExpr, foundElse=foundElse)
currentExpr = Expression(token)
currentExpr.negated = negate
foundElse = False
exprs.append(currentExpr)
if not tokenProperties.hasArguments:
currentExpr = None
elif token.tokenType == TokenType.ELSE:
if currentExpr is None or currentExpr.args is None or foundElse:
fatalError(f"Invalid syntax", inputPath, token.lineNumber, token.columnNumber, i=i, depth=depth, token=token, currentExpr=currentExpr, foundElse=foundElse)
foundElse = True
# Peek next token
nextToken = tokens.get()
if nextToken is None:
fatalError(f"Invalid syntax: missing expression after `{token.tokenLiteral}`", inputPath, token.lineNumber, token.columnNumber, i=i, depth=depth, token=token, currentExpr=currentExpr, foundElse=foundElse)
tokens.unget()
if nextToken.tokenType != TokenType.BRACE_OPEN:
# `else` with no braces, try to parse it
if len(currentExpr.right) != 0:
fatalError(f"Invalid syntax", inputPath, token.lineNumber, token.columnNumber, i=i, depth=depth, token=token, currentExpr=currentExpr, foundElse=foundElse)
# Instead of storing the `else` as its own token, just store it as part of the corresponding `if` check
currentExpr.right = makeTree(tokens, inputPath, depth=depth+1)
return exprs
elif token.tokenType == TokenType.BRACE_OPEN:
# The body of an `if` or an `else`
if currentExpr is None or currentExpr.args is None:
fatalError(f"Invalid syntax: Opening braces doesn't follow a valid expression", inputPath, token.lineNumber, token.columnNumber, i=i, depth=depth, token=token, currentExpr=currentExpr, foundElse=foundElse)
if foundElse:
if len(currentExpr.right) != 0:
fatalError(f"Invalid syntax: Double body for an `else`", inputPath, token.lineNumber, token.columnNumber, i=i, depth=depth, token=token, currentExpr=currentExpr, foundElse=foundElse)
# Instead of storing the `else` as its own token, just store it as part of the corresponding `if` check
currentExpr.right = makeTree(tokens, inputPath, depth=depth+1)
else:
if len(currentExpr.left) != 0:
fatalError(f"Invalid syntax: Double body for an `if_`", inputPath, token.lineNumber, token.columnNumber, i=i, depth=depth, token=token, currentExpr=currentExpr, foundElse=foundElse)
currentExpr.left = makeTree(tokens, inputPath, depth=depth+1)
elif token.tokenType == TokenType.BRACE_CLOSE:
if len(exprs) == 0:
warning(f"Braces with empty body", inputPath, token.lineNumber, token.columnNumber)
return exprs
elif token.tokenType == TokenType.LABEL:
# We need to check the token following a label is valid
nextToken = tokens.get()
tokens.unget()
if nextToken is None:
fatalError(f"Labels should be followed by another valid expression", inputPath, token.lineNumber, token.columnNumber, i=i, depth=depth, token=token, currentExpr=currentExpr, foundElse=foundElse)
if nextToken.tokenType == TokenType.LABEL:
fatalError(f"Labels can't be followed by another label", inputPath, token.lineNumber, token.columnNumber, i=i, depth=depth, token=token, currentExpr=currentExpr, foundElse=foundElse)
if nextToken.getProperties().isExtraToken:
fatalError(f"Found label followed by not admitted token `{nextToken.tokenLiteral}`", inputPath, token.lineNumber, token.columnNumber, i=i, depth=depth, token=token, currentExpr=currentExpr, foundElse=foundElse)
currentExpr = Expression(token)
foundElse = False
exprs.append(currentExpr)
elif token.tokenType == TokenType.IDENTIFIER:
fatalError(f"Invalid syntax, unknown identifier found inside script's body", inputPath, token.lineNumber, token.columnNumber, i=i, depth=depth, token=token, currentExpr=currentExpr, foundElse=foundElse)
else:
fatalError(f"This code should be unreachable.\n Tell me dear user, is this a vanilla bug on the compiler?\n Or are you trying to implement new features and you forgot to add a check somewhere?\n You can try to ping me, I probably won't remember how this whole thing worked,\n but we can have a laugh together.\n Remember to enable the debug prints, they may help you either way (pass `-d`).\n ", inputPath, token.lineNumber, token.columnNumber, i=i, depth=depth, token=token, currentExpr=currentExpr, foundElse=foundElse)
i += 1
return exprs
@dataclasses.dataclass
class ScriptFunction:
name: Token
tree: list[Expression]
def toStr(self) -> str:
ret = f"{self.name.tokenLiteral} {{\n"
for expr in self.tree:
ret += expr.toStr(1)
ret += f"}}"
return ret
def __str__(self) -> str:
return self.toStr()
# Parses the token list into a list of script functions
def makeScriptFunctions(tokens: TokenIterator, inputPath: str) -> list[ScriptFunction]:
funcs: list[ScriptFunction] = []
name: Token|None = None
i = 0
while (token := tokens.get()) is not None:
if token.tokenType == TokenType.IDENTIFIER:
if name is not None:
fatalError(f"Invalid syntax, script name followed by script name", inputPath, token.lineNumber, token.columnNumber, i=i, token=token)
name = token
elif token.tokenType == TokenType.BRACE_OPEN:
if name is None:
fatalError(f"Invalid syntax, missing script name", inputPath, token.lineNumber, token.columnNumber, i=i, token=token)
tree = makeTree(tokens, inputPath)
if len(tree) == 0:
fatalError(f"Invalid syntax, no commands found inside script's body", inputPath, token.lineNumber, token.columnNumber, i=i, token=token)
funcs.append(ScriptFunction(name, tree))
name = None
# makeTree consumes the trailing BRACE_OPEN, so there's no need to consume it here
else:
fatalError(f"Invalid syntax, `{token.tokenLiteral}` found outside of script's body", inputPath, token.lineNumber, token.columnNumber, i=i, token=token)
i += 1
return funcs
def normalizeTreeImpl(tree: list[Expression], postLabel: Expression, depth: int, autoLabelName: str) -> tuple[list[Expression], bool]:
newTree: list[Expression] = []
usedLabel = False
currentPostLabel: Expression = postLabel
i = 0
while i < len(tree):
expr = tree[i]
newTree.append(expr)
usedCurrentPostLabel = False
shouldAddPostLabel = True
if i + 1 < len(tree):
if tree[i + 1].token.tokenType == TokenType.LABEL:
# Re-use label if there's already one
currentPostLabel = tree[i + 1]
shouldAddPostLabel = False
else:
# dot (.) is used to ensure no name crashes with user-declared labels
labelToken = expr.token.newFromTokenType(TokenType.LABEL)
labelToken.tokenLiteral = f".{autoLabelName}.{depth}_{i}"
currentPostLabel = Expression(labelToken)
auxUsed = False
if expr.token.getProperties().isConditionalBranch:
if len(expr.left) == 0:
branchExpr = Expression(expr.token.newFromTokenType(TokenType.BRANCH), currentPostLabel.token)
expr.left.append(branchExpr)
if currentPostLabel == postLabel:
usedLabel = True
else:
usedCurrentPostLabel = True
else:
expr.left, auxUsed = normalizeTreeImpl(expr.left, currentPostLabel, depth+1, f"{autoLabelName}_left")
if currentPostLabel == postLabel:
usedLabel = usedLabel or auxUsed
if len(expr.right) == 0:
branchExpr = Expression(expr.token.newFromTokenType(TokenType.BRANCH), currentPostLabel.token)
expr.right.append(branchExpr)
if currentPostLabel == postLabel:
usedLabel = True
else:
usedCurrentPostLabel = True
else:
expr.right, auxUsed = normalizeTreeImpl(expr.right, currentPostLabel, depth+1, f"{autoLabelName}_right")
if currentPostLabel == postLabel:
usedLabel = usedLabel or auxUsed
if currentPostLabel != postLabel and (usedCurrentPostLabel or auxUsed) and shouldAddPostLabel:
newTree.append(currentPostLabel)
currentPostLabel = postLabel
i += 1
return newTree, usedLabel
# Searches for `if_` and `else`s with empty bodies and inserts in them an unconditional branch to avoid compiler-induced fallthroughs
def normalizeTree(tree: list[Expression]) -> list[Expression]:
postLabel = Expression(Token(TokenType.LABEL, ".autolabel.placeholder", "", -1, -1))
newTree, usedLabel = normalizeTreeImpl(tree, postLabel, 0, "_autolabel")
if usedLabel:
eprint("Warning: branching outside the script")
newTree.append(postLabel)
return newTree
# For linearizing a tree
@dataclasses.dataclass
class LabeledExpression:
# Index relative to the parent list
index: int
token: Token
args: Token|None
# Label to jump into this command
labelName: str
# True if this expression is allowed to change between `_s` and `_l` versions of the same command, like if the original script used a suffix-less version of the command
# False if the original script used a command with a `_s`/`_l` suffix
canChange: bool
# The branch target can be either a str (label) or None (no branch target)
branchTarget: str|None = None
def toStr(self) -> str:
ret = f"/* {self.index:03} */ {self.labelName:<24}: {self.token.tokenLiteral}"
if self.canChange:
ret += "*"
if self.args is not None:
ret += f" ({self.args.tokenLiteral})"
if self.branchTarget is not None:
ret += f" -> {self.branchTarget}"
return ret
def __str__(self) -> str:
return self.toStr()
@dataclasses.dataclass
class LabeledScriptFunction:
name: Token
labeledList: list[LabeledExpression]
def toStr(self) -> str:
ret = ""
ret = f"{self.name.tokenLiteral} {{\n"
for expr in self.labeledList:
ret += f"{expr.toStr()}\n"
ret += f"}}"
return ret
def __str__(self) -> str:
return self.toStr()
# Takes a tree and linearizes it, preserving the control flow by using labels (user-defined or autogenerated)
def convertTreeIntoLabeledList(tree: list[Expression], index: int = 0) -> tuple[list[LabeledExpression], int]:
result: list[LabeledExpression] = []
# To track the current label
labelName: str|None = None
for expr in tree:
token = expr.token
if token.tokenType == TokenType.LABEL:
# Keep the labelname but ignore the label itself
labelName = token.tokenLiteral
continue
tokenProperties = token.getProperties()
left = expr.left
right = expr.right
# Some commands need to invert their `if_`/`else` bodies to match the corresponding command
if tokenProperties.needsToInvert:
left, right = right, left
# Invert the bodies if the expression was negated
if expr.negated:
left, right = right, left
currentIndex = index
index += 1
# Linearize the left body
subResults, index = convertTreeIntoLabeledList(left, index)
# Expressions always jump into the right body if their check evaluates to True
targetLabel = None
if len(right) == 1 and right[0].token.getProperties().isUnconditionalBranch:
# If an `if_` only has 1 expression and it is a branch then incorporate it as part of the `if_`,
# avoiding redundant branches
branchExpr = right[0]
if branchExpr.args is None:
fatalError(f"Branch command without arguments?", branchExpr.token.filename, branchExpr.token.lineNumber, branchExpr.token.columnNumber)
targetLabel = branchExpr.args.tokenLiteral.strip()
else:
sub, index = convertTreeIntoLabeledList(right, index)
subResults += sub
if len(sub) != 0:
# Get the label name of the first expression
targetLabel = sub[0].labelName
# If there's no user-defined label, then autogenerate one. Use dots to avoid name clashes with user-defined ones
if labelName is None:
labelName = f".index.{currentIndex}"
# Process generics into non-generic short version for now.
# A different pass will check if the short commands produced from generics will require a long version instead
canChange = False
if tokenProperties.isGeneric:
canChange = True
newTokenType = tokenProperties.shortVersion
assert newTokenType is not None, token
token = token.newFromTokenTypePreserveLiteral(newTokenType)
tokenProperties = token.getProperties()
# Take the target label of a `branch` command
if tokenProperties.isUnconditionalBranch:
if expr.args is None:
fatalError(f"Branch command `{expr.token.tokenLiteral}` without arguments?", expr.token.filename, expr.token.lineNumber, expr.token.columnNumber)
targetLabel = expr.args.tokenLiteral
linearExpr = LabeledExpression(currentIndex, token, expr.args, labelName, canChange)
# Reset the label, so we don't accidentally reuse it
labelName = None
if tokenProperties.isAnyBranch:
linearExpr.branchTarget = targetLabel
result += [linearExpr] + subResults
return result, index
# Checks for every short command that was produced from a generic and calculate if the branch distance will
# fit on the short command, if it doesn't then change the command into a long version.
#
# The algorithm is basic and linear, so if an already-processed expression's branch distance will no longer fit
# because of latter processed expressions changed to long commands (which uses more bytes), then those won't
# be re-updated.
# To work-around this, this function returns a boolean on the second element of the returned tuple indicating
# if this pass modified any expression, so this function needs to be called again until that boolean is False
#
# Returns a 2-tuple with a list of the expressions and a boolean indicating if any expression was modified.
# Please note this function also modifies the parameter
def removeGenerics(labeledList: list[LabeledExpression]) -> tuple[list[LabeledExpression], bool]:
modifiedAnything = False
offset = 0
for labeledExpr in labeledList:
tokenProperties = labeledExpr.token.getProperties()
assert tokenProperties.cmdLength is not None, labeledExpr
nextOffset = offset + tokenProperties.cmdLength
if labeledExpr.canChange:
if tokenProperties.isAnyBranch:
if tokenProperties.isShort:
# There's no point on trying to change a long branch
# find the target expression
targetIndex = -1
targetExpression = None
subOffset = 0
for j, auxExpr in enumerate(labeledList):
if labeledExpr.branchTarget == auxExpr.labelName:
targetIndex = j
targetExpression = auxExpr
break
targetProperties = auxExpr.token.getProperties()
assert targetProperties.cmdLength is not None, auxExpr
subOffset += targetProperties.cmdLength
if targetIndex < 0 or targetExpression is None:
fatalError(f"Not able to find target '{labeledExpr.branchTarget}' for expression '{labeledExpr.token.tokenLiteral}'", labeledExpr.token.filename, labeledExpr.token.lineNumber, labeledExpr.token.columnNumber)
diff = subOffset - nextOffset
if diff >= 0x7F or diff <= -0x80:
longTokenType = tokenProperties.longVersion
if longTokenType is None:
fatalError(f"Command '{labeledExpr.token.tokenLiteral}' will require a branch way too big for a short branch, but there's no long equivalent", labeledExpr.token.filename, labeledExpr.token.lineNumber, labeledExpr.token.columnNumber)
labeledExpr.token = labeledExpr.token.newFromTokenTypePreserveLiteral(longTokenType)
tokenProperties = labeledExpr.token.getProperties()
modifiedAnything = True
assert tokenProperties.cmdLength is not None, labeledExpr
offset += tokenProperties.cmdLength
return labeledList, modifiedAnything
def getTargetOffset(labeledExpr: LabeledExpression, labeledList: list[LabeledExpression], offsetList: list[int]) -> int:
if labeledExpr.branchTarget is None:
fatalError(f"Command '{labeledExpr.token.tokenLiteral}' requested a target offset but it doesn't have a label", labeledExpr.token.filename, labeledExpr.token.lineNumber, labeledExpr.token.columnNumber)
for i, targetExpr in enumerate(labeledList):
if labeledExpr.branchTarget == targetExpr.labelName:
return offsetList[i]
fatalError(f"Command '{labeledExpr.token.tokenLiteral}' requested label '{labeledExpr.branchTarget}', but it was not found", labeledExpr.token.filename, labeledExpr.token.lineNumber, labeledExpr.token.columnNumber)
# Generate a string containing all the macros based on the labeled expression
def emitLabeledListMacros(labeledList: list[LabeledExpression], debuggingLevel: int) -> list[str]:
result: list[str] = []
# Precompute the offsets of each expression
offsetList: list[int] = []
offset = 0
for labeledExpr in labeledList:
offsetList.append(offset)
cmdLength = labeledExpr.token.getProperties().cmdLength
assert cmdLength is not None, labeledExpr
offset += cmdLength
# To avoid reading outside of the list
offsetList.append(offset)
# Use the same amount of pads for every entry
offsetWidth = len(f"{offsetList[-2]:X}")
for i, labeledExpr in enumerate(labeledList):
currentOffset = offsetList[i]
nextOffset = offsetList[i+1]
tokenProperties = labeledExpr.token.getProperties()
assert tokenProperties.macro is not None, labeledExpr
currentMacro = f" /* 0x{currentOffset:0{offsetWidth}X} */ {tokenProperties.macro}("
if tokenProperties.isUnconditionalBranch:
targetOffset = getTargetOffset(labeledExpr, labeledList, offsetList)
diff = targetOffset - nextOffset
if tokenProperties.isShort:
if diff not in range(-0x80, 0x7F):
fatalError(f"Trying to use a short command, but the branch distance is too big to fit on a single byte ({diff})", labeledExpr.token.filename, labeledExpr.token.lineNumber, labeledExpr.token.columnNumber)
else:
if diff not in range(-0x8000, 0x7FFF):
fatalError(f"Trying to use a long command, but the branch distance is too big to fit on a single byte ({diff})", labeledExpr.token.filename, labeledExpr.token.lineNumber, labeledExpr.token.columnNumber)
currentMacro += f"0x{targetOffset:0{offsetWidth}X} - 0x{nextOffset:0{offsetWidth}X}"
else:
if labeledExpr.args is not None:
currentMacro += f"{labeledExpr.args.tokenLiteral}"
if tokenProperties.isConditionalBranch:
targetOffset = getTargetOffset(labeledExpr, labeledList, offsetList)
diff = targetOffset - nextOffset
if tokenProperties.isShort:
if diff not in range(-0x80, 0x7F):
fatalError(f"Trying to use a short command, but the branch distance is too big to fit on a single byte ({diff})", labeledExpr.token.filename, labeledExpr.token.lineNumber, labeledExpr.token.columnNumber)
else:
if diff not in range(-0x8000, 0x7FFF):
fatalError(f"Trying to use a long command, but the branch distance is too big to fit on a single byte ({diff})", labeledExpr.token.filename, labeledExpr.token.lineNumber, labeledExpr.token.columnNumber)
currentMacro += f", 0x{targetOffset:0{offsetWidth}X} - 0x{nextOffset:0{offsetWidth}X}"
currentMacro += "),"
if debuggingLevel >= 1:
currentMacro += f" /* {labeledExpr.token.filename}:{labeledExpr.token.lineNumber}:{labeledExpr.token.columnNumber} */"
result.append(currentMacro)
return result
def main():
parser = argparse.ArgumentParser(description="Compiler for the high level schedule language", prog=__prog_name__)
parser.add_argument("input", help="schl (schedule language) file path", type=Path)
parser.add_argument("-o", "--output", help="Output path. Will print to stdout if omitted", type=Path)
parser.add_argument("-g", type=int, nargs="?", const=1, default=0, dest="debuggingLevel", metavar="level", help="Emit debugging information on the generated macros. Level 0 means no debugging information. -g is like -g1. Default is -g0")
parser.add_argument("-V", "--version", action="version", version=f"%(prog)s {__version__}")
debuggingParser = parser.add_argument_group("Compiler debugging options")
debuggingParser.add_argument("-d", "--debug-prints", help="Enables debug prints for fatal errors", action="store_true")
debuggingParser.add_argument("--print-tokens", help="Prints the processed tokens to stdout", action="store_true")
debuggingParser.add_argument("--print-raw-tree", help="Prints the raw tree to stdout", action="store_true")
debuggingParser.add_argument("--print-tree", help="Prints the processed and normalized tree to stdout", action="store_true")
debuggingParser.add_argument("--print-labeleds", help="Prints the linearized labeled expressions to stdout", action="store_true")
debuggingParser.add_argument("--print-labeleds-post", help="Prints the linearized labeled expressions after processing the generics to stdout", action="store_true")
args = parser.parse_args()
inputPath: Path = args.input
outputPath: Path|None = args.output
debuggingLevel: int = args.debuggingLevel
global DEBUG
DEBUG = args.debug_prints
printTokens: bool = args.print_tokens
printRawTree: bool = args.print_raw_tree
printTree: bool = args.print_tree
printLabeleds: bool = args.print_labeleds
printLabeledsPost: bool = args.print_labeleds_post
if not inputPath.exists():
eprint(f"Error: Input file '{inputPath}' not found")
exit(1)
inputContents = inputPath.read_text("UTF-8")
preprocessed = preprocess(inputContents, str(inputPath))
tokens = tokenize(preprocessed, str(inputPath))
if printTokens:
print("print tokens:")
for token in tokens.tokens:
print(token)
print()
scriptFuncs = makeScriptFunctions(tokens, str(inputPath))
if printRawTree:
print("print raw tree:")
for func in scriptFuncs:
print(func)
print()
assert tokens.remainingTokens() == 0, tokens.remainingTokens()
for func in scriptFuncs:
func.tree = normalizeTree(func.tree)
if printTree:
print("print tree:")
for func in scriptFuncs:
print(func)
print()
labeledFuncs: list[LabeledScriptFunction] = []
for func in scriptFuncs:
labeledList, _ = convertTreeIntoLabeledList(func.tree)
labeledFunc = LabeledScriptFunction(func.name, labeledList)
labeledFuncs.append(labeledFunc)
if printLabeleds:
print("print labeleds:")
for labeledFunc in labeledFuncs:
print(labeledFunc)
print()
for labeledFunc in labeledFuncs:
keepGoing = True
while keepGoing:
labeledFunc.labeledList, keepGoing = removeGenerics(labeledFunc.labeledList)
if printLabeledsPost:
print("print labeleds post:")
for labeledFunc in labeledFuncs:
print(labeledFunc)
print()
output: list[str] = [f"/* Generated by {__prog_name__} version {__version__} */", ""]
for labeledFunc in labeledFuncs:
output.append(f"static ScheduleScript {labeledFunc.name.tokenLiteral}[] = {{")
output += emitLabeledListMacros(labeledFunc.labeledList, debuggingLevel)
output.append(f"}};")
output.append(f"")
if outputPath is None:
print("\n".join(output))
else:
outputPath.parent.mkdir(parents=True, exist_ok=True)
outputPath.write_text("\n".join(output))
if __name__ == "__main__":
main()