#!/usr/bin/env python3 # usage: ./import.py path/to/file.c path/to/asm.s [make flags] import sys import os import re import subprocess import shutil import argparse import shlex import toml from typing import Callable, Dict, List, Match, Mapping, Optional, Pattern, Set, Tuple from collections import defaultdict from strip_other_fns import strip_other_fns_and_write ASM_PRELUDE: str = """ .set noat .set noreorder .set gp=64 .macro glabel label .global \label .type \label, @function \label: .endm """ DEFAULT_AS_CMDLINE: List[str] = ["mips-linux-gnu-as", "-march=vr4300", "-mabi=32"] CPP: List[str] = ["cpp", "-P", "-undef"] STUB_FN_MACROS: List[str] = [ "-D_Static_assert(x, y)=", "-D__attribute__(x)=", "-DGLOBAL_ASM(...)=", ] SETTINGS_FILES = ["permuter_settings.toml", "tools/permuter_settings.toml"] def formatcmd(cmdline: List[str]) -> str: return " ".join(shlex.quote(arg) for arg in cmdline) def parse_asm(asm_file: str) -> Tuple[str, str]: func_name = None asm_lines = [] try: with open(asm_file, encoding="utf-8") as f: cur_section = ".text" for line in f: if line.strip().startswith(".section"): cur_section = line.split()[1] elif line.strip() in [ ".text", ".rdata", ".rodata", ".late_rodata", ".bss", ".data", ]: cur_section = line.strip() if cur_section == ".text": if func_name is None and line.strip().startswith("glabel "): func_name = line.split()[1] asm_lines.append(line) except OSError as e: print("Could not open assembly file:", e, file=sys.stderr) sys.exit(1) if func_name is None: print( "Missing function name in assembly file! The file should start with 'glabel function_name'.", file=sys.stderr, ) sys.exit(1) if not re.fullmatch(r"[a-zA-Z0-9_$]+", func_name): print(f"Bad function name: {func_name}", file=sys.stderr) sys.exit(1) return func_name, "".join(asm_lines) def create_directory(func_name: str) -> str: os.makedirs(f"nonmatchings/", exist_ok=True) ctr = 0 while True: ctr += 1 dirname = f"{func_name}-{ctr}" if ctr > 1 else func_name dirname = f"nonmatchings/{dirname}" try: os.mkdir(dirname) return dirname except FileExistsError: pass def find_makefile_dir(filename: str) -> str: old_dirname = None dirname = os.path.abspath(os.path.dirname(filename)) while dirname and (not old_dirname or len(dirname) < len(old_dirname)): for fname in ["makefile", "Makefile"]: if os.path.isfile(os.path.join(dirname, fname)): return dirname old_dirname = dirname dirname = os.path.dirname(dirname) print(f"Missing makefile for file {filename}!", file=sys.stderr) sys.exit(1) def fixup_build_command( parts: List[str], ignore_part: str ) -> Tuple[List[str], Optional[List[str]]]: res = [] skip_count = 0 assembler = None for part in parts: if skip_count > 0: skip_count -= 1 continue if part in ["-MF", "-o"]: skip_count = 1 continue if part == ignore_part: continue res.append(part) try: ind0 = min( i for i, arg in enumerate(res) if any( cmd in arg for cmd in ["asm_processor", "asm-processor", "preprocess.py"] ) ) ind1 = res.index("--", ind0 + 1) ind2 = res.index("--", ind1 + 1) assembler = res[ind1 + 1 : ind2] res = res[ind0 + 1 : ind1] + res[ind2 + 1 :] except ValueError: pass return res, assembler def find_build_command_line( c_file: str, make_flags: List[str] ) -> Tuple[List[str], List[str], str]: makefile_dir = find_makefile_dir(os.path.abspath(os.path.dirname(c_file))) rel_c_file = os.path.relpath(c_file, makefile_dir) make_cmd = ["make", "--always-make", "--dry-run", "--debug=j"] + make_flags debug_output = ( subprocess.check_output(make_cmd, cwd=makefile_dir).decode("utf-8").split("\n") ) output = [] close_match = False assembler = DEFAULT_AS_CMDLINE for line in debug_output: while "//" in line: line = line.replace("//", "/") while "/./" in line: line = line.replace("/./", "/") if rel_c_file not in line: continue close_match = True parts = shlex.split(line) if rel_c_file not in parts: continue if "-o" not in parts: continue if "-fsyntax-only" in parts: continue cmdline, asmproc_assembler = fixup_build_command(parts, rel_c_file) if asmproc_assembler: assembler = asmproc_assembler output.append(cmdline) if not output: close_extra = ( "\n(Found one possible candidate, but didn't match due to " "either spaces in paths, having -fsyntax-only, or missing an -o flag.)" if close_match else "" ) print( "Failed to find compile command from makefile output. " f"Please ensure 'make -Bn --debug=j {formatcmd(make_flags)}' " f"contains a line with the string '{rel_c_file}'.{close_extra}", file=sys.stderr, ) sys.exit(1) if len(output) > 1: output_lines = "\n".join(map(formatcmd, output)) print( f"Error: found multiple compile commands for {rel_c_file}:\n{output_lines}\n" "Please modify the makefile such that if PERMUTER = 1, " "only a single compile command is included.", file=sys.stderr, ) sys.exit(1) return output[0], assembler, makefile_dir PreserveMacros = Tuple[Pattern[str], Callable[[str], str]] def build_preserve_macros( cwd: str, preserve_regex: Optional[str] ) -> Optional[PreserveMacros]: data: Mapping[str, object] = {} for filename in SETTINGS_FILES: filename = os.path.join(cwd, filename) if os.path.exists(filename): with open(filename) as f: data = toml.load(f) break subdata = data.get("preserve_macros", {}) assert isinstance(subdata, dict) regexes = [] for regex, value in subdata.items(): assert isinstance(value, str) regexes.append((re.compile(f"^(?:{regex})$"), value)) if preserve_regex == "" or (preserve_regex is None and not regexes): return None if preserve_regex is None: global_regex_text = "(?:" + ")|(?:".join(subdata.keys()) + ")" else: global_regex_text = preserve_regex global_regex = re.compile(f"^(?:{global_regex_text})$") def type_fn(macro: str) -> str: for regex, value in regexes: if regex.match(macro): return value return "int" return global_regex, type_fn def preprocess_c_with_macros( cpp_command: List[str], cwd: str, preserve_macros: PreserveMacros ) -> Tuple[str, List[str]]: """Import C file, preserving function macros. Subroutine of import_c_file.""" preserve_regex, preserve_type_fn = preserve_macros # Start by running 'cpp' in a mode that just processes ifdefs and includes. source = subprocess.check_output( cpp_command + ["-dD", "-fdirectives-only"], cwd=cwd, encoding="utf-8" ) # Modify function macros that match preserved names so the preprocessor # doesn't touch them, and at the same time normalize their syntax. Some # of these instances may be in comments, but that's fine. def repl(match: Match[str]) -> str: name = match.group(1) after = "(" if match.group(2) == "(" else " " if preserve_regex.match(name): return f"_permuter define {name}{after}" else: return f"#define {name}{after}" source = re.sub( r"^\s*#\s*define\s+([a-zA-Z0-9_]+)([ \t\(]|$)", repl, source, flags=re.MULTILINE, ) # Get rid of auto-inserted macros which the second cpp invocation will # warn about. source = re.sub(r"^#define __STDC_.*\n", "", source, flags=re.MULTILINE) # Now, run the preprocessor again for real. source = subprocess.check_output( CPP + STUB_FN_MACROS, cwd=cwd, encoding="utf-8", input=source ) # Finally, find all function-like defines that we hid (some might have # been comments, so we couldn't do this before), and construct fake # function declarations for them in a specially demarcated section of # the file. When the compiler runs, this section will be replaced by # the real defines and the preprocessor invoked once more. late_defines = [] lines = [] graph = defaultdict(set) reg_token = re.compile(r"[a-zA-Z0-9_]+") for line in source.splitlines(): is_macro = line.startswith("_permuter define ") params = [] if is_macro: ind1 = line.find("(") ind2 = line.find(" ", len("_permuter define ")) ind = min(ind1, ind2) if ind == -1: ind = len(line) if ind1 == ind2 == -1 else max(ind1, ind2) before = line[:ind] after = line[ind:] name = before.split()[2] late_defines.append((name, after)) if after.startswith("("): params = [w.strip() for w in after[1 : after.find(")")].split(",")] else: lines.append(line) name = "" for m in reg_token.finditer(line): name2 = m.group(0) has_wildcard = False if is_macro and name2 not in params: wcbefore = line[: m.start()].rstrip().endswith("##") wcafter = line[m.end() :].lstrip().startswith("##") if wcbefore or wcafter: graph[name].add(name2 + "*") has_wildcard = True if not has_wildcard: graph[name].add(name2) # Prune away (recursively) unused macros, for cleanliness. used_anywhere = set() used_by_nonmacro = graph[""] queue = [""] while queue: name = queue.pop() if name not in used_anywhere: used_anywhere.add(name) if name.endswith("*"): wildcard = name[:-1] for name2 in graph: if wildcard in name2: queue.extend(graph[name2]) else: queue.extend(graph[name]) def get_decl(name: str, after: str) -> str: typ = preserve_type_fn(name) if after.startswith("("): return f"{typ} {name}();" else: return f"extern {typ} {name};" used_macros = [name for (name, after) in late_defines if name in used_by_nonmacro] return ( "\n".join( ["#pragma _permuter latedefine start"] + [ f"#pragma _permuter define {name}{after}" for (name, after) in late_defines if name in used_anywhere ] + [ get_decl(name, after) for (name, after) in late_defines if name in used_by_nonmacro ] + ["#pragma _permuter latedefine end"] + lines + [""] ), used_macros, ) def import_c_file( compiler: List[str], cwd: str, in_file: str, preserve_macros: Optional[PreserveMacros], ) -> Tuple[str, List[str]]: in_file = os.path.relpath(in_file, cwd) include_next = 0 cpp_command = CPP + [in_file, "-D__sgi", "-D_LANGUAGE_C", "-DNON_MATCHING"] for arg in compiler: if include_next > 0: include_next -= 1 cpp_command.append(arg) continue if arg in ["-D", "-U", "-I"]: cpp_command.append(arg) include_next = 1 continue if ( arg.startswith("-D") or arg.startswith("-U") or arg.startswith("-I") or arg in ["-nostdinc"] ): cpp_command.append(arg) try: if preserve_macros is None: # Simple codepath, should work even if the more complex one breaks. return ( subprocess.check_output( cpp_command + STUB_FN_MACROS, cwd=cwd, encoding="utf-8" ), [], ) return preprocess_c_with_macros(cpp_command, cwd, preserve_macros) except subprocess.CalledProcessError as e: print( "Failed to preprocess input file, when running command:\n" + formatcmd(e.cmd), file=sys.stderr, ) sys.exit(1) def finalize_compile_command(cmdline: List[str]) -> str: quoted = [arg if arg == "|" else shlex.quote(arg) for arg in cmdline] ind = (quoted + ["|"]).index("|") return " ".join(quoted[:ind] + ['"$INPUT"'] + quoted[ind:] + ["-o", '"$OUTPUT"']) def write_compile_command(compiler: List[str], cwd: str, out_file: str) -> None: with open(out_file, "w", encoding="utf-8") as f: f.write("#!/usr/bin/env bash\n") f.write('INPUT="$(readlink -f "$1")"\n') f.write('OUTPUT="$(readlink -f "$3")"\n') f.write(f"cd {shlex.quote(cwd)}\n") f.write(finalize_compile_command(compiler)) os.chmod(out_file, 0o755) def write_asm(asm_cont: str, out_file: str) -> None: with open(out_file, "w", encoding="utf-8") as f: f.write(ASM_PRELUDE) f.write(asm_cont) def compile_asm(assembler: List[str], cwd: str, in_file: str, out_file: str) -> None: in_file = os.path.abspath(in_file) out_file = os.path.abspath(out_file) cmdline = assembler + [in_file, "-o", out_file] try: subprocess.check_call(cmdline, cwd=cwd) except subprocess.CalledProcessError: print( f"Failed to assemble .s file, command line:\n{formatcmd(cmdline)}", file=sys.stderr, ) sys.exit(1) def compile_base(compile_script: str, in_file: str, out_file: str) -> None: in_file = os.path.abspath(in_file) out_file = os.path.abspath(out_file) compile_cmd = [compile_script, in_file, "-o", out_file] try: subprocess.check_call(compile_cmd) except subprocess.CalledProcessError: print( "Warning: failed to compile .c file, you'll need to adjust it manually. " f"Command line:\n{formatcmd(compile_cmd)}" ) def write_to_file(cont: str, filename: str) -> None: with open(filename, "w", encoding="utf-8") as f: f.write(cont) def try_strip_other_fns_and_write( source: str, func_name: str, base_c_file: str ) -> None: try: strip_other_fns_and_write(source, func_name, base_c_file) except Exception: import traceback traceback.print_exc() print( "Warning: failed to remove other functions. Edit {base_c_file} and remove them manually." ) with open(base_c_file, "w", encoding="utf-8") as f: f.write(source) def main() -> None: parser = argparse.ArgumentParser( description="Import a function for use with the permuter. " "Will create a new directory nonmatchings/-/." ) parser.add_argument( "c_file", help="File containing the function. " "Assumes that the file can be built with 'make' to create an .o file.", ) parser.add_argument( "asm_file", help="File containing assembly for the function. " "Must start with 'glabel ' and contain no other functions.", ) parser.add_argument( "make_flags", nargs="*", help="Arguments to pass to 'make'. PERMUTER=1 will always be passed.", ) parser.add_argument( "--keep", action="store_true", help="Keep the directory on error." ) settings_files = ", ".join(SETTINGS_FILES[:-1]) + " or " + SETTINGS_FILES[-1] parser.add_argument( "--preserve-macros", metavar="REGEX", dest="preserve_macros_regex", help="Regex for which macros to preserve, or empty string for no macros. " f"By default, this is read from {settings_files} in the imported " "file's Makefile's directory. Type information is also read from this file.", ) args = parser.parse_args() make_flags = args.make_flags + ["PERMUTER=1"] func_name, asm_cont = parse_asm(args.asm_file) print(f"Function name: {func_name}") compiler, assembler, cwd = find_build_command_line(args.c_file, make_flags) print(f"Compiler: {formatcmd(compiler)} {{input}} -o {{output}}") print(f"Assembler: {formatcmd(assembler)} {{input}} -o {{output}}") preserve_macros = build_preserve_macros(cwd, args.preserve_macros_regex) source, macros = import_c_file(compiler, cwd, args.c_file, preserve_macros) dirname = create_directory(func_name) base_c_file = f"{dirname}/base.c" base_o_file = f"{dirname}/base.o" target_s_file = f"{dirname}/target.s" target_o_file = f"{dirname}/target.o" compile_script = f"{dirname}/compile.sh" func_name_file = f"{dirname}/function.txt" try: # try_strip_other_fns_and_write(source, func_name, base_c_file) write_to_file(source, base_c_file) write_to_file(func_name, func_name_file) write_compile_command(compiler, cwd, compile_script) write_asm(asm_cont, target_s_file) compile_asm(assembler, cwd, target_s_file, target_o_file) compile_base(compile_script, base_c_file, base_o_file) except: if not args.keep: print(f"\nDeleting directory {dirname} (run with --keep to preserve it).") shutil.rmtree(dirname) raise if macros: macro_str = "macros: " + ", ".join(macros) else: macro_str = "no macros" print(f"Preserving {macro_str}. Use --preserve-macros='' to override.") print(f"\nDone. Imported into {dirname}") if __name__ == "__main__": main()