mm/tools/decomp-permuter/src/randomizer.py

1941 lines
67 KiB
Python

import bisect
import copy
from dataclasses import dataclass, field
from random import Random
import typing
from typing import (
Any,
Callable,
Dict,
List,
Optional,
Sequence,
Set,
Tuple,
TypeVar,
Union,
)
from pycparser import c_ast as ca
from . import ast_util
from .ast_util import Block, Indices, Statement, Expression
from .ast_types import (
SimpleType,
Type,
TypeMap,
allowed_basic_type,
build_typemap,
decayed_expr_type,
get_decl_type,
resolve_typedefs,
same_type,
set_decl_name,
pointer_decay,
)
# Set to true to perform expression type detection eagerly. This can help when
# debugging crashes in the ast_types code.
DEBUG_EAGER_TYPES = False
# Randomize the type of introduced temporary variable with this probability
PROB_RANDOMIZE_TYPE = 0.3
# Reuse an existing var instead of introducing a new temporary one with this probability
PROB_REUSE_VAR = 0.5
# When wrapping statements in a new block, use a same-line `do { ... } while(0);`
# (as opposed to non-same-line `if (1) { ... }`) with this probability.
# This matches what macros often do.
PROB_INS_BLOCK_DOWHILE = 0.5
# Make a pointer to a temporary expression, rather than copy it by value, with
# this probability. (This always happens for expressions of struct type,
# regardless of this probability.)
PROB_TEMP_PTR = 0.05
# Instead of emitting an assignment statement, assign the temporary within the
# first expression it's used in with this probability.
PROB_TEMP_ASSIGN_AT_FIRST_USE = 0.1
# When creating a temporary for an expression, use the temporary for all equal
# expressions with this probability.
PROB_TEMP_REPLACE_ALL = 0.2
# When creating a temporary for an expression, use the temporary for an interval
# with maximal endpoint with this probability.
PROB_TEMP_REPLACE_MOST = 0.2
# When substituting a variable by its value, substitute all instances with this
# probability, rather than just a subrange or the complement of one.
PROB_EXPAND_REPLACE_ALL = 0.3
# When substituting a variable by its value, keep the variable assignment with
# this probability.
PROB_KEEP_REPLACED_VAR = 0.2
# Change the return type of an external function to void with this probability.
PROB_RET_VOID = 0.2
# Number larger than any node index. (If you're trying to compile a 1 GB large
# C file to matching asm, you have bigger problems than this limit.)
MAX_INDEX = 10 ** 9
T = TypeVar("T")
class RandomizationFailure(Exception):
pass
def ensure(condition: Any) -> None:
"""Abort the randomization pass if 'condition' fails to hold, and try
another pass instead. Don't call this after making any modifications to
the AST."""
if not condition:
raise RandomizationFailure
@dataclass
class Region:
start: int
end: int
indices: Optional[Indices] = field(compare=False)
@staticmethod
def unbounded() -> "Region":
return Region(-1, MAX_INDEX, None)
def is_unbounded(self) -> bool:
return self.indices is None
def contains_node(self, node: ca.Node) -> bool:
"""Check whether the region contains an entire node."""
if self.indices is None:
return True
return (
self.start < self.indices.starts[node]
and self.indices.ends[node] < self.end
)
def contains_pre(self, node: ca.Node) -> bool:
"""Check whether the region contains a point just before a given node."""
if self.indices is None:
return True
return self.start < self.indices.starts[node] < self.end
def contains_pre_index(self, index: int) -> bool:
"""Check whether the region contains a point just before a given node,
as specified by its index."""
if self.indices is None:
return True
return self.start < index < self.end
def reverse_start_indices(indices: Indices) -> Dict[int, ca.Node]:
ret = {}
for k, v in indices.starts.items():
ret[v] = k
return ret
def get_randomization_region(
top_node: ca.Node, indices: Indices, random: Random
) -> Region:
ret: List[Region] = []
cur_start: Optional[int] = None
class Visitor(ca.NodeVisitor):
def visit_Pragma(self, node: ca.Pragma) -> None:
nonlocal cur_start
if node.string == "_permuter randomizer start":
if cur_start is not None:
raise Exception("nested PERM_RANDOMIZE not supported")
cur_start = indices.ends[node]
if node.string == "_permuter randomizer end":
assert cur_start is not None, "randomizer end without start"
ret.append(Region(cur_start + 1, indices.starts[node] - 1, indices))
cur_start = None
Visitor().visit(top_node)
assert cur_start is None, "randomizer start without end"
if not ret:
return Region.unbounded()
return random.choice(ret)
def get_block_expressions(block: Block, region: Region) -> List[Expression]:
"""Return a list of all expressions within a block that are also within a
given region."""
exprs: List[Expression] = []
def visitor(expr: Expression) -> None:
if region.contains_node(expr):
exprs.append(expr)
replace_subexprs(block, visitor)
return exprs
def compute_write_locations(
top_node: ca.Node, indices: Indices
) -> Dict[str, List[int]]:
writes: Dict[str, List[int]] = {}
def add_write(var_name: str, loc: int) -> None:
if var_name not in writes:
writes[var_name] = []
else:
assert (
loc > writes[var_name][-1]
), "consistent traversal order should guarantee monotonicity here"
writes[var_name].append(loc)
class Visitor(ca.NodeVisitor):
def visit_Decl(self, node: ca.Decl) -> None:
if node.name:
add_write(node.name, indices.starts[node])
self.generic_visit(node)
def visit_UnaryOp(self, node: ca.UnaryOp) -> None:
if node.op in ["p++", "p--", "++", "--"] and isinstance(node.expr, ca.ID):
add_write(node.expr.name, indices.starts[node])
self.generic_visit(node)
def visit_Assignment(self, node: ca.Assignment) -> None:
if isinstance(node.lvalue, ca.ID):
add_write(node.lvalue.name, indices.starts[node])
self.generic_visit(node)
Visitor().visit(top_node)
return writes
def compute_read_locations(top_node: ca.Node, indices: Indices) -> Dict[str, List[int]]:
reads: Dict[str, List[int]] = {}
for node in find_var_reads(top_node):
var_name = node.name
loc = indices.starts[node]
if var_name not in reads:
reads[var_name] = []
else:
assert (
loc > reads[var_name][-1]
), "consistent traversal order should guarantee monotonicity here"
reads[var_name].append(loc)
return reads
def find_var_reads(top_node: ca.Node) -> List[ca.ID]:
ret = []
class Visitor(ca.NodeVisitor):
def visit_Decl(self, node: ca.Decl) -> None:
if node.init:
self.visit(node.init)
def visit_ID(self, node: ca.ID) -> None:
ret.append(node)
def visit_UnaryOp(self, node: ca.UnaryOp) -> None:
if node.op == "&" and isinstance(node.expr, ca.ID):
return
self.generic_visit(node)
def visit_StructRef(self, node: ca.StructRef) -> None:
self.visit(node.name)
def visit_Assignment(self, node: ca.Assignment) -> None:
if isinstance(node.lvalue, ca.ID):
return
self.generic_visit(node)
Visitor().visit(top_node)
return ret
def visit_replace(top_node: ca.Node, callback: Callable[[ca.Node, bool], Any]) -> None:
def empty_statement_to_none(node: Any) -> Any:
if isinstance(node, ca.EmptyStatement):
return None
return node
def rec(orig_node: ca.Node, toplevel: bool = False, *, lvalue: bool = False) -> Any:
node: "ca.AnyNode" = typing.cast("ca.AnyNode", orig_node)
repl = callback(node, not toplevel and not lvalue)
if repl:
return repl
if isinstance(node, ca.Assignment):
node.lvalue = rec(node.lvalue, lvalue=True)
node.rvalue = rec(node.rvalue)
elif isinstance(node, ca.StructRef):
node.name = rec(node.name, lvalue=(lvalue and node.type == "."))
elif isinstance(node, ca.Cast):
if node.expr:
node.expr = rec(node.expr)
elif isinstance(node, (ca.Constant, ca.ID)):
pass
elif isinstance(node, ca.UnaryOp):
if node.op in ["p++", "p--", "++", "--", "&"]:
node.expr = rec(node.expr, lvalue=True)
elif node.op != "sizeof":
node.expr = rec(node.expr)
elif isinstance(node, ca.BinaryOp):
node.left = rec(node.left)
node.right = rec(node.right)
elif isinstance(node, ca.FuncCall):
# not worth replacing .name
if node.args:
rec(node.args, True)
elif isinstance(node, ca.ExprList):
for i in range(len(node.exprs)):
if not isinstance(node.exprs[i], ca.Typename):
node.exprs[i] = rec(node.exprs[i])
elif isinstance(node, ca.ArrayRef):
node.name = rec(node.name, lvalue=lvalue)
node.subscript = rec(node.subscript)
elif isinstance(node, ca.TernaryOp):
node.cond = rec(node.cond)
node.iftrue = rec(node.iftrue, True)
node.iffalse = rec(node.iffalse, True)
elif isinstance(node, ca.Return):
if node.expr:
node.expr = rec(node.expr)
elif isinstance(node, ca.Decl):
if node.init:
node.init = rec(node.init, isinstance(node.init, ca.InitList))
elif isinstance(node, ca.For):
if node.init:
node.init = empty_statement_to_none(rec(node.init, True))
if node.cond:
node.cond = rec(node.cond)
if node.next:
node.next = empty_statement_to_none(rec(node.next, True))
node.stmt = rec(node.stmt, True)
elif isinstance(node, ca.Compound):
if node.block_items:
for i, sub in enumerate(node.block_items):
node.block_items[i] = rec(sub, True)
elif isinstance(node, (ca.Case, ca.Default)):
if node.stmts:
for i, sub in enumerate(node.stmts):
node.stmts[i] = rec(sub, True)
elif isinstance(node, ca.While):
node.cond = rec(node.cond)
node.stmt = rec(node.stmt, True)
elif isinstance(node, ca.DoWhile):
node.stmt = rec(node.stmt, True)
node.cond = rec(node.cond)
elif isinstance(node, ca.Switch):
node.cond = rec(node.cond)
node.stmt = rec(node.stmt, True)
elif isinstance(node, ca.Label):
node.stmt = rec(node.stmt, True)
elif isinstance(node, ca.If):
node.cond = rec(node.cond)
node.iftrue = rec(node.iftrue, True)
if node.iffalse:
node.iffalse = rec(node.iffalse, True)
elif isinstance(
node,
(
ca.TypeDecl,
ca.PtrDecl,
ca.ArrayDecl,
ca.Typename,
ca.IdentifierType,
ca.Struct,
ca.Union,
ca.Enum,
ca.EmptyStatement,
ca.Pragma,
ca.Break,
ca.Continue,
ca.Goto,
ca.CompoundLiteral,
ca.Typedef,
ca.FuncDecl,
ca.FuncDef,
ca.EllipsisParam,
ca.Enumerator,
ca.EnumeratorList,
ca.FileAST,
ca.InitList,
ca.NamedInitializer,
ca.ParamList,
),
):
pass
else:
_: None = node
assert False, f"Node with unknown type: {node}"
return node
rec(top_node, True)
def replace_subexprs(top_node: ca.Node, callback: Callable[[Expression], Any]) -> None:
def expr_filter(node: ca.Node, is_expr: bool) -> Any:
if not is_expr:
return None
return callback(typing.cast(Expression, node))
visit_replace(top_node, expr_filter)
def replace_node(top_node: ca.Node, old: ca.Node, new: ca.Node) -> None:
visit_replace(top_node, lambda node, _: new if node is old else None)
def random_bool(random: Random, prob: float) -> bool:
return random.random() < prob
def random_weighted(random: Random, values: Sequence[Tuple[T, float]]) -> T:
sumprob = 0.0
for (val, prob) in values:
assert prob >= 0, "Probabilities must be non-negative"
sumprob += prob
assert sumprob > 0, "Cannot pick randomly from empty set"
targetprob = random.uniform(0, sumprob)
sumprob = 0.0
for (val, prob) in values:
sumprob += prob
if sumprob > targetprob:
return val
# Float imprecision
for (val, prob) in values:
if prob > 0:
return val
assert False, "unreachable"
def random_type(random: Random) -> SimpleType:
new_names: List[str] = []
if random_bool(random, 0.5):
new_names.append("unsigned")
new_names.extend(
random_weighted(
random,
[
(["char"], 1),
(["short"], 1),
(["int"], 2),
(["long"], 0.5),
(["long", "long"], 0.5),
],
)
)
idtype = ca.IdentifierType(names=new_names)
quals = []
if random_bool(random, 0.5):
quals = ["volatile"]
return ca.TypeDecl(declname=None, quals=quals, type=idtype)
def randomize_type(
type: SimpleType, typemap: TypeMap, random: Random, *, ensure_changed: bool = False
) -> SimpleType:
if allowed_basic_type(
type, typemap, ["int", "char", "long", "short", "signed", "unsigned"]
):
return random_type(random)
if ensure_changed:
raise RandomizationFailure
return type
def randomize_innermost_type(
type: Type, typemap: TypeMap, random: Random, *, ensure_changed: bool = False
) -> Type:
if isinstance(type, ca.TypeDecl):
return randomize_type(type, typemap, random, ensure_changed=ensure_changed)
new_type = copy.copy(type)
new_type.type = randomize_innermost_type(
type.type, typemap, random, ensure_changed=ensure_changed
)
return new_type
def get_insertion_points(
fn: ca.FuncDef, region: Region, *, allow_within_decl: bool = False
) -> List[Tuple[Block, int, Optional[ca.Node]]]:
cands: List[Tuple[Block, int, Optional[ca.Node]]] = []
def rec(block: Block) -> None:
stmts = ast_util.get_block_stmts(block, False)
last_node: ca.Node = block
for i, stmt in enumerate(stmts):
if region.contains_pre(stmt):
cands.append((block, i, stmt))
ast_util.for_nested_blocks(stmt, rec)
last_node = stmt
if region.contains_node(last_node):
cands.append((block, len(stmts), None))
rec(fn.body)
if not allow_within_decl:
cands = [c for c in cands if not isinstance(c[2], ca.Decl)]
return cands
def maybe_reuse_var(
var: Optional[str],
assign_before: ca.Node,
orig_expr: Expression,
type: SimpleType,
reads: Dict[str, List[int]],
writes: Dict[str, List[int]],
indices: Indices,
typemap: TypeMap,
random: Random,
) -> Optional[str]:
if not random_bool(random, PROB_REUSE_VAR) or var is None:
return None
var_type: SimpleType = decayed_expr_type(ca.ID(var), typemap)
if not same_type(var_type, type, typemap, allow_similar=True):
return None
def find_next(list: List[int], value: int) -> Optional[int]:
ind = bisect.bisect_left(list, value)
if ind < len(list):
return list[ind]
return None
assignment_ind = indices.starts[assign_before]
expr_ind = indices.starts[orig_expr]
write = find_next(writes.get(var, []), assignment_ind)
read = find_next(reads.get(var, []), assignment_ind)
# TODO: if write/read is within expr, search again from after it (since
# we move expr, uses within it aren't relevant).
if read is not None and (write is None or write >= read):
# We don't want to overwrite a variable which we later read,
# unless we write to it before that read
return None
if write is not None and write < expr_ind:
# Our write will be overwritten before we manage to read from it.
return None
return var
def perm_temp_for_expr(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Create a temporary variable for a random expression. The variable will
be assigned at another random point (nearer the expression being more
likely), possibly reuse an existing variable, possibly be of a different
size/signedness, and possibly be used for other identical expressions as
well. Only expressions within the given region may be chosen for
replacement, but the assignment and the affected identical expressions may
be outside of it."""
Place = Tuple[Block, int, Statement]
einds: Dict[ca.Node, int] = {}
writes: Dict[str, List[int]] = compute_write_locations(fn, indices)
reads: Dict[str, List[int]] = compute_read_locations(fn, indices)
typemap = build_typemap(ast)
candidates: List[Tuple[Tuple[Place, Expression, Optional[str]], float]] = []
# Step 0: decide whether to make a pointer to the chosen expression, or to
# copy it by value.
should_make_ptr = random_bool(random, PROB_TEMP_PTR)
def surrounding_writes(expr: Expression, base: Expression) -> Tuple[int, int]:
"""Compute the previous and next write to a variable included in expr,
starting from base. If none, default to -1 or MAX_INDEX respectively.
If base itself writes to an included variable (e.g. if it is an
increment expression), the \"next\" write will be defined as the node
itself, while the \"previous\" will continue searching to the left."""
sub_reads = find_var_reads(expr)
prev_write = -1
next_write = MAX_INDEX
base_index = indices.starts[base]
for sub_read in sub_reads:
var_name = sub_read.name
if var_name not in writes:
continue
# Find the first write that is strictly before indices[expr],
# and the first write that is on or after.
wr = writes[var_name]
ind = bisect.bisect_left(wr, base_index)
if ind > 0:
prev_write = max(prev_write, wr[ind - 1])
if ind < len(wr):
next_write = min(next_write, wr[ind])
return prev_write, next_write
# Step 1: assign probabilities to each place/expression
def rec(block: Block, reuse_cands: List[str]) -> None:
stmts = ast_util.get_block_stmts(block, False)
reuse_cands = reuse_cands[:]
assignment_cands: List[Place] = [] # places to insert before
past_decls = False
for index, stmt in enumerate(stmts):
if isinstance(stmt, ca.Decl):
assert stmt.name, "Anonymous declarations cannot happen in functions"
if not isinstance(stmt.type, ca.ArrayDecl):
reuse_cands.append(stmt.name)
if not isinstance(stmt.type, ca.PtrDecl):
# Make non-pointers more common
reuse_cands.append(stmt.name)
elif not isinstance(stmt, ca.Pragma):
past_decls = True
if past_decls:
assignment_cands.append((block, index, stmt))
ast_util.for_nested_blocks(stmt, lambda b: rec(b, reuse_cands))
def visitor(expr: Expression) -> None:
if DEBUG_EAGER_TYPES:
decayed_expr_type(expr, typemap)
if not region.contains_node(expr):
return
orig_expr = expr
if should_make_ptr:
if not ast_util.is_lvalue(expr):
return
expr = ca.UnaryOp("&", expr)
eind = einds.get(expr, 0)
prev_write, _ = surrounding_writes(expr, orig_expr)
for place in assignment_cands[::-1]:
# If expr contains an ID which is written to within
# [place, expr), bail out; we're trying to move the
# assignment too high up.
# TODO: also fail on moving past function calls, or
# possibly-aliasing writes.
if indices.starts[place[2]] <= prev_write:
break
# Make far-away places less likely, and similarly for
# trivial expressions.
eind += 1
prob = 1 / eind
if isinstance(orig_expr, (ca.ID, ca.Constant)):
prob *= 0.15 if should_make_ptr else 0.5
reuse_cand = random.choice(reuse_cands) if reuse_cands else None
candidates.append(((place, expr, reuse_cand), prob))
einds[expr] = eind
replace_subexprs(stmt, visitor)
rec(fn.body, [])
# Step 2: decide on a place/expression
ensure(candidates)
place: Optional[Place]
place, expr, reuse_cand = random_weighted(random, candidates)
if random_bool(random, PROB_TEMP_ASSIGN_AT_FIRST_USE):
# Don't emit a statement for the assignment, emit an assignment
# expression at the first use instead.
place = None
type: SimpleType = decayed_expr_type(expr, typemap)
# Always use pointers when replacing structs
if (
not should_make_ptr
and isinstance(type, ca.TypeDecl)
and isinstance(type.type, (ca.Struct, ca.Union))
and ast_util.is_lvalue(expr)
):
should_make_ptr = True
expr = ca.UnaryOp("&", expr)
type = decayed_expr_type(expr, typemap)
if should_make_ptr:
assert isinstance(expr, ca.UnaryOp)
assert not isinstance(expr.expr, ca.Typename)
orig_expr = expr.expr
else:
orig_expr = expr
# print("replacing:", to_c(expr))
# Step 3: decide on a variable to hold the expression
if place is not None:
assign_before = place[2]
else:
assign_before = orig_expr
reused_var = maybe_reuse_var(
reuse_cand,
assign_before,
orig_expr,
type,
reads,
writes,
indices,
typemap,
random,
)
if reused_var is not None:
reused = True
var = reused_var
else:
reused = False
var = "new_var"
counter = 1
while var in writes:
counter += 1
var = f"new_var{counter}"
# Step 4: possibly expand the replacement to include duplicate expressions.
prev_write, next_write = surrounding_writes(expr, orig_expr)
prev_write = max(prev_write, indices.starts[assign_before] - 1)
replace_cands: List[Expression] = []
def find_duplicates(e: Expression) -> None:
if prev_write < indices.starts[e] <= next_write and ast_util.equal_ast(
e, orig_expr
):
replace_cands.append(e)
if ast_util.is_effectful(expr):
replace_cands = [orig_expr]
else:
replace_subexprs(fn.body, find_duplicates)
assert orig_expr in replace_cands
if random_bool(random, PROB_TEMP_REPLACE_ALL):
lo_index = 0
hi_index = len(replace_cands)
else:
index = replace_cands.index(orig_expr)
lo_index = random.randint(0, index)
hi_index = random.randint(index + 1, len(replace_cands))
if random_bool(random, PROB_TEMP_REPLACE_MOST):
if random_bool(random, 0.5):
lo_index = 0
else:
hi_index = len(replace_cands)
replace_cand_set = set(replace_cands[lo_index:hi_index])
# Step 5: replace the chosen expression
def replacer(e: Expression) -> Optional[Expression]:
if e in replace_cand_set:
ret: Expression = ca.ID(var)
if place is None and e is orig_expr:
ret = ca.Assignment("=", ret, expr)
if should_make_ptr:
ret = ca.UnaryOp("*", ret)
return ret
return None
replace_subexprs(fn.body, replacer)
# Step 6: insert the assignment and any new variable declaration
if place is not None:
block, index, _ = place
assignment = ca.Assignment("=", ca.ID(var), expr)
ast_util.insert_statement(block, index, assignment)
if not reused:
if random_bool(random, PROB_RANDOMIZE_TYPE):
type = randomize_type(type, typemap, random)
ast_util.insert_decl(fn, var, type, random)
def perm_expand_expr(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Replace a random variable by its contents."""
all_writes: Dict[str, List[int]] = compute_write_locations(fn, indices)
all_reads: Dict[str, List[int]] = compute_read_locations(fn, indices)
# Step 1: pick out a variable to replace
rev: Dict[int, str] = {}
for var, locs in all_reads.items():
for index in locs:
if region.contains_pre_index(index):
rev[index] = var
ensure(rev)
index = random.choice(list(rev.keys()))
var = rev[index]
# Step 2: find the assignment it uses
reads = all_reads[var]
writes = all_writes.get(var, [])
i = bisect.bisect_left(writes, index)
# if i == 0, there is no write to replace the read by.
ensure(i > 0)
before = writes[i - 1]
after = MAX_INDEX if i == len(writes) else writes[i]
rev_indices = reverse_start_indices(indices)
write = rev_indices[before]
if (
isinstance(write, ca.Decl)
and write.init
and not isinstance(write.init, ca.InitList)
):
repl_expr = write.init
elif isinstance(write, ca.Assignment) and write.op == "=":
repl_expr = write.rvalue
else:
raise RandomizationFailure
# Step 3: pick of the range of variables to replace
repl_cands = [
i for i in reads if before < i < after and region.contains_pre_index(i)
]
assert repl_cands, "index is always in repl_cands"
myi = repl_cands.index(index)
if not random_bool(random, PROB_EXPAND_REPLACE_ALL) and len(repl_cands) > 1:
# Keep using the variable for a bit in the middle
side = random.randrange(3)
H = len(repl_cands)
loi = 0 if side == 0 else random.randint(0, myi)
hii = H if side == 1 else random.randint(myi + 1, H)
if loi == 0 and hii == H:
loi, hii = myi, myi + 1
repl_cands[loi:hii] = []
keep_var = True
else:
keep_var = random_bool(random, PROB_KEEP_REPLACED_VAR)
repl_cands_set = set(repl_cands)
# Don't duplicate effectful expressions.
if ast_util.is_effectful(repl_expr):
ensure(len(repl_cands) == 1 and not keep_var)
# Step 4: do the replacement
def callback(expr: ca.Node, is_expr: bool) -> Optional[ca.Node]:
if indices.starts[expr] in repl_cands_set:
return copy.deepcopy(repl_expr)
if expr == write and isinstance(write, ca.Assignment) and not keep_var:
if is_expr:
return write.lvalue
else:
return ca.EmptyStatement()
return None
visit_replace(fn.body, callback)
if not keep_var and isinstance(write, ca.Decl):
write.init = None
def perm_randomize_internal_type(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Randomize types of pre-existing local variables. Function parameters
are not included -- those are handled by perm_randomize_function_type.
Only variables mentioned within the given region are affected."""
names: Set[str] = set()
class IdVisitor(ca.NodeVisitor):
def visit_ID(self, node: ca.ID) -> None:
if region.contains_node(node):
names.add(node.name)
def visit_StructRef(self, node: ca.StructRef) -> None:
self.visit(node.name)
IdVisitor().visit(fn)
typemap = build_typemap(ast)
decls: List[ca.Decl] = []
class Visitor(ca.NodeVisitor):
def visit_Decl(self, decl: ca.Decl) -> None:
if isinstance(decl.type, ca.TypeDecl) and decl.name and decl.name in names:
decls.append(decl)
self.generic_visit(decl)
Visitor().visit(fn)
ensure(decls)
decl = random.choice(decls)
assert isinstance(decl.type, ca.TypeDecl), "checked above"
decl.type = randomize_type(decl.type, typemap, random, ensure_changed=True)
set_decl_name(decl)
def perm_randomize_external_type(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Randomize types of global variables. Only variables mentioned within the
given region are affected."""
names: Set[str] = set()
class IdVisitor(ca.NodeVisitor):
def visit_ID(self, node: ca.ID) -> None:
if region.contains_node(node):
names.add(node.name)
def visit_StructRef(self, node: ca.StructRef) -> None:
self.visit(node.name)
IdVisitor().visit(fn)
ensure(names)
name = random.choice(list(names))
decls: List[Tuple[ca.Decl, int]] = []
for i in range(len(ast.ext)):
item = ast.ext[i]
if isinstance(item, ca.Decl) and item.name == name:
new_decl = copy.copy(item)
decls.append((new_decl, i))
ensure(decls)
decl = random.choice(decls)[0]
decl_type = get_decl_type(decl)
typemap = build_typemap(ast)
new_type = randomize_innermost_type(decl_type, typemap, random, ensure_changed=True)
for decl, i in decls:
decl.type = copy.deepcopy(new_type)
ast.ext[i] = decl
set_decl_name(decl)
def perm_randomize_function_type(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Randomize types of function parameters and returns. Only functions
called within the given region are affected, plus the current function."""
assert fn.decl.name is not None, "function definitions have names"
names: Set[str] = {fn.decl.name}
class IdVisitor(ca.NodeVisitor):
def visit_FuncCall(self, node: ca.FuncCall) -> None:
if region.contains_node(node) and isinstance(node.name, ca.ID):
names.add(node.name.name)
self.generic_visit(node)
IdVisitor().visit(fn)
name = random.choice(list(names))
# Find the declarations of function with the given name. For performance
# reasons, the part of the AST they live in are shared between all
# randomization runs, so if we mutated them in place bad things would
# happen. Thus, we replace the AST parts we plan to change with mutable
# copies.
all_decls: List[Tuple[ca.Decl, int, "ca.ExternalDeclaration"]] = []
main_decl: Optional[ca.Decl] = None
for i in range(len(ast.ext)):
item = ast.ext[i]
if (
isinstance(item, ca.Decl)
and isinstance(item.type, ca.FuncDecl)
and item.name == name
):
new_decl = copy.copy(item)
ast.ext[i] = new_decl
all_decls.append((new_decl, i, new_decl))
if isinstance(item, ca.FuncDef) and item.decl.name == name:
assert isinstance(
item.decl.type, ca.FuncDecl
), "function definitions have function types"
new_fndef = copy.copy(item)
new_decl = copy.copy(item.decl)
new_fndef.decl = new_decl
ast.ext[i] = new_fndef
all_decls.append((new_decl, i, new_fndef))
main_decl = new_decl
# Change the type within the function definition if there is one (since we
# need to keep names there), or else within an arbitrary of the (typically
# just one) declarations. We later mirror the change to all declarations.
ensure(all_decls)
if not main_decl:
main_decl = random.choice(all_decls)[0]
typemap = build_typemap(ast)
main_fndecl = copy.deepcopy(main_decl.type)
assert isinstance(main_fndecl, ca.FuncDecl), "checked above"
main_decl.type = main_fndecl
if random_bool(random, 0.5):
# Replace the return type, changing integer signedness/size as well as
# switching to/from void (which we should perhaps avoid if the function
# call result is used, but eh, it's annoying to tell).
type = pointer_decay(main_fndecl.type, typemap)
if allowed_basic_type(type, typemap, ["void"]):
main_fndecl.type = random_type(random)
elif random_bool(random, PROB_RET_VOID):
idtype = ca.IdentifierType(names=["void"])
main_fndecl.type = ca.TypeDecl(declname=None, quals=[], type=idtype)
else:
main_fndecl.type = randomize_type(
type, typemap, random, ensure_changed=True
)
set_decl_name(main_decl)
else:
# Replace a parameter, changing integer signedness/size.
if not main_fndecl.args:
raise RandomizationFailure
ensure(main_fndecl.args.params)
ind = random.randrange(len(main_fndecl.args.params))
arg = main_fndecl.args.params[ind]
if isinstance(arg, (ca.ID, ca.EllipsisParam)):
raise RandomizationFailure
arg_type = arg.type if isinstance(arg, ca.Typename) else get_decl_type(arg)
type = pointer_decay(arg_type, typemap)
arg.type = randomize_type(type, typemap, random, ensure_changed=True)
if isinstance(arg, ca.Decl):
set_decl_name(arg)
# Commit the changes by writing them back to the AST, for all declarations.
for i in range(len(all_decls)):
decl, ind, new_node = all_decls[i]
ast.ext[ind] = new_node
if decl is not main_decl:
decl.type = copy.deepcopy(main_decl.type)
def perm_refer_to_var(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Add `if (variable) {}` or `if (struct.member) {}` in a random place.
This will get optimized away but may affect regalloc."""
# Find expression to insert, searching within the randomization region.
cands: List[Expression] = [
expr
for expr in get_block_expressions(fn.body, region)
if isinstance(expr, (ca.StructRef, ca.ID))
]
ensure(cands)
expr = random.choice(cands)
ensure(not ast_util.is_effectful(expr))
typemap = build_typemap(ast)
type: Type = resolve_typedefs(decayed_expr_type(expr, typemap), typemap)
if isinstance(type, ca.TypeDecl) and isinstance(type.type, (ca.Struct, ca.Union)):
expr = ca.UnaryOp("&", expr)
if random_bool(random, 0.5):
expr = ca.UnaryOp("!", expr)
# Insert it wherever -- possibly outside the randomization region, since regalloc
# can act at a distance. (Except before a declaration.)
ins_cands = get_insertion_points(fn, Region.unbounded())
ensure(ins_cands)
cond = copy.deepcopy(expr)
# Repeat the condition up to two times: if (x && x && x) {} sometimes helps.
for i in range(random.choice((0, 0, 0, 0, 0, 1, 2, 2))):
cond = ca.BinaryOp("&&", cond, copy.deepcopy(expr))
stmt = ca.If(cond=cond, iftrue=ca.Compound(block_items=[]), iffalse=None)
tob, toi, _ = random.choice(ins_cands)
ast_util.insert_statement(tob, toi, stmt)
def perm_ins_block(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Wrap a random range of statements within `if (1) { ... }` or
`do { ... } while(0)`. Control flow can have remote effects, so this
mostly ignores the region restriction."""
cands: List[Block] = []
def rec(block: Block) -> None:
cands.append(block)
for stmt in ast_util.get_block_stmts(block, False):
ast_util.for_nested_blocks(stmt, rec)
rec(fn.body)
block = random.choice(cands)
stmts = ast_util.get_block_stmts(block, True)
decl_count = 0
for stmt in stmts:
if isinstance(stmt, (ca.Decl, ca.Pragma)):
decl_count += 1
else:
break
lo = random.randrange(decl_count, len(stmts) + 1)
hi = random.randrange(decl_count, len(stmts) + 1)
if hi < lo:
lo, hi = hi, lo
new_block = ca.Compound(block_items=stmts[lo:hi])
if random_bool(random, PROB_INS_BLOCK_DOWHILE) and all(
region.contains_node(n) for n in stmts[lo:hi]
):
cond = ca.Constant(type="int", value="0")
stmts[lo:hi] = [
ca.Pragma("_permuter sameline start"),
ca.DoWhile(cond=cond, stmt=new_block),
ca.Pragma("_permuter sameline end"),
]
else:
cond = ca.Constant(type="int", value="1")
stmts[lo:hi] = [ca.If(cond=cond, iftrue=new_block, iffalse=None)]
def perm_empty_stmt(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Inserts a no-op statement, one of:
- if (1) {} (sometimes multiple of them)
- if (0) {}
- label:
- goto label; label:;
- ;
Control flow can have remote effects, so this
ignores the region restriction."""
# Insert the statement wherever, except before a declaration.
cands = get_insertion_points(fn, Region.unbounded())
ensure(cands)
label_name = f"dummy_label_{random.randint(1, 10**6)}"
stmts: List[Statement] = []
kind = random.randrange(5)
if kind == 0: # if (1) or multiple if (1)
count = random.choice([1, random.randint(2, 6)])
for _ in range(count):
cond = ca.Constant(type="int", value="1")
stmts.append(ca.If(cond=cond, iftrue=ca.Compound([]), iffalse=None))
elif kind == 1: # if (0)
cond = ca.Constant(type="int", value="0")
stmts = [ca.If(cond=cond, iftrue=ca.Compound([]), iffalse=None)]
elif kind == 2: # label:
stmts = [ca.Label(label_name, ca.EmptyStatement())]
pass
elif kind == 3: # goto label; label:
stmts = [
ca.Goto(label_name),
ca.Label(label_name, ca.EmptyStatement()),
]
elif kind == 4: # ;
stmts = [ca.EmptyStatement()]
tob, toi, _ = random.choice(cands)
stmts.insert(0, ca.Pragma("_permuter sameline start"))
stmts.append(ca.Pragma("_permuter sameline end"))
for stmt in stmts[::-1]:
ast_util.insert_statement(tob, toi, stmt)
def perm_sameline(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Put all statements within a random interval on the same line."""
cands = get_insertion_points(fn, region)
n = len(cands)
ensure(n >= 3)
# Generate a small random interval
lef: float = n - 2
for i in range(4):
lef *= random.uniform(0, 1)
le = int(lef) + 2
i = random.randrange(n - le)
j = i + le
# Insert the second statement first, since inserting a statement may cause
# later indices to move.
ast_util.insert_statement(
cands[j][0], cands[j][1], ca.Pragma("_permuter sameline end")
)
ast_util.insert_statement(
cands[i][0], cands[i][1], ca.Pragma("_permuter sameline start")
)
def perm_associative(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Change a+b into b+a, or similar for other commutative operations."""
cands: List[ca.BinaryOp] = []
commutative_ops = list("+*|&^<>") + ["<=", ">=", "==", "!="]
class Visitor(ca.NodeVisitor):
def visit_BinaryOp(self, node: ca.BinaryOp) -> None:
if node.op in commutative_ops and region.contains_node(node):
cands.append(node)
self.generic_visit(node)
Visitor().visit(fn.body)
ensure(cands)
node = random.choice(cands)
node.left, node.right = node.right, node.left
if node.op[0] == "<":
node.op = ">" + node.op[1:]
elif node.op[0] == ">":
node.op = "<" + node.op[1:]
def perm_condition(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Change if(x) into if(x != 0), or vice versa. Also handles for/while/do-while."""
cands: List[Union[ca.If, ca.While, ca.DoWhile, ca.For]] = []
class Visitor(ca.NodeVisitor):
def visit_If(self, node: ca.If) -> None:
cands.append(node)
self.generic_visit(node)
def visit_While(self, node: ca.While) -> None:
cands.append(node)
self.generic_visit(node)
def visit_DoWhile(self, node: ca.DoWhile) -> None:
cands.append(node)
self.generic_visit(node)
def visit_For(self, node: ca.For) -> None:
cands.append(node)
self.generic_visit(node)
Visitor().visit(fn.body)
ensure(cands)
node = random.choice(cands)
if not node.cond:
raise RandomizationFailure
if (
isinstance(node.cond, ca.BinaryOp)
and node.cond.op in ["==", "!=", "<", ">", "<=", ">="]
and random_bool(random, 0.9)
):
ensure(node.cond.op in ["==", "!="])
ensure(
isinstance(node.cond.right, ca.Constant)
and node.cond.right.value in ["0", "0U", "0.0", "0.0f"]
)
if node.cond.op == "==":
node.cond = ca.UnaryOp("!", node.cond.left)
else:
node.cond = node.cond.left
else:
expr = node.cond
op = "!="
if isinstance(expr, ca.UnaryOp) and expr.op == "!" and random_bool(random, 0.9):
assert not isinstance(expr.expr, ca.Typename)
expr = expr.expr
op = "=="
zero = random_weighted(
random,
[
(ca.Constant("int", "0"), 0.8),
(ca.Constant("unsigned int", "0U"), 0.2),
(ca.Constant("float", "0.0f"), 0.05),
],
)
node.cond = ca.BinaryOp(op, expr, zero)
def perm_add_self_assignment(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Introduce a "x = x;" somewhere."""
cands = get_insertion_points(fn, region)
vars: List[str] = []
class Visitor(ca.NodeVisitor):
def visit_Decl(self, decl: ca.Decl) -> None:
if decl.name:
vars.append(decl.name)
self.generic_visit(decl)
Visitor().visit(fn.body)
ensure(vars)
ensure(cands)
var = random.choice(vars)
where = random.choice(cands)
assignment = ca.Assignment("=", ca.ID(var), ca.ID(var))
ast_util.insert_statement(where[0], where[1], assignment)
def perm_dummy_comma_expr(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Change x into (0, x) for a random expression x."""
cands = get_block_expressions(fn.body, region)
ensure(cands)
expr = random.choice(cands)
new_expr = ca.ExprList([ca.Constant("int", "0"), expr])
replace_node(fn.body, expr, new_expr)
def perm_reorder_stmts(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Move a statement to another random place."""
cands = get_insertion_points(fn, region, allow_within_decl=True)
# Figure out candidate statements to be moved. Don't move pragmas; it can
# cause assertion failures. Don't move blocks; statements are generally not
# reordered across basic blocks, and we don't want to risk moving a block
# to inside itself.
source_inds = []
for i, c in enumerate(cands):
stmt = c[2]
if (
stmt is not None
and not isinstance(stmt, ca.Pragma)
and not ast_util.has_nested_block(stmt)
):
source_inds.append(i)
ensure(source_inds)
fromi = random.choice(source_inds)
weighted_cands = []
for i in range(len(cands)):
dist = max(fromi - i, i - (fromi + 1))
if dist == 0:
continue
# Move distance 1, 2, 3, ... with probabilities
# 23%, 12%, 8%, 6%, 4%, 3%, 3%, 2%, 2%, 2%, ...
prob = (dist + 1) ** -1.5
weighted_cands.append((i, prob))
ensure(weighted_cands)
toi = random_weighted(random, weighted_cands)
fromb, fromi, from_stmt = cands[fromi]
tob, toi, to_stmt = cands[toi]
if fromb == tob:
ensure(toi != fromi and toi != fromi + 1)
if isinstance(from_stmt, ca.Decl):
# Moving a declaration is tricky, when also preserving C89 compatibility.
# We can move it to after another declaration, or to the start of a block.
# Alternatively, if the declaration includes an initializer, and we move
# it forwards, we can split that out as an assignment.
# We don't allow moving the declaration or assignment past the next
# occurrence of the variable.
ensure(from_stmt.name)
var_name = from_stmt.name
to_index = indices.starts[to_stmt] if to_stmt else indices.ends[fromb]
uses = 0
class Visitor(ca.NodeVisitor):
def visit_ID(self, node: ca.ID) -> None:
nonlocal uses
if node.name == var_name and indices.starts[node] < to_index:
uses += 1
def visit_TypeDecl(self, node: ca.TypeDecl) -> None:
nonlocal uses
if node.declname == var_name and indices.starts[node] < to_index:
uses += 1
Visitor().visit(fn.body)
ensure(uses <= 1)
to_block_stmts = ast_util.get_block_stmts(tob, False)
if toi == 0 or isinstance(to_block_stmts[toi - 1], ca.Decl):
# Fine to move
pass
elif (
from_stmt.name
and from_stmt.init
and not isinstance(from_stmt.init, ca.InitList)
and uses > 0
):
assignment = ca.Assignment("=", ca.ID(from_stmt.name), from_stmt.init)
ast_util.insert_statement(tob, toi, assignment)
from_stmt.init = None
return
else:
raise RandomizationFailure
else:
# Don't put statements before declarations.
ensure(not isinstance(to_stmt, ca.Decl))
if fromb == tob and fromi < toi:
toi -= 1
stmt = ast_util.get_block_stmts(fromb, True).pop(fromi)
ast_util.insert_statement(tob, toi, stmt)
def perm_compound_assignment(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Convert a statement of the form `x = x op y` to `x op= y`, or vice versa."""
cands: List[ca.Assignment] = []
operators = ["+", "-", "*", "/", "<<", ">>", "^", "|", "&"]
class Visitor(ca.NodeVisitor):
def visit_Assignment(self, node: ca.Assignment) -> None:
if region.contains_node(node):
if node.op != "=" or (
isinstance(node.rvalue, ca.BinaryOp)
and ast_util.equal_ast(node.lvalue, node.rvalue.left)
and node.rvalue.op in operators
):
cands.append(node)
self.generic_visit(node)
Visitor().visit(fn.body)
ensure(cands)
node = random.choice(cands)
if node.op == "=":
assert isinstance(node.rvalue, ca.BinaryOp)
node.op = node.rvalue.op + node.op
node.rvalue = node.rvalue.right
else:
operator = node.op[:-1]
node.op = "="
node.rvalue = ca.BinaryOp(operator, copy.deepcopy(node.lvalue), node.rvalue)
def perm_inequalities(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Adjusts inequalities to equivalent versions that sometimes produce different code.
For example, a > b and a >= b + 1, a < b to a <= b - 1 (and vice versa)"""
cands: List[ca.BinaryOp] = []
inequalities = ["<", ">", "<=", ">="]
class Visitor(ca.NodeVisitor):
def visit_BinaryOp(self, node: ca.BinaryOp) -> None:
if node.op in inequalities and region.contains_node(node):
cands.append(node)
self.generic_visit(node)
Visitor().visit(fn.body)
ensure(cands)
node = random.choice(cands)
# Does not simplify, 'a <= (b + 1)' becomes 'a < ((b + 1) + 1)'
def plus1(node: ca.Node) -> ca.BinaryOp:
return ca.BinaryOp("+", node, ca.Constant("int", "1"))
def minus1(node: ca.Node) -> ca.BinaryOp:
return ca.BinaryOp("-", node, ca.Constant("int", "1"))
# Don't change the operator, change both operands (can produce fake matches sometimes)
# Ex: a > b -> a + 1 > b + 1
if random.random() < 0.25:
change = random.choice([plus1, minus1])
node.left = change(node.left)
node.right = change(node.right)
else:
if node.op in ["<", ">="]:
node.op = {"<": "<=", ">=": ">"}[node.op]
if random_bool(random, 0.5):
node.left = plus1(node.left)
else:
node.right = minus1(node.right)
else:
node.op = {">": ">=", "<=": "<"}[node.op]
if random_bool(random, 0.5):
node.left = minus1(node.left)
else:
node.right = plus1(node.right)
def perm_add_mask(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Add a random amount of masks of 0xFF[FFFFFFFFFFFFFF] to a random expression of integer type.
In some cases this mask is optimized out but affects regalloc.
The regalloc change seems to cycle with slight differences every n masks."""
typemap = build_typemap(ast)
# Find expression to add the mask to
cands: List[Expression] = get_block_expressions(fn.body, region)
ensure(cands)
expr = random.choice(cands)
type: SimpleType = decayed_expr_type(expr, typemap)
ensure(
allowed_basic_type(
type, typemap, ["int", "char", "long", "short", "signed", "unsigned"]
)
)
# Mask as if restricting the value to 8, 16, 32, or 64-bit width.
# Sometimes use an unsigned mask like '0xFFu'
masks: List[str] = ["0xFF", "0xFFFF", "0xFFFFFFFF", "0xFFFFFFFFFFFFFFFF"]
mask = random.choice(masks) + random.choice(["", "u"])
new_expr = ca.BinaryOp("&", expr, ca.Constant("int", mask))
if random_bool(random, 0.3):
for _ in range(random.randrange(12)):
new_expr = ca.BinaryOp("&", new_expr, ca.Constant("int", mask))
replace_node(fn.body, expr, new_expr)
def perm_xor_zero(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Add ^0 to a random expression of integer type, or *1 to floats."""
typemap = build_typemap(ast)
# Find a random expression
cands: List[Expression] = get_block_expressions(fn.body, region)
ensure(cands)
expr = random.choice(cands)
type: SimpleType = decayed_expr_type(expr, typemap)
int_types = ["int", "char", "long", "short", "signed", "unsigned"]
if allowed_basic_type(type, typemap, int_types):
new_expr = ca.BinaryOp("^", expr, ca.Constant("int", "0"))
elif allowed_basic_type(type, typemap, ["float"]):
new_expr = ca.BinaryOp("*", expr, ca.Constant("float", "1.0f"))
elif allowed_basic_type(type, typemap, ["double"]):
new_expr = ca.BinaryOp("*", expr, ca.Constant("double", "1.0"))
else:
raise RandomizationFailure
replace_node(fn.body, expr, new_expr)
def perm_float_literal(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Converts a Float Literal"""
cands: List[ca.Constant] = []
class Visitor(ca.NodeVisitor):
def visit_Constant(self, node: ca.Constant) -> None:
if node.type == "float" and region.contains_node(node):
cands.append(node)
Visitor().visit(fn.body)
ensure(cands)
node = random.choice(cands)
value: str = node.value.lower()
choices: List[str] = [value[:-1]]
if value.endswith(".0f"):
choices.append(value[:-3] or "0")
elif value.endswith(".f"):
choices.append(value[:-2] or "0")
if value.startswith("0."):
choices.append("." + (value[2:] or "0"))
elif value.startswith("."):
choices.append("0" + value)
if value.endswith(".0f"):
choices.append((value[:-3] or "0") + ".f")
else:
choices.append(value[:-1] + "0f")
ensure(choices)
value = random.choice(choices)
if value.endswith("f"):
type = "float"
elif "." in value:
type = "double"
else:
type = "int"
replace_node(fn.body, node, ca.Constant(type, value))
def perm_cast_simple(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Cast a random expression to a simple type (integral or floating point only)."""
typemap = build_typemap(ast)
# Find a random expression
cands: List[Expression] = get_block_expressions(fn.body, region)
ensure(cands)
expr = random.choice(cands)
type: SimpleType = decayed_expr_type(expr, typemap)
ensure(
allowed_basic_type(
type,
typemap,
["int", "char", "long", "short", "signed", "unsigned", "float", "double"],
)
)
integral_type = [["int"], ["char"], ["long"], ["short"], ["long", "long"]]
floating_type = [["float"], ["double"]]
new_type: List[str]
if random_bool(random, 0.5):
# Cast to integral type, sometimes unsigned
sign: List[str] = random.choice([[], ["unsigned"]])
new_type = sign + random.choice(integral_type)
else:
# Cast to floating point type
new_type = random.choice(floating_type)
# Surround the original expression with a cast to the chosen type
typedecl = ca.TypeDecl(None, [], ca.IdentifierType(new_type))
new_expr = ca.Cast(ca.Typename(None, [], typedecl), expr)
replace_node(fn.body, expr, new_expr)
# struct_ref # type of a # easiest conversion
################################################################
# (a + b).c; # impossible #
# (a + b)->c; # s* # a[b].c
# (*(a + b)).c; # s* # a[b].c
# (*(a + b))->c; # s** # (*(a[b]).c
# (&(a + b)).c; # impossible #
# (&(a + b))->c; # impossible #
# (*(&(a + b))).c; # impossible #
# (*(&(a + b)))->c; # imp: a+b=rvalue #
# (&(*(a + b))).c; # impossible #
# (&(*(a + b)))->c; # s* # a[b].c (-&* req.)
################################################################
# (a[b]).c; # s* # (a + b)->c
# (a[b])->c; # s** # (*(a + b))->c
# (*(a[b])).c; # s** # (*(a + b))->c
# (*(a[b]))->c; # s*** # (*(*(a + b)))->c
# (&(a[b])).c; # impossible #
# (&(a[b]))->c; # s* # (&(*(a + b)))->c
# (*(&(a[b]))).c; # s* # (*(&(a + b)))->c
# (*(&(a[b])))->c; # s** # (*(&(*(a + b))))->c
# (&(*(a[b]))).c; # impossible #
# (&(*(a[b])))->c; # s** # (&(*(*(a + b))))->c
################################################################
# a.c # s # (&a)->c
# a->c # s* # (*a).c
# (*a).c # s* # a->c
# (*a)->c # s** # (*(*a)).c
# (&a).c # impossible #
# (&a)->c # s # (*(&a)).c
def perm_struct_ref(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Permute struct references: (a + b)->c, and (*(a + b)).c, a[b].c, (&a[b])->c"""
cands: List[ca.StructRef] = []
class Visitor(ca.NodeVisitor):
def visit_StructRef(self, node: ca.StructRef) -> None:
if region.contains_node(node):
cands.append(node)
self.generic_visit(node)
Visitor().visit(fn.body)
ensure(cands)
# TODO: Split into separate perm? Need a separate one for arrayrefs, (a + b)[1] to a[b + 1]
def randomize_associative_binop(left: ca.Node, right: ca.BinaryOp) -> ca.BinaryOp:
"""Try moving parentheses to the left side sometimes (sadly, it seems to matter)"""
if random_bool(random, 0.5) and right.op in ["+", "-"]:
# ((a + b) - c)
return ca.BinaryOp(
right.op, ca.BinaryOp("+", left, right.left), right.right
)
else:
# (a + (b - c))
return ca.BinaryOp("+", left, right)
# Conversions
def to_array(node: ca.BinaryOp) -> ca.ArrayRef:
"""Change a BinaryOp, a + b, to an ArrayRef, a[b]
The operator is expected to be + or -"""
# TODO: Permute binops like to_binop() does
if node.op == "-":
# Convert to a[-b]
node.right = ca.UnaryOp("-", node.right)
return ca.ArrayRef(node.left, node.right)
def to_binop(node: ca.ArrayRef) -> ca.BinaryOp:
"""Change an ArrayRef, a[b], to a BinaryOp, a + b
If b is also BinaryOp, such as a[b - 1], sometimes change the order of operations,
ie: a + (b - 1) vs (a + b) - 1"""
if isinstance(node.subscript, ca.BinaryOp):
return randomize_associative_binop(node.name, node.subscript)
return ca.BinaryOp("+", node.name, node.subscript)
def deref(node: Expression) -> Expression:
"""Surround the given node with a dereference operator"""
if isinstance(node, ca.UnaryOp) and node.op == "&":
assert not isinstance(node.expr, ca.Typename)
return node.expr
return ca.UnaryOp("*", node)
def addr(node: Expression) -> Expression:
"""Surround the given node with an address-of operator"""
if isinstance(node, ca.UnaryOp) and node.op == "*":
assert not isinstance(node.expr, ca.Typename)
return node.expr
return ca.UnaryOp("&", node)
def rec(node: ca.Node) -> Any:
"""Recurse down the StructRef tree, finding the parent of the leaf BinaryOp/ArrayRef.
Throws RandomizationFailure when a UnaryOp other than * or & was encountered."""
if isinstance(node, ca.UnaryOp):
ensure(node.op in ["&", "*"])
return rec(node.expr) or node
if isinstance(node, ca.StructRef):
return rec(node.name) or node
return None
# TODO
def apply_child( # type: ignore
parent: Union[ca.StructRef, ca.UnaryOp], func
) -> None:
if isinstance(parent, ca.StructRef):
parent.name = func(parent.name)
elif isinstance(parent, ca.UnaryOp):
parent.expr = func(parent.expr)
def get_child(parent: Union[ca.StructRef, ca.UnaryOp]) -> ca.Node:
if isinstance(parent, ca.StructRef):
return parent.name
elif isinstance(parent, ca.UnaryOp):
return parent.expr
struct_ref = random.choice(cands)
parent: Union[ca.StructRef, ca.UnaryOp]
# Step 1: Find the parent of the leaf node
parent = rec(struct_ref)
changed = False
# Step 2: Simplify (...)->c to (*(...)).c
if struct_ref.type == "->":
struct_ref.type = "."
# check if deref would remove the parent node
if (
parent is struct_ref.name
and isinstance(parent, ca.UnaryOp)
and parent.op == "&"
):
struct_ref.name = deref(struct_ref.name)
parent = struct_ref
else:
struct_ref.name = deref(struct_ref.name)
if parent is struct_ref and isinstance(
struct_ref.name, ca.UnaryOp
): # Check to make mypy happy
parent = struct_ref.name
changed = True
# Simple StructRefs only need their type permuted
if isinstance(get_child(parent), (ca.ArrayRef, ca.BinaryOp)):
# For binops, a lhs like &(a+b)->c is impossible, because a + b is an rvalue
# Step 3: Simplify further by converting ArrayRef to BinaryOp
if isinstance(get_child(parent), ca.ArrayRef):
apply_child(parent, to_binop)
apply_child(parent, deref)
parent = typing.cast("Union[ca.StructRef, ca.UnaryOp]", get_child(parent))
changed = True
# Step 4: Convert back to ArrayRef
if random_bool(random, 0.5):
# Sanity check that there's at least one dereference
if isinstance(parent, ca.UnaryOp) and parent.op == "*":
apply_child(parent, to_array)
apply_child(parent, addr)
changed = True
# Step 5: Convert the StructRef type back
if random_bool(random, 0.5):
struct_ref.name = addr(struct_ref.name)
struct_ref.type = "->"
changed = True
ensure(changed)
def perm_split_assignment(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Split assignments of the form a = b . c . d ...; into a = b; a = a . c . d ...;, a = c . d ...; a = b . a;, etc."""
cands = []
# Look for assignments of the form 'var = binaryOp' (ignores op=)
class Visitor(ca.NodeVisitor):
def visit_Assignment(self, node: ca.Assignment) -> None:
if (
node.op == "="
and isinstance(node.rvalue, ca.BinaryOp)
and region.contains_node(node)
):
cands.append(node)
self.generic_visit(node)
Visitor().visit(fn.body)
ensure(cands)
assign = random.choice(cands)
var = assign.lvalue
ins_cands = get_insertion_points(fn, region)
for ins_block, ins_index, node in ins_cands:
if node is assign:
break
else:
raise RandomizationFailure
binops = []
def collect_binops(node: ca.BinaryOp) -> None:
if isinstance(node.left, ca.BinaryOp):
collect_binops(node.left)
binops.append(node)
if isinstance(node.right, ca.BinaryOp):
collect_binops(node.right)
collect_binops(typing.cast(ca.BinaryOp, assign.rvalue))
split = random.choice(binops)
typemap = build_typemap(ast)
vartype = decayed_expr_type(var, typemap)
# Choose which side to move to a new assignment
if random_bool(random, 0.5):
side = split.left
sidetype = decayed_expr_type(side, typemap)
ensure(same_type(vartype, sidetype, typemap, allow_similar=True))
split.left = copy.deepcopy(var)
else:
side = split.right
sidetype = decayed_expr_type(side, typemap)
ensure(same_type(vartype, sidetype, typemap, allow_similar=True))
split.right = copy.deepcopy(var)
# The assignment is always inserted before the original
new_assign = ca.Assignment("=", copy.deepcopy(var), side)
ast_util.insert_statement(ins_block, ins_index, new_assign)
def perm_remove_ast(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Delete parts of the function that might be unnecessary (mistakes or unnecessary changes from an improved base.c)."""
cands: List[Tuple[ca.Node, ca.Node]] = []
class Visitor(ca.NodeVisitor):
def visit_Cast(self, node: ca.Cast) -> None:
if region.contains_node(node):
cands.append((node, node.expr))
self.generic_visit(node)
# Replace (a & constant) with (a).
def visit_BinaryOp(self, node: ca.BinaryOp) -> None:
if region.contains_node(node) and node.op == "&":
if isinstance(node.left, ca.Constant):
cands.append((node, node.right))
if isinstance(node.right, ca.Constant):
cands.append((node, node.left))
self.generic_visit(node)
# Remove if statements that don't have an else
def visit_If(self, node: ca.If) -> None:
if not node.iffalse and region.contains_node(node):
cands.append((node, node.iftrue))
self.generic_visit(node)
# Remove loops
def visit_While(self, node: ca.While) -> None:
if region.contains_node(node):
cands.append((node, node.stmt))
self.generic_visit(node)
def visit_DoWhile(self, node: ca.DoWhile) -> None:
if region.contains_node(node):
cands.append((node, node.stmt))
self.generic_visit(node)
Visitor().visit(fn.body)
ensure(cands)
cand, expr = random.choice(cands)
replace_node(fn.body, cand, expr)
def perm_duplicate_assignment(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Duplicate an assignment, sometimes forcing IDO to reuse a register."""
cands = []
class Visitor(ca.NodeVisitor):
def visit_Assignment(self, node: ca.Assignment) -> None:
if region.contains_node(node) and node.op == "=":
cands.append(node)
self.generic_visit(node)
Visitor().visit(fn.body)
ensure(cands)
cand = random.choice(cands)
ins_cands = get_insertion_points(fn, Region.unbounded())
ensure(ins_cands)
dup = copy.deepcopy(cand)
tob, toi, _ = random.choice(ins_cands)
ast_util.insert_statement(tob, toi, dup)
def perm_pad_var_decl(
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
) -> None:
"""Inserts an unused variable to adjust stack offsets. Probably only useful with --stack-diffs enabled."""
vars: List[str] = []
class Visitor(ca.NodeVisitor):
def visit_Decl(self, decl: ca.Decl) -> None:
if decl.name:
vars.append(decl.name)
self.generic_visit(decl)
Visitor().visit(fn.body)
var = "pad"
counter = 1
while var in vars:
counter += 1
var = f"pad{counter}"
type = random_type(random)
ast_util.insert_decl(fn, var, type, random)
class Randomizer:
def __init__(self, rng_seed: int) -> None:
self.random = Random(rng_seed)
def randomize(self, ast: ca.FileAST, fn_index: int) -> None:
fn = ast.ext[fn_index]
assert isinstance(fn, ca.FuncDef)
indices = ast_util.compute_node_indices(fn)
region = get_randomization_region(fn, indices, self.random)
methods = [
(perm_temp_for_expr, 100),
(perm_expand_expr, 20),
(perm_reorder_stmts, 20),
(perm_add_mask, 15),
(perm_xor_zero, 10),
(perm_cast_simple, 10),
(perm_refer_to_var, 10),
(perm_float_literal, 10),
(perm_randomize_internal_type, 10),
(perm_randomize_external_type, 5),
(perm_randomize_function_type, 5),
(perm_split_assignment, 10),
(perm_sameline, 10),
(perm_ins_block, 10),
(perm_struct_ref, 10),
(perm_empty_stmt, 10),
(perm_condition, 10),
(perm_dummy_comma_expr, 5),
(perm_add_self_assignment, 5),
(perm_associative, 5),
(perm_inequalities, 5),
(perm_compound_assignment, 5),
(perm_remove_ast, 5),
(perm_duplicate_assignment, 5),
(perm_pad_var_decl, 1),
]
while True:
method = random_weighted(self.random, methods)
try:
method(fn, ast, indices, region, self.random)
break
except RandomizationFailure:
pass