mirror of https://github.com/zeldaret/mm.git
1941 lines
67 KiB
Python
1941 lines
67 KiB
Python
import bisect
|
|
import copy
|
|
from dataclasses import dataclass, field
|
|
from random import Random
|
|
import typing
|
|
from typing import (
|
|
Any,
|
|
Callable,
|
|
Dict,
|
|
List,
|
|
Optional,
|
|
Sequence,
|
|
Set,
|
|
Tuple,
|
|
TypeVar,
|
|
Union,
|
|
)
|
|
|
|
from pycparser import c_ast as ca
|
|
|
|
from . import ast_util
|
|
from .ast_util import Block, Indices, Statement, Expression
|
|
from .ast_types import (
|
|
SimpleType,
|
|
Type,
|
|
TypeMap,
|
|
allowed_basic_type,
|
|
build_typemap,
|
|
decayed_expr_type,
|
|
get_decl_type,
|
|
resolve_typedefs,
|
|
same_type,
|
|
set_decl_name,
|
|
pointer_decay,
|
|
)
|
|
|
|
# Set to true to perform expression type detection eagerly. This can help when
|
|
# debugging crashes in the ast_types code.
|
|
DEBUG_EAGER_TYPES = False
|
|
|
|
# Randomize the type of introduced temporary variable with this probability
|
|
PROB_RANDOMIZE_TYPE = 0.3
|
|
|
|
# Reuse an existing var instead of introducing a new temporary one with this probability
|
|
PROB_REUSE_VAR = 0.5
|
|
|
|
# When wrapping statements in a new block, use a same-line `do { ... } while(0);`
|
|
# (as opposed to non-same-line `if (1) { ... }`) with this probability.
|
|
# This matches what macros often do.
|
|
PROB_INS_BLOCK_DOWHILE = 0.5
|
|
|
|
# Make a pointer to a temporary expression, rather than copy it by value, with
|
|
# this probability. (This always happens for expressions of struct type,
|
|
# regardless of this probability.)
|
|
PROB_TEMP_PTR = 0.05
|
|
|
|
# Instead of emitting an assignment statement, assign the temporary within the
|
|
# first expression it's used in with this probability.
|
|
PROB_TEMP_ASSIGN_AT_FIRST_USE = 0.1
|
|
|
|
# When creating a temporary for an expression, use the temporary for all equal
|
|
# expressions with this probability.
|
|
PROB_TEMP_REPLACE_ALL = 0.2
|
|
|
|
# When creating a temporary for an expression, use the temporary for an interval
|
|
# with maximal endpoint with this probability.
|
|
PROB_TEMP_REPLACE_MOST = 0.2
|
|
|
|
# When substituting a variable by its value, substitute all instances with this
|
|
# probability, rather than just a subrange or the complement of one.
|
|
PROB_EXPAND_REPLACE_ALL = 0.3
|
|
|
|
# When substituting a variable by its value, keep the variable assignment with
|
|
# this probability.
|
|
PROB_KEEP_REPLACED_VAR = 0.2
|
|
|
|
# Change the return type of an external function to void with this probability.
|
|
PROB_RET_VOID = 0.2
|
|
|
|
# Number larger than any node index. (If you're trying to compile a 1 GB large
|
|
# C file to matching asm, you have bigger problems than this limit.)
|
|
MAX_INDEX = 10 ** 9
|
|
|
|
T = TypeVar("T")
|
|
|
|
|
|
class RandomizationFailure(Exception):
|
|
pass
|
|
|
|
|
|
def ensure(condition: Any) -> None:
|
|
"""Abort the randomization pass if 'condition' fails to hold, and try
|
|
another pass instead. Don't call this after making any modifications to
|
|
the AST."""
|
|
if not condition:
|
|
raise RandomizationFailure
|
|
|
|
|
|
@dataclass
|
|
class Region:
|
|
start: int
|
|
end: int
|
|
indices: Optional[Indices] = field(compare=False)
|
|
|
|
@staticmethod
|
|
def unbounded() -> "Region":
|
|
return Region(-1, MAX_INDEX, None)
|
|
|
|
def is_unbounded(self) -> bool:
|
|
return self.indices is None
|
|
|
|
def contains_node(self, node: ca.Node) -> bool:
|
|
"""Check whether the region contains an entire node."""
|
|
if self.indices is None:
|
|
return True
|
|
return (
|
|
self.start < self.indices.starts[node]
|
|
and self.indices.ends[node] < self.end
|
|
)
|
|
|
|
def contains_pre(self, node: ca.Node) -> bool:
|
|
"""Check whether the region contains a point just before a given node."""
|
|
if self.indices is None:
|
|
return True
|
|
return self.start < self.indices.starts[node] < self.end
|
|
|
|
def contains_pre_index(self, index: int) -> bool:
|
|
"""Check whether the region contains a point just before a given node,
|
|
as specified by its index."""
|
|
if self.indices is None:
|
|
return True
|
|
return self.start < index < self.end
|
|
|
|
|
|
def reverse_start_indices(indices: Indices) -> Dict[int, ca.Node]:
|
|
ret = {}
|
|
for k, v in indices.starts.items():
|
|
ret[v] = k
|
|
return ret
|
|
|
|
|
|
def get_randomization_region(
|
|
top_node: ca.Node, indices: Indices, random: Random
|
|
) -> Region:
|
|
ret: List[Region] = []
|
|
cur_start: Optional[int] = None
|
|
|
|
class Visitor(ca.NodeVisitor):
|
|
def visit_Pragma(self, node: ca.Pragma) -> None:
|
|
nonlocal cur_start
|
|
if node.string == "_permuter randomizer start":
|
|
if cur_start is not None:
|
|
raise Exception("nested PERM_RANDOMIZE not supported")
|
|
cur_start = indices.ends[node]
|
|
if node.string == "_permuter randomizer end":
|
|
assert cur_start is not None, "randomizer end without start"
|
|
ret.append(Region(cur_start + 1, indices.starts[node] - 1, indices))
|
|
cur_start = None
|
|
|
|
Visitor().visit(top_node)
|
|
assert cur_start is None, "randomizer start without end"
|
|
if not ret:
|
|
return Region.unbounded()
|
|
return random.choice(ret)
|
|
|
|
|
|
def get_block_expressions(block: Block, region: Region) -> List[Expression]:
|
|
"""Return a list of all expressions within a block that are also within a
|
|
given region."""
|
|
exprs: List[Expression] = []
|
|
|
|
def visitor(expr: Expression) -> None:
|
|
if region.contains_node(expr):
|
|
exprs.append(expr)
|
|
|
|
replace_subexprs(block, visitor)
|
|
return exprs
|
|
|
|
|
|
def compute_write_locations(
|
|
top_node: ca.Node, indices: Indices
|
|
) -> Dict[str, List[int]]:
|
|
writes: Dict[str, List[int]] = {}
|
|
|
|
def add_write(var_name: str, loc: int) -> None:
|
|
if var_name not in writes:
|
|
writes[var_name] = []
|
|
else:
|
|
assert (
|
|
loc > writes[var_name][-1]
|
|
), "consistent traversal order should guarantee monotonicity here"
|
|
writes[var_name].append(loc)
|
|
|
|
class Visitor(ca.NodeVisitor):
|
|
def visit_Decl(self, node: ca.Decl) -> None:
|
|
if node.name:
|
|
add_write(node.name, indices.starts[node])
|
|
self.generic_visit(node)
|
|
|
|
def visit_UnaryOp(self, node: ca.UnaryOp) -> None:
|
|
if node.op in ["p++", "p--", "++", "--"] and isinstance(node.expr, ca.ID):
|
|
add_write(node.expr.name, indices.starts[node])
|
|
self.generic_visit(node)
|
|
|
|
def visit_Assignment(self, node: ca.Assignment) -> None:
|
|
if isinstance(node.lvalue, ca.ID):
|
|
add_write(node.lvalue.name, indices.starts[node])
|
|
self.generic_visit(node)
|
|
|
|
Visitor().visit(top_node)
|
|
return writes
|
|
|
|
|
|
def compute_read_locations(top_node: ca.Node, indices: Indices) -> Dict[str, List[int]]:
|
|
reads: Dict[str, List[int]] = {}
|
|
for node in find_var_reads(top_node):
|
|
var_name = node.name
|
|
loc = indices.starts[node]
|
|
if var_name not in reads:
|
|
reads[var_name] = []
|
|
else:
|
|
assert (
|
|
loc > reads[var_name][-1]
|
|
), "consistent traversal order should guarantee monotonicity here"
|
|
reads[var_name].append(loc)
|
|
return reads
|
|
|
|
|
|
def find_var_reads(top_node: ca.Node) -> List[ca.ID]:
|
|
ret = []
|
|
|
|
class Visitor(ca.NodeVisitor):
|
|
def visit_Decl(self, node: ca.Decl) -> None:
|
|
if node.init:
|
|
self.visit(node.init)
|
|
|
|
def visit_ID(self, node: ca.ID) -> None:
|
|
ret.append(node)
|
|
|
|
def visit_UnaryOp(self, node: ca.UnaryOp) -> None:
|
|
if node.op == "&" and isinstance(node.expr, ca.ID):
|
|
return
|
|
self.generic_visit(node)
|
|
|
|
def visit_StructRef(self, node: ca.StructRef) -> None:
|
|
self.visit(node.name)
|
|
|
|
def visit_Assignment(self, node: ca.Assignment) -> None:
|
|
if isinstance(node.lvalue, ca.ID):
|
|
return
|
|
self.generic_visit(node)
|
|
|
|
Visitor().visit(top_node)
|
|
return ret
|
|
|
|
|
|
def visit_replace(top_node: ca.Node, callback: Callable[[ca.Node, bool], Any]) -> None:
|
|
def empty_statement_to_none(node: Any) -> Any:
|
|
if isinstance(node, ca.EmptyStatement):
|
|
return None
|
|
return node
|
|
|
|
def rec(orig_node: ca.Node, toplevel: bool = False, *, lvalue: bool = False) -> Any:
|
|
node: "ca.AnyNode" = typing.cast("ca.AnyNode", orig_node)
|
|
repl = callback(node, not toplevel and not lvalue)
|
|
if repl:
|
|
return repl
|
|
if isinstance(node, ca.Assignment):
|
|
node.lvalue = rec(node.lvalue, lvalue=True)
|
|
node.rvalue = rec(node.rvalue)
|
|
elif isinstance(node, ca.StructRef):
|
|
node.name = rec(node.name, lvalue=(lvalue and node.type == "."))
|
|
elif isinstance(node, ca.Cast):
|
|
if node.expr:
|
|
node.expr = rec(node.expr)
|
|
elif isinstance(node, (ca.Constant, ca.ID)):
|
|
pass
|
|
elif isinstance(node, ca.UnaryOp):
|
|
if node.op in ["p++", "p--", "++", "--", "&"]:
|
|
node.expr = rec(node.expr, lvalue=True)
|
|
elif node.op != "sizeof":
|
|
node.expr = rec(node.expr)
|
|
elif isinstance(node, ca.BinaryOp):
|
|
node.left = rec(node.left)
|
|
node.right = rec(node.right)
|
|
elif isinstance(node, ca.FuncCall):
|
|
# not worth replacing .name
|
|
if node.args:
|
|
rec(node.args, True)
|
|
elif isinstance(node, ca.ExprList):
|
|
for i in range(len(node.exprs)):
|
|
if not isinstance(node.exprs[i], ca.Typename):
|
|
node.exprs[i] = rec(node.exprs[i])
|
|
elif isinstance(node, ca.ArrayRef):
|
|
node.name = rec(node.name, lvalue=lvalue)
|
|
node.subscript = rec(node.subscript)
|
|
elif isinstance(node, ca.TernaryOp):
|
|
node.cond = rec(node.cond)
|
|
node.iftrue = rec(node.iftrue, True)
|
|
node.iffalse = rec(node.iffalse, True)
|
|
elif isinstance(node, ca.Return):
|
|
if node.expr:
|
|
node.expr = rec(node.expr)
|
|
elif isinstance(node, ca.Decl):
|
|
if node.init:
|
|
node.init = rec(node.init, isinstance(node.init, ca.InitList))
|
|
elif isinstance(node, ca.For):
|
|
if node.init:
|
|
node.init = empty_statement_to_none(rec(node.init, True))
|
|
if node.cond:
|
|
node.cond = rec(node.cond)
|
|
if node.next:
|
|
node.next = empty_statement_to_none(rec(node.next, True))
|
|
node.stmt = rec(node.stmt, True)
|
|
elif isinstance(node, ca.Compound):
|
|
if node.block_items:
|
|
for i, sub in enumerate(node.block_items):
|
|
node.block_items[i] = rec(sub, True)
|
|
elif isinstance(node, (ca.Case, ca.Default)):
|
|
if node.stmts:
|
|
for i, sub in enumerate(node.stmts):
|
|
node.stmts[i] = rec(sub, True)
|
|
elif isinstance(node, ca.While):
|
|
node.cond = rec(node.cond)
|
|
node.stmt = rec(node.stmt, True)
|
|
elif isinstance(node, ca.DoWhile):
|
|
node.stmt = rec(node.stmt, True)
|
|
node.cond = rec(node.cond)
|
|
elif isinstance(node, ca.Switch):
|
|
node.cond = rec(node.cond)
|
|
node.stmt = rec(node.stmt, True)
|
|
elif isinstance(node, ca.Label):
|
|
node.stmt = rec(node.stmt, True)
|
|
elif isinstance(node, ca.If):
|
|
node.cond = rec(node.cond)
|
|
node.iftrue = rec(node.iftrue, True)
|
|
if node.iffalse:
|
|
node.iffalse = rec(node.iffalse, True)
|
|
elif isinstance(
|
|
node,
|
|
(
|
|
ca.TypeDecl,
|
|
ca.PtrDecl,
|
|
ca.ArrayDecl,
|
|
ca.Typename,
|
|
ca.IdentifierType,
|
|
ca.Struct,
|
|
ca.Union,
|
|
ca.Enum,
|
|
ca.EmptyStatement,
|
|
ca.Pragma,
|
|
ca.Break,
|
|
ca.Continue,
|
|
ca.Goto,
|
|
ca.CompoundLiteral,
|
|
ca.Typedef,
|
|
ca.FuncDecl,
|
|
ca.FuncDef,
|
|
ca.EllipsisParam,
|
|
ca.Enumerator,
|
|
ca.EnumeratorList,
|
|
ca.FileAST,
|
|
ca.InitList,
|
|
ca.NamedInitializer,
|
|
ca.ParamList,
|
|
),
|
|
):
|
|
pass
|
|
else:
|
|
_: None = node
|
|
assert False, f"Node with unknown type: {node}"
|
|
return node
|
|
|
|
rec(top_node, True)
|
|
|
|
|
|
def replace_subexprs(top_node: ca.Node, callback: Callable[[Expression], Any]) -> None:
|
|
def expr_filter(node: ca.Node, is_expr: bool) -> Any:
|
|
if not is_expr:
|
|
return None
|
|
return callback(typing.cast(Expression, node))
|
|
|
|
visit_replace(top_node, expr_filter)
|
|
|
|
|
|
def replace_node(top_node: ca.Node, old: ca.Node, new: ca.Node) -> None:
|
|
visit_replace(top_node, lambda node, _: new if node is old else None)
|
|
|
|
|
|
def random_bool(random: Random, prob: float) -> bool:
|
|
return random.random() < prob
|
|
|
|
|
|
def random_weighted(random: Random, values: Sequence[Tuple[T, float]]) -> T:
|
|
sumprob = 0.0
|
|
for (val, prob) in values:
|
|
assert prob >= 0, "Probabilities must be non-negative"
|
|
sumprob += prob
|
|
assert sumprob > 0, "Cannot pick randomly from empty set"
|
|
targetprob = random.uniform(0, sumprob)
|
|
sumprob = 0.0
|
|
for (val, prob) in values:
|
|
sumprob += prob
|
|
if sumprob > targetprob:
|
|
return val
|
|
|
|
# Float imprecision
|
|
for (val, prob) in values:
|
|
if prob > 0:
|
|
return val
|
|
assert False, "unreachable"
|
|
|
|
|
|
def random_type(random: Random) -> SimpleType:
|
|
new_names: List[str] = []
|
|
if random_bool(random, 0.5):
|
|
new_names.append("unsigned")
|
|
new_names.extend(
|
|
random_weighted(
|
|
random,
|
|
[
|
|
(["char"], 1),
|
|
(["short"], 1),
|
|
(["int"], 2),
|
|
(["long"], 0.5),
|
|
(["long", "long"], 0.5),
|
|
],
|
|
)
|
|
)
|
|
idtype = ca.IdentifierType(names=new_names)
|
|
quals = []
|
|
if random_bool(random, 0.5):
|
|
quals = ["volatile"]
|
|
return ca.TypeDecl(declname=None, quals=quals, type=idtype)
|
|
|
|
|
|
def randomize_type(
|
|
type: SimpleType, typemap: TypeMap, random: Random, *, ensure_changed: bool = False
|
|
) -> SimpleType:
|
|
if allowed_basic_type(
|
|
type, typemap, ["int", "char", "long", "short", "signed", "unsigned"]
|
|
):
|
|
return random_type(random)
|
|
if ensure_changed:
|
|
raise RandomizationFailure
|
|
return type
|
|
|
|
|
|
def randomize_innermost_type(
|
|
type: Type, typemap: TypeMap, random: Random, *, ensure_changed: bool = False
|
|
) -> Type:
|
|
if isinstance(type, ca.TypeDecl):
|
|
return randomize_type(type, typemap, random, ensure_changed=ensure_changed)
|
|
new_type = copy.copy(type)
|
|
new_type.type = randomize_innermost_type(
|
|
type.type, typemap, random, ensure_changed=ensure_changed
|
|
)
|
|
return new_type
|
|
|
|
|
|
def get_insertion_points(
|
|
fn: ca.FuncDef, region: Region, *, allow_within_decl: bool = False
|
|
) -> List[Tuple[Block, int, Optional[ca.Node]]]:
|
|
cands: List[Tuple[Block, int, Optional[ca.Node]]] = []
|
|
|
|
def rec(block: Block) -> None:
|
|
stmts = ast_util.get_block_stmts(block, False)
|
|
last_node: ca.Node = block
|
|
for i, stmt in enumerate(stmts):
|
|
if region.contains_pre(stmt):
|
|
cands.append((block, i, stmt))
|
|
ast_util.for_nested_blocks(stmt, rec)
|
|
last_node = stmt
|
|
if region.contains_node(last_node):
|
|
cands.append((block, len(stmts), None))
|
|
|
|
rec(fn.body)
|
|
if not allow_within_decl:
|
|
cands = [c for c in cands if not isinstance(c[2], ca.Decl)]
|
|
return cands
|
|
|
|
|
|
def maybe_reuse_var(
|
|
var: Optional[str],
|
|
assign_before: ca.Node,
|
|
orig_expr: Expression,
|
|
type: SimpleType,
|
|
reads: Dict[str, List[int]],
|
|
writes: Dict[str, List[int]],
|
|
indices: Indices,
|
|
typemap: TypeMap,
|
|
random: Random,
|
|
) -> Optional[str]:
|
|
if not random_bool(random, PROB_REUSE_VAR) or var is None:
|
|
return None
|
|
var_type: SimpleType = decayed_expr_type(ca.ID(var), typemap)
|
|
if not same_type(var_type, type, typemap, allow_similar=True):
|
|
return None
|
|
|
|
def find_next(list: List[int], value: int) -> Optional[int]:
|
|
ind = bisect.bisect_left(list, value)
|
|
if ind < len(list):
|
|
return list[ind]
|
|
return None
|
|
|
|
assignment_ind = indices.starts[assign_before]
|
|
expr_ind = indices.starts[orig_expr]
|
|
write = find_next(writes.get(var, []), assignment_ind)
|
|
read = find_next(reads.get(var, []), assignment_ind)
|
|
# TODO: if write/read is within expr, search again from after it (since
|
|
# we move expr, uses within it aren't relevant).
|
|
if read is not None and (write is None or write >= read):
|
|
# We don't want to overwrite a variable which we later read,
|
|
# unless we write to it before that read
|
|
return None
|
|
if write is not None and write < expr_ind:
|
|
# Our write will be overwritten before we manage to read from it.
|
|
return None
|
|
return var
|
|
|
|
|
|
def perm_temp_for_expr(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Create a temporary variable for a random expression. The variable will
|
|
be assigned at another random point (nearer the expression being more
|
|
likely), possibly reuse an existing variable, possibly be of a different
|
|
size/signedness, and possibly be used for other identical expressions as
|
|
well. Only expressions within the given region may be chosen for
|
|
replacement, but the assignment and the affected identical expressions may
|
|
be outside of it."""
|
|
Place = Tuple[Block, int, Statement]
|
|
einds: Dict[ca.Node, int] = {}
|
|
writes: Dict[str, List[int]] = compute_write_locations(fn, indices)
|
|
reads: Dict[str, List[int]] = compute_read_locations(fn, indices)
|
|
typemap = build_typemap(ast)
|
|
candidates: List[Tuple[Tuple[Place, Expression, Optional[str]], float]] = []
|
|
|
|
# Step 0: decide whether to make a pointer to the chosen expression, or to
|
|
# copy it by value.
|
|
should_make_ptr = random_bool(random, PROB_TEMP_PTR)
|
|
|
|
def surrounding_writes(expr: Expression, base: Expression) -> Tuple[int, int]:
|
|
"""Compute the previous and next write to a variable included in expr,
|
|
starting from base. If none, default to -1 or MAX_INDEX respectively.
|
|
If base itself writes to an included variable (e.g. if it is an
|
|
increment expression), the \"next\" write will be defined as the node
|
|
itself, while the \"previous\" will continue searching to the left."""
|
|
sub_reads = find_var_reads(expr)
|
|
prev_write = -1
|
|
next_write = MAX_INDEX
|
|
base_index = indices.starts[base]
|
|
for sub_read in sub_reads:
|
|
var_name = sub_read.name
|
|
if var_name not in writes:
|
|
continue
|
|
# Find the first write that is strictly before indices[expr],
|
|
# and the first write that is on or after.
|
|
wr = writes[var_name]
|
|
ind = bisect.bisect_left(wr, base_index)
|
|
if ind > 0:
|
|
prev_write = max(prev_write, wr[ind - 1])
|
|
if ind < len(wr):
|
|
next_write = min(next_write, wr[ind])
|
|
return prev_write, next_write
|
|
|
|
# Step 1: assign probabilities to each place/expression
|
|
def rec(block: Block, reuse_cands: List[str]) -> None:
|
|
stmts = ast_util.get_block_stmts(block, False)
|
|
reuse_cands = reuse_cands[:]
|
|
assignment_cands: List[Place] = [] # places to insert before
|
|
past_decls = False
|
|
for index, stmt in enumerate(stmts):
|
|
if isinstance(stmt, ca.Decl):
|
|
assert stmt.name, "Anonymous declarations cannot happen in functions"
|
|
if not isinstance(stmt.type, ca.ArrayDecl):
|
|
reuse_cands.append(stmt.name)
|
|
if not isinstance(stmt.type, ca.PtrDecl):
|
|
# Make non-pointers more common
|
|
reuse_cands.append(stmt.name)
|
|
elif not isinstance(stmt, ca.Pragma):
|
|
past_decls = True
|
|
if past_decls:
|
|
assignment_cands.append((block, index, stmt))
|
|
|
|
ast_util.for_nested_blocks(stmt, lambda b: rec(b, reuse_cands))
|
|
|
|
def visitor(expr: Expression) -> None:
|
|
if DEBUG_EAGER_TYPES:
|
|
decayed_expr_type(expr, typemap)
|
|
|
|
if not region.contains_node(expr):
|
|
return
|
|
|
|
orig_expr = expr
|
|
if should_make_ptr:
|
|
if not ast_util.is_lvalue(expr):
|
|
return
|
|
expr = ca.UnaryOp("&", expr)
|
|
|
|
eind = einds.get(expr, 0)
|
|
prev_write, _ = surrounding_writes(expr, orig_expr)
|
|
|
|
for place in assignment_cands[::-1]:
|
|
# If expr contains an ID which is written to within
|
|
# [place, expr), bail out; we're trying to move the
|
|
# assignment too high up.
|
|
# TODO: also fail on moving past function calls, or
|
|
# possibly-aliasing writes.
|
|
if indices.starts[place[2]] <= prev_write:
|
|
break
|
|
|
|
# Make far-away places less likely, and similarly for
|
|
# trivial expressions.
|
|
eind += 1
|
|
prob = 1 / eind
|
|
if isinstance(orig_expr, (ca.ID, ca.Constant)):
|
|
prob *= 0.15 if should_make_ptr else 0.5
|
|
reuse_cand = random.choice(reuse_cands) if reuse_cands else None
|
|
candidates.append(((place, expr, reuse_cand), prob))
|
|
|
|
einds[expr] = eind
|
|
|
|
replace_subexprs(stmt, visitor)
|
|
|
|
rec(fn.body, [])
|
|
|
|
# Step 2: decide on a place/expression
|
|
ensure(candidates)
|
|
place: Optional[Place]
|
|
place, expr, reuse_cand = random_weighted(random, candidates)
|
|
|
|
if random_bool(random, PROB_TEMP_ASSIGN_AT_FIRST_USE):
|
|
# Don't emit a statement for the assignment, emit an assignment
|
|
# expression at the first use instead.
|
|
place = None
|
|
|
|
type: SimpleType = decayed_expr_type(expr, typemap)
|
|
|
|
# Always use pointers when replacing structs
|
|
if (
|
|
not should_make_ptr
|
|
and isinstance(type, ca.TypeDecl)
|
|
and isinstance(type.type, (ca.Struct, ca.Union))
|
|
and ast_util.is_lvalue(expr)
|
|
):
|
|
should_make_ptr = True
|
|
expr = ca.UnaryOp("&", expr)
|
|
type = decayed_expr_type(expr, typemap)
|
|
|
|
if should_make_ptr:
|
|
assert isinstance(expr, ca.UnaryOp)
|
|
assert not isinstance(expr.expr, ca.Typename)
|
|
orig_expr = expr.expr
|
|
else:
|
|
orig_expr = expr
|
|
# print("replacing:", to_c(expr))
|
|
|
|
# Step 3: decide on a variable to hold the expression
|
|
if place is not None:
|
|
assign_before = place[2]
|
|
else:
|
|
assign_before = orig_expr
|
|
reused_var = maybe_reuse_var(
|
|
reuse_cand,
|
|
assign_before,
|
|
orig_expr,
|
|
type,
|
|
reads,
|
|
writes,
|
|
indices,
|
|
typemap,
|
|
random,
|
|
)
|
|
if reused_var is not None:
|
|
reused = True
|
|
var = reused_var
|
|
else:
|
|
reused = False
|
|
var = "new_var"
|
|
counter = 1
|
|
while var in writes:
|
|
counter += 1
|
|
var = f"new_var{counter}"
|
|
|
|
# Step 4: possibly expand the replacement to include duplicate expressions.
|
|
prev_write, next_write = surrounding_writes(expr, orig_expr)
|
|
prev_write = max(prev_write, indices.starts[assign_before] - 1)
|
|
replace_cands: List[Expression] = []
|
|
|
|
def find_duplicates(e: Expression) -> None:
|
|
if prev_write < indices.starts[e] <= next_write and ast_util.equal_ast(
|
|
e, orig_expr
|
|
):
|
|
replace_cands.append(e)
|
|
|
|
if ast_util.is_effectful(expr):
|
|
replace_cands = [orig_expr]
|
|
else:
|
|
replace_subexprs(fn.body, find_duplicates)
|
|
|
|
assert orig_expr in replace_cands
|
|
if random_bool(random, PROB_TEMP_REPLACE_ALL):
|
|
lo_index = 0
|
|
hi_index = len(replace_cands)
|
|
else:
|
|
index = replace_cands.index(orig_expr)
|
|
lo_index = random.randint(0, index)
|
|
hi_index = random.randint(index + 1, len(replace_cands))
|
|
if random_bool(random, PROB_TEMP_REPLACE_MOST):
|
|
if random_bool(random, 0.5):
|
|
lo_index = 0
|
|
else:
|
|
hi_index = len(replace_cands)
|
|
replace_cand_set = set(replace_cands[lo_index:hi_index])
|
|
|
|
# Step 5: replace the chosen expression
|
|
def replacer(e: Expression) -> Optional[Expression]:
|
|
if e in replace_cand_set:
|
|
ret: Expression = ca.ID(var)
|
|
if place is None and e is orig_expr:
|
|
ret = ca.Assignment("=", ret, expr)
|
|
if should_make_ptr:
|
|
ret = ca.UnaryOp("*", ret)
|
|
return ret
|
|
return None
|
|
|
|
replace_subexprs(fn.body, replacer)
|
|
|
|
# Step 6: insert the assignment and any new variable declaration
|
|
if place is not None:
|
|
block, index, _ = place
|
|
assignment = ca.Assignment("=", ca.ID(var), expr)
|
|
ast_util.insert_statement(block, index, assignment)
|
|
if not reused:
|
|
if random_bool(random, PROB_RANDOMIZE_TYPE):
|
|
type = randomize_type(type, typemap, random)
|
|
ast_util.insert_decl(fn, var, type, random)
|
|
|
|
|
|
def perm_expand_expr(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Replace a random variable by its contents."""
|
|
all_writes: Dict[str, List[int]] = compute_write_locations(fn, indices)
|
|
all_reads: Dict[str, List[int]] = compute_read_locations(fn, indices)
|
|
|
|
# Step 1: pick out a variable to replace
|
|
rev: Dict[int, str] = {}
|
|
for var, locs in all_reads.items():
|
|
for index in locs:
|
|
if region.contains_pre_index(index):
|
|
rev[index] = var
|
|
ensure(rev)
|
|
index = random.choice(list(rev.keys()))
|
|
var = rev[index]
|
|
|
|
# Step 2: find the assignment it uses
|
|
reads = all_reads[var]
|
|
writes = all_writes.get(var, [])
|
|
i = bisect.bisect_left(writes, index)
|
|
# if i == 0, there is no write to replace the read by.
|
|
ensure(i > 0)
|
|
before = writes[i - 1]
|
|
after = MAX_INDEX if i == len(writes) else writes[i]
|
|
rev_indices = reverse_start_indices(indices)
|
|
write = rev_indices[before]
|
|
if (
|
|
isinstance(write, ca.Decl)
|
|
and write.init
|
|
and not isinstance(write.init, ca.InitList)
|
|
):
|
|
repl_expr = write.init
|
|
elif isinstance(write, ca.Assignment) and write.op == "=":
|
|
repl_expr = write.rvalue
|
|
else:
|
|
raise RandomizationFailure
|
|
|
|
# Step 3: pick of the range of variables to replace
|
|
repl_cands = [
|
|
i for i in reads if before < i < after and region.contains_pre_index(i)
|
|
]
|
|
assert repl_cands, "index is always in repl_cands"
|
|
myi = repl_cands.index(index)
|
|
if not random_bool(random, PROB_EXPAND_REPLACE_ALL) and len(repl_cands) > 1:
|
|
# Keep using the variable for a bit in the middle
|
|
side = random.randrange(3)
|
|
H = len(repl_cands)
|
|
loi = 0 if side == 0 else random.randint(0, myi)
|
|
hii = H if side == 1 else random.randint(myi + 1, H)
|
|
if loi == 0 and hii == H:
|
|
loi, hii = myi, myi + 1
|
|
repl_cands[loi:hii] = []
|
|
keep_var = True
|
|
else:
|
|
keep_var = random_bool(random, PROB_KEEP_REPLACED_VAR)
|
|
repl_cands_set = set(repl_cands)
|
|
|
|
# Don't duplicate effectful expressions.
|
|
if ast_util.is_effectful(repl_expr):
|
|
ensure(len(repl_cands) == 1 and not keep_var)
|
|
|
|
# Step 4: do the replacement
|
|
def callback(expr: ca.Node, is_expr: bool) -> Optional[ca.Node]:
|
|
if indices.starts[expr] in repl_cands_set:
|
|
return copy.deepcopy(repl_expr)
|
|
if expr == write and isinstance(write, ca.Assignment) and not keep_var:
|
|
if is_expr:
|
|
return write.lvalue
|
|
else:
|
|
return ca.EmptyStatement()
|
|
return None
|
|
|
|
visit_replace(fn.body, callback)
|
|
if not keep_var and isinstance(write, ca.Decl):
|
|
write.init = None
|
|
|
|
|
|
def perm_randomize_internal_type(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Randomize types of pre-existing local variables. Function parameters
|
|
are not included -- those are handled by perm_randomize_function_type.
|
|
Only variables mentioned within the given region are affected."""
|
|
names: Set[str] = set()
|
|
|
|
class IdVisitor(ca.NodeVisitor):
|
|
def visit_ID(self, node: ca.ID) -> None:
|
|
if region.contains_node(node):
|
|
names.add(node.name)
|
|
|
|
def visit_StructRef(self, node: ca.StructRef) -> None:
|
|
self.visit(node.name)
|
|
|
|
IdVisitor().visit(fn)
|
|
|
|
typemap = build_typemap(ast)
|
|
decls: List[ca.Decl] = []
|
|
|
|
class Visitor(ca.NodeVisitor):
|
|
def visit_Decl(self, decl: ca.Decl) -> None:
|
|
if isinstance(decl.type, ca.TypeDecl) and decl.name and decl.name in names:
|
|
decls.append(decl)
|
|
self.generic_visit(decl)
|
|
|
|
Visitor().visit(fn)
|
|
|
|
ensure(decls)
|
|
decl = random.choice(decls)
|
|
assert isinstance(decl.type, ca.TypeDecl), "checked above"
|
|
decl.type = randomize_type(decl.type, typemap, random, ensure_changed=True)
|
|
set_decl_name(decl)
|
|
|
|
|
|
def perm_randomize_external_type(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Randomize types of global variables. Only variables mentioned within the
|
|
given region are affected."""
|
|
names: Set[str] = set()
|
|
|
|
class IdVisitor(ca.NodeVisitor):
|
|
def visit_ID(self, node: ca.ID) -> None:
|
|
if region.contains_node(node):
|
|
names.add(node.name)
|
|
|
|
def visit_StructRef(self, node: ca.StructRef) -> None:
|
|
self.visit(node.name)
|
|
|
|
IdVisitor().visit(fn)
|
|
|
|
ensure(names)
|
|
name = random.choice(list(names))
|
|
decls: List[Tuple[ca.Decl, int]] = []
|
|
|
|
for i in range(len(ast.ext)):
|
|
item = ast.ext[i]
|
|
if isinstance(item, ca.Decl) and item.name == name:
|
|
new_decl = copy.copy(item)
|
|
decls.append((new_decl, i))
|
|
|
|
ensure(decls)
|
|
decl = random.choice(decls)[0]
|
|
decl_type = get_decl_type(decl)
|
|
|
|
typemap = build_typemap(ast)
|
|
new_type = randomize_innermost_type(decl_type, typemap, random, ensure_changed=True)
|
|
|
|
for decl, i in decls:
|
|
decl.type = copy.deepcopy(new_type)
|
|
ast.ext[i] = decl
|
|
set_decl_name(decl)
|
|
|
|
|
|
def perm_randomize_function_type(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Randomize types of function parameters and returns. Only functions
|
|
called within the given region are affected, plus the current function."""
|
|
assert fn.decl.name is not None, "function definitions have names"
|
|
names: Set[str] = {fn.decl.name}
|
|
|
|
class IdVisitor(ca.NodeVisitor):
|
|
def visit_FuncCall(self, node: ca.FuncCall) -> None:
|
|
if region.contains_node(node) and isinstance(node.name, ca.ID):
|
|
names.add(node.name.name)
|
|
self.generic_visit(node)
|
|
|
|
IdVisitor().visit(fn)
|
|
|
|
name = random.choice(list(names))
|
|
|
|
# Find the declarations of function with the given name. For performance
|
|
# reasons, the part of the AST they live in are shared between all
|
|
# randomization runs, so if we mutated them in place bad things would
|
|
# happen. Thus, we replace the AST parts we plan to change with mutable
|
|
# copies.
|
|
all_decls: List[Tuple[ca.Decl, int, "ca.ExternalDeclaration"]] = []
|
|
main_decl: Optional[ca.Decl] = None
|
|
for i in range(len(ast.ext)):
|
|
item = ast.ext[i]
|
|
if (
|
|
isinstance(item, ca.Decl)
|
|
and isinstance(item.type, ca.FuncDecl)
|
|
and item.name == name
|
|
):
|
|
new_decl = copy.copy(item)
|
|
ast.ext[i] = new_decl
|
|
all_decls.append((new_decl, i, new_decl))
|
|
if isinstance(item, ca.FuncDef) and item.decl.name == name:
|
|
assert isinstance(
|
|
item.decl.type, ca.FuncDecl
|
|
), "function definitions have function types"
|
|
new_fndef = copy.copy(item)
|
|
new_decl = copy.copy(item.decl)
|
|
new_fndef.decl = new_decl
|
|
ast.ext[i] = new_fndef
|
|
all_decls.append((new_decl, i, new_fndef))
|
|
main_decl = new_decl
|
|
|
|
# Change the type within the function definition if there is one (since we
|
|
# need to keep names there), or else within an arbitrary of the (typically
|
|
# just one) declarations. We later mirror the change to all declarations.
|
|
ensure(all_decls)
|
|
if not main_decl:
|
|
main_decl = random.choice(all_decls)[0]
|
|
|
|
typemap = build_typemap(ast)
|
|
|
|
main_fndecl = copy.deepcopy(main_decl.type)
|
|
assert isinstance(main_fndecl, ca.FuncDecl), "checked above"
|
|
main_decl.type = main_fndecl
|
|
|
|
if random_bool(random, 0.5):
|
|
# Replace the return type, changing integer signedness/size as well as
|
|
# switching to/from void (which we should perhaps avoid if the function
|
|
# call result is used, but eh, it's annoying to tell).
|
|
type = pointer_decay(main_fndecl.type, typemap)
|
|
if allowed_basic_type(type, typemap, ["void"]):
|
|
main_fndecl.type = random_type(random)
|
|
elif random_bool(random, PROB_RET_VOID):
|
|
idtype = ca.IdentifierType(names=["void"])
|
|
main_fndecl.type = ca.TypeDecl(declname=None, quals=[], type=idtype)
|
|
else:
|
|
main_fndecl.type = randomize_type(
|
|
type, typemap, random, ensure_changed=True
|
|
)
|
|
set_decl_name(main_decl)
|
|
else:
|
|
# Replace a parameter, changing integer signedness/size.
|
|
if not main_fndecl.args:
|
|
raise RandomizationFailure
|
|
ensure(main_fndecl.args.params)
|
|
ind = random.randrange(len(main_fndecl.args.params))
|
|
arg = main_fndecl.args.params[ind]
|
|
if isinstance(arg, (ca.ID, ca.EllipsisParam)):
|
|
raise RandomizationFailure
|
|
arg_type = arg.type if isinstance(arg, ca.Typename) else get_decl_type(arg)
|
|
type = pointer_decay(arg_type, typemap)
|
|
arg.type = randomize_type(type, typemap, random, ensure_changed=True)
|
|
if isinstance(arg, ca.Decl):
|
|
set_decl_name(arg)
|
|
|
|
# Commit the changes by writing them back to the AST, for all declarations.
|
|
for i in range(len(all_decls)):
|
|
decl, ind, new_node = all_decls[i]
|
|
ast.ext[ind] = new_node
|
|
if decl is not main_decl:
|
|
decl.type = copy.deepcopy(main_decl.type)
|
|
|
|
|
|
def perm_refer_to_var(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Add `if (variable) {}` or `if (struct.member) {}` in a random place.
|
|
This will get optimized away but may affect regalloc."""
|
|
# Find expression to insert, searching within the randomization region.
|
|
cands: List[Expression] = [
|
|
expr
|
|
for expr in get_block_expressions(fn.body, region)
|
|
if isinstance(expr, (ca.StructRef, ca.ID))
|
|
]
|
|
ensure(cands)
|
|
expr = random.choice(cands)
|
|
ensure(not ast_util.is_effectful(expr))
|
|
typemap = build_typemap(ast)
|
|
type: Type = resolve_typedefs(decayed_expr_type(expr, typemap), typemap)
|
|
if isinstance(type, ca.TypeDecl) and isinstance(type.type, (ca.Struct, ca.Union)):
|
|
expr = ca.UnaryOp("&", expr)
|
|
|
|
if random_bool(random, 0.5):
|
|
expr = ca.UnaryOp("!", expr)
|
|
|
|
# Insert it wherever -- possibly outside the randomization region, since regalloc
|
|
# can act at a distance. (Except before a declaration.)
|
|
ins_cands = get_insertion_points(fn, Region.unbounded())
|
|
ensure(ins_cands)
|
|
|
|
cond = copy.deepcopy(expr)
|
|
|
|
# Repeat the condition up to two times: if (x && x && x) {} sometimes helps.
|
|
for i in range(random.choice((0, 0, 0, 0, 0, 1, 2, 2))):
|
|
cond = ca.BinaryOp("&&", cond, copy.deepcopy(expr))
|
|
|
|
stmt = ca.If(cond=cond, iftrue=ca.Compound(block_items=[]), iffalse=None)
|
|
tob, toi, _ = random.choice(ins_cands)
|
|
ast_util.insert_statement(tob, toi, stmt)
|
|
|
|
|
|
def perm_ins_block(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Wrap a random range of statements within `if (1) { ... }` or
|
|
`do { ... } while(0)`. Control flow can have remote effects, so this
|
|
mostly ignores the region restriction."""
|
|
cands: List[Block] = []
|
|
|
|
def rec(block: Block) -> None:
|
|
cands.append(block)
|
|
for stmt in ast_util.get_block_stmts(block, False):
|
|
ast_util.for_nested_blocks(stmt, rec)
|
|
|
|
rec(fn.body)
|
|
block = random.choice(cands)
|
|
stmts = ast_util.get_block_stmts(block, True)
|
|
decl_count = 0
|
|
for stmt in stmts:
|
|
if isinstance(stmt, (ca.Decl, ca.Pragma)):
|
|
decl_count += 1
|
|
else:
|
|
break
|
|
lo = random.randrange(decl_count, len(stmts) + 1)
|
|
hi = random.randrange(decl_count, len(stmts) + 1)
|
|
if hi < lo:
|
|
lo, hi = hi, lo
|
|
new_block = ca.Compound(block_items=stmts[lo:hi])
|
|
if random_bool(random, PROB_INS_BLOCK_DOWHILE) and all(
|
|
region.contains_node(n) for n in stmts[lo:hi]
|
|
):
|
|
cond = ca.Constant(type="int", value="0")
|
|
stmts[lo:hi] = [
|
|
ca.Pragma("_permuter sameline start"),
|
|
ca.DoWhile(cond=cond, stmt=new_block),
|
|
ca.Pragma("_permuter sameline end"),
|
|
]
|
|
else:
|
|
cond = ca.Constant(type="int", value="1")
|
|
stmts[lo:hi] = [ca.If(cond=cond, iftrue=new_block, iffalse=None)]
|
|
|
|
|
|
def perm_empty_stmt(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Inserts a no-op statement, one of:
|
|
- if (1) {} (sometimes multiple of them)
|
|
- if (0) {}
|
|
- label:
|
|
- goto label; label:;
|
|
- ;
|
|
Control flow can have remote effects, so this
|
|
ignores the region restriction."""
|
|
|
|
# Insert the statement wherever, except before a declaration.
|
|
cands = get_insertion_points(fn, Region.unbounded())
|
|
ensure(cands)
|
|
|
|
label_name = f"dummy_label_{random.randint(1, 10**6)}"
|
|
|
|
stmts: List[Statement] = []
|
|
|
|
kind = random.randrange(5)
|
|
if kind == 0: # if (1) or multiple if (1)
|
|
count = random.choice([1, random.randint(2, 6)])
|
|
for _ in range(count):
|
|
cond = ca.Constant(type="int", value="1")
|
|
stmts.append(ca.If(cond=cond, iftrue=ca.Compound([]), iffalse=None))
|
|
elif kind == 1: # if (0)
|
|
cond = ca.Constant(type="int", value="0")
|
|
stmts = [ca.If(cond=cond, iftrue=ca.Compound([]), iffalse=None)]
|
|
elif kind == 2: # label:
|
|
stmts = [ca.Label(label_name, ca.EmptyStatement())]
|
|
pass
|
|
elif kind == 3: # goto label; label:
|
|
stmts = [
|
|
ca.Goto(label_name),
|
|
ca.Label(label_name, ca.EmptyStatement()),
|
|
]
|
|
elif kind == 4: # ;
|
|
stmts = [ca.EmptyStatement()]
|
|
|
|
tob, toi, _ = random.choice(cands)
|
|
stmts.insert(0, ca.Pragma("_permuter sameline start"))
|
|
stmts.append(ca.Pragma("_permuter sameline end"))
|
|
for stmt in stmts[::-1]:
|
|
ast_util.insert_statement(tob, toi, stmt)
|
|
|
|
|
|
def perm_sameline(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Put all statements within a random interval on the same line."""
|
|
cands = get_insertion_points(fn, region)
|
|
n = len(cands)
|
|
ensure(n >= 3)
|
|
# Generate a small random interval
|
|
lef: float = n - 2
|
|
for i in range(4):
|
|
lef *= random.uniform(0, 1)
|
|
le = int(lef) + 2
|
|
i = random.randrange(n - le)
|
|
j = i + le
|
|
# Insert the second statement first, since inserting a statement may cause
|
|
# later indices to move.
|
|
ast_util.insert_statement(
|
|
cands[j][0], cands[j][1], ca.Pragma("_permuter sameline end")
|
|
)
|
|
ast_util.insert_statement(
|
|
cands[i][0], cands[i][1], ca.Pragma("_permuter sameline start")
|
|
)
|
|
|
|
|
|
def perm_associative(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Change a+b into b+a, or similar for other commutative operations."""
|
|
cands: List[ca.BinaryOp] = []
|
|
commutative_ops = list("+*|&^<>") + ["<=", ">=", "==", "!="]
|
|
|
|
class Visitor(ca.NodeVisitor):
|
|
def visit_BinaryOp(self, node: ca.BinaryOp) -> None:
|
|
if node.op in commutative_ops and region.contains_node(node):
|
|
cands.append(node)
|
|
self.generic_visit(node)
|
|
|
|
Visitor().visit(fn.body)
|
|
ensure(cands)
|
|
node = random.choice(cands)
|
|
node.left, node.right = node.right, node.left
|
|
if node.op[0] == "<":
|
|
node.op = ">" + node.op[1:]
|
|
elif node.op[0] == ">":
|
|
node.op = "<" + node.op[1:]
|
|
|
|
|
|
def perm_condition(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Change if(x) into if(x != 0), or vice versa. Also handles for/while/do-while."""
|
|
cands: List[Union[ca.If, ca.While, ca.DoWhile, ca.For]] = []
|
|
|
|
class Visitor(ca.NodeVisitor):
|
|
def visit_If(self, node: ca.If) -> None:
|
|
cands.append(node)
|
|
self.generic_visit(node)
|
|
|
|
def visit_While(self, node: ca.While) -> None:
|
|
cands.append(node)
|
|
self.generic_visit(node)
|
|
|
|
def visit_DoWhile(self, node: ca.DoWhile) -> None:
|
|
cands.append(node)
|
|
self.generic_visit(node)
|
|
|
|
def visit_For(self, node: ca.For) -> None:
|
|
cands.append(node)
|
|
self.generic_visit(node)
|
|
|
|
Visitor().visit(fn.body)
|
|
ensure(cands)
|
|
node = random.choice(cands)
|
|
if not node.cond:
|
|
raise RandomizationFailure
|
|
|
|
if (
|
|
isinstance(node.cond, ca.BinaryOp)
|
|
and node.cond.op in ["==", "!=", "<", ">", "<=", ">="]
|
|
and random_bool(random, 0.9)
|
|
):
|
|
ensure(node.cond.op in ["==", "!="])
|
|
ensure(
|
|
isinstance(node.cond.right, ca.Constant)
|
|
and node.cond.right.value in ["0", "0U", "0.0", "0.0f"]
|
|
)
|
|
if node.cond.op == "==":
|
|
node.cond = ca.UnaryOp("!", node.cond.left)
|
|
else:
|
|
node.cond = node.cond.left
|
|
else:
|
|
expr = node.cond
|
|
op = "!="
|
|
if isinstance(expr, ca.UnaryOp) and expr.op == "!" and random_bool(random, 0.9):
|
|
assert not isinstance(expr.expr, ca.Typename)
|
|
expr = expr.expr
|
|
op = "=="
|
|
zero = random_weighted(
|
|
random,
|
|
[
|
|
(ca.Constant("int", "0"), 0.8),
|
|
(ca.Constant("unsigned int", "0U"), 0.2),
|
|
(ca.Constant("float", "0.0f"), 0.05),
|
|
],
|
|
)
|
|
node.cond = ca.BinaryOp(op, expr, zero)
|
|
|
|
|
|
def perm_add_self_assignment(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Introduce a "x = x;" somewhere."""
|
|
cands = get_insertion_points(fn, region)
|
|
vars: List[str] = []
|
|
|
|
class Visitor(ca.NodeVisitor):
|
|
def visit_Decl(self, decl: ca.Decl) -> None:
|
|
if decl.name:
|
|
vars.append(decl.name)
|
|
self.generic_visit(decl)
|
|
|
|
Visitor().visit(fn.body)
|
|
ensure(vars)
|
|
ensure(cands)
|
|
var = random.choice(vars)
|
|
where = random.choice(cands)
|
|
assignment = ca.Assignment("=", ca.ID(var), ca.ID(var))
|
|
ast_util.insert_statement(where[0], where[1], assignment)
|
|
|
|
|
|
def perm_dummy_comma_expr(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Change x into (0, x) for a random expression x."""
|
|
cands = get_block_expressions(fn.body, region)
|
|
ensure(cands)
|
|
expr = random.choice(cands)
|
|
new_expr = ca.ExprList([ca.Constant("int", "0"), expr])
|
|
replace_node(fn.body, expr, new_expr)
|
|
|
|
|
|
def perm_reorder_stmts(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Move a statement to another random place."""
|
|
cands = get_insertion_points(fn, region, allow_within_decl=True)
|
|
|
|
# Figure out candidate statements to be moved. Don't move pragmas; it can
|
|
# cause assertion failures. Don't move blocks; statements are generally not
|
|
# reordered across basic blocks, and we don't want to risk moving a block
|
|
# to inside itself.
|
|
source_inds = []
|
|
for i, c in enumerate(cands):
|
|
stmt = c[2]
|
|
if (
|
|
stmt is not None
|
|
and not isinstance(stmt, ca.Pragma)
|
|
and not ast_util.has_nested_block(stmt)
|
|
):
|
|
source_inds.append(i)
|
|
|
|
ensure(source_inds)
|
|
fromi = random.choice(source_inds)
|
|
|
|
weighted_cands = []
|
|
for i in range(len(cands)):
|
|
dist = max(fromi - i, i - (fromi + 1))
|
|
if dist == 0:
|
|
continue
|
|
# Move distance 1, 2, 3, ... with probabilities
|
|
# 23%, 12%, 8%, 6%, 4%, 3%, 3%, 2%, 2%, 2%, ...
|
|
prob = (dist + 1) ** -1.5
|
|
weighted_cands.append((i, prob))
|
|
ensure(weighted_cands)
|
|
toi = random_weighted(random, weighted_cands)
|
|
|
|
fromb, fromi, from_stmt = cands[fromi]
|
|
tob, toi, to_stmt = cands[toi]
|
|
|
|
if fromb == tob:
|
|
ensure(toi != fromi and toi != fromi + 1)
|
|
|
|
if isinstance(from_stmt, ca.Decl):
|
|
# Moving a declaration is tricky, when also preserving C89 compatibility.
|
|
# We can move it to after another declaration, or to the start of a block.
|
|
# Alternatively, if the declaration includes an initializer, and we move
|
|
# it forwards, we can split that out as an assignment.
|
|
# We don't allow moving the declaration or assignment past the next
|
|
# occurrence of the variable.
|
|
ensure(from_stmt.name)
|
|
var_name = from_stmt.name
|
|
to_index = indices.starts[to_stmt] if to_stmt else indices.ends[fromb]
|
|
uses = 0
|
|
|
|
class Visitor(ca.NodeVisitor):
|
|
def visit_ID(self, node: ca.ID) -> None:
|
|
nonlocal uses
|
|
if node.name == var_name and indices.starts[node] < to_index:
|
|
uses += 1
|
|
|
|
def visit_TypeDecl(self, node: ca.TypeDecl) -> None:
|
|
nonlocal uses
|
|
if node.declname == var_name and indices.starts[node] < to_index:
|
|
uses += 1
|
|
|
|
Visitor().visit(fn.body)
|
|
ensure(uses <= 1)
|
|
|
|
to_block_stmts = ast_util.get_block_stmts(tob, False)
|
|
if toi == 0 or isinstance(to_block_stmts[toi - 1], ca.Decl):
|
|
# Fine to move
|
|
pass
|
|
elif (
|
|
from_stmt.name
|
|
and from_stmt.init
|
|
and not isinstance(from_stmt.init, ca.InitList)
|
|
and uses > 0
|
|
):
|
|
assignment = ca.Assignment("=", ca.ID(from_stmt.name), from_stmt.init)
|
|
ast_util.insert_statement(tob, toi, assignment)
|
|
from_stmt.init = None
|
|
return
|
|
else:
|
|
raise RandomizationFailure
|
|
else:
|
|
# Don't put statements before declarations.
|
|
ensure(not isinstance(to_stmt, ca.Decl))
|
|
|
|
if fromb == tob and fromi < toi:
|
|
toi -= 1
|
|
|
|
stmt = ast_util.get_block_stmts(fromb, True).pop(fromi)
|
|
ast_util.insert_statement(tob, toi, stmt)
|
|
|
|
|
|
def perm_compound_assignment(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Convert a statement of the form `x = x op y` to `x op= y`, or vice versa."""
|
|
cands: List[ca.Assignment] = []
|
|
operators = ["+", "-", "*", "/", "<<", ">>", "^", "|", "&"]
|
|
|
|
class Visitor(ca.NodeVisitor):
|
|
def visit_Assignment(self, node: ca.Assignment) -> None:
|
|
if region.contains_node(node):
|
|
if node.op != "=" or (
|
|
isinstance(node.rvalue, ca.BinaryOp)
|
|
and ast_util.equal_ast(node.lvalue, node.rvalue.left)
|
|
and node.rvalue.op in operators
|
|
):
|
|
cands.append(node)
|
|
self.generic_visit(node)
|
|
|
|
Visitor().visit(fn.body)
|
|
ensure(cands)
|
|
node = random.choice(cands)
|
|
|
|
if node.op == "=":
|
|
assert isinstance(node.rvalue, ca.BinaryOp)
|
|
node.op = node.rvalue.op + node.op
|
|
node.rvalue = node.rvalue.right
|
|
else:
|
|
operator = node.op[:-1]
|
|
node.op = "="
|
|
node.rvalue = ca.BinaryOp(operator, copy.deepcopy(node.lvalue), node.rvalue)
|
|
|
|
|
|
def perm_inequalities(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Adjusts inequalities to equivalent versions that sometimes produce different code.
|
|
For example, a > b and a >= b + 1, a < b to a <= b - 1 (and vice versa)"""
|
|
cands: List[ca.BinaryOp] = []
|
|
inequalities = ["<", ">", "<=", ">="]
|
|
|
|
class Visitor(ca.NodeVisitor):
|
|
def visit_BinaryOp(self, node: ca.BinaryOp) -> None:
|
|
if node.op in inequalities and region.contains_node(node):
|
|
cands.append(node)
|
|
self.generic_visit(node)
|
|
|
|
Visitor().visit(fn.body)
|
|
ensure(cands)
|
|
|
|
node = random.choice(cands)
|
|
|
|
# Does not simplify, 'a <= (b + 1)' becomes 'a < ((b + 1) + 1)'
|
|
|
|
def plus1(node: ca.Node) -> ca.BinaryOp:
|
|
return ca.BinaryOp("+", node, ca.Constant("int", "1"))
|
|
|
|
def minus1(node: ca.Node) -> ca.BinaryOp:
|
|
return ca.BinaryOp("-", node, ca.Constant("int", "1"))
|
|
|
|
# Don't change the operator, change both operands (can produce fake matches sometimes)
|
|
# Ex: a > b -> a + 1 > b + 1
|
|
if random.random() < 0.25:
|
|
change = random.choice([plus1, minus1])
|
|
node.left = change(node.left)
|
|
node.right = change(node.right)
|
|
|
|
else:
|
|
if node.op in ["<", ">="]:
|
|
node.op = {"<": "<=", ">=": ">"}[node.op]
|
|
if random_bool(random, 0.5):
|
|
node.left = plus1(node.left)
|
|
else:
|
|
node.right = minus1(node.right)
|
|
else:
|
|
node.op = {">": ">=", "<=": "<"}[node.op]
|
|
if random_bool(random, 0.5):
|
|
node.left = minus1(node.left)
|
|
else:
|
|
node.right = plus1(node.right)
|
|
|
|
|
|
def perm_add_mask(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Add a random amount of masks of 0xFF[FFFFFFFFFFFFFF] to a random expression of integer type.
|
|
In some cases this mask is optimized out but affects regalloc.
|
|
The regalloc change seems to cycle with slight differences every n masks."""
|
|
typemap = build_typemap(ast)
|
|
|
|
# Find expression to add the mask to
|
|
cands: List[Expression] = get_block_expressions(fn.body, region)
|
|
ensure(cands)
|
|
|
|
expr = random.choice(cands)
|
|
type: SimpleType = decayed_expr_type(expr, typemap)
|
|
ensure(
|
|
allowed_basic_type(
|
|
type, typemap, ["int", "char", "long", "short", "signed", "unsigned"]
|
|
)
|
|
)
|
|
|
|
# Mask as if restricting the value to 8, 16, 32, or 64-bit width.
|
|
# Sometimes use an unsigned mask like '0xFFu'
|
|
masks: List[str] = ["0xFF", "0xFFFF", "0xFFFFFFFF", "0xFFFFFFFFFFFFFFFF"]
|
|
mask = random.choice(masks) + random.choice(["", "u"])
|
|
|
|
new_expr = ca.BinaryOp("&", expr, ca.Constant("int", mask))
|
|
if random_bool(random, 0.3):
|
|
for _ in range(random.randrange(12)):
|
|
new_expr = ca.BinaryOp("&", new_expr, ca.Constant("int", mask))
|
|
|
|
replace_node(fn.body, expr, new_expr)
|
|
|
|
|
|
def perm_xor_zero(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Add ^0 to a random expression of integer type, or *1 to floats."""
|
|
typemap = build_typemap(ast)
|
|
|
|
# Find a random expression
|
|
cands: List[Expression] = get_block_expressions(fn.body, region)
|
|
ensure(cands)
|
|
|
|
expr = random.choice(cands)
|
|
type: SimpleType = decayed_expr_type(expr, typemap)
|
|
int_types = ["int", "char", "long", "short", "signed", "unsigned"]
|
|
|
|
if allowed_basic_type(type, typemap, int_types):
|
|
new_expr = ca.BinaryOp("^", expr, ca.Constant("int", "0"))
|
|
elif allowed_basic_type(type, typemap, ["float"]):
|
|
new_expr = ca.BinaryOp("*", expr, ca.Constant("float", "1.0f"))
|
|
elif allowed_basic_type(type, typemap, ["double"]):
|
|
new_expr = ca.BinaryOp("*", expr, ca.Constant("double", "1.0"))
|
|
else:
|
|
raise RandomizationFailure
|
|
|
|
replace_node(fn.body, expr, new_expr)
|
|
|
|
|
|
def perm_float_literal(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Converts a Float Literal"""
|
|
cands: List[ca.Constant] = []
|
|
|
|
class Visitor(ca.NodeVisitor):
|
|
def visit_Constant(self, node: ca.Constant) -> None:
|
|
if node.type == "float" and region.contains_node(node):
|
|
cands.append(node)
|
|
|
|
Visitor().visit(fn.body)
|
|
ensure(cands)
|
|
|
|
node = random.choice(cands)
|
|
|
|
value: str = node.value.lower()
|
|
choices: List[str] = [value[:-1]]
|
|
if value.endswith(".0f"):
|
|
choices.append(value[:-3] or "0")
|
|
elif value.endswith(".f"):
|
|
choices.append(value[:-2] or "0")
|
|
if value.startswith("0."):
|
|
choices.append("." + (value[2:] or "0"))
|
|
elif value.startswith("."):
|
|
choices.append("0" + value)
|
|
if value.endswith(".0f"):
|
|
choices.append((value[:-3] or "0") + ".f")
|
|
else:
|
|
choices.append(value[:-1] + "0f")
|
|
|
|
ensure(choices)
|
|
value = random.choice(choices)
|
|
if value.endswith("f"):
|
|
type = "float"
|
|
elif "." in value:
|
|
type = "double"
|
|
else:
|
|
type = "int"
|
|
|
|
replace_node(fn.body, node, ca.Constant(type, value))
|
|
|
|
|
|
def perm_cast_simple(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Cast a random expression to a simple type (integral or floating point only)."""
|
|
typemap = build_typemap(ast)
|
|
|
|
# Find a random expression
|
|
cands: List[Expression] = get_block_expressions(fn.body, region)
|
|
ensure(cands)
|
|
|
|
expr = random.choice(cands)
|
|
type: SimpleType = decayed_expr_type(expr, typemap)
|
|
ensure(
|
|
allowed_basic_type(
|
|
type,
|
|
typemap,
|
|
["int", "char", "long", "short", "signed", "unsigned", "float", "double"],
|
|
)
|
|
)
|
|
|
|
integral_type = [["int"], ["char"], ["long"], ["short"], ["long", "long"]]
|
|
floating_type = [["float"], ["double"]]
|
|
new_type: List[str]
|
|
if random_bool(random, 0.5):
|
|
# Cast to integral type, sometimes unsigned
|
|
sign: List[str] = random.choice([[], ["unsigned"]])
|
|
new_type = sign + random.choice(integral_type)
|
|
else:
|
|
# Cast to floating point type
|
|
new_type = random.choice(floating_type)
|
|
|
|
# Surround the original expression with a cast to the chosen type
|
|
typedecl = ca.TypeDecl(None, [], ca.IdentifierType(new_type))
|
|
new_expr = ca.Cast(ca.Typename(None, [], typedecl), expr)
|
|
replace_node(fn.body, expr, new_expr)
|
|
|
|
|
|
# struct_ref # type of a # easiest conversion
|
|
################################################################
|
|
# (a + b).c; # impossible #
|
|
# (a + b)->c; # s* # a[b].c
|
|
# (*(a + b)).c; # s* # a[b].c
|
|
# (*(a + b))->c; # s** # (*(a[b]).c
|
|
# (&(a + b)).c; # impossible #
|
|
# (&(a + b))->c; # impossible #
|
|
# (*(&(a + b))).c; # impossible #
|
|
# (*(&(a + b)))->c; # imp: a+b=rvalue #
|
|
# (&(*(a + b))).c; # impossible #
|
|
# (&(*(a + b)))->c; # s* # a[b].c (-&* req.)
|
|
################################################################
|
|
# (a[b]).c; # s* # (a + b)->c
|
|
# (a[b])->c; # s** # (*(a + b))->c
|
|
# (*(a[b])).c; # s** # (*(a + b))->c
|
|
# (*(a[b]))->c; # s*** # (*(*(a + b)))->c
|
|
# (&(a[b])).c; # impossible #
|
|
# (&(a[b]))->c; # s* # (&(*(a + b)))->c
|
|
# (*(&(a[b]))).c; # s* # (*(&(a + b)))->c
|
|
# (*(&(a[b])))->c; # s** # (*(&(*(a + b))))->c
|
|
# (&(*(a[b]))).c; # impossible #
|
|
# (&(*(a[b])))->c; # s** # (&(*(*(a + b))))->c
|
|
################################################################
|
|
# a.c # s # (&a)->c
|
|
# a->c # s* # (*a).c
|
|
# (*a).c # s* # a->c
|
|
# (*a)->c # s** # (*(*a)).c
|
|
# (&a).c # impossible #
|
|
# (&a)->c # s # (*(&a)).c
|
|
def perm_struct_ref(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Permute struct references: (a + b)->c, and (*(a + b)).c, a[b].c, (&a[b])->c"""
|
|
cands: List[ca.StructRef] = []
|
|
|
|
class Visitor(ca.NodeVisitor):
|
|
def visit_StructRef(self, node: ca.StructRef) -> None:
|
|
if region.contains_node(node):
|
|
cands.append(node)
|
|
self.generic_visit(node)
|
|
|
|
Visitor().visit(fn.body)
|
|
ensure(cands)
|
|
|
|
# TODO: Split into separate perm? Need a separate one for arrayrefs, (a + b)[1] to a[b + 1]
|
|
def randomize_associative_binop(left: ca.Node, right: ca.BinaryOp) -> ca.BinaryOp:
|
|
"""Try moving parentheses to the left side sometimes (sadly, it seems to matter)"""
|
|
if random_bool(random, 0.5) and right.op in ["+", "-"]:
|
|
# ((a + b) - c)
|
|
return ca.BinaryOp(
|
|
right.op, ca.BinaryOp("+", left, right.left), right.right
|
|
)
|
|
else:
|
|
# (a + (b - c))
|
|
return ca.BinaryOp("+", left, right)
|
|
|
|
# Conversions
|
|
def to_array(node: ca.BinaryOp) -> ca.ArrayRef:
|
|
"""Change a BinaryOp, a + b, to an ArrayRef, a[b]
|
|
The operator is expected to be + or -"""
|
|
# TODO: Permute binops like to_binop() does
|
|
if node.op == "-":
|
|
# Convert to a[-b]
|
|
node.right = ca.UnaryOp("-", node.right)
|
|
return ca.ArrayRef(node.left, node.right)
|
|
|
|
def to_binop(node: ca.ArrayRef) -> ca.BinaryOp:
|
|
"""Change an ArrayRef, a[b], to a BinaryOp, a + b
|
|
If b is also BinaryOp, such as a[b - 1], sometimes change the order of operations,
|
|
ie: a + (b - 1) vs (a + b) - 1"""
|
|
if isinstance(node.subscript, ca.BinaryOp):
|
|
return randomize_associative_binop(node.name, node.subscript)
|
|
return ca.BinaryOp("+", node.name, node.subscript)
|
|
|
|
def deref(node: Expression) -> Expression:
|
|
"""Surround the given node with a dereference operator"""
|
|
if isinstance(node, ca.UnaryOp) and node.op == "&":
|
|
assert not isinstance(node.expr, ca.Typename)
|
|
return node.expr
|
|
return ca.UnaryOp("*", node)
|
|
|
|
def addr(node: Expression) -> Expression:
|
|
"""Surround the given node with an address-of operator"""
|
|
if isinstance(node, ca.UnaryOp) and node.op == "*":
|
|
assert not isinstance(node.expr, ca.Typename)
|
|
return node.expr
|
|
return ca.UnaryOp("&", node)
|
|
|
|
def rec(node: ca.Node) -> Any:
|
|
"""Recurse down the StructRef tree, finding the parent of the leaf BinaryOp/ArrayRef.
|
|
Throws RandomizationFailure when a UnaryOp other than * or & was encountered."""
|
|
if isinstance(node, ca.UnaryOp):
|
|
ensure(node.op in ["&", "*"])
|
|
return rec(node.expr) or node
|
|
if isinstance(node, ca.StructRef):
|
|
return rec(node.name) or node
|
|
return None
|
|
|
|
# TODO
|
|
def apply_child( # type: ignore
|
|
parent: Union[ca.StructRef, ca.UnaryOp], func
|
|
) -> None:
|
|
if isinstance(parent, ca.StructRef):
|
|
parent.name = func(parent.name)
|
|
elif isinstance(parent, ca.UnaryOp):
|
|
parent.expr = func(parent.expr)
|
|
|
|
def get_child(parent: Union[ca.StructRef, ca.UnaryOp]) -> ca.Node:
|
|
if isinstance(parent, ca.StructRef):
|
|
return parent.name
|
|
elif isinstance(parent, ca.UnaryOp):
|
|
return parent.expr
|
|
|
|
struct_ref = random.choice(cands)
|
|
parent: Union[ca.StructRef, ca.UnaryOp]
|
|
|
|
# Step 1: Find the parent of the leaf node
|
|
parent = rec(struct_ref)
|
|
|
|
changed = False
|
|
|
|
# Step 2: Simplify (...)->c to (*(...)).c
|
|
if struct_ref.type == "->":
|
|
struct_ref.type = "."
|
|
# check if deref would remove the parent node
|
|
if (
|
|
parent is struct_ref.name
|
|
and isinstance(parent, ca.UnaryOp)
|
|
and parent.op == "&"
|
|
):
|
|
struct_ref.name = deref(struct_ref.name)
|
|
parent = struct_ref
|
|
else:
|
|
struct_ref.name = deref(struct_ref.name)
|
|
if parent is struct_ref and isinstance(
|
|
struct_ref.name, ca.UnaryOp
|
|
): # Check to make mypy happy
|
|
parent = struct_ref.name
|
|
changed = True
|
|
|
|
# Simple StructRefs only need their type permuted
|
|
if isinstance(get_child(parent), (ca.ArrayRef, ca.BinaryOp)):
|
|
# For binops, a lhs like &(a+b)->c is impossible, because a + b is an rvalue
|
|
|
|
# Step 3: Simplify further by converting ArrayRef to BinaryOp
|
|
if isinstance(get_child(parent), ca.ArrayRef):
|
|
apply_child(parent, to_binop)
|
|
apply_child(parent, deref)
|
|
parent = typing.cast("Union[ca.StructRef, ca.UnaryOp]", get_child(parent))
|
|
changed = True
|
|
|
|
# Step 4: Convert back to ArrayRef
|
|
if random_bool(random, 0.5):
|
|
# Sanity check that there's at least one dereference
|
|
if isinstance(parent, ca.UnaryOp) and parent.op == "*":
|
|
apply_child(parent, to_array)
|
|
apply_child(parent, addr)
|
|
changed = True
|
|
|
|
# Step 5: Convert the StructRef type back
|
|
if random_bool(random, 0.5):
|
|
struct_ref.name = addr(struct_ref.name)
|
|
struct_ref.type = "->"
|
|
changed = True
|
|
|
|
ensure(changed)
|
|
|
|
|
|
def perm_split_assignment(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Split assignments of the form a = b . c . d ...; into a = b; a = a . c . d ...;, a = c . d ...; a = b . a;, etc."""
|
|
cands = []
|
|
# Look for assignments of the form 'var = binaryOp' (ignores op=)
|
|
class Visitor(ca.NodeVisitor):
|
|
def visit_Assignment(self, node: ca.Assignment) -> None:
|
|
if (
|
|
node.op == "="
|
|
and isinstance(node.rvalue, ca.BinaryOp)
|
|
and region.contains_node(node)
|
|
):
|
|
cands.append(node)
|
|
self.generic_visit(node)
|
|
|
|
Visitor().visit(fn.body)
|
|
ensure(cands)
|
|
|
|
assign = random.choice(cands)
|
|
var = assign.lvalue
|
|
|
|
ins_cands = get_insertion_points(fn, region)
|
|
|
|
for ins_block, ins_index, node in ins_cands:
|
|
if node is assign:
|
|
break
|
|
else:
|
|
raise RandomizationFailure
|
|
|
|
binops = []
|
|
|
|
def collect_binops(node: ca.BinaryOp) -> None:
|
|
if isinstance(node.left, ca.BinaryOp):
|
|
collect_binops(node.left)
|
|
binops.append(node)
|
|
if isinstance(node.right, ca.BinaryOp):
|
|
collect_binops(node.right)
|
|
|
|
collect_binops(typing.cast(ca.BinaryOp, assign.rvalue))
|
|
|
|
split = random.choice(binops)
|
|
|
|
typemap = build_typemap(ast)
|
|
vartype = decayed_expr_type(var, typemap)
|
|
|
|
# Choose which side to move to a new assignment
|
|
if random_bool(random, 0.5):
|
|
side = split.left
|
|
sidetype = decayed_expr_type(side, typemap)
|
|
ensure(same_type(vartype, sidetype, typemap, allow_similar=True))
|
|
split.left = copy.deepcopy(var)
|
|
else:
|
|
side = split.right
|
|
sidetype = decayed_expr_type(side, typemap)
|
|
ensure(same_type(vartype, sidetype, typemap, allow_similar=True))
|
|
split.right = copy.deepcopy(var)
|
|
|
|
# The assignment is always inserted before the original
|
|
new_assign = ca.Assignment("=", copy.deepcopy(var), side)
|
|
ast_util.insert_statement(ins_block, ins_index, new_assign)
|
|
|
|
|
|
def perm_remove_ast(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Delete parts of the function that might be unnecessary (mistakes or unnecessary changes from an improved base.c)."""
|
|
cands: List[Tuple[ca.Node, ca.Node]] = []
|
|
|
|
class Visitor(ca.NodeVisitor):
|
|
def visit_Cast(self, node: ca.Cast) -> None:
|
|
if region.contains_node(node):
|
|
cands.append((node, node.expr))
|
|
self.generic_visit(node)
|
|
|
|
# Replace (a & constant) with (a).
|
|
def visit_BinaryOp(self, node: ca.BinaryOp) -> None:
|
|
if region.contains_node(node) and node.op == "&":
|
|
if isinstance(node.left, ca.Constant):
|
|
cands.append((node, node.right))
|
|
if isinstance(node.right, ca.Constant):
|
|
cands.append((node, node.left))
|
|
self.generic_visit(node)
|
|
|
|
# Remove if statements that don't have an else
|
|
def visit_If(self, node: ca.If) -> None:
|
|
if not node.iffalse and region.contains_node(node):
|
|
cands.append((node, node.iftrue))
|
|
self.generic_visit(node)
|
|
|
|
# Remove loops
|
|
def visit_While(self, node: ca.While) -> None:
|
|
if region.contains_node(node):
|
|
cands.append((node, node.stmt))
|
|
self.generic_visit(node)
|
|
|
|
def visit_DoWhile(self, node: ca.DoWhile) -> None:
|
|
if region.contains_node(node):
|
|
cands.append((node, node.stmt))
|
|
self.generic_visit(node)
|
|
|
|
Visitor().visit(fn.body)
|
|
ensure(cands)
|
|
|
|
cand, expr = random.choice(cands)
|
|
replace_node(fn.body, cand, expr)
|
|
|
|
|
|
def perm_duplicate_assignment(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Duplicate an assignment, sometimes forcing IDO to reuse a register."""
|
|
cands = []
|
|
|
|
class Visitor(ca.NodeVisitor):
|
|
def visit_Assignment(self, node: ca.Assignment) -> None:
|
|
if region.contains_node(node) and node.op == "=":
|
|
cands.append(node)
|
|
self.generic_visit(node)
|
|
|
|
Visitor().visit(fn.body)
|
|
ensure(cands)
|
|
cand = random.choice(cands)
|
|
|
|
ins_cands = get_insertion_points(fn, Region.unbounded())
|
|
ensure(ins_cands)
|
|
|
|
dup = copy.deepcopy(cand)
|
|
tob, toi, _ = random.choice(ins_cands)
|
|
ast_util.insert_statement(tob, toi, dup)
|
|
|
|
|
|
def perm_pad_var_decl(
|
|
fn: ca.FuncDef, ast: ca.FileAST, indices: Indices, region: Region, random: Random
|
|
) -> None:
|
|
"""Inserts an unused variable to adjust stack offsets. Probably only useful with --stack-diffs enabled."""
|
|
vars: List[str] = []
|
|
|
|
class Visitor(ca.NodeVisitor):
|
|
def visit_Decl(self, decl: ca.Decl) -> None:
|
|
if decl.name:
|
|
vars.append(decl.name)
|
|
self.generic_visit(decl)
|
|
|
|
Visitor().visit(fn.body)
|
|
|
|
var = "pad"
|
|
counter = 1
|
|
while var in vars:
|
|
counter += 1
|
|
var = f"pad{counter}"
|
|
|
|
type = random_type(random)
|
|
ast_util.insert_decl(fn, var, type, random)
|
|
|
|
|
|
class Randomizer:
|
|
def __init__(self, rng_seed: int) -> None:
|
|
self.random = Random(rng_seed)
|
|
|
|
def randomize(self, ast: ca.FileAST, fn_index: int) -> None:
|
|
fn = ast.ext[fn_index]
|
|
assert isinstance(fn, ca.FuncDef)
|
|
indices = ast_util.compute_node_indices(fn)
|
|
region = get_randomization_region(fn, indices, self.random)
|
|
methods = [
|
|
(perm_temp_for_expr, 100),
|
|
(perm_expand_expr, 20),
|
|
(perm_reorder_stmts, 20),
|
|
(perm_add_mask, 15),
|
|
(perm_xor_zero, 10),
|
|
(perm_cast_simple, 10),
|
|
(perm_refer_to_var, 10),
|
|
(perm_float_literal, 10),
|
|
(perm_randomize_internal_type, 10),
|
|
(perm_randomize_external_type, 5),
|
|
(perm_randomize_function_type, 5),
|
|
(perm_split_assignment, 10),
|
|
(perm_sameline, 10),
|
|
(perm_ins_block, 10),
|
|
(perm_struct_ref, 10),
|
|
(perm_empty_stmt, 10),
|
|
(perm_condition, 10),
|
|
(perm_dummy_comma_expr, 5),
|
|
(perm_add_self_assignment, 5),
|
|
(perm_associative, 5),
|
|
(perm_inequalities, 5),
|
|
(perm_compound_assignment, 5),
|
|
(perm_remove_ast, 5),
|
|
(perm_duplicate_assignment, 5),
|
|
(perm_pad_var_decl, 1),
|
|
]
|
|
while True:
|
|
method = random_weighted(self.random, methods)
|
|
try:
|
|
method(fn, ast, indices, region, self.random)
|
|
break
|
|
except RandomizationFailure:
|
|
pass
|