diff --git a/tests/test_acceptance.py b/tests/test_acceptance.py index 7862e2c..b137726 100644 --- a/tests/test_acceptance.py +++ b/tests/test_acceptance.py @@ -16,6 +16,7 @@ remove_force_delay, pre_apply_args, deduplicate, + inline_variables, ) acceptance_test_path = Path(__file__).parent.parent / "examples/acceptance_tests" @@ -44,6 +45,8 @@ def acceptance_test_dirs(): pre_apply_args.ApplyLambdaTransformer, # Apply deduplication deduplicate.Deduplicate, + # Inline single-use variables in guaranteed positions + inline_variables.InlineVariableOptimizer, ] @@ -116,6 +119,7 @@ def test_acceptance_tests(self, _, dirpath, rewriter): pre_evaluation.PreEvaluationOptimizer, remove_force_delay.ForceDelayRemover, pre_apply_args.ApplyLambdaTransformer, + inline_variables.InlineVariableOptimizer, ): self.assertGreaterEqual( expected_spent_budget, diff --git a/tests/test_misc.py b/tests/test_misc.py index a77d77e..c6c3e07 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -1701,6 +1701,191 @@ def test_force_delay_removal(self): p = remove_force_delay.ForceDelayRemover().visit(p) self.assertEqual(p.term, Error(), "Force-Delay was not removed.") + def test_inline_variables_single_use_guaranteed(self): + """Variable used once in a guaranteed position is inlined.""" + from uplc.optimizer.inline_variables import InlineVariableOptimizer + from uplc.transformer.unique_variables import UniqueVariableTransformer + + # [(lam x [addInteger x (con integer 1)]) (con integer 10)] + p = Program( + (1, 0, 0), + Apply( + Lambda( + "x", + Apply( + Apply(BuiltIn(BuiltInFun.AddInteger), Variable("x")), + BuiltinInteger(1), + ), + ), + BuiltinInteger(10), + ), + ) + p = UniqueVariableTransformer().visit(p) + p_inlined = InlineVariableOptimizer().visit(p) + # The Lambda binding should have been removed (outer Apply(Lambda, val) is gone) + self.assertNotIsInstance( + p_inlined.term.f, + Lambda, + "Variable was not inlined - Lambda binding should be gone", + ) + # Result must still be correct + r = eval(p_inlined) + self.assertEqual(r.result, BuiltinInteger(11)) + + def test_inline_variables_not_inlined_when_used_twice(self): + """Variable used twice is NOT inlined.""" + from uplc.optimizer.inline_variables import InlineVariableOptimizer + from uplc.transformer.unique_variables import UniqueVariableTransformer + + # [(lam x [x x]) (con integer 5)] + p = Program( + (1, 0, 0), + Apply( + Lambda("x", Apply(Variable("x"), Variable("x"))), + BuiltinInteger(5), + ), + ) + p = UniqueVariableTransformer().visit(p) + before = p.dumps() + p_after = InlineVariableOptimizer().visit(p) + self.assertEqual( + before, p_after.dumps(), "Double-use variable should NOT be inlined" + ) + + def test_inline_variables_not_inlined_inside_delay(self): + """Variable used inside Delay is NOT inlined (not guaranteed).""" + from uplc.optimizer.inline_variables import InlineVariableOptimizer + from uplc.transformer.unique_variables import UniqueVariableTransformer + + # [(lam x (delay x)) (con integer 42)] + p = Program( + (1, 0, 0), + Apply( + Lambda("x", Delay(Variable("x"))), + BuiltinInteger(42), + ), + ) + p = UniqueVariableTransformer().visit(p) + before = p.dumps() + p_after = InlineVariableOptimizer().visit(p) + self.assertEqual( + before, p_after.dumps(), "Variable inside Delay should NOT be inlined" + ) + + def test_inline_variables_not_inlined_inside_lambda(self): + """Variable used only inside a nested Lambda body is NOT inlined.""" + from uplc.optimizer.inline_variables import InlineVariableOptimizer + from uplc.transformer.unique_variables import UniqueVariableTransformer + + # [(lam x (lam y x)) (con integer 5)] + p = Program( + (1, 0, 0), + Apply( + Lambda("x", Lambda("y", Variable("x"))), + BuiltinInteger(5), + ), + ) + p = UniqueVariableTransformer().visit(p) + before = p.dumps() + p_after = InlineVariableOptimizer().visit(p) + self.assertEqual( + before, p_after.dumps(), "Variable inside Lambda body should NOT be inlined" + ) + + def test_inline_variables_o3_preserves_semantics(self): + """O3 compilation with inline_variables produces the same results as O0.""" + with open("examples/fibonacci.uplc", "r") as f: + p = parse(f.read()) + p0 = tools.compile(p, compiler_config.OPT_O0_CONFIG) + p3 = tools.compile(p, compiler_config.OPT_O3_CONFIG) + for i in range(5): + r0 = eval(p0, BuiltinInteger(i)) + r3 = eval(p3, BuiltinInteger(i)) + self.assertEqual( + r0.result, + r3.result, + f"O3 result differs from O0 for input {i}", + ) + + def test_inline_variables_case_scrutinee_guaranteed(self): + """Variable in Case scrutinee position is inlined (guaranteed).""" + from uplc.optimizer.inline_variables import ( + GuaranteedExecutionChecker, + VariableOccurrenceCounter, + ) + + # body = (case x (lam y y)) -- x is in scrutinee, guaranteed + body = Case(Variable("x"), [Lambda("y", Variable("y"))]) + counter = VariableOccurrenceCounter() + counter.visit(body) + self.assertEqual(counter.counts.get("x", 0), 1) + self.assertTrue( + GuaranteedExecutionChecker("x").visit(body), + "x in Case scrutinee should be in guaranteed position", + ) + + def test_inline_variables_case_branch_not_guaranteed(self): + """Variable inside Case branch is NOT in a guaranteed position.""" + from uplc.optimizer.inline_variables import GuaranteedExecutionChecker + + # body = (case (con integer 0) (lam y x)) -- x is only in branch, not scrutinee + body = Case(BuiltinInteger(0), [Lambda("y", Variable("x"))]) + self.assertFalse( + GuaranteedExecutionChecker("x").visit(body), + "x inside Case branch should NOT be in guaranteed position", + ) + + def test_inline_variables_constr_field_guaranteed(self): + """Variable inside a Constr field is in a guaranteed position.""" + from uplc.optimizer.inline_variables import ( + GuaranteedExecutionChecker, + InlineVariableOptimizer, + ) + from uplc.transformer.unique_variables import UniqueVariableTransformer + + # GuaranteedExecutionChecker: x inside Constr fields should be guaranteed + body = Constr(0, [Variable("x"), BuiltinInteger(1)]) + self.assertTrue( + GuaranteedExecutionChecker("x").visit(body), + "x inside Constr fields should be in guaranteed position", + ) + + # Also verify the optimizer actually inlines through a Constr field + # [(lam x (constr 0 x (con integer 1))) (con integer 5)] + p = Program( + (1, 0, 0), + Apply( + Lambda("x", Constr(0, [Variable("x"), BuiltinInteger(1)])), + BuiltinInteger(5), + ), + ) + p = UniqueVariableTransformer().visit(p) + p_inlined = InlineVariableOptimizer().visit(p) + # After inlining, Apply(Lambda, val) is replaced by the Constr directly + self.assertIsInstance( + p_inlined.term, + Constr, + "Variable in Constr field was not inlined - Apply(Lambda,...) should become Constr directly", + ) + + def test_inline_variables_program_visit(self): + """GuaranteedExecutionChecker.visit_Program delegates to the term.""" + from uplc.optimizer.inline_variables import GuaranteedExecutionChecker + + # Program wrapping a body where x is in a guaranteed position + prog = Program((1, 0, 0), Variable("x")) + self.assertTrue( + GuaranteedExecutionChecker("x").visit(prog), + "x at top-level of Program should be in guaranteed position", + ) + + # Program wrapping a body where x is NOT guaranteed (inside Delay) + prog_not_guaranteed = Program((1, 0, 0), Delay(Variable("x"))) + self.assertFalse( + GuaranteedExecutionChecker("x").visit(prog_not_guaranteed), + "x inside Delay in Program should NOT be in guaranteed position", + ) + def test_compiler_options(self): with open("examples/fibonacci.uplc", "r") as f: p = parse(f.read()) diff --git a/tests/test_roundtrips.py b/tests/test_roundtrips.py index 6dfc1a4..028feda 100644 --- a/tests/test_roundtrips.py +++ b/tests/test_roundtrips.py @@ -11,6 +11,7 @@ from uplc.flat_decoder import unzigzag from uplc.flat_encoder import zigzag from uplc.optimizer import pre_evaluation, pre_apply_args, deduplicate +from uplc.optimizer import inline_variables from uplc.tools import unflatten from uplc.transformer import unique_variables, debrujin_variables, undebrujin_variables from uplc.ast import * @@ -658,6 +659,80 @@ def test_apply_lambda_no_semantic_change_and_size_increase(self, p, max_increase "Rewrite result was exception but orig result is not an exception", ) + @hypothesis.given(uplc_program_valid) + @hypothesis.settings(max_examples=1000, deadline=datetime.timedelta(seconds=1)) + @hypothesis.example( + parse( + "(program 1.0.0 [(lam x [(builtin addInteger) x (con integer 1)]) (con integer 10)])" + ) + ) + @hypothesis.example(parse("(program 1.0.0 [(lam x (lam y x)) (con integer 0)])")) + @hypothesis.example(parse("(program 1.0.0 [(lam x (delay x)) (con integer 0)])")) + def test_inline_variables_no_semantic_change(self, p): + code = dumps(p) + orig_p = parse(code).term + rewrite_p = ( + inline_variables.InlineVariableOptimizer() + .visit(UniqueVariableTransformer().visit(p)) + .term + ) + params = [] + try: + orig_res = orig_p + for _ in range(100): + if isinstance(orig_res, Exception): + break + if isinstance(orig_res, BoundStateLambda) or isinstance( + orig_res, ForcedBuiltIn + ): + p = BuiltinUnit() + params.append(p) + orig_res = Apply(orig_res, p) + if isinstance(orig_res, BoundStateDelay): + orig_res = Force(orig_res) + orig_res = eval(orig_res).result + if not isinstance(orig_res, Exception): + orig_res = unique_variables.UniqueVariableTransformer().visit(orig_res) + except unique_variables.FreeVariableError: + self.fail(f"Free variable error occurred after evaluation in {code}") + try: + rewrite_res = rewrite_p + for _ in range(100): + if isinstance(rewrite_res, Exception): + break + if isinstance(rewrite_res, BoundStateLambda) or isinstance( + rewrite_res, ForcedBuiltIn + ): + p = params.pop(0) + rewrite_res = Apply(rewrite_res, p) + if isinstance(rewrite_res, BoundStateDelay): + rewrite_res = Force(rewrite_res) + rewrite_res = eval(rewrite_res).result + if not isinstance(rewrite_res, Exception): + rewrite_res = unique_variables.UniqueVariableTransformer().visit( + rewrite_res + ) + except unique_variables.FreeVariableError: + self.fail(f"Free variable error occurred after evaluation in {code}") + if not isinstance(rewrite_res, Exception): + if isinstance(orig_res, Exception): + self.assertIsInstance( + orig_res, + RuntimeError, + "Original code resulted in something different than a runtime error (exceeding budget) and rewritten result is ok", + ) + self.assertEqual( + orig_res, + rewrite_res, + f"Two programs evaluate to different results after optimization in {code}", + ) + else: + self.assertIsInstance( + orig_res, + Exception, + "Rewrite result was exception but orig result is not an exception", + ) + @hypothesis.given(hst.integers(), hst.booleans()) def test_zigzag(self, i, b): self.assertEqual(i, unzigzag(zigzag(i, b), b)), "Incorrect roundtrip" diff --git a/uplc/__init__.py b/uplc/__init__.py index be7c956..e6cfc93 100644 --- a/uplc/__init__.py +++ b/uplc/__init__.py @@ -3,7 +3,6 @@ import importlib.metadata import logging - __version__ = importlib.metadata.version(__package__ or __name__) __author__ = "nielstron" __author_email__ = "niels@opshin.dev" diff --git a/uplc/compiler_config.py b/uplc/compiler_config.py index 9df3fee..85881de 100644 --- a/uplc/compiler_config.py +++ b/uplc/compiler_config.py @@ -10,6 +10,7 @@ class CompilationConfig: remove_force_delay: Optional[bool] = None fold_apply_lambda_increase: Optional[Union[int, float]] = None deduplicate: Optional[bool] = None + inline_variables: Optional[bool] = None def update( self, other: Optional["CompilationConfig"] = None, **kwargs @@ -34,7 +35,7 @@ def update( constant_folding_keep_traces=True, ) OPT_O3_CONFIG = OPT_O2_CONFIG.update( - deduplicate=True, constant_folding_keep_traces=False + deduplicate=True, constant_folding_keep_traces=False, inline_variables=True ) OPT_CONFIGS = [OPT_O0_CONFIG, OPT_O1_CONFIG, OPT_O2_CONFIG, OPT_O3_CONFIG] @@ -65,6 +66,10 @@ def update( "__alts__": ["--dedup"], "help": "Deduplicate identical subterms by introducing a let-binding. This reduces size but may increase runtime slightly.", }, + "inline_variables": { + "__alts__": ["--iv"], + "help": "Inline variables that are used exactly once in a position guaranteed to be executed. This may increase size but reduces runtime.", + }, } for k in ARGPARSE_ARGS: assert ( diff --git a/uplc/optimizer/inline_variables.py b/uplc/optimizer/inline_variables.py new file mode 100644 index 0000000..53b24dd --- /dev/null +++ b/uplc/optimizer/inline_variables.py @@ -0,0 +1,135 @@ +from collections import defaultdict + +from ..util import NodeTransformer, NodeVisitor +from ..ast import * + +""" +Inlines variable bindings (Apply(Lambda(var, body), value)) when: + 1. var appears exactly once in body (counting all occurrences) + 2. that single occurrence is in a position guaranteed to be executed + (not inside a Lambda body or Delay term) + +This is semantically correct because: + - The variable will always be evaluated (guaranteed position) + - Inlining doesn't duplicate evaluation (single occurrence) + - Any side effects (crashes) will still occur (guaranteed position) + +NOTE: This optimization may reorder evaluation relative to other sub-expressions +in the body (e.g. traces), so it is an O3-only optimization. +NOTE: This optimization requires unique variable names. +""" + + +class VariableOccurrenceCounter(NodeVisitor): + """Counts ALL occurrences of each variable name in the AST, + including those inside lambdas and delays.""" + + def __init__(self): + self.counts = defaultdict(int) + + def visit_Variable(self, node: Variable): + self.counts[node.name] += 1 + + +class GuaranteedExecutionChecker(NodeVisitor): + """Checks if a specific variable occurs in a position that is guaranteed + to be executed when the enclosing expression is evaluated. + + A position is guaranteed if it is NOT nested inside a Lambda body or a + Delay term (because Lambda bodies only execute when the lambda is called, + and Delay terms only execute when forced). + + The scrutinee of a Case is guaranteed (evaluated unconditionally), but the + case branches are not (only one branch is taken). + All fields of a Constr are guaranteed (all evaluated to build the value). + """ + + def __init__(self, var_name: str): + self.var_name = var_name + + def visit_Variable(self, node: Variable) -> bool: + return node.name == self.var_name + + # Lambda bodies are NOT guaranteed to execute + def visit_Lambda(self, node: Lambda) -> bool: + return False + + def visit_BoundStateLambda(self, node: BoundStateLambda) -> bool: + return False + + # Delay terms are NOT guaranteed to execute until forced + def visit_Delay(self, node: Delay) -> bool: + return False + + def visit_BoundStateDelay(self, node: BoundStateDelay) -> bool: + return False + + def visit_Apply(self, node: Apply) -> bool: + # Both function and argument are always evaluated (call-by-value) + return self.visit(node.f) or self.visit(node.x) + + def visit_Force(self, node: Force) -> bool: + # The forced term is always evaluated + return self.visit(node.term) + + def visit_Case(self, node: Case) -> bool: + # Only the scrutinee is unconditionally evaluated; branches are not + return self.visit(node.scrutinee) + + def visit_Constr(self, node: Constr) -> bool: + # All constructor fields are evaluated unconditionally + return any(self.visit(f) for f in node.fields) + + def visit_Program(self, node: Program) -> bool: + return self.visit(node.term) + + def generic_visit(self, node: AST) -> bool: + # For any other node (constants, builtins, errors, …) the variable + # is not present, so return False. + return False + + +class Substitute(NodeTransformer): + """Substitutes all occurrences of var_name with value.""" + + def __init__(self, var_name: str, value: AST): + self.var_name = var_name + self.value = value + + def visit_Variable(self, node: Variable) -> AST: + if node.name == self.var_name: + return self.value + return node + + +class InlineVariableOptimizer(NodeTransformer): + """Inlines variable bindings that are used exactly once in a guaranteed + execution position. + + For Apply(Lambda(var, body), value): + - If var appears exactly once in body + - AND that occurrence is in a guaranteed-execution position + then replace with body[var := value]. + + Requires unique variable names (run UniqueVariableTransformer first). + """ + + def visit_Apply(self, node: Apply) -> AST: + if isinstance(node.f, Lambda): + var_name = node.f.var_name + body = node.f.term + value = node.x + + # Count all occurrences of var_name in body + counter = VariableOccurrenceCounter() + counter.visit(body) + total_count = counter.counts.get(var_name, 0) + + if total_count == 1: + # Check if the single occurrence is in a guaranteed position + if GuaranteedExecutionChecker(var_name).visit(body): + # Safe to inline: substitute var with value in body + new_body = Substitute(var_name, value).visit(body) + return self.visit(new_body) + + return super().generic_visit(node) diff --git a/uplc/tools.py b/uplc/tools.py index d2d1303..f7aa5ff 100644 --- a/uplc/tools.py +++ b/uplc/tools.py @@ -14,6 +14,7 @@ ) from .lexer import strip_comments, Lexer from .optimizer.deduplicate import Deduplicate +from .optimizer.inline_variables import InlineVariableOptimizer from .optimizer.pre_apply_args import ApplyLambdaTransformer from .optimizer.pre_evaluation import PreEvaluationOptimizer from .optimizer.remove_force_delay import ForceDelayRemover @@ -152,6 +153,11 @@ def compile( if config.unique_variable_names and config.deduplicate is not None else NoOp() ), + ( + InlineVariableOptimizer() + if config.unique_variable_names and config.inline_variables + else NoOp() + ), ]: x = step.visit(x) prev_dump = new_dump