diff --git a/examples/python/loop_unroll_codegen.py b/examples/python/loop_unroll_codegen.py new file mode 100644 index 000000000..ae09a5130 --- /dev/null +++ b/examples/python/loop_unroll_codegen.py @@ -0,0 +1,50 @@ +import numpy as np + +import pyopencl as cl +import pyopencl.array + +import loopy as lp +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa: F401 + + +# setup +# ----- +ctx = cl.create_some_context() +queue = cl.CommandQueue(ctx) + +n = 15 * 10**6 +a = cl.array.arange(queue, n, dtype=np.float32) + +# create +# ------ +knl = lp.make_kernel( + "{ [i]: 0<= i <8}", + "out[i] = a if i == 0 else (b if i == 1 else c)") + +knl = lp.tag_inames(knl, {"i": "vec"}) +from loopy.kernel.array import VectorArrayDimTag + + +try: + orig_knl = knl + knl = lp.tag_array_axes(knl, "out", [VectorArrayDimTag()]) + knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, + "b": np.float32, + "c": np.float32}) + + dev_code = lp.generate_code_v2(knl).device_code() + print(dev_code) + +except Exception as err: + print(err) + +print("No Vector Array Tag.") +knl = orig_knl +knl = lp.make_kernel( + "{ [i]: 0<= i <8}", + "out[i] = a if i == 0 else (b if i == 1 else c)") + +knl = lp.tag_inames(knl, {"i": "ilp.unr"}) +knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "b": np.float32, "c": np.float32}) +dev_code = lp.generate_code_v2(knl).device_code() +print(dev_code) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 44bfa07cc..ff56f1e53 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -31,7 +31,9 @@ from loopy.codegen.control import build_loop_nest from loopy.codegen.result import merge_codegen_results from loopy.diagnostic import LoopyError, warn -from loopy.symbolic import flatten +from loopy.symbolic import ConstantFoldingMapper, SubstitutionMapper, flatten +from pymbolic.mapper.substitutor import make_subst_func +from loopy.transform.parameter import fix_parameters # {{{ conditional-reducing slab decomposition @@ -151,9 +153,41 @@ def generate_unroll_loop(codegen_state, sched_index): result = [] + fold_consts = ConstantFoldingMapper() + for i in range(length): idx_aff = lower_bound_aff + i new_codegen_state = codegen_state.fix(iname, idx_aff) + original_knl_ = new_codegen_state.kernel.copy() + context = new_codegen_state.var_subst_map + # Add in the other variables as variables. + + from loopy.kernel.instruction import Assignment + #new_knl = fix_parameters(original_knl_, **context) + + subst_func = make_subst_func(context) + mymapper = SubstitutionMapper(subst_func) + + new_insns = [] + for insn in new_codegen_state.kernel.instructions: + """ + new_insn = mymapper(insn) + new_insns.append(fold_consts(new_insn)) + + """ + if isinstance(insn, Assignment): + # We can update the evaluation of this potentially. + new_expr = mymapper(insn.expression) + new_expr = fold_consts(new_expr) + new_insns.append(insn.copy(expression=new_expr)) + else: + new_insns.append(insn) + + new_knl = original_knl_.copy(instructions=new_insns) + new_codegen_state = new_codegen_state.copy(kernel=new_knl) + + #new_codegen_state = new_codegen_state.copy(kernel=new_knl) + result.append( build_loop_nest(new_codegen_state, sched_index+1))