diff --git a/.gitignore b/.gitignore index 267f5ee03c..5a5df47e61 100644 --- a/.gitignore +++ b/.gitignore @@ -169,6 +169,7 @@ MANIFEST ## Editor Files .vscode/ .vs/ +.idea/ ## Build Files */bin/lpython diff --git a/ISSUES/Issue2496.py b/ISSUES/Issue2496.py new file mode 100644 index 0000000000..8dab66fc9a --- /dev/null +++ b/ISSUES/Issue2496.py @@ -0,0 +1,22 @@ +from numpy import array, empty, int16 +from lpython import (i16, i32, c_p_pointer, Pointer, CPtr, TypeVar) + + +Tn = TypeVar("Tn") +Tm = TypeVar("Tm") +Tl = TypeVar("Tl") + + +def THIS_WORKS(Anm_l4: CPtr, Tn: i32, Tm: i32, l: i32) -> i16[Tn, Tm]: + A_nm: i16[Tn, Tm] = empty((Tn, Tm), dtype=int16) + return A_nm + + +def THIS_DOESNT_WORK(d: i16[Tm, Tn], b: CPtr, Tm: i32, Tn: i32) -> None: + B: Pointer[i16[:]] = c_p_pointer(b, i16[:], array([Tm * Tn])) + i: i32 + j: i32 + for i in range(Tm): + for j in range(Tn): + d[i, j] = B[(i * Tn) + j] + diff --git a/ISSUES/Issue2499.py b/ISSUES/Issue2499.py new file mode 100644 index 0000000000..797650693c --- /dev/null +++ b/ISSUES/Issue2499.py @@ -0,0 +1,18 @@ +from lpython import i32, i16, Const +VR_SIZE: i32 = 32_768 +l: Const[i32] = VR_SIZE +n: Const[i32] = 15 +m: Const[i32] = 3 +k: i32 +M2: Const[i32] = 5 +A_ik: i16 +jj: i32 +ii: i32 +i: i32 +for jj in range(0, l, VR_SIZE): # each VR-col chunk in B and C + for ii in range(0, n, M2): # each M2 block in A cols and B rows + for i in range(0, M2): # zero-out rows of C + pass + for k in range(0, m): # rows of B + for i in range(0, M2): + pass diff --git a/ISSUES/Issue2503.py b/ISSUES/Issue2503.py new file mode 100644 index 0000000000..0b0068fc06 --- /dev/null +++ b/ISSUES/Issue2503.py @@ -0,0 +1,25 @@ +from lpython import (i16, i32, Const) +from numpy import empty, int16 +dim: Const[i32] = 10 + + +def foo(): + """Negative indices produce random results each run.""" + A: i16[dim] = empty((dim,), dtype=int16) + ww: i32 + for ww in range(dim): + A[ww] = i16(ww + 1) + print(A[0], A[1], A[2], "...", A[-3], A[-2], A[-1]) + + +def bar(dim_: i32): + """Negative indices always produce zero when 'dim' is a parameter.""" + A: i16[dim_] = empty((dim_,), dtype=int16) + ww: i32 + for ww in range(dim_): + A[ww] = i16(ww + 1) + print(A[0], A[1], A[2], "...", A[-3], A[-2], A[-1]) + + +foo() +bar(10) \ No newline at end of file diff --git a/ISSUES/Issue2509.py b/ISSUES/Issue2509.py new file mode 100644 index 0000000000..e379eee4b9 --- /dev/null +++ b/ISSUES/Issue2509.py @@ -0,0 +1,5 @@ + +def main(option: bool = False): + print("option: ", option) + +main() diff --git a/ISSUES/SIGSEGV/Issue2498.py b/ISSUES/SIGSEGV/Issue2498.py new file mode 100644 index 0000000000..cbfc0860b8 --- /dev/null +++ b/ISSUES/SIGSEGV/Issue2498.py @@ -0,0 +1,20 @@ +from numpy import array, empty, int16 +from lpython import (i16, i32, ccallback, c_p_pointer, Pointer, u64, CPtr, i64, + ccall, sizeof, Array, Allocatable, TypeVar, Const) + + +rows = TypeVar("rows") +cols = TypeVar("cols") + + +def spot_print_lpython_array(a: i16[:], rows: i32, cols: i32) -> i16[rows, cols]: + pass + + +def main() -> i32: + print ("hello, world!") + return 0 + + +if __name__ == "__main__": + main() diff --git a/ISSUES/UNHANDLED-EXCEPTIONS/Issue2479.py b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2479.py new file mode 100644 index 0000000000..917166f0bd --- /dev/null +++ b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2479.py @@ -0,0 +1,117 @@ +import numpy +from lpython import (i16, i32, ccallback, c_p_pointer, Pointer, u64, CPtr, i64) + +######## ALL THE LINES WITH EIGHT COMMENT MARKS ARE THE ONES WE NEED TO +######## BRING UP! AS IT STANDS, THIS CODE WORKS IN LPYTHON MAIN AS OF 4 +######## FEBRUARY 2024. + +# https://numpy.org/devdocs/reference/typing.html +######## from numpy.typing import NDArray + + +# plan for 30 Jan 2024 -- +# step 0: comment out this code and ./build_baryon.sh to run on APU +# emulator; or ./run_full_emulation.sh to run in CPython. +# step 1: side-by-side numpy implementation in full-emulation +# - get there line-by-line +# = focus on gvml_add_u16 first + + +def numpy_side_by_side(n: i32, m: i32, l: i32, M1: i32, M2: i32, + A: CPtr, B: CPtr, C: CPtr) -> \ + None: ######## NDArray[numpy.int16]: + VR_SIZE: i32 = 32_768 + + # In the primary example, n = 15, m = 3, l = 32_768, + # M1 = 1, M2 = 5 + + # source GSI L4 arrays + pA_nm: Pointer[i16[:]] = c_p_pointer(A, i16[:], array([n * m])) + pB_ml: Pointer[i16[:]] = c_p_pointer(B, i16[:], array([m * l])) + + # source numpy arrays + ######## A_nm: NDArray[numpy.int16] = numpy.zeros((n, m), dtype=numpy.int16) + ######## for row in range(n): + ######## A_nm[row,:] = pA_nm[(row * m):((row + 1) * m)] + A_nm: Array[i16, n, m] + row : i32 + for row in range(n): + col : i32 + for col in range(m): + A_nm[row, col] = pA_nm[(row * m):((row * m) + col)] + + ######## B_ml: NDArray[numpy.int16] = numpy.zeros((m, l), dtype=numpy.int16) + ######## for row in range(m): + ######## B_ml[row,:] = pB_ml[(row * l):((row + 1) * l)] + + # # destination numpy array + ######## C_nl: NDArray[numpy.int16] = numpy.zeros((n, l), dtype=numpy.int16) + + # destination GSI L4 array + pC_nl: Pointer[i16[:]] = c_p_pointer(C, i16[:], array([n * l])) + + # First, accumulate outer product without blocking. This is + # the code we would -ultimately- like to compile. Notice that + # all GSI-specific L1, L4, MMB are hidden. + + k: i32 + ######## for k in range(0, m): + ######## C_nl += numpy.outer(A_nm[:,k], B_ml[k,:]) + ######## pass + + # expect + # [[ 5 8 11 ... 20 23 26], + # [ 8 14 20 ... 38 44 50], + # [11 20 29 ... 56 65 74], ... + # + # [ 8 14 20 ... 38 44 50], + # [11 20 29 ... 56 65 74], + # [14 26 38 ... 74 86 98]] + set_breakpoint_here_and_inspect_C_nl : i32 = 0 + + # Second, with explicit blocking. This is a stepping-stone + # for our back-end. Notice that L1 and MMB are hidden. + + # T_1l: NDArray[numpy.int16] = numpy.zeros((1, l), dtype=numpy.int16) + # B_1l: NDArray[numpy.int16] = numpy.zeros((1, l), dtype=numpy.int16) + A_ik: i16 + jj: i32 + ii: i32 + i: i32 + for jj in range(0, l, VR_SIZE): # each VR-col chunk in B and C + for ii in range(0, n, M2): # each M2 block in A cols and B rows + for i in range(0, M2): # zero-out rows of C + ######## C_nl[i + ii, :] = 0 + pass + for k in range(0, m): # rows of B + # B_1l[0, :] = B_ml[k, :] + for i in range(0, M2): + ######## A_ik = A_nm[i + ii, k] + # broadcast a single element of A + # T_1l[0, :] = A_ik + # pointwise (Hadamard) product: + # T_1l[0, :] = np.multiply(B_1l[0, :], T_1l[0, :]) + # C_nl[i + ii, :] += T_1l[0, :] + # optimization without the temporaries + ######## C_nl[i + ii, :] += B_ml[k, :] * A_ik + pass + + set_breakpoint_here_and_inspect_C_nl = 0 + + ######## return C_nl + +def main(): + n : i32 = 15 + m : i32 = 3 + l : i32 = 32_768 + M1 : i32 = 1 + M2 : i32 = 5 + A_l4 : CPtr + B_l4 : CPtr + C_l4 : CPtr + numpy_side_by_side(n, m, l, M1, M2, A_l4, B_l4, C_l4) + print ("hello, world!") + + +if __name__ == "__main__": + main() diff --git a/ISSUES/UNHANDLED-EXCEPTIONS/Issue2479.txt b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2479.txt new file mode 100644 index 0000000000..85662064e2 --- /dev/null +++ b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2479.txt @@ -0,0 +1,55 @@ +└─(10:46:54 on vector-backend ✖ ✭)──> lpython ../ISSUES/UNHANDLED-EXCEPTIONS/Issue2479.py 1 ↵ ──(Mon,Feb05)─┘ +Internal Compiler Error: Unhandled exception +Traceback (most recent call last): + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/bin/lpython.cpp", line 1872 + err = compile_python_to_object_file(arg_file, tmp_o, runtime_library_dir, + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/bin/lpython.cpp", line 824 + res = fe.get_llvm3(*asr, pass_manager, diagnostics, infile); + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/lpython/python_evaluator.cpp", line 71 + run_fn, infile); + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 9282 + v.visit_asr((ASR::asr_t&)asr); + File "../libasr/asr.h", line 5057 + File "../libasr/asr.h", line 5033 + File "../libasr/asr.h", line 5058 + File "../libasr/asr.h", line 4766 + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 932 + ASR::symbol_t *mod = x.m_symtab->get_symbol(item); + File "../libasr/asr.h", line 5060 + File "../libasr/asr.h", line 4774 + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 2976 + finish_module_init_function_prototype(x); + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 3927 + ASR::Function_t *s = ASR::down_cast(item.second); + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 3683 + visit_procedures(x); + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 3880 + this->visit_stmt(*x.m_body[i]); + File "../libasr/asr.h", line 5077 + File "../libasr/asr.h", line 4827 + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 5603 + create_loop(x.m_name, [=]() { + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 321 + start_new_block(loopbody); { + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 5609 + this->visit_stmt(*x.m_body[i]); + File "../libasr/asr.h", line 5077 + File "../libasr/asr.h", line 4834 + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 5063 + this->visit_stmt(*(block->m_body[i])); + File "../libasr/asr.h", line 5077 + File "../libasr/asr.h", line 4827 + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 5603 + create_loop(x.m_name, [=]() { + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 321 + start_new_block(loopbody); { + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 5609 + this->visit_stmt(*x.m_body[i]); + File "../libasr/asr.h", line 5077 + File "../libasr/asr.h", line 4800 + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 4283 + handle_array_section_association_to_pointer(x); + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 4247 + LCOMPILERS_ASSERT(target_rank > 0); +AssertFailed: target_rank > 0 +(lp) ┌─(~/Documents/GitHub/lpython/integration_tests)───────────────────────────────────────(brian@MacBook-Pro:s001)─┐ diff --git a/ISSUES/UNHANDLED-EXCEPTIONS/Issue2480.py b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2480.py new file mode 100644 index 0000000000..a26e5c230e --- /dev/null +++ b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2480.py @@ -0,0 +1,116 @@ +import numpy +from lpython import (i16, i32, ccallback, c_p_pointer, Pointer, u64, CPtr, i64) + +######## ALL THE LINES WITH EIGHT COMMENT MARKS ARE THE ONES WE NEED TO +######## BRING UP! AS IT STANDS, THIS CODE WORKS IN LPYTHON MAIN AS OF 4 +######## FEBRUARY 2024. + +# https://numpy.org/devdocs/reference/typing.html +######## from numpy.typing import NDArray + + +# plan for 30 Jan 2024 -- +# step 0: comment out this code and ./build_baryon.sh to run on APU +# emulator; or ./run_full_emulation.sh to run in CPython. +# step 1: side-by-side numpy implementation in full-emulation +# - get there line-by-line +# = focus on gvml_add_u16 first + + +def numpy_side_by_side(n: i32, m: i32, l: i32, M1: i32, M2: i32, + A: CPtr, B: CPtr, C: CPtr) -> \ + None: ######## NDArray[numpy.int16]: + VR_SIZE: i32 = 32_768 + + # In the primary example, n = 15, m = 3, l = 32_768, + # M1 = 1, M2 = 5 + + # source GSI L4 arrays + pA_nm: Pointer[i16[:]] = c_p_pointer(A, i16[:], array([n * m])) + pB_ml: Pointer[i16[:]] = c_p_pointer(B, i16[:], array([m * l])) + + # source numpy arrays + ######## A_nm: NDArray[numpy.int16] = numpy.zeros((n, m), dtype=numpy.int16) + ######## for row in range(n): + ######## A_nm[row,:] = pA_nm[(row * m):((row + 1) * m)] + A_nm: Array[i16, n, m] + row : i32 + for row in range(n): + A_nm[row,:] = pA_nm[(row * m):((row + 1) * m)] + + + ######## B_ml: NDArray[numpy.int16] = numpy.zeros((m, l), dtype=numpy.int16) + ######## for row in range(m): + ######## B_ml[row,:] = pB_ml[(row * l):((row + 1) * l)] + + # # destination numpy array + ######## C_nl: NDArray[numpy.int16] = numpy.zeros((n, l), dtype=numpy.int16) + + # destination GSI L4 array + pC_nl: Pointer[i16[:]] = c_p_pointer(C, i16[:], array([n * l])) + + # First, accumulate outer product without blocking. This is + # the code we would -ultimately- like to compile. Notice that + # all GSI-specific L1, L4, MMB are hidden. + + k: i32 + ######## for k in range(0, m): + ######## C_nl += numpy.outer(A_nm[:,k], B_ml[k,:]) + ######## pass + + # expect + # [[ 5 8 11 ... 20 23 26], + # [ 8 14 20 ... 38 44 50], + # [11 20 29 ... 56 65 74], ... + # + # [ 8 14 20 ... 38 44 50], + # [11 20 29 ... 56 65 74], + # [14 26 38 ... 74 86 98]] + set_breakpoint_here_and_inspect_C_nl : i32 = 0 + + # Second, with explicit blocking. This is a stepping-stone + # for our back-end. Notice that L1 and MMB are hidden. + + # T_1l: NDArray[numpy.int16] = numpy.zeros((1, l), dtype=numpy.int16) + # B_1l: NDArray[numpy.int16] = numpy.zeros((1, l), dtype=numpy.int16) + A_ik: i16 + jj: i32 + ii: i32 + i: i32 + for jj in range(0, l, VR_SIZE): # each VR-col chunk in B and C + for ii in range(0, n, M2): # each M2 block in A cols and B rows + for i in range(0, M2): # zero-out rows of C + ######## C_nl[i + ii, :] = 0 + pass + for k in range(0, m): # rows of B + # B_1l[0, :] = B_ml[k, :] + for i in range(0, M2): + ######## A_ik = A_nm[i + ii, k] + # broadcast a single element of A + # T_1l[0, :] = A_ik + # pointwise (Hadamard) product: + # T_1l[0, :] = np.multiply(B_1l[0, :], T_1l[0, :]) + # C_nl[i + ii, :] += T_1l[0, :] + # optimization without the temporaries + ######## C_nl[i + ii, :] += B_ml[k, :] * A_ik + pass + + set_breakpoint_here_and_inspect_C_nl = 0 + + ######## return C_nl + +def main(): + n : i32 = 15 + m : i32 = 3 + l : i32 = 32_768 + M1 : i32 = 1 + M2 : i32 = 5 + A_l4 : CPtr + B_l4 : CPtr + C_l4 : CPtr + numpy_side_by_side(n, m, l, M1, M2, A_l4, B_l4, C_l4) + print ("hello, world!") + + +if __name__ == "__main__": + main() diff --git a/ISSUES/UNHANDLED-EXCEPTIONS/Issue2480.txt b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2480.txt new file mode 100644 index 0000000000..d9604ba789 --- /dev/null +++ b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2480.txt @@ -0,0 +1,33 @@ +└─(10:50:28 on vector-backend ✖ ✭)──> lpython ../ISSUES/UNHANDLED-EXCEPTIONS/Issue2480.py 1 ↵ ──(Mon,Feb05)─┘ +Traceback (most recent call last): + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/bin/lpython.cpp", line 1872 + err = compile_python_to_object_file(arg_file, tmp_o, runtime_library_dir, + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/bin/lpython.cpp", line 824 + res = fe.get_llvm3(*asr, pass_manager, diagnostics, infile); + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/lpython/python_evaluator.cpp", line 71 + run_fn, infile); + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 9276 + pass_manager.apply_passes(al, &asr, co.po, diagnostics); + File "/Users/brian/Documents/GitHub/lpython/src/libasr/pass/pass_manager.h", line 299 + apply_passes(al, asr, _passes, pass_options, diagnostics); + File "/Users/brian/Documents/GitHub/lpython/src/libasr/pass/pass_manager.h", line 160 + _passes_db[passes[i]](al, *asr, pass_options); + File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/pass/array_op.cpp", line 1910 + u.visit_TranslationUnit(unit); + File "../libasr/asr.h", line 5277 + File "../libasr/asr.h", line 5060 + File "../libasr/asr.h", line 4774 + File "../libasr/pass/pass_utils.h", line 317 + File "../libasr/asr.h", line 5290 + File "../libasr/asr.h", line 5060 + File "../libasr/asr.h", line 4775 + File "../libasr/pass/pass_utils.h", line 298 + File "../libasr/asr.h", line 5303 + File "../libasr/asr.h", line 5077 + File "../libasr/asr.h", line 4805 + File "../libasr/asr.h", line 5441 + File "../libasr/asr.h", line 5077 + File "../libasr/asr.h", line 5077 + Binary file "/usr/lib/system/libsystem_platform.dylib", local address: 0x18046da23 +Segfault: Signal SIGSEGV (segmentation fault) received +(lp) ┌─(~/Documents/GitHub/lpython/integration_tests)───────────────────────────────────────(brian@MacBook-Pro:s001)─┐ diff --git a/ISSUES/UNHANDLED-EXCEPTIONS/Issue2487.py b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2487.py new file mode 100644 index 0000000000..2bf167c2ff --- /dev/null +++ b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2487.py @@ -0,0 +1,8 @@ +from lpython import CPtr + +A : CPtr + +def foo(a : CPtr) -> None: + pass + +foo(A) diff --git a/ISSUES/UNHANDLED-EXCEPTIONS/Issue2491.py b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2491.py new file mode 100644 index 0000000000..baca03ced2 --- /dev/null +++ b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2491.py @@ -0,0 +1,13 @@ +import numpy +from numpy import empty + +from lpython import (i16) + + +def main(): + A_nm: i16[15, 3] = empty((15, 3), dtype=numpy.int16) + print ("hello, world!") + + +if __name__ == "__main__": + main() diff --git a/ISSUES/UNHANDLED-EXCEPTIONS/Issue2492.py b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2492.py new file mode 100644 index 0000000000..635a5e8134 --- /dev/null +++ b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2492.py @@ -0,0 +1,22 @@ +from numpy import empty, int16 + +from lpython import (i16, i32, CPtr, ccall, sizeof) + + +@ccall +def _lfortran_malloc(size : i32) -> CPtr: + """borrowed from bindc_07.py in integration_tests""" + pass + + +def main(): + n : i32 = 15 + m : i32 = 3 + + # Emulate getting stuff from the C side. + Anm_l4 : CPtr = _lfortran_malloc( (n * m) * i32(sizeof(i16)) ) + A_nm: i16[n, m] = empty((n, m), dtype=int16) + + +if __name__ == "__main__": + main() diff --git a/ISSUES/UNHANDLED-EXCEPTIONS/Issue2494.py b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2494.py new file mode 100644 index 0000000000..745b743cce --- /dev/null +++ b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2494.py @@ -0,0 +1,15 @@ +from numpy import empty, int16 + +from lpython import (i16, i32, Allocatable) + + +# doesn't work: +# def to_lpython_array(n: i32, m: i32) -> Array[i16, n, m]: #ndarray(Any, dtype=int16): +# works: +# def to_lpython_array(n: i32, m: i32) -> Array[i16, 15, 3]: #ndarray(Any, dtype=int16): +# doesn't work: +def to_lpython_array(n: i32, m: i32) -> Allocatable[i16[:]]: + A_nm: i16[n, m] = empty((n, m), dtype=int16) + return A_nm + + diff --git a/ISSUES/UNHANDLED-EXCEPTIONS/Issue2495.py b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2495.py new file mode 100644 index 0000000000..07ad62e468 --- /dev/null +++ b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2495.py @@ -0,0 +1,7 @@ +from lpython import i16, Allocatable +from numpy import empty, int16 + + +def foo() -> Allocatable[i16[:]]: + result: i16[1] = empty((1,), dtype=int16) + return result \ No newline at end of file diff --git a/ISSUES/UNHANDLED-EXCEPTIONS/Issue2497.py b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2497.py new file mode 100644 index 0000000000..ca17294ab7 --- /dev/null +++ b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2497.py @@ -0,0 +1,48 @@ +import numpy +from numpy import array, empty, int16 +from lpython import (i16, i32, ccallback, c_p_pointer, Pointer, u64, CPtr, i64, + ccall, sizeof, Array, Allocatable, TypeVar, Const) + + +@ccall +def _lfortran_malloc(size : i32) -> CPtr: + """Borrow from bindc_07.py in integration_tests.""" + pass + + +rows = TypeVar("rows") +cols = TypeVar("cols") + + +def load_lpython_array_from_c_fortran_array(b: CPtr, rows: i32, cols: i32) -> i16[rows, cols]: + """Load an LPython array from a C / Fortran array.""" + B: Pointer[i16[:]] = c_p_pointer(b, i16[:], array([rows * cols])) + D: i16[rows, cols] = empty((rows, cols), dtype=int16) + i: i32 + j: i32 + for i in range(rows): + for j in range(cols): + D[i, j] = B[(i * cols) + j] + return D + + +def spot_print_lpython_array(a: i16[:], n: i32, m: i32) -> None: + pass + + +def main() -> i32: + + # "Const" lets these appear in type declarations such as i16[n, m] + n : Const[i32] = 15 + m : Const[i32] = 3 + + Anm_l4 : CPtr = _lfortran_malloc((n * m) * i32(sizeof(i16))) + + Anm: i16[n, m] = load_lpython_array_from_c_fortran_array(Anm_l4, n, m) + spot_print_lpython_array(Anm, n, m) + + return 0 + + +if __name__ == "__main__": + main() diff --git a/integration_tests/matmul_apu_backend.py b/integration_tests/matmul_apu_backend.py new file mode 100644 index 0000000000..10c4b7bec8 --- /dev/null +++ b/integration_tests/matmul_apu_backend.py @@ -0,0 +1,44 @@ +from numpy import empty, uint16 +from lpython import Annotation, u16, SIMD, L1, i32, ccall, i16, i64, CPtr, TypeVar + + +n = TypeVar("n") +m = TypeVar("m") +l = TypeVar("l") + +def matmul_test(l: i32, n: i32, m: i32, M1: i32, M2: i32, V: i32, + A: u16[n, m], B: u16[m, l], C: u16[n, l]): + L1cache: Annotation[u16[M2 + 1, V], L1] = empty((M2 + 1, V), dtype=uint16) + B_vr: Annotation[u16[V], SIMD] = empty((V,), dtype=uint16) + C_vr: Annotation[u16[V], SIMD] = empty((V,), dtype=uint16) + T_vr: Annotation[u16[V], SIMD] = empty((V,), dtype=uint16) + i: i32; j: i32; jj: i32; ii: i32; kk: i32; k: i32; A_ik: u16 + + L1_B_index: i32 = 0 + L1_C_base: i32 = 1 + + # I think in the APU code, M1=1, so the loops over kk and k get fused + # Closer to the APU code + + # Wrong value on purpose to check that all entries will be overriden + C[:,:] = u16(1) + for jj in range(0, l, V): + for ii in range(0, n, M2): + for i in range(M2): + C_vr[:] = u16(0) + L1cache[L1_C_base+i,:] = C_vr[:] + for kk in range(0, m, M1): + for k in range(M1): + L1cache[L1_B_index,:] = B[kk+k, jj:jj+V] + B_vr[:] = L1cache[L1_B_index,:] + for i in range(M2): + A_ik = A[ii+i,kk+k] + C_vr[:] = L1cache[L1_C_base+i,:] + T_vr[:] = A_ik + T_vr[:] = B_vr[:] * T_vr[:] + C_vr[:] = C_vr[:] + T_vr[:] + L1cache[L1_C_base+i,:] = C_vr[:] + for i in range(M2): + C[ii+i,jj:jj+V] = L1cache[L1_C_base+i,:] + + print(C) diff --git a/integration_tests/matmul_integration.asr b/integration_tests/matmul_integration.asr new file mode 100644 index 0000000000..eb0e0fbafd --- /dev/null +++ b/integration_tests/matmul_integration.asr @@ -0,0 +1,2999 @@ +(TranslationUnit + (SymbolTable + 1 + { + __main__: + (Module + (SymbolTable + 2 + { + __lcompilers_dummy: + (Variable + 2 + __lcompilers_dummy + [] + Local + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + __main__global_stmts: + (Function + (SymbolTable + 244 + { + + }) + __main__global_stmts + (FunctionType + [] + () + Source + Implementation + () + .false. + .false. + .false. + .false. + .false. + [] + .false. + ) + [main] + [] + [(If + (StringCompare + (Var 2 __name__) + Eq + (StringConstant + "__main__" + (Character 1 8 ()) + ) + (Logical 4) + () + ) + [(= + (Var 2 __lcompilers_dummy) + (FunctionCall + 2 main + () + [((LogicalConstant + .true. + (Logical 4) + ))] + (Integer 4) + () + () + ) + () + )] + [] + )] + () + Public + .false. + .false. + () + ), + __name__: + (Variable + 2 + __name__ + [] + Local + (StringConstant + "__main__" + (Character 1 8 ()) + ) + (StringConstant + "__main__" + (Character 1 8 ()) + ) + Default + (Character 1 8 ()) + () + Source + Public + Required + .false. + ), + _lfortran_malloc: + (Function + (SymbolTable + 211 + { + _lpython_return_variable: + (Variable + 211 + _lpython_return_variable + [] + ReturnVar + () + () + Default + (CPtr) + () + BindC + Public + Required + .false. + ), + size: + (Variable + 211 + size + [] + In + () + () + Default + (Integer 4) + () + BindC + Public + Required + .true. + ) + }) + _lfortran_malloc + (FunctionType + [(Integer 4)] + (CPtr) + BindC + Interface + () + .false. + .false. + .false. + .false. + .false. + [] + .false. + ) + [] + [(Var 211 size)] + [] + (Var 211 _lpython_return_variable) + Public + .false. + .false. + () + ), + clear_row: + (Function + (SymbolTable + 216 + { + a: + (Variable + 216 + a + [] + InOut + () + () + Default + (Array + (Integer 2) + [(() + ())] + DescriptorArray + ) + () + Source + Public + Required + .false. + ), + cols: + (Variable + 216 + cols + [] + In + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + j: + (Variable + 216 + j + [] + Local + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + row: + (Variable + 216 + row + [] + In + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ) + }) + clear_row + (FunctionType + [(Array + (Integer 2) + [(() + ())] + DescriptorArray + ) + (Integer 4) + (Integer 4)] + () + Source + Implementation + () + .false. + .false. + .false. + .false. + .false. + [] + .false. + ) + [] + [(Var 216 a) + (Var 216 row) + (Var 216 cols)] + [(DoLoop + () + ((Var 216 j) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 216 cols) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + () + ) + (IntegerConstant 1 (Integer 4))) + [(= + (ArrayItem + (Var 216 a) + [(() + (Var 216 row) + ()) + (() + (Var 216 j) + ())] + (Integer 2) + RowMajor + () + ) + (Cast + (IntegerConstant 0 (Integer 4)) + IntegerToInteger + (Integer 2) + (IntegerConstant 0 (Integer 2)) + ) + () + )] + )] + () + Public + .false. + .false. + () + ), + cols: + (Variable + 2 + cols + [] + Local + () + () + Default + (TypeParameter + cols + ) + () + Source + Public + Required + .false. + ), + init_c_fortran_array: + (Function + (SymbolTable + 212 + { + B: + (Variable + 212 + B + [] + Local + () + () + Default + (Pointer + (Array + (Integer 2) + [(() + ())] + DescriptorArray + ) + ) + () + Source + Public + Required + .false. + ), + b: + (Variable + 212 + b + [] + In + () + () + Default + (CPtr) + () + Source + Public + Required + .false. + ), + block: + (Block + (SymbolTable + 218 + { + block: + (Block + (SymbolTable + 219 + { + _mod: + (ExternalSymbol + 219 + _mod + 86 _mod + lpython_builtin + [] + _mod + Private + ), + _mod@__lpython_overloaded_2___mod: + (ExternalSymbol + 219 + _mod@__lpython_overloaded_2___mod + 86 __lpython_overloaded_2___mod + lpython_builtin + [] + __lpython_overloaded_2___mod + Public + ) + }) + block + [(= + (ArrayItem + (Var 212 B) + [(() + (IntegerBinOp + (IntegerBinOp + (Var 212 i) + Mul + (Var 212 cols) + (Integer 4) + () + ) + Add + (Var 212 j) + (Integer 4) + () + ) + ())] + (Integer 2) + RowMajor + () + ) + (Cast + (FunctionCall + 219 _mod@__lpython_overloaded_2___mod + 219 _mod + [((IntegerBinOp + (Var 212 i) + Add + (Var 212 j) + (Integer 4) + () + )) + ((Var 212 mod))] + (Integer 4) + () + () + ) + IntegerToInteger + (Integer 2) + () + ) + () + )] + ) + }) + block + [(DoLoop + () + ((Var 212 j) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 212 cols) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + () + ) + (IntegerConstant 1 (Integer 4))) + [(BlockCall + -1 + 218 block + )] + )] + ), + cols: + (Variable + 212 + cols + [] + In + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + i: + (Variable + 212 + i + [] + Local + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + j: + (Variable + 212 + j + [] + Local + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + mod: + (Variable + 212 + mod + [] + In + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + rows: + (Variable + 212 + rows + [] + In + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ) + }) + init_c_fortran_array + (FunctionType + [(CPtr) + (Integer 4) + (Integer 4) + (Integer 4)] + () + Source + Implementation + () + .false. + .false. + .false. + .false. + .false. + [] + .false. + ) + [] + [(Var 212 b) + (Var 212 rows) + (Var 212 cols) + (Var 212 mod)] + [(CPtrToPointer + (Var 212 b) + (Var 212 B) + (ArrayConstant + [(IntegerBinOp + (Var 212 rows) + Mul + (Var 212 cols) + (Integer 4) + () + )] + (Array + (Integer 4) + [((IntegerConstant 0 (Integer 4)) + (IntegerConstant 1 (Integer 4)))] + PointerToDataArray + ) + RowMajor + ) + (ArrayConstant + [(IntegerConstant 0 (Integer 4))] + (Array + (Integer 4) + [((IntegerConstant 0 (Integer 4)) + (IntegerConstant 1 (Integer 4)))] + PointerToDataArray + ) + RowMajor + ) + ) + (DoLoop + () + ((Var 212 i) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 212 rows) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + () + ) + (IntegerConstant 1 (Integer 4))) + [(BlockCall + -1 + 212 block + )] + )] + () + Public + .false. + .false. + () + ), + load_lpython_array_from_c_fortran_array: + (Function + (SymbolTable + 214 + { + B: + (Variable + 214 + B + [] + Local + () + () + Default + (Pointer + (Array + (Integer 2) + [(() + ())] + DescriptorArray + ) + ) + () + Source + Public + Required + .false. + ), + D: + (Variable + 214 + D + [rows + cols] + Local + () + () + Default + (Array + (Integer 2) + [((IntegerConstant 0 (Integer 4)) + (Var 214 rows)) + ((IntegerConstant 0 (Integer 4)) + (Var 214 cols))] + PointerToDataArray + ) + () + Source + Public + Required + .false. + ), + _lpython_return_variable: + (Variable + 214 + _lpython_return_variable + [rows + cols] + ReturnVar + () + () + Default + (Array + (Integer 2) + [((IntegerConstant 0 (Integer 4)) + (Var 214 rows)) + ((IntegerConstant 0 (Integer 4)) + (Var 214 cols))] + PointerToDataArray + ) + () + Source + Public + Required + .false. + ), + b: + (Variable + 214 + b + [] + In + () + () + Default + (CPtr) + () + Source + Public + Required + .false. + ), + cols: + (Variable + 214 + cols + [] + In + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + i: + (Variable + 214 + i + [] + Local + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + j: + (Variable + 214 + j + [] + Local + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + rows: + (Variable + 214 + rows + [] + In + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ) + }) + load_lpython_array_from_c_fortran_array + (FunctionType + [(CPtr) + (Integer 4) + (Integer 4)] + (Array + (Integer 2) + [((IntegerConstant 0 (Integer 4)) + (FunctionParam + 1 + (Integer 4) + () + )) + ((IntegerConstant 0 (Integer 4)) + (FunctionParam + 2 + (Integer 4) + () + ))] + PointerToDataArray + ) + Source + Implementation + () + .false. + .false. + .false. + .false. + .false. + [] + .false. + ) + [] + [(Var 214 b) + (Var 214 rows) + (Var 214 cols)] + [(CPtrToPointer + (Var 214 b) + (Var 214 B) + (ArrayConstant + [(IntegerBinOp + (Var 214 rows) + Mul + (Var 214 cols) + (Integer 4) + () + )] + (Array + (Integer 4) + [((IntegerConstant 0 (Integer 4)) + (IntegerConstant 1 (Integer 4)))] + PointerToDataArray + ) + RowMajor + ) + (ArrayConstant + [(IntegerConstant 0 (Integer 4))] + (Array + (Integer 4) + [((IntegerConstant 0 (Integer 4)) + (IntegerConstant 1 (Integer 4)))] + PointerToDataArray + ) + RowMajor + ) + ) + (= + (Var 214 D) + (ArrayConstant + [] + (Array + (Integer 2) + [((IntegerConstant 0 (Integer 4)) + (Var 214 rows)) + ((IntegerConstant 0 (Integer 4)) + (Var 214 cols))] + PointerToDataArray + ) + RowMajor + ) + () + ) + (DoLoop + () + ((Var 214 i) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 214 rows) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + () + ) + (IntegerConstant 1 (Integer 4))) + [(DoLoop + () + ((Var 214 j) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 214 cols) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + () + ) + (IntegerConstant 1 (Integer 4))) + [(= + (ArrayItem + (Var 214 D) + [(() + (Var 214 i) + ()) + (() + (Var 214 j) + ())] + (Integer 2) + RowMajor + () + ) + (ArrayItem + (Var 214 B) + [(() + (IntegerBinOp + (IntegerBinOp + (Var 214 i) + Mul + (Var 214 cols) + (Integer 4) + () + ) + Add + (Var 214 j) + (Integer 4) + () + ) + ())] + (Integer 2) + RowMajor + () + ) + () + )] + )] + ) + (= + (Var 214 _lpython_return_variable) + (Var 214 D) + () + ) + (Return)] + (Var 214 _lpython_return_variable) + Public + .false. + .false. + () + ), + main: + (Function + (SymbolTable + 217 + { + A_ik: + (Variable + 217 + A_ik + [] + Local + () + () + Default + (Integer 2) + () + Source + Public + Required + .false. + ), + Anm: + (Variable + 217 + Anm + [] + Local + () + () + Default + (Array + (Integer 2) + [((IntegerConstant 0 (Const + (Integer 4) + )) + (IntegerConstant 15 (Const + (Integer 4) + ))) + ((IntegerConstant 0 (Const + (Integer 4) + )) + (IntegerConstant 3 (Const + (Integer 4) + )))] + FixedSizeArray + ) + () + Source + Public + Required + .false. + ), + Anm_l4: + (Variable + 217 + Anm_l4 + [] + Local + () + () + Default + (CPtr) + () + Source + Public + Required + .false. + ), + B1l: + (Variable + 217 + B1l + [] + Local + () + () + Default + (Array + (Integer 2) + [((IntegerConstant 0 (Integer 4)) + (IntegerConstant 1 (Integer 4))) + ((IntegerConstant 0 (Const + (Integer 4) + )) + (IntegerConstant 32768 (Const + (Integer 4) + )))] + FixedSizeArray + ) + () + Source + Public + Required + .false. + ), + Bml: + (Variable + 217 + Bml + [] + Local + () + () + Default + (Array + (Integer 2) + [((IntegerConstant 0 (Const + (Integer 4) + )) + (IntegerConstant 3 (Const + (Integer 4) + ))) + ((IntegerConstant 0 (Const + (Integer 4) + )) + (IntegerConstant 32768 (Const + (Integer 4) + )))] + FixedSizeArray + ) + () + Source + Public + Required + .false. + ), + Bml_l4: + (Variable + 217 + Bml_l4 + [] + Local + () + () + Default + (CPtr) + () + Source + Public + Required + .false. + ), + Cnl: + (Variable + 217 + Cnl + [] + Local + () + () + Default + (Array + (Integer 2) + [((IntegerConstant 0 (Const + (Integer 4) + )) + (IntegerConstant 15 (Const + (Integer 4) + ))) + ((IntegerConstant 0 (Const + (Integer 4) + )) + (IntegerConstant 32768 (Const + (Integer 4) + )))] + FixedSizeArray + ) + () + Source + Public + Required + .false. + ), + Cnl_l4: + (Variable + 217 + Cnl_l4 + [] + Local + () + () + Default + (CPtr) + () + Source + Public + Required + .false. + ), + M1: + (Variable + 217 + M1 + [] + Local + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + M2: + (Variable + 217 + M2 + [] + Local + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + T1l: + (Variable + 217 + T1l + [] + Local + () + () + Default + (Array + (Integer 2) + [((IntegerConstant 0 (Integer 4)) + (IntegerConstant 1 (Integer 4))) + ((IntegerConstant 0 (Const + (Integer 4) + )) + (IntegerConstant 32768 (Const + (Integer 4) + )))] + FixedSizeArray + ) + () + Source + Public + Required + .false. + ), + VR_SIZE: + (Variable + 217 + VR_SIZE + [] + Local + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + _lpython_return_variable: + (Variable + 217 + _lpython_return_variable + [] + ReturnVar + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + i: + (Variable + 217 + i + [] + Local + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + ii: + (Variable + 217 + ii + [] + Local + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + jj: + (Variable + 217 + jj + [] + Local + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + k: + (Variable + 217 + k + [] + Local + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + l: + (Variable + 217 + l + [] + Local + (IntegerConstant 32768 (Integer 4)) + (IntegerConstant 32768 (Integer 4)) + Parameter + (Const + (Integer 4) + ) + () + Source + Public + Required + .false. + ), + m: + (Variable + 217 + m + [] + Local + (IntegerConstant 3 (Integer 4)) + (IntegerConstant 3 (Integer 4)) + Parameter + (Const + (Integer 4) + ) + () + Source + Public + Required + .false. + ), + n: + (Variable + 217 + n + [] + Local + (IntegerConstant 15 (Integer 4)) + (IntegerConstant 15 (Integer 4)) + Parameter + (Const + (Integer 4) + ) + () + Source + Public + Required + .false. + ), + optimize: + (Variable + 217 + optimize + [] + In + () + () + Default + (Logical 4) + () + Source + Public + Required + .false. + ), + ww: + (Variable + 217 + ww + [] + Local + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ) + }) + main + (FunctionType + [(Logical 4)] + (Integer 4) + Source + Implementation + () + .false. + .false. + .false. + .false. + .false. + [] + .false. + ) + [_lfortran_malloc + init_c_fortran_array + zero_c_fortran_array + load_lpython_array_from_c_fortran_array] + [(Var 217 optimize)] + [(= + (Var 217 M1) + (IntegerConstant 1 (Integer 4)) + () + ) + (= + (Var 217 M2) + (IntegerConstant 5 (Integer 4)) + () + ) + (= + (Var 217 Anm_l4) + (FunctionCall + 2 _lfortran_malloc + () + [((IntegerBinOp + (IntegerBinOp + (Var 217 n) + Mul + (Var 217 m) + (Integer 4) + (IntegerConstant 45 (Integer 4)) + ) + Mul + (Cast + (SizeOfType + (Integer 2) + (Integer 8) + () + ) + IntegerToInteger + (Integer 4) + () + ) + (Integer 4) + () + ))] + (CPtr) + () + () + ) + () + ) + (= + (Var 217 Bml_l4) + (FunctionCall + 2 _lfortran_malloc + () + [((IntegerBinOp + (IntegerBinOp + (Var 217 m) + Mul + (Var 217 l) + (Integer 4) + (IntegerConstant 98304 (Integer 4)) + ) + Mul + (Cast + (SizeOfType + (Integer 2) + (Integer 8) + () + ) + IntegerToInteger + (Integer 4) + () + ) + (Integer 4) + () + ))] + (CPtr) + () + () + ) + () + ) + (= + (Var 217 Cnl_l4) + (FunctionCall + 2 _lfortran_malloc + () + [((IntegerBinOp + (IntegerBinOp + (Var 217 n) + Mul + (Var 217 l) + (Integer 4) + (IntegerConstant 491520 (Integer 4)) + ) + Mul + (Cast + (SizeOfType + (Integer 2) + (Integer 8) + () + ) + IntegerToInteger + (Integer 4) + () + ) + (Integer 4) + () + ))] + (CPtr) + () + () + ) + () + ) + (SubroutineCall + 2 init_c_fortran_array + () + [((Var 217 Anm_l4)) + ((Var 217 n)) + ((Var 217 m)) + ((IntegerConstant 11 (Integer 4)))] + () + ) + (SubroutineCall + 2 init_c_fortran_array + () + [((Var 217 Bml_l4)) + ((Var 217 m)) + ((Var 217 l)) + ((IntegerConstant 13 (Integer 4)))] + () + ) + (SubroutineCall + 2 zero_c_fortran_array + () + [((Var 217 Cnl_l4)) + ((Var 217 n)) + ((Var 217 l))] + () + ) + (Print + [(Var 217 Anm_l4)] + () + () + ) + (= + (Var 217 Anm) + (FunctionCall + 2 load_lpython_array_from_c_fortran_array + () + [((Var 217 Anm_l4)) + ((Var 217 n)) + ((Var 217 m))] + (Array + (Integer 2) + [((IntegerConstant 0 (Integer 4)) + (Var 217 n)) + ((IntegerConstant 0 (Integer 4)) + (Var 217 m))] + FixedSizeArray + ) + () + () + ) + () + ) + (Print + [(Var 217 Anm)] + () + () + ) + (= + (Var 217 Bml) + (FunctionCall + 2 load_lpython_array_from_c_fortran_array + () + [((Var 217 Bml_l4)) + ((Var 217 m)) + ((Var 217 l))] + (Array + (Integer 2) + [((IntegerConstant 0 (Integer 4)) + (Var 217 m)) + ((IntegerConstant 0 (Integer 4)) + (Var 217 l))] + FixedSizeArray + ) + () + () + ) + () + ) + (= + (Var 217 Cnl) + (FunctionCall + 2 load_lpython_array_from_c_fortran_array + () + [((Var 217 Cnl_l4)) + ((Var 217 n)) + ((Var 217 l))] + (Array + (Integer 2) + [((IntegerConstant 0 (Integer 4)) + (Var 217 n)) + ((IntegerConstant 0 (Integer 4)) + (Var 217 l))] + FixedSizeArray + ) + () + () + ) + () + ) + (= + (Var 217 B1l) + (ArrayConstant + [] + (Array + (Integer 2) + [((IntegerConstant 0 (Integer 4)) + (IntegerConstant 1 (Integer 4))) + ((IntegerConstant 0 (Const + (Integer 4) + )) + (IntegerConstant 32768 (Const + (Integer 4) + )))] + FixedSizeArray + ) + RowMajor + ) + () + ) + (= + (Var 217 T1l) + (ArrayConstant + [] + (Array + (Integer 2) + [((IntegerConstant 0 (Integer 4)) + (IntegerConstant 1 (Integer 4))) + ((IntegerConstant 0 (Const + (Integer 4) + )) + (IntegerConstant 32768 (Const + (Integer 4) + )))] + FixedSizeArray + ) + RowMajor + ) + () + ) + (= + (Var 217 VR_SIZE) + (IntegerConstant 32768 (Integer 4)) + () + ) + (Print + [(StringConstant + "optimized by hand ? " + (Character 1 20 ()) + ) + (Var 217 optimize) + (StringConstant + "\n" + (Character 1 1 ()) + )] + () + () + ) + (If + (Var 217 optimize) + [(DoLoop + () + ((Var 217 jj) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 217 l) + Add + (IntegerBinOp + (IntegerBinOp + (Cast + (IntegerCompare + (Var 217 VR_SIZE) + GtE + (IntegerConstant 0 (Integer 4)) + (Logical 4) + () + ) + LogicalToInteger + (Integer 4) + () + ) + Mul + (IntegerConstant -1 (Integer 4)) + (Integer 4) + () + ) + Add + (IntegerBinOp + (Cast + (IntegerCompare + (Var 217 VR_SIZE) + Lt + (IntegerConstant 0 (Integer 4)) + (Logical 4) + () + ) + LogicalToInteger + (Integer 4) + () + ) + Mul + (IntegerConstant 1 (Integer 4)) + (Integer 4) + () + ) + (Integer 4) + () + ) + (Integer 4) + () + ) + (Var 217 VR_SIZE)) + [(DoLoop + () + ((Var 217 ii) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 217 n) + Add + (IntegerBinOp + (IntegerBinOp + (Cast + (IntegerCompare + (Var 217 M2) + GtE + (IntegerConstant 0 (Integer 4)) + (Logical 4) + () + ) + LogicalToInteger + (Integer 4) + () + ) + Mul + (IntegerConstant -1 (Integer 4)) + (Integer 4) + () + ) + Add + (IntegerBinOp + (Cast + (IntegerCompare + (Var 217 M2) + Lt + (IntegerConstant 0 (Integer 4)) + (Logical 4) + () + ) + LogicalToInteger + (Integer 4) + () + ) + Mul + (IntegerConstant 1 (Integer 4)) + (Integer 4) + () + ) + (Integer 4) + () + ) + (Integer 4) + () + ) + (Var 217 M2)) + [(DoLoop + () + ((Var 217 i) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 217 M2) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + () + ) + (IntegerConstant 1 (Integer 4))) + [(DoLoop + () + ((Var 217 ww) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 217 l) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + (IntegerConstant 32767 (Integer 4)) + ) + (IntegerConstant 1 (Integer 4))) + [(= + (ArrayItem + (Var 217 Cnl) + [(() + (IntegerBinOp + (Var 217 i) + Add + (Var 217 ii) + (Integer 4) + () + ) + ()) + (() + (Var 217 ww) + ())] + (Integer 2) + RowMajor + () + ) + (Cast + (IntegerConstant 0 (Integer 4)) + IntegerToInteger + (Integer 2) + (IntegerConstant 0 (Integer 2)) + ) + () + )] + )] + ) + (DoLoop + () + ((Var 217 k) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 217 m) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + (IntegerConstant 2 (Integer 4)) + ) + (IntegerConstant 1 (Integer 4))) + [(DoLoop + () + ((Var 217 i) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 217 M2) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + () + ) + (IntegerConstant 1 (Integer 4))) + [(DoLoop + () + ((Var 217 ww) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 217 l) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + (IntegerConstant 32767 (Integer 4)) + ) + (IntegerConstant 1 (Integer 4))) + [(= + (ArrayItem + (Var 217 Cnl) + [(() + (IntegerBinOp + (Var 217 i) + Add + (Var 217 ii) + (Integer 4) + () + ) + ()) + (() + (Var 217 ww) + ())] + (Integer 2) + RowMajor + () + ) + (IntegerBinOp + (ArrayItem + (Var 217 Cnl) + [(() + (IntegerBinOp + (Var 217 i) + Add + (Var 217 ii) + (Integer 4) + () + ) + ()) + (() + (Var 217 ww) + ())] + (Integer 2) + RowMajor + () + ) + Add + (IntegerBinOp + (ArrayItem + (Var 217 Bml) + [(() + (Var 217 k) + ()) + (() + (Var 217 ww) + ())] + (Integer 2) + RowMajor + () + ) + Mul + (ArrayItem + (Var 217 Anm) + [(() + (IntegerBinOp + (Var 217 i) + Add + (Var 217 ii) + (Integer 4) + () + ) + ()) + (() + (Var 217 k) + ())] + (Integer 2) + RowMajor + () + ) + (Integer 2) + () + ) + (Integer 2) + () + ) + () + )] + )] + )] + )] + )] + )] + [(DoLoop + () + ((Var 217 jj) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 217 l) + Add + (IntegerBinOp + (IntegerBinOp + (Cast + (IntegerCompare + (Var 217 VR_SIZE) + GtE + (IntegerConstant 0 (Integer 4)) + (Logical 4) + () + ) + LogicalToInteger + (Integer 4) + () + ) + Mul + (IntegerConstant -1 (Integer 4)) + (Integer 4) + () + ) + Add + (IntegerBinOp + (Cast + (IntegerCompare + (Var 217 VR_SIZE) + Lt + (IntegerConstant 0 (Integer 4)) + (Logical 4) + () + ) + LogicalToInteger + (Integer 4) + () + ) + Mul + (IntegerConstant 1 (Integer 4)) + (Integer 4) + () + ) + (Integer 4) + () + ) + (Integer 4) + () + ) + (Var 217 VR_SIZE)) + [(DoLoop + () + ((Var 217 ii) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 217 n) + Add + (IntegerBinOp + (IntegerBinOp + (Cast + (IntegerCompare + (Var 217 M2) + GtE + (IntegerConstant 0 (Integer 4)) + (Logical 4) + () + ) + LogicalToInteger + (Integer 4) + () + ) + Mul + (IntegerConstant -1 (Integer 4)) + (Integer 4) + () + ) + Add + (IntegerBinOp + (Cast + (IntegerCompare + (Var 217 M2) + Lt + (IntegerConstant 0 (Integer 4)) + (Logical 4) + () + ) + LogicalToInteger + (Integer 4) + () + ) + Mul + (IntegerConstant 1 (Integer 4)) + (Integer 4) + () + ) + (Integer 4) + () + ) + (Integer 4) + () + ) + (Var 217 M2)) + [(DoLoop + () + ((Var 217 i) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 217 M2) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + () + ) + (IntegerConstant 1 (Integer 4))) + [(DoLoop + () + ((Var 217 ww) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 217 l) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + (IntegerConstant 32767 (Integer 4)) + ) + (IntegerConstant 1 (Integer 4))) + [(= + (ArrayItem + (Var 217 Cnl) + [(() + (IntegerBinOp + (Var 217 i) + Add + (Var 217 ii) + (Integer 4) + () + ) + ()) + (() + (Var 217 ww) + ())] + (Integer 2) + RowMajor + () + ) + (Cast + (IntegerConstant 0 (Integer 4)) + IntegerToInteger + (Integer 2) + (IntegerConstant 0 (Integer 2)) + ) + () + )] + )] + ) + (DoLoop + () + ((Var 217 k) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 217 m) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + (IntegerConstant 2 (Integer 4)) + ) + (IntegerConstant 1 (Integer 4))) + [(DoLoop + () + ((Var 217 ww) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 217 l) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + (IntegerConstant 32767 (Integer 4)) + ) + (IntegerConstant 1 (Integer 4))) + [(= + (ArrayItem + (Var 217 B1l) + [(() + (IntegerConstant 0 (Integer 4)) + ()) + (() + (Var 217 ww) + ())] + (Integer 2) + RowMajor + () + ) + (ArrayItem + (Var 217 Bml) + [(() + (Var 217 k) + ()) + (() + (Var 217 ww) + ())] + (Integer 2) + RowMajor + () + ) + () + )] + ) + (DoLoop + () + ((Var 217 i) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 217 M2) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + () + ) + (IntegerConstant 1 (Integer 4))) + [(= + (Var 217 A_ik) + (ArrayItem + (Var 217 Anm) + [(() + (IntegerBinOp + (Var 217 i) + Add + (Var 217 ii) + (Integer 4) + () + ) + ()) + (() + (Var 217 k) + ())] + (Integer 2) + RowMajor + () + ) + () + ) + (DoLoop + () + ((Var 217 ww) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 217 l) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + (IntegerConstant 32767 (Integer 4)) + ) + (IntegerConstant 1 (Integer 4))) + [(= + (ArrayItem + (Var 217 T1l) + [(() + (IntegerConstant 0 (Integer 4)) + ()) + (() + (Var 217 ww) + ())] + (Integer 2) + RowMajor + () + ) + (Var 217 A_ik) + () + )] + ) + (DoLoop + () + ((Var 217 ww) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 217 l) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + (IntegerConstant 32767 (Integer 4)) + ) + (IntegerConstant 1 (Integer 4))) + [(= + (ArrayItem + (Var 217 T1l) + [(() + (IntegerConstant 0 (Integer 4)) + ()) + (() + (Var 217 ww) + ())] + (Integer 2) + RowMajor + () + ) + (IntegerBinOp + (ArrayItem + (Var 217 T1l) + [(() + (IntegerConstant 0 (Integer 4)) + ()) + (() + (Var 217 ww) + ())] + (Integer 2) + RowMajor + () + ) + Mul + (ArrayItem + (Var 217 B1l) + [(() + (IntegerConstant 0 (Integer 4)) + ()) + (() + (Var 217 ww) + ())] + (Integer 2) + RowMajor + () + ) + (Integer 2) + () + ) + () + )] + ) + (DoLoop + () + ((Var 217 ww) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 217 l) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + (IntegerConstant 32767 (Integer 4)) + ) + (IntegerConstant 1 (Integer 4))) + [(= + (ArrayItem + (Var 217 Cnl) + [(() + (IntegerBinOp + (Var 217 i) + Add + (Var 217 ii) + (Integer 4) + () + ) + ()) + (() + (Var 217 ww) + ())] + (Integer 2) + RowMajor + () + ) + (IntegerBinOp + (ArrayItem + (Var 217 Cnl) + [(() + (IntegerBinOp + (Var 217 i) + Add + (Var 217 ii) + (Integer 4) + () + ) + ()) + (() + (Var 217 ww) + ())] + (Integer 2) + RowMajor + () + ) + Add + (ArrayItem + (Var 217 T1l) + [(() + (IntegerConstant 0 (Integer 4)) + ()) + (() + (Var 217 ww) + ())] + (Integer 2) + RowMajor + () + ) + (Integer 2) + () + ) + () + )] + )] + )] + )] + )] + )] + ) + (Print + [(StringConstant + "Expect:" + (Character 1 7 ()) + )] + () + () + ) + (Print + [(StringConstant + "[[ 5 8 11 ... 20 23 26]," + (Character 1 25 ()) + )] + () + () + ) + (Print + [(StringConstant + " [ 8 14 20 ... 38 44 50]," + (Character 1 25 ()) + )] + () + () + ) + (Print + [(StringConstant + " [11 20 29 ... 56 65 74], ..." + (Character 1 29 ()) + )] + () + () + ) + (Print + [(StringConstant + "" + (Character 1 0 ()) + )] + () + () + ) + (Print + [(StringConstant + " [ 8 14 20 ... 38 44 50]," + (Character 1 25 ()) + )] + () + () + ) + (Print + [(StringConstant + " [11 20 29 ... 56 65 74]," + (Character 1 25 ()) + )] + () + () + ) + (Print + [(StringConstant + " [14 26 38 ... 74 86 98]]" + (Character 1 25 ()) + )] + () + () + ) + (Print + [(StringConstant + "" + (Character 1 0 ()) + )] + () + () + ) + (Print + [(StringConstant + "Actual:" + (Character 1 7 ()) + )] + () + () + ) + (DoLoop + () + ((Var 217 ww) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (IntegerConstant 3 (Integer 4)) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + (IntegerConstant 2 (Integer 4)) + ) + (IntegerConstant 1 (Integer 4))) + [(Print + [(ArrayItem + (Var 217 Cnl) + [(() + (Var 217 ww) + ()) + (() + (IntegerConstant 0 (Integer 4)) + ())] + (Integer 2) + RowMajor + () + ) + (ArrayItem + (Var 217 Cnl) + [(() + (Var 217 ww) + ()) + (() + (IntegerConstant 1 (Integer 4)) + ())] + (Integer 2) + RowMajor + () + ) + (ArrayItem + (Var 217 Cnl) + [(() + (Var 217 ww) + ()) + (() + (IntegerConstant 2 (Integer 4)) + ())] + (Integer 2) + RowMajor + () + ) + (StringConstant + "..." + (Character 1 3 ()) + ) + (ArrayItem + (Var 217 Cnl) + [(() + (Var 217 ww) + ()) + (() + (IntegerBinOp + (Var 217 l) + Sub + (IntegerConstant 3 (Integer 4)) + (Integer 4) + (IntegerConstant 32765 (Integer 4)) + ) + ())] + (Integer 2) + RowMajor + () + ) + (ArrayItem + (Var 217 Cnl) + [(() + (Var 217 ww) + ()) + (() + (IntegerBinOp + (Var 217 l) + Sub + (IntegerConstant 2 (Integer 4)) + (Integer 4) + (IntegerConstant 32766 (Integer 4)) + ) + ())] + (Integer 2) + RowMajor + () + ) + (ArrayItem + (Var 217 Cnl) + [(() + (Var 217 ww) + ()) + (() + (IntegerBinOp + (Var 217 l) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + (IntegerConstant 32767 (Integer 4)) + ) + ())] + (Integer 2) + RowMajor + () + )] + () + () + )] + ) + (Print + [(StringConstant + "..." + (Character 1 3 ()) + )] + () + () + ) + (DoLoop + () + ((Var 217 ww) + (IntegerBinOp + (Var 217 n) + Sub + (IntegerConstant 3 (Integer 4)) + (Integer 4) + (IntegerConstant 12 (Integer 4)) + ) + (IntegerBinOp + (Var 217 n) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + (IntegerConstant 14 (Integer 4)) + ) + (IntegerConstant 1 (Integer 4))) + [(Print + [(ArrayItem + (Var 217 Cnl) + [(() + (Var 217 ww) + ()) + (() + (IntegerConstant 0 (Integer 4)) + ())] + (Integer 2) + RowMajor + () + ) + (ArrayItem + (Var 217 Cnl) + [(() + (Var 217 ww) + ()) + (() + (IntegerConstant 1 (Integer 4)) + ())] + (Integer 2) + RowMajor + () + ) + (ArrayItem + (Var 217 Cnl) + [(() + (Var 217 ww) + ()) + (() + (IntegerConstant 2 (Integer 4)) + ())] + (Integer 2) + RowMajor + () + ) + (StringConstant + "..." + (Character 1 3 ()) + ) + (ArrayItem + (Var 217 Cnl) + [(() + (Var 217 ww) + ()) + (() + (IntegerBinOp + (Var 217 l) + Sub + (IntegerConstant 3 (Integer 4)) + (Integer 4) + (IntegerConstant 32765 (Integer 4)) + ) + ())] + (Integer 2) + RowMajor + () + ) + (ArrayItem + (Var 217 Cnl) + [(() + (Var 217 ww) + ()) + (() + (IntegerBinOp + (Var 217 l) + Sub + (IntegerConstant 2 (Integer 4)) + (Integer 4) + (IntegerConstant 32766 (Integer 4)) + ) + ())] + (Integer 2) + RowMajor + () + ) + (ArrayItem + (Var 217 Cnl) + [(() + (Var 217 ww) + ()) + (() + (IntegerBinOp + (Var 217 l) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + (IntegerConstant 32767 (Integer 4)) + ) + ())] + (Integer 2) + RowMajor + () + )] + () + () + )] + ) + (= + (Var 217 _lpython_return_variable) + (IntegerConstant 0 (Integer 4)) + () + ) + (Return)] + (Var 217 _lpython_return_variable) + Public + .false. + .false. + () + ), + rows: + (Variable + 2 + rows + [] + Local + () + () + Default + (TypeParameter + rows + ) + () + Source + Public + Required + .false. + ), + spot_print: + (Function + (SymbolTable + 215 + { + a: + (Variable + 215 + a + [] + InOut + () + () + Default + (Array + (Integer 2) + [(() + ())] + DescriptorArray + ) + () + Source + Public + Required + .false. + ) + }) + spot_print + (FunctionType + [(Array + (Integer 2) + [(() + ())] + DescriptorArray + )] + () + Source + Implementation + () + .false. + .false. + .false. + .false. + .false. + [] + .false. + ) + [] + [(Var 215 a)] + [(Return)] + () + Public + .false. + .false. + () + ), + zero_c_fortran_array: + (Function + (SymbolTable + 213 + { + B: + (Variable + 213 + B + [] + Local + () + () + Default + (Pointer + (Array + (Integer 2) + [(() + ())] + DescriptorArray + ) + ) + () + Source + Public + Required + .false. + ), + b: + (Variable + 213 + b + [] + In + () + () + Default + (CPtr) + () + Source + Public + Required + .false. + ), + cols: + (Variable + 213 + cols + [] + In + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + i: + (Variable + 213 + i + [] + Local + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + j: + (Variable + 213 + j + [] + Local + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + rows: + (Variable + 213 + rows + [] + In + () + () + Default + (Integer 4) + () + Source + Public + Required + .false. + ) + }) + zero_c_fortran_array + (FunctionType + [(CPtr) + (Integer 4) + (Integer 4)] + () + Source + Implementation + () + .false. + .false. + .false. + .false. + .false. + [] + .false. + ) + [] + [(Var 213 b) + (Var 213 rows) + (Var 213 cols)] + [(CPtrToPointer + (Var 213 b) + (Var 213 B) + (ArrayConstant + [(IntegerBinOp + (Var 213 rows) + Mul + (Var 213 cols) + (Integer 4) + () + )] + (Array + (Integer 4) + [((IntegerConstant 0 (Integer 4)) + (IntegerConstant 1 (Integer 4)))] + PointerToDataArray + ) + RowMajor + ) + (ArrayConstant + [(IntegerConstant 0 (Integer 4))] + (Array + (Integer 4) + [((IntegerConstant 0 (Integer 4)) + (IntegerConstant 1 (Integer 4)))] + PointerToDataArray + ) + RowMajor + ) + ) + (DoLoop + () + ((Var 213 i) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 213 rows) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + () + ) + (IntegerConstant 1 (Integer 4))) + [(DoLoop + () + ((Var 213 j) + (IntegerConstant 0 (Integer 4)) + (IntegerBinOp + (Var 213 cols) + Sub + (IntegerConstant 1 (Integer 4)) + (Integer 4) + () + ) + (IntegerConstant 1 (Integer 4))) + [(= + (ArrayItem + (Var 213 B) + [(() + (IntegerBinOp + (IntegerBinOp + (Var 213 i) + Mul + (Var 213 cols) + (Integer 4) + () + ) + Add + (Var 213 j) + (Integer 4) + () + ) + ())] + (Integer 2) + RowMajor + () + ) + (Cast + (IntegerConstant 0 (Integer 4)) + IntegerToInteger + (Integer 2) + (IntegerConstant 0 (Integer 2)) + ) + () + )] + )] + )] + () + Public + .false. + .false. + () + ) + }) + __main__ + [lpython_builtin + numpy] + .false. + .false. + ), + lpython_builtin: + (IntrinsicModule lpython_builtin), + main_program: + (Program + (SymbolTable + 245 + { + __main__global_stmts: + (ExternalSymbol + 245 + __main__global_stmts + 2 __main__global_stmts + __main__ + [] + __main__global_stmts + Public + ) + }) + main_program + [__main__] + [(SubroutineCall + 245 __main__global_stmts + 2 __main__global_stmts + [] + () + )] + ), + numpy: + (Module numpy) + }) + [] +) diff --git a/integration_tests/matmul_integration.py b/integration_tests/matmul_integration.py new file mode 100644 index 0000000000..ae88adaa0b --- /dev/null +++ b/integration_tests/matmul_integration.py @@ -0,0 +1,326 @@ +import numpy +from numpy import array, empty, int16, uint16 +from lpython import (i16, i32, c_p_pointer, Pointer, CPtr, + ccall, sizeof, TypeVar, Const, + # Annotation, SIMD # TODO + ) + + +# https://numpy.org/devdocs/reference/typing.html +# from numpy.typing import NDArray + + +# plan for 30 Jan 2024 -- +# step 0: comment out this code and ./build_baryon.sh to run on APU +# emulator; or ./run_full_emulation.sh to run in CPython. +# step 1: side-by-side numpy implementation in full-emulation +# - get there line-by-line +# = focus on gvml_add_u16 first + + +@ccall +def _lfortran_malloc(size : i32) -> CPtr: + """Borrow from bindc_07.py in integration_tests.""" + pass + + +def init_c_fortran_array(b: CPtr, rows: i32, cols: i32, mod: i32) -> None: + """Initialize a C / Fortran array with test data. A C / Fortran + array is, mathematically, a 2D structure. Its 2D indices are + converted inline to a 1D index into the 1D physical array.""" + B: Pointer[i16[:]] = c_p_pointer(b, i16[:], array([rows * cols])) + i: i32 + j: i32 + for i in range(rows): + for j in range(cols): + B[(i * cols) + j] = i16((i + j) % mod) + + +def zero_c_fortran_array(b: CPtr, rows: i32, cols: i32) -> None: + """Zero out a C / Fortran array.""" + B: Pointer[i16[:]] = c_p_pointer(b, i16[:], array([rows * cols])) + i: i32 + j: i32 + for i in range(rows): + for j in range(cols): + B[(i * cols) + j] = i16(0) + + +rows = TypeVar("rows") +cols = TypeVar("cols") + + +def load_lpython_array_from_c_fortran_array( + b: CPtr, rows: i32, cols: i32) -> i16[rows, cols]: + """Load an LPython array from a C / Fortran array.""" + B: Pointer[i16[:]] = c_p_pointer(b, i16[:], array([rows * cols])) + D: i16[rows, cols] = empty((rows, cols), dtype=int16) + i: i32 + j: i32 + for i in range(rows): + for j in range(cols): + D[i, j] = B[(i * cols) + j] + return D + + +def spot_print(Anl: i16[:, :], n: i32, l: i32) -> None: + j: i32 + for j in range(0, 3): + spot_print_row(Anl, l, j) + print("...") + for j in range(n - 3, n): + spot_print_row(Anl, l, j) + + +def spot_print_row(Anl: i16[:, :], cols: i32, row: i32) -> None: + if (cols > 3): + print(Anl[row, 0], Anl[row, 1], Anl[row, 2], "...", + Anl[row, cols - 3], Anl[row, cols - 2], Anl[row, cols - 1]) + else: + print(Anl[row, 0], Anl[row, 1], Anl[row, 2]) + + +def clear_row(a: i16[:, :], row: i32, cols: i32) -> None: + # Due to Issue 2500, I cannot broadcast a constant. + j: i32 + for j in range(cols): + a[row, j] = i16(0) + + +def broadcast_i16_row( + a: i16[:, :], row: i32, val: i16, + cols: i32) -> None: + # Due to Issue 2500, I cannot broadcast a constant. + j: i32 + for j in range(cols): + a[row, j] = i16(val) + + +def broadcast_copy_row( + dest: i16[:, :], dest_row: i32, + src: i16[:, :], src_row: i32, + cols: i32) -> None: + # Due to Issue 2500, I cannot broadcast. + j: i32 + for j in range(cols): + dest[dest_row, j] = src[src_row, j] + + +def hadamard_product_in_place_row( + dest: i16[:, :], dest_row: i32, + src: i16[:, :], src_row: i32, + cols: i32) -> None: + j: i32 + for j in range(cols): + dest[dest_row, j] *= src[src_row, j] + + +def accumulate_in_place_row( + dest: i16[:, :], dest_row: i32, + src: i16[:, :], src_row: i32, + cols: i32) -> None: + j: i32 + for j in range(cols): + dest[dest_row, j] += src[src_row, j] + + +def accumulate_in_place_outer_product_row( + dest: i16[:, :], dest_row: i32, + src1: i16[:, :], src1_row: i32, + src2: i16[:, :], + cols: i32) -> None: + ww: i32 + for ww in range(0, cols): + dest[dest_row, ww] += src1[src1_row, ww] * src2[dest_row, src1_row] + + +def print_expected() -> None: + print("\nExpected result:") + print("[[ 5 8 11 ... 20 23 26],") + print(" [ 8 14 20 ... 38 44 50],") + print(" [11 20 29 ... 56 65 74], ...") + print("") + print(" [ 8 14 20 ... 38 44 50],") + print(" [11 20 29 ... 56 65 74],") + print(" [14 26 38 ... 74 86 98]]") + print("") + + +def main() -> i32: + + # "Const" lets these appear in type declarations such as i16[n, m] + n : Const[i32] = 15 + m : Const[i32] = 3 + l : Const[i32] = 32_768 + + M1 : i32 = 1 + M2 : i32 = 5 # Issue 2499 -- can't be Const + + Anm_l4 : CPtr = _lfortran_malloc((n * m) * i32(sizeof(i16))) + Bml_l4 : CPtr = _lfortran_malloc((m * l) * i32(sizeof(i16))) + Cnl_l4 : CPtr = _lfortran_malloc((n * l) * i32(sizeof(i16))) + + init_c_fortran_array(Anm_l4, n, m, 11) + init_c_fortran_array(Bml_l4, m, l, 13) + zero_c_fortran_array(Cnl_l4, n, l) + + print("\nInputs:") + + Anm: i16[n, m] = load_lpython_array_from_c_fortran_array(Anm_l4, n, m) + print("Anm[", n, ",", m, "]") + spot_print(Anm, n, m) + + Bml: i16[m, l] = load_lpython_array_from_c_fortran_array(Bml_l4, m, l) + print("Bml[", m, ",", l, "]") + spot_print(Bml, m, l) + + Cnl: i16[n, l] = load_lpython_array_from_c_fortran_array(Cnl_l4, n, l) + print("Cnl[", n, ",", l, "]") + spot_print(Cnl, n, l) + + print_expected() + + VR_SIZE: i32 = 32_768 + # ---------------------------------------------------------------- + print("\nhand-blocked accumulated outer product; block size = M2 =", M2) + hand_optimized_to_remove_temporaries(Anm, Bml, Cnl, n, m, l, VR_SIZE, M2) + + print("\nActual result:") + spot_print(Cnl, n, l) + # ---------------------------------------------------------------- + with_liberal_use_of_temporaries(Anm, Bml, Cnl, n, m, l, VR_SIZE, M2) + + print("\nActual result:") + spot_print(Cnl, n, l) + # ---------------------------------------------------------------- + blocked_and_tiled_with_temporaries(Anm, Bml, Cnl, n, m, l, VR_SIZE, M1, M2) + + print("\nActual result:") + spot_print(Cnl, n, l) + # ---------------------------------------------------------------- + unblocked_accumulated_outer_product(Anm, Bml, Cnl, n, m, l) + + print("\nActual result:") + spot_print(Cnl, n, l) + # ---------------------------------------------------------------- + return 0 + + +def unblocked_accumulated_outer_product( + Anm: i16[:, :], Bml: i16[:, :], Cnl: i16[:, :], + n: i32, m: i32, l: i32) -> None: + print("\nunblocked, naive Accumulated Outer Product for reference") + i: i32 + k: i32 + for i in range(0, n): + clear_row(Cnl, i, l) + for k in range(0, m): # rows of B + accumulate_in_place_outer_product_row(Cnl, i, Bml, k, Anm, l) + + +def blocked_and_tiled_with_temporaries( + Anm: i16[:, :], Bml: i16[:, :], Cnl: i16[:, :], + n: i32, m: i32, l: i32, VR_SIZE: i32, + M1: i32, M2: i32): + # L1cache: Annotation[u16[VR_SIZE], SIMD] = empty((M2 + 1), VR_SIZE, dtype=uint16) # TODO + # B_vr: Annotation[u16[V], SIMD] = empty((V,), dtype=uint16) # TODO + # C_vr: Annotation[u16[V], SIMD] = empty((V,), dtype=uint16) # TODO + # T_vr: Annotation[u16[V], SIMD] = empty((V,), dtype=uint16) # TODO + L1_B_index: i32 = 0 + L1_C_base: i32 = 1 + k: i32 + kk: i32 + jj: i32 + i: i32 + ii: i32 + A_ik: i16 + print("\nbloced and tiled with temporaries") + B1l: i16[1, l] = empty((1, l), dtype=int16) + T1l: i16[1, l] = empty((1, l), dtype=int16) + for jj in range(0, l, VR_SIZE): # each VR-col chunk in B and C + for ii in range(0, n, M2): # each M2 block in A cols and B rows + for i in range(0, M2): # Zero-out rows of C. + clear_row(Cnl, i + ii, l) + # L1cache[(L1_C_base + i), :] = C_vr[:] # TODO + for kk in range(0, m, M1): + for k in range(0, M1): # rows of Bml + # L1cache[L1_B_index, :] = Bml[(kk + k), jj : (jj + VR_SIZE)] # TODO + # B_vr[:] = l1cache[L1_B_index, :] # TODO + # ------------------------------------------------------------ + # B_1l[0, :] = B_ml[(k + kk), :] + broadcast_copy_row(B1l, 0, Bml, k + kk, l) + for i in range(0, M2): + # C_vr[:] = L1cache[(L1_C_base + i), :] # TODO + # -------------------------------------------------------- + # T_vr[:] = A_ik # TODO + # -------------------------------------------------------- + # T1l[0, :] = Anm[(i + ii), (k + kk)] + A_ik = Anm[i + ii, k + kk] + broadcast_i16_row(T1l, 0, A_ik, l) + # -------------------------------------------------------- + # T_vr[:] = B_vr[:] * T_vr[:] # Hadamard product # TODO + # T_vr[:] *= B_vr[:] # Hadamard product alternative # TODO + # -------------------------------------------------------- + # T1l[0, :] = np.multiply(B1l[0, :], T1l[0, :]) + hadamard_product_in_place_row(T1l, 0, B1l, 0, l) + # -------------------------------------------------------- + # C_vr[:] = C_vr[:] + T_vr[:] # TODO + # C_vr[:] += T_vr[:] # Alternative # TODO + # -------------------------------------------------------- + # Cnl[i + ii, :] += T1l[0, :] + accumulate_in_place_row(Cnl, i + ii, T1l, 0, l) + # L1cache[(L1_C_base + i), :] = C_vr[:] # TODO + # for i in range(0, M2): # TODO + # Cnl[(ii + i), jj : (jj + VR_SIZE)] = L1cache[(L1_C_base + i), :] # TODO + + +def with_liberal_use_of_temporaries( + Anm: i16[:, :], Bml: i16[:, :], Cnl: i16[:, :], + n: i32, m: i32, l: i32, VR_SIZE: i32, M2: i32): + k: i32 + jj: i32 + ii: i32 + i: i32 + print("\nliberal usage of temporaries") + B1l: i16[1, l] = empty((1, l), dtype=int16) + T1l: i16[1, l] = empty((1, l), dtype=int16) + for jj in range(0, l, VR_SIZE): # each VR-col chunk in B and C + for ii in range(0, n, M2): # each M2 block in A cols and B rows + for i in range(0, M2): # Zero-out rows of C. + clear_row(Cnl, i + ii, l) + for k in range(0, m): # rows of B + # ------------------------------------------------------------ + # B_1l[0, :] = B_ml[k, :] + broadcast_copy_row(B1l, 0, Bml, k, l) + for i in range(0, M2): + # -------------------------------------------------------- + # T1l[0, :] = Anm[i + ii, k] + broadcast_i16_row(T1l, 0, Anm[i + ii, k], l) + # -------------------------------------------------------- + # T1l[0, :] = np.multiply(B1l[0, :], T1l[0, :]) + hadamard_product_in_place_row(T1l, 0, B1l, 0, l) + # -------------------------------------------------------- + # Cnl[i + ii, :] += T1l[0, :] + accumulate_in_place_row(Cnl, i + ii, T1l, 0, l) + + +def hand_optimized_to_remove_temporaries( + Anm: i16[:, :], Bml: i16[:, :], Cnl: i16[:, :], + n: i32, m: i32, l: i32, VR_SIZE: i32, M2: i32): + k: i32 + jj: i32 + ii: i32 + i: i32 + print("\noptimized by hand to remove temporaries") + for jj in range(0, l, VR_SIZE): # each VR-col chunk in B and C + for ii in range(0, n, M2): # each M2 block in A cols and B rows + for i in range(0, M2): # Zero-out rows of C. + clear_row(Cnl, i + ii, l) + for k in range(0, m): # rows of B + for i in range(0, M2): + accumulate_in_place_outer_product_row( + Cnl, i + ii, Bml, k, Anm, l) + + +if __name__ == "__main__": + main()