Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions flang/include/flang/Optimizer/Dialect/FIROps.td
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,36 @@ def fir_StoreOp : fir_Op<"store", [FirAliasTagOpInterface,
}];
}

def fir_PrefetchOp : fir_Op<"prefetch", []> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without any side effects, I think this operation could pessimize some analysis/optimizations.

Do you know how LLVM handles the side effects of this operation?

Should it simply be given read/write effects on its arguments depending on the rw attribute?

let summary = "prefetch a memory reference";

let description = [{
The prefetch is a hint to the code generator that the memory reference will
be used in the near future. The prefetch is not guaranteed to be executed.

```
%a = ... -> !fir.ref<i32>
fir.prefetch %a {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<i32>
// ...
fir.load %a : !fir.ref<i32> // use the prefetched value
```
}];

/// `memref' is the address to be prefetched
/// `rw' : rw specifier >
/// read is 0 (default), write is 1
/// `localityHint': temporal locality specifier >
/// value ranging from 0 - no locality to 3 - extremely local
/// `cacheType' : cache type specifier >
/// instruction cache is 0 (default), data cache is 1
/// NOTE: The numerical values used here is in reference to the LLVM LangRef
let arguments = (ins AnyReferenceLike:$memref, UnitAttr:$rw,
ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<3>]>:$localityHint,
UnitAttr:$cacheType);

let assemblyFormat = "$memref attr-dict `:` type(operands)";
}

def fir_CopyOp : fir_Op<"copy", [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
let summary = "copy constant size memory";

Expand Down
23 changes: 22 additions & 1 deletion flang/lib/Lower/Bridge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3276,7 +3276,28 @@ class FirConverter : public Fortran::lower::AbstractConverter {
attachInliningDirectiveToStmt(dir, &eval);
},
[&](const Fortran::parser::CompilerDirective::Prefetch &prefetch) {
TODO(getCurrentLocation(), "!$dir prefetch");
for (const auto &p : prefetch.v) {
Fortran::evaluate::ExpressionAnalyzer ea{
bridge.getSemanticsContext()};
Fortran::lower::SomeExpr expr{*ea.Analyze(
std::get<Fortran::parser::DataRef>(p.value().u))};
Fortran::lower::StatementContext stmtCtx;
mlir::Location loc = genLocation(dir.source);
mlir::Value memRef{Fortran::lower::convertExprToHLFIR(
loc, *this, expr, localSymbols, stmtCtx)
.getBase()};
if (mlir::isa<fir::BaseBoxType>(
fir::unwrapRefType(memRef.getType()))) {
memRef = fir::LoadOp::create(*builder, loc, memRef);
memRef = fir::BoxAddrOp::create(*builder, loc, memRef);
}

// TODO: Don't use default value, instead get the following
// info from the directive
uint32_t isWrite{0}, localityHint{3}, isData{1};
fir::PrefetchOp::create(*builder, loc, memRef, isWrite,
localityHint, isData);
}
},
[&](const auto &) {}},
dir.u);
Expand Down
37 changes: 29 additions & 8 deletions flang/lib/Optimizer/CodeGen/CodeGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3346,6 +3346,26 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> {
}
};

/// `fir.prefetch` --> `llvm.prefetch`
struct PrefetchOpConversion : public fir::FIROpConversion<fir::PrefetchOp> {
using FIROpConversion::FIROpConversion;

llvm::LogicalResult
matchAndRewrite(fir::PrefetchOp prefetch, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
mlir::IntegerAttr rw = mlir::IntegerAttr::get(rewriter.getI32Type(),
prefetch.getRwAttr() ? 1 : 0);
mlir::IntegerAttr localityHint = prefetch.getLocalityHintAttr();
mlir::IntegerAttr cacheType = mlir::IntegerAttr::get(
rewriter.getI32Type(), prefetch.getCacheTypeAttr() ? 1 : 0);
mlir::LLVM::Prefetch::create(rewriter, prefetch.getLoc(),
adaptor.getOperands().front(), rw,
localityHint, cacheType);
rewriter.eraseOp(prefetch);
return mlir::success();
}
};

/// `fir.load` --> `llvm.load`
struct LoadOpConversion : public fir::FIROpConversion<fir::LoadOp> {
using FIROpConversion::FIROpConversion;
Expand Down Expand Up @@ -4423,14 +4443,15 @@ void fir::populateFIRToLLVMConversionPatterns(
FirEndOpConversion, FreeMemOpConversion, GlobalLenOpConversion,
GlobalOpConversion, InsertOnRangeOpConversion, IsPresentOpConversion,
LenParamIndexOpConversion, LoadOpConversion, MulcOpConversion,
NegcOpConversion, NoReassocOpConversion, SelectCaseOpConversion,
SelectOpConversion, SelectRankOpConversion, SelectTypeOpConversion,
ShapeOpConversion, ShapeShiftOpConversion, ShiftOpConversion,
SliceOpConversion, StoreOpConversion, StringLitOpConversion,
SubcOpConversion, TypeDescOpConversion, TypeInfoOpConversion,
UnboxCharOpConversion, UnboxProcOpConversion, UndefOpConversion,
UnreachableOpConversion, XArrayCoorOpConversion, XEmboxOpConversion,
XReboxOpConversion, ZeroOpConversion>(converter, options);
NegcOpConversion, NoReassocOpConversion, PrefetchOpConversion,
SelectCaseOpConversion, SelectOpConversion, SelectRankOpConversion,
SelectTypeOpConversion, ShapeOpConversion, ShapeShiftOpConversion,
ShiftOpConversion, SliceOpConversion, StoreOpConversion,
StringLitOpConversion, SubcOpConversion, TypeDescOpConversion,
TypeInfoOpConversion, UnboxCharOpConversion, UnboxProcOpConversion,
UndefOpConversion, UnreachableOpConversion, XArrayCoorOpConversion,
XEmboxOpConversion, XReboxOpConversion, ZeroOpConversion>(converter,
options);

// Patterns that are populated without a type converter do not trigger
// target materializations for the operands of the root op.
Expand Down
39 changes: 39 additions & 0 deletions flang/test/Integration/prefetch.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
!===----------------------------------------------------------------------===!
! This directory can be used to add Integration tests involving multiple
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
! contain executable tests. We should only add tests here sparingly and only
! if there is no other way to test. Repeat this message in each test that is
! added to this directory and sub-directories.
!===----------------------------------------------------------------------===!

! RUN: %flang_fc1 -emit-llvm -o - %s | FileCheck %s --check-prefixes=LLVM

!===============================================================================
! Test lowering of prefetch directive
!===============================================================================

subroutine test_prefetch_01()
! LLVM: {{.*}} = alloca i32, i64 1, align 4
! LLVM: %[[VAR_J:.*]] = alloca i32, i64 1, align 4
! LLVM: %[[VAR_I:.*]] = alloca i32, i64 1, align 4
! LLVM: %[[VAR_A:.*]] = alloca [256 x i32], i64 1, align 4

integer :: i, j
integer :: a(256)

a = 23
! LLVM: call void @llvm.prefetch.p0(ptr %[[VAR_A]], i32 0, i32 3, i32 1)
!dir$ prefetch a
i = sum(a)

! LLVM: %[[LOAD_I:.*]] = load i32, ptr %[[VAR_I]], align 4
! LLVM: %{{.*}} = add nsw i32 %[[LOAD_I]], 64
! LLVM: %[[GEP_A:.*]] = getelementptr i32, ptr %[[VAR_A]], i64 {{.*}}

! LLVM: call void @llvm.prefetch.p0(ptr %[[GEP_A]], i32 0, i32 3, i32 1)
! LLVM: call void @llvm.prefetch.p0(ptr %[[VAR_J]], i32 0, i32 3, i32 1)
do i = 1, (256 - 64)
!dir$ prefetch a(i+64), j
a(i) = a(i-32) + a(i+32) + j
end do
end subroutine test_prefetch_01
65 changes: 65 additions & 0 deletions flang/test/Lower/HLFIR/prefetch.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
! Test lowering of prefetch directive
! RUN: %flang_fc1 -emit-hlfir -o - %s | FileCheck %s --check-prefixes=HLFIR

module test_prefetch_mod
implicit none
type :: t
integer :: a(256, 256)
end type t
end module test_prefetch_mod

subroutine test_prefetch_01()
! HLFIR: %[[H_A:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_prefetch_01Ea"} : (!fir.ref<!fir.array<256xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<256xi32>>, !fir.ref<!fir.array<256xi32>>)
! HLFIR: %[[H_I:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_prefetch_01Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! HLFIR: %[[H_J:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_prefetch_01Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)

integer :: i, j
integer :: a(256)

a = 23

! HLFIR: fir.prefetch %[[H_A]]#0 {cacheType, localityHint = 3 : i32} : !fir.ref<!fir.array<256xi32>>
!dir$ prefetch a
i = sum(a)

! HLFIR: %[[H_LOAD:.*]] = fir.load %[[H_I]]#0 : !fir.ref<i32>
! HLFIR: %[[H_C64:.*]] = arith.constant 64 : i32
! HLFIR: %[[H_ADD:.*]] = arith.addi %[[H_LOAD]], %[[H_C64]] overflow<nsw> : i32
! HLFIR: %[[H_CON:.*]] = fir.convert %[[H_ADD]] : (i32) -> i64
! HLFIR: %[[H_DESIG:.*]] = hlfir.designate %[[H_A]]#0 (%[[H_CON]]) : (!fir.ref<!fir.array<256xi32>>, i64) -> !fir.ref<i32>

! HLFIR: fir.prefetch %[[H_DESIG]] {cacheType, localityHint = 3 : i32} : !fir.ref<i32>
! HLFIR: fir.prefetch %[[H_J]]#0 {cacheType, localityHint = 3 : i32} : !fir.ref<i32>

do i = 1, (256 - 64)
!dir$ prefetch a(i+64), j
a(i) = a(i-32) + a(i+32) + j
end do
end subroutine test_prefetch_01

subroutine test_prefetch_02(t1)
use test_prefetch_mod
! HLFIR: %[[H_A:.*]]:2 = hlfir.declare {{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_prefetch_02Ea"}
! HLFIR: %[[H_ARG0:.*]]:2 = hlfir.declare {{.*}} dummy_scope {{.*}} {fortran_attrs = #fir.var_attrs<intent_inout>, uniq_name = "_QFtest_prefetch_02Et1"}
type(t), intent(inout) :: t1
integer, allocatable :: a(:, :)

! HLFIR: %[[H_DESIG_01:.*]] = hlfir.designate %[[H_ARG0]]#0{"a"} shape {{.*}}
! HLFIR: fir.prefetch %[[H_DESIG_01]] {cacheType, localityHint = 3 : i32} : !fir.ref<!fir.array<256x256xi32>>
!dir$ prefetch t1%a
a = t1%a ** 2

do i = 1, 256
! HLFIR: %[[A_LOAD:.*]] = fir.load %[[H_A]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
! HLFIR: %[[A_BOX:.*]] = fir.box_addr %[[A_LOAD]] : (!fir.box<!fir.heap<!fir.array<?x?xi32>>>) -> !fir.heap<!fir.array<?x?xi32>>
! HLFIR: fir.prefetch %[[A_BOX]] {cacheType, localityHint = 3 : i32} : !fir.heap<!fir.array<?x?xi32>>
!dir$ prefetch a
a(i, :) = a(i, :) + i
do j = 1, 256
! HLFIR: %[[H_DESIG_02:.*]] = hlfir.designate %[[H_ARG0]]#0{"a"} {{.*}}
! HLFIR: fir.prefetch %[[H_DESIG_02]] {cacheType, localityHint = 3 : i32} : !fir.ref<i32>
!dir$ prefetch t1%a(i, j)
t1%a(i, j) = (a(i, j) + i*j) / t1%a(i, j)
end do
end do
end subroutine test_prefetch_02
Loading