-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[Affine] Parallelize loops with MemoryEffects::Free ops. #172388
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
If an operation deallocates memory allocated inside the loop, then it is as safe for parallelization as the allocation, which is already handled by `isLocallyDefined`.
|
@llvm/pr-subscribers-mlir Author: Slava Zakharin (vzakhari) ChangesIf an operation deallocates memory allocated inside the loop, Full diff: https://github.com/llvm/llvm-project/pull/172388.diff 2 Files Affected:
diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
index 3d1a73417d1ea..b126909439b88 100644
--- a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
@@ -134,6 +134,25 @@ static bool isLocallyDefined(Value v, Operation *enclosingOp) {
return viewOp && isLocallyDefined(viewOp.getViewSource(), enclosingOp);
}
+/// Returns true if `op` has only Free memory effects on the values
+/// that are locally defined (i.e. they are allocated by operations
+/// nested withing `enclosingOp`).
+static bool isDeallocationOfLocallyDefined(Operation *op,
+ Operation *enclosingOp) {
+ std::optional<SmallVector<MemoryEffects::EffectInstance>> effects =
+ getEffectsRecursively(op);
+ if (!effects)
+ return false;
+
+ for (const MemoryEffects::EffectInstance &effect : *effects) {
+ Value freed = effect.getValue();
+ if (!isa<MemoryEffects::Free>(effect.getEffect()) || !freed ||
+ !isLocallyDefined(freed, enclosingOp))
+ return false;
+ }
+ return true;
+}
+
bool mlir::affine::isLoopMemoryParallel(AffineForOp forOp) {
// Any memref-typed iteration arguments are treated as serializing.
if (llvm::any_of(forOp.getResultTypes(), llvm::IsaPred<BaseMemRefType>))
@@ -152,6 +171,7 @@ bool mlir::affine::isLoopMemoryParallel(AffineForOp forOp) {
loadAndStoreOps.push_back(op);
} else if (!isa<AffineForOp, AffineYieldOp, AffineIfOp>(op) &&
!hasSingleEffect<MemoryEffects::Allocate>(op) &&
+ !isDeallocationOfLocallyDefined(op, forOp) &&
!isMemoryEffectFree(op)) {
// Alloc-like ops inside `forOp` are fine (they don't impact parallelism)
// as long as they don't escape the loop (which has been checked above).
diff --git a/mlir/test/Dialect/Affine/parallelize.mlir b/mlir/test/Dialect/Affine/parallelize.mlir
index bfd1720959861..95cf8f3a45f3f 100644
--- a/mlir/test/Dialect/Affine/parallelize.mlir
+++ b/mlir/test/Dialect/Affine/parallelize.mlir
@@ -272,19 +272,20 @@ func.func @nested_min_max(%m: memref<?xf32>, %lb0: index,
// Test in the presence of locally allocated memrefs.
-// CHECK: func @local_alloc
+// CHECK-LABEL: func @local_alloc
func.func @local_alloc() {
%cst = arith.constant 0.0 : f32
affine.for %i = 0 to 100 {
%m = memref.alloc() : memref<1xf32>
%ma = memref.alloca() : memref<1xf32>
affine.store %cst, %m[0] : memref<1xf32>
+ memref.dealloc %m : memref<1xf32>
}
// CHECK: affine.parallel
return
}
-// CHECK: func @local_alloc_cast
+// CHECK-LABEL: func @local_alloc_cast
func.func @local_alloc_cast() {
%cst = arith.constant 0.0 : f32
affine.for %i = 0 to 100 {
@@ -300,6 +301,7 @@ func.func @local_alloc_cast() {
affine.for %j = 0 to 8 {
affine.store %cst, %r[%j, %j] : memref<8x16xf32>
}
+ memref.dealloc %m : memref<128xf32>
}
// CHECK: affine.parallel
// CHECK: affine.parallel
@@ -313,6 +315,21 @@ func.func @local_alloc_cast() {
return
}
+// When memref.dealloc deallocates out-of-loop allocation,
+// the loop should not be parallelized. This test is quite
+// artificial though.
+// CHECK-LABEL: func @local_dealloc
+func.func @local_dealloc() {
+ %cst = arith.constant 0.0 : f32
+ %m = memref.alloc() : memref<1xf32>
+ affine.for %i = 0 to 1 {
+ affine.store %cst, %m[%i] : memref<1xf32>
+ memref.dealloc %m : memref<1xf32>
+ }
+ // CHECK-NOT: affine.parallel
+ return
+}
+
// CHECK-LABEL: @iter_arg_memrefs
func.func @iter_arg_memrefs(%in: memref<10xf32>) {
%mi = memref.alloc() : memref<f32>
|
|
@llvm/pr-subscribers-mlir-affine Author: Slava Zakharin (vzakhari) ChangesIf an operation deallocates memory allocated inside the loop, Full diff: https://github.com/llvm/llvm-project/pull/172388.diff 2 Files Affected:
diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
index 3d1a73417d1ea..b126909439b88 100644
--- a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
@@ -134,6 +134,25 @@ static bool isLocallyDefined(Value v, Operation *enclosingOp) {
return viewOp && isLocallyDefined(viewOp.getViewSource(), enclosingOp);
}
+/// Returns true if `op` has only Free memory effects on the values
+/// that are locally defined (i.e. they are allocated by operations
+/// nested withing `enclosingOp`).
+static bool isDeallocationOfLocallyDefined(Operation *op,
+ Operation *enclosingOp) {
+ std::optional<SmallVector<MemoryEffects::EffectInstance>> effects =
+ getEffectsRecursively(op);
+ if (!effects)
+ return false;
+
+ for (const MemoryEffects::EffectInstance &effect : *effects) {
+ Value freed = effect.getValue();
+ if (!isa<MemoryEffects::Free>(effect.getEffect()) || !freed ||
+ !isLocallyDefined(freed, enclosingOp))
+ return false;
+ }
+ return true;
+}
+
bool mlir::affine::isLoopMemoryParallel(AffineForOp forOp) {
// Any memref-typed iteration arguments are treated as serializing.
if (llvm::any_of(forOp.getResultTypes(), llvm::IsaPred<BaseMemRefType>))
@@ -152,6 +171,7 @@ bool mlir::affine::isLoopMemoryParallel(AffineForOp forOp) {
loadAndStoreOps.push_back(op);
} else if (!isa<AffineForOp, AffineYieldOp, AffineIfOp>(op) &&
!hasSingleEffect<MemoryEffects::Allocate>(op) &&
+ !isDeallocationOfLocallyDefined(op, forOp) &&
!isMemoryEffectFree(op)) {
// Alloc-like ops inside `forOp` are fine (they don't impact parallelism)
// as long as they don't escape the loop (which has been checked above).
diff --git a/mlir/test/Dialect/Affine/parallelize.mlir b/mlir/test/Dialect/Affine/parallelize.mlir
index bfd1720959861..95cf8f3a45f3f 100644
--- a/mlir/test/Dialect/Affine/parallelize.mlir
+++ b/mlir/test/Dialect/Affine/parallelize.mlir
@@ -272,19 +272,20 @@ func.func @nested_min_max(%m: memref<?xf32>, %lb0: index,
// Test in the presence of locally allocated memrefs.
-// CHECK: func @local_alloc
+// CHECK-LABEL: func @local_alloc
func.func @local_alloc() {
%cst = arith.constant 0.0 : f32
affine.for %i = 0 to 100 {
%m = memref.alloc() : memref<1xf32>
%ma = memref.alloca() : memref<1xf32>
affine.store %cst, %m[0] : memref<1xf32>
+ memref.dealloc %m : memref<1xf32>
}
// CHECK: affine.parallel
return
}
-// CHECK: func @local_alloc_cast
+// CHECK-LABEL: func @local_alloc_cast
func.func @local_alloc_cast() {
%cst = arith.constant 0.0 : f32
affine.for %i = 0 to 100 {
@@ -300,6 +301,7 @@ func.func @local_alloc_cast() {
affine.for %j = 0 to 8 {
affine.store %cst, %r[%j, %j] : memref<8x16xf32>
}
+ memref.dealloc %m : memref<128xf32>
}
// CHECK: affine.parallel
// CHECK: affine.parallel
@@ -313,6 +315,21 @@ func.func @local_alloc_cast() {
return
}
+// When memref.dealloc deallocates out-of-loop allocation,
+// the loop should not be parallelized. This test is quite
+// artificial though.
+// CHECK-LABEL: func @local_dealloc
+func.func @local_dealloc() {
+ %cst = arith.constant 0.0 : f32
+ %m = memref.alloc() : memref<1xf32>
+ affine.for %i = 0 to 1 {
+ affine.store %cst, %m[%i] : memref<1xf32>
+ memref.dealloc %m : memref<1xf32>
+ }
+ // CHECK-NOT: affine.parallel
+ return
+}
+
// CHECK-LABEL: @iter_arg_memrefs
func.func @iter_arg_memrefs(%in: memref<10xf32>) {
%mi = memref.alloc() : memref<f32>
|
If an operation deallocates memory allocated inside the loop,
then it is as safe for parallelization as the allocation,
which is already handled by
isLocallyDefined.