diff --git a/CLAUDE.md b/CLAUDE.md index b10a33b..08b345f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -87,7 +87,7 @@ uv run ruff format gpu_test/ - **Stack Type**: `!forth.stack` - untyped stack, programmer ensures type safety - **Operations**: All take stack as input and produce stack as output (except `forth.stack`) -- **Supported Words**: literals, `DUP DROP SWAP OVER ROT NIP TUCK PICK ROLL`, `+ - * / MOD`, `AND OR XOR NOT LSHIFT RSHIFT`, `= < > <> <= >= 0=`, `@ !`, `CELLS`, `IF ELSE THEN`, `BEGIN UNTIL`, `BEGIN WHILE REPEAT`, `DO LOOP I J K`, `TID-X/Y/Z BID-X/Y/Z BDIM-X/Y/Z GDIM-X/Y/Z GLOBAL-ID` (GPU indexing). +- **Supported Words**: literals, `DUP DROP SWAP OVER ROT NIP TUCK PICK ROLL`, `+ - * / MOD`, `AND OR XOR NOT LSHIFT RSHIFT`, `= < > <> <= >= 0=`, `@ !`, `CELLS`, `IF ELSE THEN`, `BEGIN UNTIL`, `BEGIN WHILE REPEAT`, `DO LOOP +LOOP I J K`, `LEAVE UNLOOP EXIT`, `TID-X/Y/Z BID-X/Y/Z BDIM-X/Y/Z GDIM-X/Y/Z GLOBAL-ID` (GPU indexing). - **Kernel Parameters**: Declared with `PARAM `, each becomes a `memref` function argument with `forth.param_name` attribute. Using a param name in code pushes its byte address onto the stack via `forth.param_ref` - **Conversion**: `!forth.stack` → `memref<256xi64>` with explicit stack pointer - **GPU**: Functions wrapped in `gpu.module`, `main` gets `gpu.kernel` attribute, configured with bare pointers for NVVM conversion diff --git a/gpu_test/test_kernels.py b/gpu_test/test_kernels.py index 5a99cfa..99ab192 100644 --- a/gpu_test/test_kernels.py +++ b/gpu_test/test_kernels.py @@ -154,6 +154,24 @@ def test_do_loop(kernel_runner: KernelRunner) -> None: assert result == [0, 1, 2, 3, 4] +def test_do_plus_loop(kernel_runner: KernelRunner) -> None: + """DO/+LOOP: write I values 0, 2, 4, 6, 8 to DATA[0..4].""" + result = kernel_runner.run( + forth_source=("PARAM DATA 256\n0\n10 0 DO\n I OVER CELLS DATA + !\n 1 +\n2 +LOOP\nDROP"), + output_count=5, + ) + assert result == [0, 2, 4, 6, 8] + + +def test_do_plus_loop_negative(kernel_runner: KernelRunner) -> None: + """DO/+LOOP with negative step: count down from 10 to 1.""" + result = kernel_runner.run( + forth_source=("PARAM DATA 256\n0\n0 10 DO\n I OVER CELLS DATA + !\n 1 +\n-1 +LOOP\nDROP"), + output_count=10, + ) + assert result == [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] + + def test_multi_while(kernel_runner: KernelRunner) -> None: """Multi-WHILE: two exit conditions from the same loop (interleaved CF). diff --git a/lib/Translation/ForthToMLIR/ForthToMLIR.cpp b/lib/Translation/ForthToMLIR/ForthToMLIR.cpp index edbe11f..fb992b2 100644 --- a/lib/Translation/ForthToMLIR/ForthToMLIR.cpp +++ b/lib/Translation/ForthToMLIR/ForthToMLIR.cpp @@ -397,6 +397,36 @@ std::pair ForthParser::emitPopFlag(Location loc, Value stack) { return {popFlag.getOutputStack(), popFlag.getFlag()}; } +void ForthParser::emitLoopEnd(Location loc, const LoopContext &ctx, Value step, + Value &stack) { + auto i64Type = builder.getI64Type(); + + // Load old counter, compute new = old + step, store. + Value c0 = builder.create(loc, 0); + Value oldIdx = + builder.create(loc, ctx.counter, ValueRange{c0}); + Value newIdx = builder.create(loc, oldIdx, step); + builder.create(loc, newIdx, ctx.counter, ValueRange{c0}); + + // Crossing test: ((oldIdx - limit) XOR (newIdx - limit)) < 0 + // This correctly handles both positive and negative step values. + Value oldDiff = builder.create(loc, oldIdx, ctx.limit); + Value newDiff = builder.create(loc, newIdx, ctx.limit); + Value xorVal = builder.create(loc, oldDiff, newDiff); + Value zero = builder.create(loc, i64Type, + builder.getI64IntegerAttr(0)); + Value crossed = builder.create(loc, arith::CmpIPredicate::slt, + xorVal, zero); + + // If crossed → exit, otherwise → loop back to body. + builder.create(loc, crossed, ctx.exit, ValueRange{stack}, + ctx.body, ValueRange{stack}); + + // Continue after exit. + builder.setInsertionPointToStart(ctx.exit); + stack = ctx.exit->getArgument(0); +} + LogicalResult ForthParser::parseBody(Value &stack) { Type stackType = forth::StackType::get(context); @@ -644,27 +674,15 @@ LogicalResult ForthParser::parseBody(Value &stack) { Value c0 = builder.create(loc, 0); builder.create(loc, start, counter, ValueRange{c0}); - // Create check, body, and exit blocks. - auto *checkBlock = createStackBlock(parentRegion, loc); + // Create body and exit blocks (post-test loop: always enters once). auto *bodyBlock = createStackBlock(parentRegion, loc); auto *exitBlock = createStackBlock(parentRegion, loc); - // Branch to check. - builder.create(loc, checkBlock, ValueRange{s2}); - - // --- Check block: load counter, compare < limit --- - builder.setInsertionPointToStart(checkBlock); - Value checkC0 = builder.create(loc, 0); - Value idx = - builder.create(loc, counter, ValueRange{checkC0}); - Value cond = builder.create( - loc, arith::CmpIPredicate::slt, idx, limit); - builder.create( - loc, cond, bodyBlock, ValueRange{checkBlock->getArgument(0)}, - exitBlock, ValueRange{checkBlock->getArgument(0)}); + // Branch directly to body. + builder.create(loc, bodyBlock, ValueRange{s2}); // Push loop context for I/J/K. - loopStack.push_back({counter, limit, checkBlock, exitBlock}); + loopStack.push_back({counter, limit, bodyBlock, exitBlock}); // Continue parsing in body. builder.setInsertionPointToStart(bodyBlock); @@ -673,29 +691,32 @@ LogicalResult ForthParser::parseBody(Value &stack) { //=== LOOP === } else if (word == "LOOP") { consume(); - auto i64Type = builder.getI64Type(); if (loopStack.empty()) { return emitError("LOOP without matching DO"); } auto ctx = loopStack.pop_back_val(); - - // Increment counter: load, add 1, store. - Value c0 = builder.create(loc, 0); - Value idx = - builder.create(loc, ctx.counter, ValueRange{c0}); Value one = builder.create( - loc, i64Type, builder.getI64IntegerAttr(1)); - Value next = builder.create(loc, idx, one); - builder.create(loc, next, ctx.counter, ValueRange{c0}); + loc, builder.getI64Type(), builder.getI64IntegerAttr(1)); + emitLoopEnd(loc, ctx, one, stack); - // Branch back to check. - builder.create(loc, ctx.check, ValueRange{stack}); + //=== +LOOP === + } else if (word == "+LOOP") { + consume(); - // Continue after exit. - builder.setInsertionPointToStart(ctx.exit); - stack = ctx.exit->getArgument(0); + if (loopStack.empty()) { + return emitError("+LOOP without matching DO"); + } + + auto ctx = loopStack.pop_back_val(); + + // Pop step from data stack. + auto popOp = builder.create( + loc, forth::StackType::get(context), builder.getI64Type(), stack); + stack = popOp.getOutputStack(); + Value step = popOp.getValue(); + emitLoopEnd(loc, ctx, step, stack); //=== Normal word === } else { diff --git a/lib/Translation/ForthToMLIR/ForthToMLIR.h b/lib/Translation/ForthToMLIR/ForthToMLIR.h index 264175b..c860dd2 100644 --- a/lib/Translation/ForthToMLIR/ForthToMLIR.h +++ b/lib/Translation/ForthToMLIR/ForthToMLIR.h @@ -92,7 +92,7 @@ class ForthParser { struct LoopContext { Value counter; // memref<1xi64> alloca for the loop counter Value limit; // i64 loop limit - Block *check; // condition check block + Block *body; // loop body block Block *exit; // loop exit block }; SmallVector loopStack; @@ -127,6 +127,11 @@ class ForthParser { /// Parse a sequence of Forth operations, handling control flow inline. LogicalResult parseBody(Value &stack); + /// Emit the common loop-end logic for LOOP and +LOOP: + /// load counter, add step, store, crossing test, cond_br to exit or body. + void emitLoopEnd(Location loc, const LoopContext &ctx, Value step, + Value &stack); + /// Parse a user-defined word definition. LogicalResult parseWordDefinition(); }; diff --git a/test/Conversion/ForthToMemRef/do-loop.mlir b/test/Conversion/ForthToMemRef/do-loop.mlir index 62fa347..034807a 100644 --- a/test/Conversion/ForthToMemRef/do-loop.mlir +++ b/test/Conversion/ForthToMemRef/do-loop.mlir @@ -1,6 +1,6 @@ // RUN: %warpforth-opt --convert-forth-to-memref %s | %FileCheck %s -// Test: DO...LOOP with I conversion to memref with CF-based control flow +// Test: DO...LOOP with I conversion to memref with post-test crossing check // Forth: 10 0 DO I LOOP // CHECK-LABEL: func.func private @main @@ -23,24 +23,20 @@ // CHECK: memref.store %{{.*}}, %[[COUNTER]] // CHECK: cf.br ^bb1 -// Loop header: load counter, compare < limit, cond_br +// Loop body: push I (load counter, push to stack), crossing test // CHECK: ^bb1(%{{.*}}: memref<256xi64>, %{{.*}}: index): // CHECK: memref.load %[[COUNTER]] -// CHECK: arith.cmpi slt -// CHECK: cf.cond_br %{{.*}}, ^bb2(%{{.*}}: memref<256xi64>, index), ^bb3(%{{.*}}: memref<256xi64>, index) - -// Loop body: push I (load counter, push to stack), increment counter -// CHECK: ^bb2(%{{.*}}: memref<256xi64>, %{{.*}}: index): -// CHECK: memref.load %[[COUNTER]] // CHECK: memref.store -// CHECK: memref.load %[[COUNTER]] -// CHECK: arith.constant 1 : i64 // CHECK: arith.addi // CHECK: memref.store %{{.*}}, %[[COUNTER]] -// CHECK: cf.br ^bb1 +// CHECK: arith.subi +// CHECK: arith.subi +// CHECK: arith.xori +// CHECK: arith.cmpi slt +// CHECK: cf.cond_br // Exit block -// CHECK: ^bb3(%{{.*}}: memref<256xi64>, %{{.*}}: index): +// CHECK: ^bb2(%{{.*}}: memref<256xi64>, %{{.*}}: index): // CHECK: return module { @@ -57,19 +53,19 @@ module { ^bb1(%3: !forth.stack): %c0_2 = arith.constant 0 : index %4 = memref.load %alloca[%c0_2] : memref<1xi64> - %5 = arith.cmpi slt, %4, %value_1 : i64 - cf.cond_br %5, ^bb2(%3 : !forth.stack), ^bb3(%3 : !forth.stack) - ^bb2(%6: !forth.stack): - %c0_3 = arith.constant 0 : index - %7 = memref.load %alloca[%c0_3] : memref<1xi64> - %8 = forth.push_value %6, %7 : !forth.stack, i64 -> !forth.stack - %c0_4 = arith.constant 0 : index - %9 = memref.load %alloca[%c0_4] : memref<1xi64> + %5 = forth.push_value %3, %4 : !forth.stack, i64 -> !forth.stack %c1_i64 = arith.constant 1 : i64 - %10 = arith.addi %9, %c1_i64 : i64 - memref.store %10, %alloca[%c0_4] : memref<1xi64> - cf.br ^bb1(%8 : !forth.stack) - ^bb3(%11: !forth.stack): + %c0_3 = arith.constant 0 : index + %6 = memref.load %alloca[%c0_3] : memref<1xi64> + %7 = arith.addi %6, %c1_i64 : i64 + memref.store %7, %alloca[%c0_3] : memref<1xi64> + %8 = arith.subi %6, %value_1 : i64 + %9 = arith.subi %7, %value_1 : i64 + %10 = arith.xori %8, %9 : i64 + %c0_i64 = arith.constant 0 : i64 + %11 = arith.cmpi slt, %10, %c0_i64 : i64 + cf.cond_br %11, ^bb2(%5 : !forth.stack), ^bb1(%5 : !forth.stack) + ^bb2(%12: !forth.stack): return } } diff --git a/test/Conversion/ForthToMemRef/leave.mlir b/test/Conversion/ForthToMemRef/leave.mlir index 60f4979..2c93ddc 100644 --- a/test/Conversion/ForthToMemRef/leave.mlir +++ b/test/Conversion/ForthToMemRef/leave.mlir @@ -7,10 +7,8 @@ // CHECK: %[[STACK:.*]] = memref.alloca() : memref<256xi64> // CHECK: cf.br ^bb1(%[[STACK]], %{{.*}} : memref<256xi64>, index) // CHECK: ^bb1(%{{.*}}: memref<256xi64>, %{{.*}}: index): -// CHECK: cf.cond_br %{{.*}}, ^bb2(%{{.*}}: memref<256xi64>, index), ^bb3(%{{.*}}: memref<256xi64>, index) +// CHECK: cf.cond_br %true, ^bb2(%{{.*}}: memref<256xi64>, index), ^bb3(%{{.*}}: memref<256xi64>, index) // CHECK: ^bb2(%{{.*}}: memref<256xi64>, %{{.*}}: index): -// CHECK-NEXT: cf.br ^bb3(%{{.*}}: memref<256xi64>, index) -// CHECK: ^bb3(%{{.*}}: memref<256xi64>, %{{.*}}: index): // CHECK: return module { @@ -25,13 +23,21 @@ module { memref.store %value, %alloca[%c0] : memref<1xi64> cf.br ^bb1(%output_stack_0 : !forth.stack) ^bb1(%3: !forth.stack): - %c0_2 = arith.constant 0 : index - %4 = memref.load %alloca[%c0_2] : memref<1xi64> - %5 = arith.cmpi slt, %4, %value_1 : i64 - cf.cond_br %5, ^bb2(%3 : !forth.stack), ^bb3(%3 : !forth.stack) - ^bb2(%6: !forth.stack): - cf.br ^bb3(%6 : !forth.stack) - ^bb3(%7: !forth.stack): + %true = arith.constant true + cf.cond_br %true, ^bb2(%3 : !forth.stack), ^bb3(%3 : !forth.stack) + ^bb2(%4: !forth.stack): return + ^bb3(%5: !forth.stack): + %c1_i64 = arith.constant 1 : i64 + %c0_2 = arith.constant 0 : index + %6 = memref.load %alloca[%c0_2] : memref<1xi64> + %7 = arith.addi %6, %c1_i64 : i64 + memref.store %7, %alloca[%c0_2] : memref<1xi64> + %8 = arith.subi %6, %value_1 : i64 + %9 = arith.subi %7, %value_1 : i64 + %10 = arith.xori %8, %9 : i64 + %c0_i64 = arith.constant 0 : i64 + %11 = arith.cmpi slt, %10, %c0_i64 : i64 + cf.cond_br %11, ^bb2(%5 : !forth.stack), ^bb1(%5 : !forth.stack) } } diff --git a/test/Pipeline/nested-control-flow.forth b/test/Pipeline/nested-control-flow.forth index 3e11155..41a520b 100644 --- a/test/Pipeline/nested-control-flow.forth +++ b/test/Pipeline/nested-control-flow.forth @@ -8,8 +8,7 @@ \ MID: gpu.module @warpforth_module \ MID: gpu.func @main(%arg0: memref<4xi64> {forth.param_name = "DATA"}) kernel \ MID: cf.br -\ MID: cf.cond_br -\ MID: gpu.return +\ MID: arith.xori PARAM DATA 4 3 0 DO 4 0 DO J I + LOOP LOOP DATA 0 CELLS + ! diff --git a/test/Pipeline/plus-loop-negative.forth b/test/Pipeline/plus-loop-negative.forth new file mode 100644 index 0000000..50afe63 --- /dev/null +++ b/test/Pipeline/plus-loop-negative.forth @@ -0,0 +1,7 @@ +\ RUN: %warpforth-translate --forth-to-mlir %s | %warpforth-opt --warpforth-pipeline | %FileCheck %s + +\ Verify that +LOOP with negative step through the full pipeline produces a gpu.binary +\ CHECK: gpu.binary @warpforth_module + +PARAM DATA 4 +0 10 DO I DATA 0 CELLS + ! -1 +LOOP diff --git a/test/Pipeline/plus-loop.forth b/test/Pipeline/plus-loop.forth new file mode 100644 index 0000000..6eb32ed --- /dev/null +++ b/test/Pipeline/plus-loop.forth @@ -0,0 +1,7 @@ +\ RUN: %warpforth-translate --forth-to-mlir %s | %warpforth-opt --warpforth-pipeline | %FileCheck %s + +\ Verify that +LOOP through the full pipeline produces a gpu.binary +\ CHECK: gpu.binary @warpforth_module + +PARAM DATA 4 +10 0 DO I DATA 0 CELLS + ! 2 +LOOP diff --git a/test/Translation/Forth/do-loop.forth b/test/Translation/Forth/do-loop.forth index 4a4a7ff..4c0b9bc 100644 --- a/test/Translation/Forth/do-loop.forth +++ b/test/Translation/Forth/do-loop.forth @@ -1,6 +1,6 @@ \ RUN: %warpforth-translate --forth-to-mlir %s | %FileCheck %s -\ Verify DO/LOOP generates loop counter with memref.alloca, pop, cmpi, cond_br +\ Verify DO/LOOP generates post-test loop with crossing test \ CHECK: %[[S0:.*]] = forth.stack !forth.stack \ CHECK-NEXT: %[[S1:.*]] = forth.literal %[[S0]] 10 : !forth.stack -> !forth.stack @@ -14,18 +14,18 @@ \ CHECK: ^bb1(%[[B1:.*]]: !forth.stack): \ CHECK-NEXT: %[[C0_2:.*]] = arith.constant 0 : index \ CHECK-NEXT: %[[LOAD1:.*]] = memref.load %[[ALLOCA]][%[[C0_2]]] : memref<1xi64> -\ CHECK-NEXT: %[[CMP:.*]] = arith.cmpi slt, %[[LOAD1]], %[[LIM]] : i64 -\ CHECK-NEXT: cf.cond_br %[[CMP]], ^bb2(%[[B1]] : !forth.stack), ^bb3(%[[B1]] : !forth.stack) -\ CHECK: ^bb2(%[[B2:.*]]: !forth.stack): -\ CHECK-NEXT: %[[C0_3:.*]] = arith.constant 0 : index -\ CHECK-NEXT: %[[LOAD2:.*]] = memref.load %[[ALLOCA]][%[[C0_3]]] : memref<1xi64> -\ CHECK-NEXT: %[[PUSH:.*]] = forth.push_value %[[B2]], %[[LOAD2]] : !forth.stack, i64 -> !forth.stack -\ CHECK-NEXT: %[[C0_4:.*]] = arith.constant 0 : index -\ CHECK-NEXT: %[[LOAD3:.*]] = memref.load %[[ALLOCA]][%[[C0_4]]] : memref<1xi64> +\ CHECK-NEXT: %[[PUSH:.*]] = forth.push_value %[[B1]], %[[LOAD1]] : !forth.stack, i64 -> !forth.stack \ CHECK-NEXT: %[[C1:.*]] = arith.constant 1 : i64 -\ CHECK-NEXT: %[[ADDI:.*]] = arith.addi %[[LOAD3]], %[[C1]] : i64 -\ CHECK-NEXT: memref.store %[[ADDI]], %[[ALLOCA]][%[[C0_4]]] : memref<1xi64> -\ CHECK-NEXT: cf.br ^bb1(%[[PUSH]] : !forth.stack) -\ CHECK: ^bb3(%[[B3:.*]]: !forth.stack): +\ CHECK-NEXT: %[[C0_3:.*]] = arith.constant 0 : index +\ CHECK-NEXT: %[[OLD:.*]] = memref.load %[[ALLOCA]][%[[C0_3]]] : memref<1xi64> +\ CHECK-NEXT: %[[NEW:.*]] = arith.addi %[[OLD]], %[[C1]] : i64 +\ CHECK-NEXT: memref.store %[[NEW]], %[[ALLOCA]][%[[C0_3]]] : memref<1xi64> +\ CHECK-NEXT: %[[D1:.*]] = arith.subi %[[OLD]], %[[LIM]] : i64 +\ CHECK-NEXT: %[[D2:.*]] = arith.subi %[[NEW]], %[[LIM]] : i64 +\ CHECK-NEXT: %[[XOR:.*]] = arith.xori %[[D1]], %[[D2]] : i64 +\ CHECK-NEXT: %[[ZERO:.*]] = arith.constant 0 : i64 +\ CHECK-NEXT: %[[CROSSED:.*]] = arith.cmpi slt, %[[XOR]], %[[ZERO]] : i64 +\ CHECK-NEXT: cf.cond_br %[[CROSSED]], ^bb2(%[[PUSH]] : !forth.stack), ^bb1(%[[PUSH]] : !forth.stack) +\ CHECK: ^bb2(%[[B2:.*]]: !forth.stack): \ CHECK-NEXT: return 10 0 DO I LOOP diff --git a/test/Translation/Forth/leave-conditional.forth b/test/Translation/Forth/leave-conditional.forth index 1f3f327..5d56a65 100644 --- a/test/Translation/Forth/leave-conditional.forth +++ b/test/Translation/Forth/leave-conditional.forth @@ -2,19 +2,31 @@ \ Verify conditional LEAVE preserves the loop backedge for non-LEAVE paths. +\ Branch directly to body (post-test loop) \ CHECK: cf.br ^bb1(%{{.*}} : !forth.stack) -\ CHECK: ^bb1(%[[CHK:.*]]: !forth.stack): -\ CHECK: cf.cond_br %{{.*}}, ^bb2(%[[CHK]] : !forth.stack), ^bb[[EXIT:[0-9]+]](%[[CHK]] : !forth.stack) -\ CHECK: ^bb2(%[[B:.*]]: !forth.stack): -\ CHECK: cf.cond_br %{{.*}}, ^bb[[LEAVE:[0-9]+]](%{{.*}} : !forth.stack), ^bb[[JOIN:[0-9]+]](%{{.*}} : !forth.stack) -\ CHECK: ^bb[[EXIT]](%{{.*}}: !forth.stack): + +\ Body: I 5 = IF → cond_br to LEAVE or THEN merge +\ CHECK: ^bb1(%[[B:.*]]: !forth.stack): +\ CHECK: forth.pop_flag +\ CHECK-NEXT: cf.cond_br %{{[^,]*}}, ^bb[[LEAVE:[0-9]+]](%{{[^)]*}} : !forth.stack), ^bb[[JOIN:[0-9]+]](%{{[^)]*}} : !forth.stack) + +\ Exit: return +\ CHECK: ^bb[[EXIT:[0-9]+]](%{{.*}}: !forth.stack): \ CHECK: return + +\ LEAVE branch: unconditional jump to exit \ CHECK: ^bb[[LEAVE]](%{{.*}}: !forth.stack): -\ CHECK: cf.cond_br %{{.*}}, ^bb[[EXIT]](%{{.*}} : !forth.stack), ^bb[[DEAD:[0-9]+]](%{{.*}} : !forth.stack) +\ CHECK: cf.cond_br %true, ^bb[[EXIT]](%{{.*}} : !forth.stack), ^bb[[DEAD:[0-9]+]](%{{.*}} : !forth.stack) + +\ Join (THEN merge): 1 DROP, crossing test, loop back to body or exit \ CHECK: ^bb[[JOIN]](%{{.*}}: !forth.stack): -\ CHECK: cf.br ^bb1(%{{.*}} : !forth.stack) +\ CHECK: arith.xori +\ CHECK: arith.cmpi slt +\ CHECK: cf.cond_br + +\ Dead block from LEAVE \ CHECK: ^bb[[DEAD]](%{{.*}}: !forth.stack): -\ CHECK: cf.br ^bb[[JOIN]](%{{.*}} : !forth.stack) +\ CHECK: cf.br ^bb[[JOIN]] 10 0 DO I 5 = IF LEAVE THEN diff --git a/test/Translation/Forth/leave.forth b/test/Translation/Forth/leave.forth index 0a185c0..242b3c4 100644 --- a/test/Translation/Forth/leave.forth +++ b/test/Translation/Forth/leave.forth @@ -7,10 +7,8 @@ \ CHECK-NEXT: %[[S2:.*]] = forth.literal %[[S1]] 0 : !forth.stack -> !forth.stack \ CHECK: cf.br ^bb1(%{{.*}} : !forth.stack) \ CHECK: ^bb1(%[[B1:.*]]: !forth.stack): -\ CHECK: cf.cond_br %{{.*}}, ^bb2(%[[B1]] : !forth.stack), ^bb[[EXIT:[0-9]+]](%[[B1]] : !forth.stack) -\ CHECK: ^bb2(%[[B2:.*]]: !forth.stack): \ CHECK-NEXT: %[[TRUE:.*]] = arith.constant true -\ CHECK-NEXT: cf.cond_br %[[TRUE]], ^bb[[EXIT:[0-9]+]](%[[B2]] : !forth.stack), ^bb{{[0-9]+}}(%[[B2]] : !forth.stack) +\ CHECK-NEXT: cf.cond_br %[[TRUE]], ^bb[[EXIT:[0-9]+]](%[[B1]] : !forth.stack), ^bb{{[0-9]+}}(%[[B1]] : !forth.stack) \ CHECK: ^bb[[EXIT]](%[[B3:.*]]: !forth.stack): \ CHECK-NEXT: return diff --git a/test/Translation/Forth/nested-control-flow.forth b/test/Translation/Forth/nested-control-flow.forth index 2cbed2f..8cb108c 100644 --- a/test/Translation/Forth/nested-control-flow.forth +++ b/test/Translation/Forth/nested-control-flow.forth @@ -12,6 +12,7 @@ 1 IF 2 IF 3 THEN THEN \ === IF inside DO === +\ After IF/THEN merge, set up DO loop: 10 0 DO \ CHECK: ^bb2(%[[B2:.*]]: !forth.stack): \ CHECK-NEXT: %[[L10:.*]] = forth.literal %[[B2]] 10 : !forth.stack -> !forth.stack \ CHECK-NEXT: %[[L0A:.*]] = forth.literal %[[L10]] 0 : !forth.stack -> !forth.stack @@ -30,43 +31,35 @@ \ CHECK: ^bb4(%[[B4:.*]]: !forth.stack): \ CHECK-NEXT: cf.br ^bb2(%[[B4]] : !forth.stack) -\ DO loop header: check index < limit +\ DO loop body (post-test: no check block): I 5 > IF I THEN \ CHECK: ^bb5(%[[B5:.*]]: !forth.stack): -\ CHECK: arith.cmpi slt -\ CHECK-NEXT: cf.cond_br %{{.*}}, ^bb6(%[[B5]] : !forth.stack), ^bb7(%[[B5]] : !forth.stack) - -\ DO loop body: I 5 > IF I THEN -\ CHECK: ^bb6(%[[B6:.*]]: !forth.stack): -\ CHECK: forth.push_value %[[B6]] +\ CHECK: forth.push_value %[[B5]] \ CHECK: forth.literal %{{.*}} 5 \ CHECK-NEXT: %{{.*}} = forth.gt \ CHECK: forth.pop_flag -\ CHECK-NEXT: cf.cond_br %{{.*}}, ^bb8(%{{.*}} : !forth.stack), ^bb9(%{{.*}} : !forth.stack) +\ CHECK-NEXT: cf.cond_br %{{[^,]*}}, ^bb7(%{{[^)]*}} : !forth.stack), ^bb8(%{{[^)]*}} : !forth.stack) \ === Nested DO with J === -\ After first DO loop exits: bb7 sets up nested DO (3 0 DO) -\ CHECK: ^bb7(%[[B7:.*]]: !forth.stack): -\ CHECK-NEXT: %{{.*}} = forth.literal %[[B7]] 3 +\ After first DO loop exits: sets up nested DO (3 0 DO) +\ CHECK: ^bb6(%[[B6:.*]]: !forth.stack): +\ CHECK-NEXT: %{{.*}} = forth.literal %[[B6]] 3 3 0 DO 4 0 DO J I + LOOP LOOP \ IF I true branch: push loop index -\ CHECK: ^bb8(%[[B8:.*]]: !forth.stack): -\ CHECK: forth.push_value %[[B8]] -\ CHECK-NEXT: cf.br ^bb9 +\ CHECK: ^bb7(%[[B7:.*]]: !forth.stack): +\ CHECK: forth.push_value %[[B7]] +\ CHECK-NEXT: cf.br ^bb8 -\ Loop increment and back-edge -\ CHECK: ^bb9(%{{.*}}: !forth.stack): +\ Loop end with crossing test and back-edge +\ CHECK: ^bb8(%{{.*}}: !forth.stack): \ CHECK: arith.addi \ CHECK: memref.store -\ CHECK: cf.br ^bb5 - -\ Outer DO loop (3 0 DO) header -\ CHECK: ^bb10(%{{.*}}: !forth.stack): +\ CHECK: arith.xori \ CHECK: arith.cmpi slt \ CHECK: cf.cond_br -\ Inner DO setup (4 0 DO) -\ CHECK: ^bb11(%{{.*}}: !forth.stack): +\ Outer DO body (3 0 DO) with inner DO setup (4 0 DO) +\ CHECK: ^bb9(%{{.*}}: !forth.stack): \ CHECK: forth.literal %{{.*}} 4 \ CHECK: forth.literal %{{.*}} 0 \ CHECK: forth.pop @@ -75,78 +68,75 @@ \ === Triple-nested DO with K === \ After nested DO exits: sets up triple-nested DO (2 0 DO) -\ CHECK: ^bb12(%{{.*}}: !forth.stack): +\ CHECK: ^bb10(%{{.*}}: !forth.stack): \ CHECK: forth.literal %{{.*}} 2 2 0 DO 2 0 DO 2 0 DO K J I + + LOOP LOOP LOOP -\ Inner loop of J I + (bb13 header, bb14 body) -\ CHECK: ^bb13(%{{.*}}: !forth.stack): -\ CHECK: arith.cmpi slt -\ CHECK: cf.cond_br - -\ J I + body -\ CHECK: ^bb14(%{{.*}}: !forth.stack): +\ Inner loop of J I + (bb11 body) +\ CHECK: ^bb11(%{{.*}}: !forth.stack): \ CHECK: forth.push_value \ CHECK: forth.push_value \ CHECK: forth.add -\ Outer loop increment (bb15) -\ CHECK: ^bb15(%{{.*}}: !forth.stack): -\ CHECK: arith.addi -\ CHECK: cf.br ^bb10 +\ Inner loop crossing test +\ CHECK: arith.xori +\ CHECK: arith.cmpi slt +\ CHECK: cf.cond_br -\ Triple-nested outer loop header (bb16) -\ CHECK: ^bb16(%{{.*}}: !forth.stack): +\ Outer loop increment (bb12) +\ CHECK: ^bb12(%{{.*}}: !forth.stack): +\ CHECK: arith.addi +\ CHECK: arith.xori \ CHECK: arith.cmpi slt \ CHECK: cf.cond_br -\ Triple-nested middle loop setup (bb17) -\ CHECK: ^bb17(%{{.*}}: !forth.stack): +\ Triple-nested outer loop body (bb13) +\ CHECK: ^bb13(%{{.*}}: !forth.stack): \ CHECK: forth.literal %{{.*}} 2 \ CHECK: forth.literal %{{.*}} 0 \ === BEGIN/WHILE inside IF === \ After triple-nested exits: 5 IF BEGIN DUP WHILE 1 - REPEAT THEN -\ CHECK: ^bb18(%{{.*}}: !forth.stack): +\ CHECK: ^bb14(%{{.*}}: !forth.stack): \ CHECK: forth.literal %{{.*}} 5 \ CHECK: forth.pop_flag \ CHECK-NEXT: cf.cond_br 5 IF BEGIN DUP WHILE 1 - REPEAT THEN -\ bb25: IF true branch -> jump to begin/while header -\ CHECK: ^bb25(%{{.*}}: !forth.stack): -\ CHECK-NEXT: cf.br ^bb27 +\ bb19: IF true branch -> jump to begin/while header +\ CHECK: ^bb19(%{{.*}}: !forth.stack): +\ CHECK-NEXT: cf.br ^bb21 -\ bb26: IF false branch (and WHILE exit) -> jump to BEGIN/UNTIL -\ CHECK: ^bb26(%{{.*}}: !forth.stack): -\ CHECK-NEXT: cf.br ^bb30 +\ bb20: IF false branch (and WHILE exit) -> jump to BEGIN/UNTIL +\ CHECK: ^bb20(%{{.*}}: !forth.stack): +\ CHECK-NEXT: cf.br ^bb24 \ WHILE condition: DUP + pop_flag -\ CHECK: ^bb27(%{{.*}}: !forth.stack): +\ CHECK: ^bb21(%{{.*}}: !forth.stack): \ CHECK: forth.dup \ CHECK: forth.pop_flag \ CHECK-NEXT: cf.cond_br \ WHILE body: 1 - -\ CHECK: ^bb28(%[[B28:.*]]: !forth.stack): -\ CHECK-NEXT: %{{.*}} = forth.literal %[[B28]] 1 +\ CHECK: ^bb22(%[[B22:.*]]: !forth.stack): +\ CHECK-NEXT: %{{.*}} = forth.literal %[[B22]] 1 \ CHECK-NEXT: %{{.*}} = forth.sub \ === IF inside BEGIN/UNTIL === \ BEGIN/UNTIL header: DUP 10 < -\ CHECK: ^bb30(%{{.*}}: !forth.stack): +\ CHECK: ^bb24(%{{.*}}: !forth.stack): \ CHECK: forth.dup \ CHECK: forth.literal %{{.*}} 10 \ CHECK-NEXT: %{{.*}} = forth.lt BEGIN DUP 10 < IF 1 + THEN DUP 20 = UNTIL \ IF true branch: 1 + -\ CHECK: ^bb31(%[[B31:.*]]: !forth.stack): -\ CHECK-NEXT: %{{.*}} = forth.literal %[[B31]] 1 +\ CHECK: ^bb25(%[[B25:.*]]: !forth.stack): +\ CHECK-NEXT: %{{.*}} = forth.literal %[[B25]] 1 \ CHECK-NEXT: %{{.*}} = forth.add \ UNTIL condition: DUP 20 = -\ CHECK: ^bb32(%{{.*}}: !forth.stack): +\ CHECK: ^bb26(%{{.*}}: !forth.stack): \ CHECK: forth.dup \ CHECK: forth.literal %{{.*}} 20 \ CHECK-NEXT: %{{.*}} = forth.eq diff --git a/test/Translation/Forth/plus-loop-negative.forth b/test/Translation/Forth/plus-loop-negative.forth new file mode 100644 index 0000000..233af08 --- /dev/null +++ b/test/Translation/Forth/plus-loop-negative.forth @@ -0,0 +1,24 @@ +\ RUN: %warpforth-translate --forth-to-mlir %s | %FileCheck %s + +\ Verify +LOOP with negative step uses crossing test (handles negative direction) + +\ CHECK: %[[S0:.*]] = forth.stack !forth.stack +\ CHECK-NEXT: %[[S1:.*]] = forth.literal %[[S0]] 0 : !forth.stack -> !forth.stack +\ CHECK-NEXT: %[[S2:.*]] = forth.literal %[[S1]] 10 : !forth.stack -> !forth.stack +\ CHECK-NEXT: %[[OS:.*]], %[[VAL:.*]] = forth.pop %[[S2]] : !forth.stack -> !forth.stack, i64 +\ CHECK-NEXT: %[[OS2:.*]], %[[LIM:.*]] = forth.pop %[[OS]] : !forth.stack -> !forth.stack, i64 +\ CHECK: cf.br ^bb1(%[[OS2]] : !forth.stack) +\ CHECK: ^bb1(%[[B1:.*]]: !forth.stack): +\ CHECK: %[[STEP_S:.*]] = forth.literal %[[B1]] -1 : !forth.stack -> !forth.stack +\ CHECK-NEXT: %[[POP_S:.*]], %[[STEP:.*]] = forth.pop %[[STEP_S]] : !forth.stack -> !forth.stack, i64 +\ CHECK: %[[OLD:.*]] = memref.load +\ CHECK: %[[NEW:.*]] = arith.addi %[[OLD]], %[[STEP]] : i64 +\ CHECK: %[[D1:.*]] = arith.subi %[[OLD]], %[[LIM]] : i64 +\ CHECK-NEXT: %[[D2:.*]] = arith.subi %[[NEW]], %[[LIM]] : i64 +\ CHECK-NEXT: %[[XOR:.*]] = arith.xori %[[D1]], %[[D2]] : i64 +\ CHECK-NEXT: %[[ZERO:.*]] = arith.constant 0 : i64 +\ CHECK-NEXT: %[[CROSSED:.*]] = arith.cmpi slt, %[[XOR]], %[[ZERO]] : i64 +\ CHECK-NEXT: cf.cond_br %[[CROSSED]], ^bb2(%[[POP_S]] : !forth.stack), ^bb1(%[[POP_S]] : !forth.stack) +\ CHECK: ^bb2(%{{.*}}: !forth.stack): +\ CHECK-NEXT: return +0 10 DO -1 +LOOP diff --git a/test/Translation/Forth/plus-loop-without-do-error.forth b/test/Translation/Forth/plus-loop-without-do-error.forth new file mode 100644 index 0000000..06bc674 --- /dev/null +++ b/test/Translation/Forth/plus-loop-without-do-error.forth @@ -0,0 +1,3 @@ +\ RUN: %not %warpforth-translate --forth-to-mlir %s 2>&1 | %FileCheck %s +\ CHECK: +LOOP without matching DO ++LOOP diff --git a/test/Translation/Forth/plus-loop.forth b/test/Translation/Forth/plus-loop.forth new file mode 100644 index 0000000..ac7f4ac --- /dev/null +++ b/test/Translation/Forth/plus-loop.forth @@ -0,0 +1,29 @@ +\ RUN: %warpforth-translate --forth-to-mlir %s | %FileCheck %s + +\ Verify +LOOP pops step from data stack and uses it as increment + +\ CHECK: %[[S0:.*]] = forth.stack !forth.stack +\ CHECK-NEXT: %[[S1:.*]] = forth.literal %[[S0]] 10 : !forth.stack -> !forth.stack +\ CHECK-NEXT: %[[S2:.*]] = forth.literal %[[S1]] 0 : !forth.stack -> !forth.stack +\ CHECK-NEXT: %[[OS:.*]], %[[VAL:.*]] = forth.pop %[[S2]] : !forth.stack -> !forth.stack, i64 +\ CHECK-NEXT: %[[OS2:.*]], %[[LIM:.*]] = forth.pop %[[OS]] : !forth.stack -> !forth.stack, i64 +\ CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca() : memref<1xi64> +\ CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : index +\ CHECK-NEXT: memref.store %[[VAL]], %[[ALLOCA]][%[[C0]]] : memref<1xi64> +\ CHECK-NEXT: cf.br ^bb1(%[[OS2]] : !forth.stack) +\ CHECK: ^bb1(%[[B1:.*]]: !forth.stack): +\ CHECK: %[[STEP_S:.*]] = forth.literal %[[B1]] 2 : !forth.stack -> !forth.stack +\ CHECK-NEXT: %[[POP_S:.*]], %[[STEP:.*]] = forth.pop %[[STEP_S]] : !forth.stack -> !forth.stack, i64 +\ CHECK-NEXT: %[[C0_2:.*]] = arith.constant 0 : index +\ CHECK-NEXT: %[[OLD:.*]] = memref.load %[[ALLOCA]][%[[C0_2]]] : memref<1xi64> +\ CHECK-NEXT: %[[NEW:.*]] = arith.addi %[[OLD]], %[[STEP]] : i64 +\ CHECK-NEXT: memref.store %[[NEW]], %[[ALLOCA]][%[[C0_2]]] : memref<1xi64> +\ CHECK-NEXT: %[[D1:.*]] = arith.subi %[[OLD]], %[[LIM]] : i64 +\ CHECK-NEXT: %[[D2:.*]] = arith.subi %[[NEW]], %[[LIM]] : i64 +\ CHECK-NEXT: %[[XOR:.*]] = arith.xori %[[D1]], %[[D2]] : i64 +\ CHECK-NEXT: %[[ZERO:.*]] = arith.constant 0 : i64 +\ CHECK-NEXT: %[[CROSSED:.*]] = arith.cmpi slt, %[[XOR]], %[[ZERO]] : i64 +\ CHECK-NEXT: cf.cond_br %[[CROSSED]], ^bb2(%[[POP_S]] : !forth.stack), ^bb1(%[[POP_S]] : !forth.stack) +\ CHECK: ^bb2(%[[B2:.*]]: !forth.stack): +\ CHECK-NEXT: return +10 0 DO 2 +LOOP diff --git a/test/Translation/Forth/unloop-exit.forth b/test/Translation/Forth/unloop-exit.forth index 1718718..e5058a4 100644 --- a/test/Translation/Forth/unloop-exit.forth +++ b/test/Translation/Forth/unloop-exit.forth @@ -4,13 +4,11 @@ \ CHECK: func.func private @FIND_FIVE(%{{.*}}: !forth.stack) -> !forth.stack \ CHECK: memref.alloca -\ CHECK: cf.br ^bb[[#CHECK:]] -\ CHECK: ^bb[[#CHECK]](%{{.*}}: !forth.stack): -\ CHECK: cf.cond_br %{{.*}}, ^bb[[#BODY:]](%{{.*}}), ^bb[[#EXIT:]](%{{.*}}) +\ CHECK: cf.br ^bb[[#BODY:]] \ CHECK: ^bb[[#BODY]](%{{.*}}: !forth.stack): \ CHECK: forth.eq \ CHECK: cf.cond_br %{{.*}}, ^bb[[#THEN:]](%{{.*}}), ^bb[[#ENDIF:]](%{{.*}}) -\ CHECK: ^bb[[#EXIT]](%{{.*}}: !forth.stack): +\ CHECK: ^bb[[#EXIT:]](%{{.*}}: !forth.stack): \ CHECK: return \ CHECK: ^bb[[#THEN]](%[[T:.*]]: !forth.stack): \ CHECK: cf.cond_br %true, ^bb[[#RET:]](%[[T]]{{.*}})