Skip to content

Commit

Permalink
ssa: removes map use for block traversals (#2235)
Browse files Browse the repository at this point in the history
This removes the use of map in basic block traversals.
As a result, overall compilation perf improves like the below:

### Zig
```
goos: darwin
goarch: arm64
pkg: github.com/tetratelabs/wazero/internal/integration_test/stdlibs
                             │ old_zig.txt │            new_zig.txt            │
                             │   sec/op    │   sec/op    vs base               │
Zig/Compile/test-opt.wasm-10    4.438 ± 1%   3.778 ± 0%  -14.87% (p=0.002 n=6)
Zig/Run/test-opt.wasm-10        18.77 ± 1%   18.76 ± 0%        ~ (p=0.818 n=6)
Zig/Compile/test.wasm-10        5.083 ± 0%   4.673 ± 0%   -8.07% (p=0.002 n=6)
Zig/Run/test.wasm-10            19.27 ± 1%   19.30 ± 1%        ~ (p=0.699 n=6)
geomean                         9.504        8.941        -5.92%

                             │ old_zig.txt  │            new_zig.txt             │
                             │     B/op     │     B/op      vs base              │
Zig/Compile/test-opt.wasm-10   396.7Mi ± 0%   394.7Mi ± 0%  -0.51% (p=0.002 n=6)
Zig/Run/test-opt.wasm-10       741.7Mi ± 0%   741.7Mi ± 0%       ~ (p=0.900 n=6)
Zig/Compile/test.wasm-10       660.0Mi ± 0%   659.5Mi ± 0%  -0.08% (p=0.002 n=6)
Zig/Run/test.wasm-10           1.296Gi ± 0%   1.296Gi ± 0%       ~ (p=0.892 n=6)
geomean                        712.6Mi        711.5Mi       -0.15%

                             │ old_zig.txt │            new_zig.txt            │
                             │  allocs/op  │  allocs/op   vs base              │
Zig/Compile/test-opt.wasm-10   363.2k ± 0%   362.6k ± 0%  -0.17% (p=0.002 n=6)
Zig/Run/test-opt.wasm-10       51.58k ± 0%   51.58k ± 0%       ~ (p=0.933 n=6)
Zig/Compile/test.wasm-10       515.2k ± 0%   515.4k ± 0%       ~ (p=0.485 n=6)
Zig/Run/test.wasm-10           2.156M ± 0%   2.156M ± 0%       ~ (p=0.998 n=6)
geomean                        379.8k        379.7k       -0.03%
```

### wasip1

```
goos: darwin
goarch: arm64
pkg: github.com/tetratelabs/wazero/internal/integration_test/stdlibs
                                            │ old_wasip1.txt │            new_wasip1.txt             │
                                            │     sec/op     │    sec/op      vs base                │
Wasip1/Compile/src_archive_tar.test-10            2.198 ± 1%    2.067 ± 1%    -5.96% (p=0.001 n=7)
Wasip1/Run/src_archive_tar.test-10               398.8m ± 0%   398.8m ± 0%         ~ (p=0.902 n=7)
Wasip1/Compile/src_bufio.test-10                  1.492 ± 0%    1.409 ± 1%    -5.57% (p=0.001 n=7)
Wasip1/Run/src_bufio.test-10                     120.5m ± 1%   121.0m ± 1%    +0.44% (p=0.017 n=7)
Wasip1/Compile/src_bytes.test-10                  1.543 ± 0%    1.454 ± 0%    -5.72% (p=0.001 n=7)
Wasip1/Run/src_bytes.test-10                     469.0m ± 1%   467.4m ± 1%         ~ (p=0.209 n=7)
Wasip1/Compile/src_context.test-10                1.664 ± 0%    1.564 ± 1%    -6.00% (p=0.001 n=7)
Wasip1/Run/src_context.test-10                   31.54m ± 1%   31.57m ± 0%         ~ (p=0.445 n=6+7)
Wasip1/Compile/src_encoding_ascii85.test-10                     1.261 ±  ∞ ¹
geomean                                          527.3m        565.9m         -2.92%
¹ need >= 6 samples for confidence interval at level 0.95

                                            │ old_wasip1.txt │             new_wasip1.txt             │
                                            │      B/op      │      B/op       vs base                │
Wasip1/Compile/src_archive_tar.test-10          93.44Mi ± 0%   93.17Mi ± 0%    -0.30% (p=0.001 n=7)
Wasip1/Run/src_archive_tar.test-10              286.0Mi ± 0%   286.0Mi ± 0%         ~ (p=0.593 n=7)
Wasip1/Compile/src_bufio.test-10                74.38Mi ± 0%   74.13Mi ± 0%    -0.35% (p=0.001 n=7)
Wasip1/Run/src_bufio.test-10                    105.3Mi ± 0%   105.3Mi ± 0%         ~ (p=0.780 n=7)
Wasip1/Compile/src_bytes.test-10                75.58Mi ± 0%   75.32Mi ± 0%    -0.35% (p=0.001 n=7)
Wasip1/Run/src_bytes.test-10                    605.0Mi ± 0%   605.0Mi ± 0%         ~ (p=0.331 n=7)
Wasip1/Compile/src_context.test-10              78.33Mi ± 0%   78.07Mi ± 0%    -0.33% (p=0.001 n=7)
Wasip1/Run/src_context.test-10                  71.52Mi ± 0%   71.52Mi ± 0%         ~ (p=1.000 n=6+7)
Wasip1/Compile/src_encoding_ascii85.test-10                    70.38Mi ±  ∞ ¹
geomean                                         123.4Mi        115.7Mi         -0.17%
¹ need >= 6 samples for confidence interval at level 0.95

                                            │ old_wasip1.txt │             new_wasip1.txt              │
                                            │   allocs/op    │   allocs/op    vs base                  │
Wasip1/Compile/src_archive_tar.test-10           265.4k ± 0%   265.0k ± 0%    -0.16% (p=0.001 n=7)
Wasip1/Run/src_archive_tar.test-10               7.831k ± 0%   7.830k ± 0%         ~ (p=1.000 n=7)
Wasip1/Compile/src_bufio.test-10                 195.6k ± 0%   195.4k ± 0%    -0.12% (p=0.001 n=7)
Wasip1/Run/src_bufio.test-10                     3.728k ± 0%   3.728k ± 0%         ~ (p=1.000 n=7)   ¹
Wasip1/Compile/src_bytes.test-10                 204.1k ± 0%   203.7k ± 0%    -0.20% (p=0.001 n=7)
Wasip1/Run/src_bytes.test-10                     6.377k ± 0%   6.377k ± 0%         ~ (p=1.000 n=7)
Wasip1/Compile/src_context.test-10               221.7k ± 0%   221.6k ± 0%    -0.06% (p=0.001 n=7)
Wasip1/Run/src_context.test-10                   3.814k ± 0%   3.814k ± 1%         ~ (p=0.140 n=6+7)
Wasip1/Compile/src_encoding_ascii85.test-10                    182.3k ±  ∞ ²
geomean                                          33.71k        40.64k         -0.07%
¹ all samples are equal
² need >= 6 samples for confidence interval at level 0.95
```


### TinyGo
```
goos: darwin
goarch: arm64
pkg: github.com/tetratelabs/wazero/internal/integration_test/stdlibs
                                      │ old_tinygo.txt │            new_tinygo.txt             │
                                      │     sec/op     │    sec/op      vs base                │
TinyGo/Compile/container_heap.test-10    410.8m ± 1%     399.8m ± 0%    -2.69% (p=0.001 n=7)
TinyGo/Run/container_heap.test-10        14.41m ± 0%     14.29m ± 2%    -0.77% (p=0.026 n=7)
TinyGo/Compile/container_list.test-10    410.5m ± 1%     398.1m ± 0%    -3.02% (p=0.001 n=7)
TinyGo/Run/container_list.test-10        14.27m ± 2%     14.16m ± 1%         ~ (p=0.073 n=7)
TinyGo/Compile/container_ring.test-10    403.7m ± 1%     392.5m ± 2%    -2.77% (p=0.001 n=7)
TinyGo/Run/container_ring.test-10        14.24m ± 0%     14.27m ± 1%         ~ (p=0.259 n=7)
TinyGo/Compile/crypto_des.test-10        418.8m ± 0%     408.1m ± 0%    -2.56% (p=0.001 n=7)
TinyGo/Run/crypto_des.test-10            18.23m ± 0%     18.17m ± 1%         ~ (p=0.456 n=7)
TinyGo/Compile/crypto_md5.test-10        417.3m ± 2%     406.1m ± 1%    -2.68% (p=0.001 n=7)
TinyGo/Run/crypto_md5.test-10            20.50m ± 0%     20.45m ± 1%         ~ (p=0.128 n=7)
TinyGo/Compile/crypto_rc4.test-10        402.2m ± 1%     390.5m ± 0%    -2.90% (p=0.001 n=7)
TinyGo/Run/crypto_rc4.test-10            160.8m ± 0%     161.0m ± 1%         ~ (p=1.000 n=7)
TinyGo/Compile/crypto_sha1.test-10       417.2m ± 1%     404.5m ± 1%    -3.04% (p=0.001 n=7)
TinyGo/Run/crypto_sha1.test-10           15.93m ± 1%     15.90m ± 1%         ~ (p=0.710 n=7)
TinyGo/Compile/crypto_sha256.test-10     423.4m ± 1%     412.4m ± 1%    -2.60% (p=0.001 n=7)
TinyGo/Run/crypto_sha256.test-10         16.16m ±  ∞ ¹   16.05m ±  ∞ ¹       ~ (p=0.381 n=2+5)
geomean                                  94.17m          92.70m         -1.56%
¹ need >= 6 samples for confidence interval at level 0.95

                                      │ old_tinygo.txt │             new_tinygo.txt             │
                                      │      B/op      │      B/op       vs base                │
TinyGo/Compile/container_heap.test-10   48.55Mi ± 0%     48.30Mi ± 0%    -0.52% (p=0.001 n=7)
TinyGo/Run/container_heap.test-10       16.63Mi ± 0%     16.63Mi ± 0%         ~ (p=0.557 n=7)
TinyGo/Compile/container_list.test-10   48.53Mi ± 0%     48.29Mi ± 0%    -0.51% (p=0.001 n=7)
TinyGo/Run/container_list.test-10       16.40Mi ± 0%     16.40Mi ± 0%         ~ (p=0.364 n=7)
TinyGo/Compile/container_ring.test-10   47.78Mi ± 0%     47.53Mi ± 0%    -0.52% (p=0.001 n=7)
TinyGo/Run/container_ring.test-10       16.30Mi ± 0%     16.30Mi ± 0%         ~ (p=0.128 n=7)
TinyGo/Compile/crypto_des.test-10       48.67Mi ± 0%     48.42Mi ± 0%    -0.51% (p=0.001 n=7)
TinyGo/Run/crypto_des.test-10           16.76Mi ± 0%     16.76Mi ± 0%         ~ (p=0.902 n=7)
TinyGo/Compile/crypto_md5.test-10       48.73Mi ± 0%     48.48Mi ± 0%    -0.51% (p=0.001 n=7)
TinyGo/Run/crypto_md5.test-10           44.97Mi ± 0%     44.97Mi ± 0%         ~ (p=0.402 n=7)
TinyGo/Compile/crypto_rc4.test-10       47.76Mi ± 0%     47.52Mi ± 0%    -0.51% (p=0.001 n=7)
TinyGo/Run/crypto_rc4.test-10           29.28Mi ± 0%     29.28Mi ± 0%         ~ (p=0.104 n=7)
TinyGo/Compile/crypto_sha1.test-10      48.97Mi ± 0%     48.72Mi ± 0%    -0.52% (p=0.001 n=7)
TinyGo/Run/crypto_sha1.test-10          17.44Mi ± 0%     17.44Mi ± 0%         ~ (p=1.000 n=7)
TinyGo/Compile/crypto_sha256.test-10    48.81Mi ± 0%     48.56Mi ± 0%    -0.51% (p=0.001 n=7)
TinyGo/Run/crypto_sha256.test-10        17.53Mi ±  ∞ ¹   17.53Mi ±  ∞ ¹       ~ (p=0.381 n=2+5)
geomean                                 31.45Mi          31.37Mi         -0.26%
¹ need >= 6 samples for confidence interval at level 0.95

                                      │ old_tinygo.txt │            new_tinygo.txt             │
                                      │   allocs/op    │   allocs/op    vs base                │
TinyGo/Compile/container_heap.test-10    83.67k ± 0%     83.46k ± 0%    -0.25% (p=0.011 n=7)
TinyGo/Run/container_heap.test-10        374.9k ± 0%     374.9k ± 0%         ~ (p=1.000 n=7)
TinyGo/Compile/container_list.test-10    83.34k ± 0%     83.19k ± 0%    -0.19% (p=0.002 n=7)
TinyGo/Run/container_list.test-10        370.0k ± 0%     370.0k ± 0%         ~ (p=0.674 n=7)
TinyGo/Compile/container_ring.test-10    83.26k ± 0%     83.08k ± 0%    -0.22% (p=0.004 n=7)
TinyGo/Run/container_ring.test-10        367.6k ± 0%     367.6k ± 0%         ~ (p=0.249 n=7)
TinyGo/Compile/crypto_des.test-10        83.68k ± 0%     83.53k ± 0%    -0.18% (p=0.004 n=7)
TinyGo/Run/crypto_des.test-10            378.1k ± 0%     378.1k ± 0%         ~ (p=0.437 n=7)
TinyGo/Compile/crypto_md5.test-10        83.86k ± 0%     83.67k ± 0%    -0.23% (p=0.001 n=7)
TinyGo/Run/crypto_md5.test-10            393.3k ± 0%     393.3k ± 0%         ~ (p=0.592 n=7)
TinyGo/Compile/crypto_rc4.test-10        83.32k ± 0%     83.20k ± 0%    -0.14% (p=0.011 n=7)
TinyGo/Run/crypto_rc4.test-10            367.1k ± 0%     367.1k ± 0%         ~ (p=0.102 n=7)
TinyGo/Compile/crypto_sha1.test-10       84.05k ± 0%     83.87k ± 0%    -0.21% (p=0.002 n=7)
TinyGo/Run/crypto_sha1.test-10           392.7k ± 0%     392.7k ± 0%         ~ (p=1.000 n=7)
TinyGo/Compile/crypto_sha256.test-10     83.86k ± 0%     83.67k ± 0%    -0.24% (p=0.001 n=7)
TinyGo/Run/crypto_sha256.test-10         394.5k ±  ∞ ¹   394.5k ±  ∞ ¹       ~ (p=0.952 n=2+5)
geomean                                  178.2k          178.0k         -0.10%
```

### wazero compiled as wasip1 binary

```
goos: darwin
goarch: arm64
pkg: github.com/tetratelabs/wazero
               │  old.txt   │             new.txt              │
               │   sec/op   │   sec/op    vs base              │
Compilation-10   2.413 ± 0%   2.258 ± 1%  -6.42% (p=0.001 n=7)

               │   old.txt    │              new.txt               │
               │     B/op     │     B/op      vs base              │
Compilation-10   339.9Mi ± 0%   337.7Mi ± 0%  -0.63% (p=0.001 n=7)

               │   old.txt   │              new.txt              │
               │  allocs/op  │  allocs/op   vs base              │
Compilation-10   603.9k ± 0%   602.4k ± 0%  -0.25% (p=0.001 n=7)
```


Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
  • Loading branch information
mathetake committed Jun 7, 2024
1 parent f47fd2e commit 747609b
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 45 deletions.
6 changes: 5 additions & 1 deletion internal/engine/wazevo/ssa/basic_block.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,10 @@ type (

// reversePostOrder is used to sort all the blocks in the function in reverse post order.
// This is used in builder.LayoutBlocks.
reversePostOrder int
reversePostOrder int32

// visited is used during various traversals.
visited int32

// child and sibling are the ones in the dominator tree.
child, sibling *basicBlock
Expand Down Expand Up @@ -274,6 +277,7 @@ func resetBasicBlock(bb *basicBlock) {
bb.unknownValues = bb.unknownValues[:0]
bb.lastDefinitions = wazevoapi.ResetMap(bb.lastDefinitions)
bb.reversePostOrder = -1
bb.visited = 0
bb.loopNestingForestChildren = basicBlockVarLengthNil
bb.loopHeader = false
bb.sibling = nil
Expand Down
7 changes: 0 additions & 7 deletions internal/engine/wazevo/ssa/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@ func NewBuilder() Builder {
varLengthPool: wazevoapi.NewVarLengthPool[Value](),
valueAnnotations: make(map[ValueID]string),
signatures: make(map[SignatureID]*Signature),
blkVisited: make(map[*basicBlock]int),
valueIDAliases: make(map[ValueID]Value),
redundantParameterIndexToValue: make(map[int]Value),
returnBlk: &basicBlock{id: basicBlockIDReturnBlock},
Expand Down Expand Up @@ -189,7 +188,6 @@ type builder struct {

// The followings are used for optimization passes/deterministic compilation.
instStack []*Instruction
blkVisited map[*basicBlock]int
valueIDToInstruction []*Instruction
blkStack []*basicBlock
blkStack2 []*basicBlock
Expand Down Expand Up @@ -266,11 +264,6 @@ func (b *builder) Init(s *Signature) {
b.blkStack2 = b.blkStack2[:0]
b.dominators = b.dominators[:0]
b.loopNestingForestRoots = b.loopNestingForestRoots[:0]

for i := 0; i < b.basicBlocksPool.Allocated(); i++ {
blk := b.basicBlocksPool.View(i)
delete(b.blkVisited, blk)
}
b.basicBlocksPool.Reset()

for v := ValueID(0); v < b.nextValueID; v++ {
Expand Down
22 changes: 5 additions & 17 deletions internal/engine/wazevo/ssa/pass.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,11 @@ func (b *builder) runFinalizingPasses() {
// passDeadBlockEliminationOpt searches the unreachable blocks, and sets the basicBlock.invalid flag true if so.
func passDeadBlockEliminationOpt(b *builder) {
entryBlk := b.entryBlk()
b.clearBlkVisited()
b.blkStack = append(b.blkStack, entryBlk)
for len(b.blkStack) > 0 {
reachableBlk := b.blkStack[len(b.blkStack)-1]
b.blkStack = b.blkStack[:len(b.blkStack)-1]
b.blkVisited[reachableBlk] = 0 // the value won't be used in this pass.
reachableBlk.visited = 1

if !reachableBlk.sealed && !reachableBlk.ReturnBlock() {
panic(fmt.Sprintf("%s is not sealed", reachableBlk))
Expand All @@ -94,17 +93,18 @@ func passDeadBlockEliminationOpt(b *builder) {
}

for _, succ := range reachableBlk.success {
if _, ok := b.blkVisited[succ]; ok {
if succ.visited == 1 {
continue
}
b.blkStack = append(b.blkStack, succ)
}
}

for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
if _, ok := b.blkVisited[blk]; !ok {
if blk.visited != 1 {
blk.invalid = true
}
blk.visited = 0
}
}

Expand All @@ -121,7 +121,7 @@ func passRedundantPhiEliminationOpt(b *builder) {
// the maximum number of iteration was 22, which seems to be acceptable but not that small either since the
// complexity here is O(BlockNum * Iterations) at the worst case where BlockNum might be the order of thousands.
// -- Note --
// Currently, each iteration can run in an order of blocks, but it empirically converges quickly in practice when
// Currently, each iteration can run in any order of blocks, but it empirically converges quickly in practice when
// running on the reverse post-order. It might be possible to optimize this further by using the dominator tree.
for {
changed := false
Expand Down Expand Up @@ -355,18 +355,6 @@ func (b *builder) incRefCount(id ValueID, from *Instruction) {
b.valueRefCounts[id]++
}

// clearBlkVisited clears the b.blkVisited map so that we can reuse it for multiple places.
func (b *builder) clearBlkVisited() {
b.blkStack2 = b.blkStack2[:0]
for key := range b.blkVisited {
b.blkStack2 = append(b.blkStack2, key)
}
for _, blk := range b.blkStack2 {
delete(b.blkVisited, blk)
}
b.blkStack2 = b.blkStack2[:0]
}

// passNopInstElimination eliminates the instructions which is essentially a no-op.
func passNopInstElimination(b *builder) {
if int(b.nextValueID) >= len(b.valueIDToInstruction) {
Expand Down
14 changes: 6 additions & 8 deletions internal/engine/wazevo/ssa/pass_blk_layouts.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ import (
//
// This heuristic is done in maybeInvertBranches function.
func passLayoutBlocks(b *builder) {
b.clearBlkVisited()

// We might end up splitting critical edges which adds more basic blocks,
// so we store the currently existing basic blocks in nonSplitBlocks temporarily.
// That way we can iterate over the original basic blocks while appending new ones into reversePostOrderedBasicBlocks.
Expand All @@ -47,20 +45,20 @@ func passLayoutBlocks(b *builder) {
for _, blk := range nonSplitBlocks {
for i := range blk.preds {
pred := blk.preds[i].blk
if _, ok := b.blkVisited[pred]; ok || !pred.Valid() {
if pred.visited == 1 || !pred.Valid() {
continue
} else if pred.reversePostOrder < blk.reversePostOrder {
// This means the edge is critical, and this pred is the trampoline and yet to be inserted.
// Split edge trampolines must come before the destination in reverse post-order.
b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, pred)
b.blkVisited[pred] = 0 // mark as inserted, the value is not used.
pred.visited = 1 // mark as inserted.
}
}

// Now that we've already added all the potential trampoline blocks incoming to this block,
// we can add this block itself.
b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, blk)
b.blkVisited[blk] = 0 // mark as inserted, the value is not used.
blk.visited = 1 // mark as inserted.

if len(blk.success) < 2 {
// There won't be critical edge originating from this block.
Expand Down Expand Up @@ -116,7 +114,7 @@ func passLayoutBlocks(b *builder) {
if fallthroughBranch.opcode == OpcodeJump && fallthroughBranch.blk == trampoline {
// This can be lowered as fallthrough at the end of the block.
b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline)
b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used.
trampoline.visited = 1 // mark as inserted.
} else {
uninsertedTrampolines = append(uninsertedTrampolines, trampoline)
}
Expand All @@ -126,7 +124,7 @@ func passLayoutBlocks(b *builder) {
if trampoline.success[0].reversePostOrder <= trampoline.reversePostOrder { // "<=", not "<" because the target might be itself.
// This means the critical edge was backward, so we insert after the current block immediately.
b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline)
b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used.
trampoline.visited = 1 // mark as inserted.
} // If the target is forward, we can wait to insert until the target is inserted.
}
uninsertedTrampolines = uninsertedTrampolines[:0] // Reuse the stack for the next block.
Expand All @@ -142,7 +140,7 @@ func passLayoutBlocks(b *builder) {

if wazevoapi.SSAValidationEnabled {
for _, trampoline := range trampolines {
if _, ok := b.blkVisited[trampoline]; !ok {
if trampoline.visited != 1 {
panic("BUG: trampoline block not inserted: " + trampoline.formatHeader(b))
}
trampoline.validate(b)
Expand Down
2 changes: 1 addition & 1 deletion internal/engine/wazevo/ssa/pass_blk_layouts_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ func TestBuilder_splitCriticalEdge(t *testing.T) {
predInfo := &basicBlockPredecessorInfo{blk: predBlk, branch: originalBrz}
trampoline := b.splitCriticalEdge(predBlk, dummyBlk, predInfo)
require.NotNil(t, trampoline)
require.Equal(t, 100, trampoline.reversePostOrder)
require.Equal(t, int32(100), trampoline.reversePostOrder)

require.Equal(t, trampoline, predInfo.blk)
require.Equal(t, originalBrz, predInfo.branch)
Expand Down
23 changes: 12 additions & 11 deletions internal/engine/wazevo/ssa/pass_cfg.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@ import (
// At the last of pass, this function also does the loop detection and sets the basicBlock.loop flag.
func passCalculateImmediateDominators(b *builder) {
reversePostOrder := b.reversePostOrderedBasicBlocks[:0]
exploreStack := b.blkStack[:0]
b.clearBlkVisited()

entryBlk := b.entryBlk()

// Store the reverse postorder from the entrypoint into reversePostOrder slice.
// This calculation of reverse postorder is not described in the paper,
Expand All @@ -28,14 +24,17 @@ func passCalculateImmediateDominators(b *builder) {
// which is a reasonable assumption as long as SSA Builder is properly used.
//
// First we push blocks in postorder iteratively visit successors of the entry block.
exploreStack = append(exploreStack, entryBlk)
entryBlk := b.entryBlk()
exploreStack := append(b.blkStack[:0], entryBlk)
// These flags are used to track the state of the block in the DFS traversal.
// We temporarily use the reversePostOrder field to store the state.
const visitStateUnseen, visitStateSeen, visitStateDone = 0, 1, 2
b.blkVisited[entryBlk] = visitStateSeen
entryBlk.visited = visitStateSeen
for len(exploreStack) > 0 {
tail := len(exploreStack) - 1
blk := exploreStack[tail]
exploreStack = exploreStack[:tail]
switch b.blkVisited[blk] {
switch blk.visited {
case visitStateUnseen:
// This is likely a bug in the frontend.
panic("BUG: unsupported CFG")
Expand All @@ -48,16 +47,18 @@ func passCalculateImmediateDominators(b *builder) {
if succ.ReturnBlock() || succ.invalid {
continue
}
if b.blkVisited[succ] == visitStateUnseen {
b.blkVisited[succ] = visitStateSeen
if succ.visited == visitStateUnseen {
succ.visited = visitStateSeen
exploreStack = append(exploreStack, succ)
}
}
// Finally, we could pop this block once we pop all of its successors.
b.blkVisited[blk] = visitStateDone
blk.visited = visitStateDone
case visitStateDone:
// Note: at this point we push blk in postorder despite its name.
reversePostOrder = append(reversePostOrder, blk)
default:
panic("BUG")
}
}
// At this point, reversePostOrder has postorder actually, so we reverse it.
Expand All @@ -67,7 +68,7 @@ func passCalculateImmediateDominators(b *builder) {
}

for i, blk := range reversePostOrder {
blk.reversePostOrder = i
blk.reversePostOrder = int32(i)
}

// Reuse the dominators slice if possible from the previous computation of function.
Expand Down

0 comments on commit 747609b

Please sign in to comment.