spirt/cfg.rs
1//! Control-flow graph (CFG) abstractions and utilities.
2
3use crate::transform::{InnerInPlaceTransform as _, Transformer};
4use crate::{
5 AttrSet, Const, ConstDef, ConstKind, Context, ControlNode, ControlNodeDef, ControlNodeKind,
6 ControlNodeOutputDecl, ControlRegion, ControlRegionDef, EntityOrientedDenseMap, FuncDefBody,
7 FxIndexMap, FxIndexSet, SelectionKind, Type, TypeKind, Value, spv,
8};
9use itertools::{Either, Itertools};
10use smallvec::SmallVec;
11use std::mem;
12use std::rc::Rc;
13
14/// The control-flow graph (CFG) of a function, as control-flow instructions
15/// ([`ControlInst`]s) attached to [`ControlRegion`]s, as an "action on exit", i.e.
16/// "terminator" (while intra-region control-flow is strictly structured).
17#[derive(Clone, Default)]
18pub struct ControlFlowGraph {
19 pub control_inst_on_exit_from: EntityOrientedDenseMap<ControlRegion, ControlInst>,
20
21 // HACK(eddyb) this currently only comes from `OpLoopMerge`, and cannot be
22 // inferred (because implies too strong of an ownership/uniqueness notion).
23 pub loop_merge_to_loop_header: FxIndexMap<ControlRegion, ControlRegion>,
24}
25
26#[derive(Clone)]
27pub struct ControlInst {
28 pub attrs: AttrSet,
29
30 pub kind: ControlInstKind,
31
32 pub inputs: SmallVec<[Value; 2]>,
33
34 // FIXME(eddyb) change the inline size of this to fit most instructions.
35 pub targets: SmallVec<[ControlRegion; 4]>,
36
37 /// `target_inputs[region][input_idx]` is the [`Value`] that
38 /// `Value::ControlRegionInput { region, input_idx }` will get on entry,
39 /// where `region` must be appear at least once in `targets` - this is a
40 /// separate map instead of being part of `targets` because it reflects the
41 /// limitations of φ ("phi") nodes, which (unlike "basic block arguments")
42 /// cannot tell apart multiple edges with the same source and destination.
43 pub target_inputs: FxIndexMap<ControlRegion, SmallVec<[Value; 2]>>,
44}
45
46#[derive(Clone)]
47pub enum ControlInstKind {
48 /// Reaching this point in the control-flow is undefined behavior, e.g.:
49 /// * a `SelectBranch` case that's known to be impossible
50 /// * after a function call, where the function never returns
51 ///
52 /// Optimizations can take advantage of this information, to assume that any
53 /// necessary preconditions for reaching this point, are never met.
54 Unreachable,
55
56 /// Leave the current function, optionally returning a value.
57 Return,
58
59 /// Leave the current invocation, similar to returning from every function
60 /// call in the stack (up to and including the entry-point), but potentially
61 /// indicating a fatal error as well.
62 ExitInvocation(ExitInvocationKind),
63
64 /// Unconditional branch to a single target.
65 Branch,
66
67 /// Branch to one of several targets, chosen by a single value input.
68 SelectBranch(SelectionKind),
69}
70
71#[derive(Clone)]
72pub enum ExitInvocationKind {
73 SpvInst(spv::Inst),
74}
75
76impl ControlFlowGraph {
77 /// Iterate over all [`ControlRegion`]s making up `func_def_body`'s CFG, in
78 /// reverse post-order (RPO).
79 ///
80 /// RPO iteration over a CFG provides certain guarantees, most importantly
81 /// that dominators are visited before the entire subgraph they dominate.
82 pub fn rev_post_order(
83 &self,
84 func_def_body: &FuncDefBody,
85 ) -> impl DoubleEndedIterator<Item = ControlRegion> {
86 let mut post_order = SmallVec::<[_; 8]>::new();
87 self.traverse_whole_func(func_def_body, &mut TraversalState {
88 incoming_edge_counts: EntityOrientedDenseMap::new(),
89
90 pre_order_visit: |_| {},
91 post_order_visit: |region| post_order.push(region),
92
93 // NOTE(eddyb) this doesn't impact semantics, but combined with
94 // the final reversal, it should keep targets in the original
95 // order in the cases when they didn't get deduplicated.
96 reverse_targets: true,
97 });
98 post_order.into_iter().rev()
99 }
100}
101
102// HACK(eddyb) this only serves to disallow accessing `private_count` field of
103// `IncomingEdgeCount`.
104mod sealed {
105 /// Opaque newtype for the count of incoming edges (into a [`ControlRegion`](crate::ControlRegion)).
106 ///
107 /// The private field prevents direct mutation or construction, forcing the
108 /// use of [`IncomingEdgeCount::ONE`] and addition operations to produce some
109 /// specific count (which would require explicit workarounds for misuse).
110 #[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
111 pub(super) struct IncomingEdgeCount(usize);
112
113 impl IncomingEdgeCount {
114 pub(super) const ONE: Self = Self(1);
115 }
116
117 impl std::ops::Add for IncomingEdgeCount {
118 type Output = Self;
119 fn add(self, other: Self) -> Self {
120 Self(self.0 + other.0)
121 }
122 }
123
124 impl std::ops::AddAssign for IncomingEdgeCount {
125 fn add_assign(&mut self, other: Self) {
126 *self = *self + other;
127 }
128 }
129}
130use sealed::IncomingEdgeCount;
131
132struct TraversalState<PreVisit: FnMut(ControlRegion), PostVisit: FnMut(ControlRegion)> {
133 incoming_edge_counts: EntityOrientedDenseMap<ControlRegion, IncomingEdgeCount>,
134 pre_order_visit: PreVisit,
135 post_order_visit: PostVisit,
136
137 // FIXME(eddyb) should this be a generic parameter for "targets iterator"?
138 reverse_targets: bool,
139}
140
141impl ControlFlowGraph {
142 fn traverse_whole_func(
143 &self,
144 func_def_body: &FuncDefBody,
145 state: &mut TraversalState<impl FnMut(ControlRegion), impl FnMut(ControlRegion)>,
146 ) {
147 let func_at_body = func_def_body.at_body();
148
149 // Quick sanity check that this is the right CFG for `func_def_body`.
150 assert!(std::ptr::eq(func_def_body.unstructured_cfg.as_ref().unwrap(), self));
151 assert!(func_at_body.def().outputs.is_empty());
152
153 self.traverse(func_def_body.body, state);
154 }
155
156 fn traverse(
157 &self,
158 region: ControlRegion,
159 state: &mut TraversalState<impl FnMut(ControlRegion), impl FnMut(ControlRegion)>,
160 ) {
161 // FIXME(eddyb) `EntityOrientedDenseMap` should have an `entry` API.
162 if let Some(existing_count) = state.incoming_edge_counts.get_mut(region) {
163 *existing_count += IncomingEdgeCount::ONE;
164 return;
165 }
166 state.incoming_edge_counts.insert(region, IncomingEdgeCount::ONE);
167
168 (state.pre_order_visit)(region);
169
170 let control_inst = self
171 .control_inst_on_exit_from
172 .get(region)
173 .expect("cfg: missing `ControlInst`, despite having left structured control-flow");
174
175 let targets = control_inst.targets.iter().copied();
176 let targets = if state.reverse_targets {
177 Either::Left(targets.rev())
178 } else {
179 Either::Right(targets)
180 };
181 for target in targets {
182 self.traverse(target, state);
183 }
184
185 (state.post_order_visit)(region);
186 }
187}
188
189/// Minimal loop analysis, based on Tarjan's SCC (strongly connected components)
190/// algorithm, applied recursively (for every level of loop nesting).
191///
192/// Here "minimal" means that each loops is the smallest CFG subgraph possible
193/// (excluding any control-flow paths that cannot reach a backedge and cycle),
194/// i.e. each loop is a CFG SCC (strongly connected component).
195///
196/// These "minimal loops" contrast with the "maximal loops" that the greedy
197/// architecture of the structurizer would naively produce, with the main impact
198/// of the difference being where loop exits (`break`s) "merge" (or "reconverge"),
199/// which SPIR-V encodes via `OpLoopMerge`, and is significant for almost anything
200/// where shared memory and/or subgroup ops can allow observing when invocations
201/// "wait for others in the subgroup to exit the loop" (or when they fail to wait).
202///
203/// This analysis was added to because of two observations wrt "reconvergence":
204/// 1. syntactic loops (from some high-level language), when truly structured
205/// (i.e. only using `while`/`do`-`while` exit conditions, not `break` etc.),
206/// *always* map to "minimal loops" on a CFG, as the only loop exit edge is
207/// built-in, and no part of the syntactic "loop body" can be its successor
208/// 2. more pragmatically, compiling shader languages to SPIR-V seems to (almost?)
209/// always *either* fully preserve syntactic loops (via SPIR-V `OpLoopMerge`),
210/// *or* structurize CFGs in a way that produces "minimal loops", which can
211/// be misleading with explicit `break`s (moving user code from just before
212/// the `break` to after the loop), but is less impactful than "maximal loops"
213struct LoopFinder<'a> {
214 cfg: &'a ControlFlowGraph,
215
216 // FIXME(eddyb) this feels a bit inefficient (are many-exit loops rare?).
217 loop_header_to_exit_targets: FxIndexMap<ControlRegion, FxIndexSet<ControlRegion>>,
218
219 /// SCC accumulation stack, where CFG nodes collect during the depth-first
220 /// traversal, and are only popped when their "SCC root" (loop header) is
221 /// (note that multiple SCCs on the stack does *not* indicate SCC nesting,
222 /// but rather a path between two SCCs, i.e. a loop *following* another).
223 scc_stack: Vec<ControlRegion>,
224 /// Per-CFG-node traversal state (often just pointing to a `scc_stack` slot).
225 scc_state: EntityOrientedDenseMap<ControlRegion, SccState>,
226}
227
228#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
229struct SccStackIdx(u32);
230
231#[derive(PartialEq, Eq)]
232enum SccState {
233 /// CFG node has been reached and ended up somewhere on the `scc_stack`,
234 /// where it will remain until the SCC it's part of will be completed.
235 Pending(SccStackIdx),
236
237 /// CFG node had been reached once, but is no longer on the `scc_stack`, its
238 /// parent SCC having been completed (or it wasn't in an SCC to begin with).
239 Complete(EventualCfgExits),
240}
241
242/// Summary of all the ways in which a CFG node may eventually leave the CFG.
243///
244// HACK(eddyb) a loop can reach a CFG subgraph that happens to always "diverge"
245// (e.g. ending in `unreachable`, `ExitInvocation`, or even infinite loops,
246// though those have other issues) and strictly speaking that would always be
247// an edge leaving the SCC of the loop (as it can't reach a backedge), but it
248// still shouldn't be treated as an exit because it doesn't reconverge to the
249// rest of the function, i.e. it can't reach any `return`s, which is what this
250// tracks in order to later make a more accurate decision wrt loop exits.
251//
252// NOTE(eddyb) only in the case where a loop *also* has non-"diverging" exits,
253// do the "diverging" ones not get treated as exits, as the presence of both
254// disambiguates `break`s from naturally "diverging" sections of the loop body
255// (at least for CFGs built from languages without labelled `break` or `goto`,
256// but even then it would be pretty convoluted to set up `break` to diverge,
257// while `break some_outer_label` to reconverge to the rest of the function).
258#[derive(Copy, Clone, Default, PartialEq, Eq)]
259struct EventualCfgExits {
260 // FIXME(eddyb) do the other situations need their own flags here?
261 may_return_from_func: bool,
262}
263
264impl std::ops::BitOr for EventualCfgExits {
265 type Output = Self;
266 fn bitor(self, other: Self) -> Self {
267 Self { may_return_from_func: self.may_return_from_func | other.may_return_from_func }
268 }
269}
270impl std::ops::BitOrAssign for EventualCfgExits {
271 fn bitor_assign(&mut self, other: Self) {
272 *self = *self | other;
273 }
274}
275
276impl<'a> LoopFinder<'a> {
277 fn new(cfg: &'a ControlFlowGraph) -> Self {
278 Self {
279 cfg,
280 loop_header_to_exit_targets: FxIndexMap::default(),
281 scc_stack: vec![],
282 scc_state: EntityOrientedDenseMap::new(),
283 }
284 }
285
286 /// Tarjan's SCC algorithm works by computing the "earliest" reachable node,
287 /// from every node (often using the name `lowlink`), which will be equal
288 /// to the origin node itself iff that node is an "SCC root" (loop header),
289 /// and always point to an "earlier" node if a cycle (via loop backedge) was
290 /// found from somewhere else in the SCC (i.e. from inside the loop body).
291 ///
292 /// Here we track stack indices (as the stack order is the traversal order),
293 /// and distinguish the acyclic case to avoid treating most nodes as self-loops.
294 //
295 // FIXME(eddyb) name of the function is a bit clunky wrt its return type.
296 fn find_earliest_scc_root_of(
297 &mut self,
298 node: ControlRegion,
299 ) -> (Option<SccStackIdx>, EventualCfgExits) {
300 let state_entry = self.scc_state.entry(node);
301 if let Some(state) = &state_entry {
302 return match *state {
303 SccState::Pending(scc_stack_idx) => {
304 // HACK(eddyb) this means that `EventualCfgExits`s will be
305 // inconsistently observed across the `Pending` nodes of a
306 // loop body, but that is sound as it cannot feed into any
307 // `Complete` state until the loop header itself is complete,
308 // and the monotonic nature of `EventualCfgExits` means that
309 // the loop header will still get to see the complete picture.
310 (Some(scc_stack_idx), EventualCfgExits::default())
311 }
312 SccState::Complete(eventual_cfg_exits) => (None, eventual_cfg_exits),
313 };
314 }
315 let scc_stack_idx = SccStackIdx(self.scc_stack.len().try_into().unwrap());
316 self.scc_stack.push(node);
317 *state_entry = Some(SccState::Pending(scc_stack_idx));
318
319 let control_inst = self
320 .cfg
321 .control_inst_on_exit_from
322 .get(node)
323 .expect("cfg: missing `ControlInst`, despite having left structured control-flow");
324
325 let mut eventual_cfg_exits = EventualCfgExits::default();
326
327 if let ControlInstKind::Return = control_inst.kind {
328 eventual_cfg_exits.may_return_from_func = true;
329 }
330
331 let earliest_scc_root = control_inst
332 .targets
333 .iter()
334 .flat_map(|&target| {
335 let (earliest_scc_root_of_target, eventual_cfg_exits_of_target) =
336 self.find_earliest_scc_root_of(target);
337 eventual_cfg_exits |= eventual_cfg_exits_of_target;
338
339 // HACK(eddyb) if one of the edges is already known to be a loop exit
340 // (from `OpLoopMerge` specifically), treat it almost like a backedge,
341 // but with the additional requirement that the loop header is already
342 // on the stack (i.e. this `node` is reachable from that loop header).
343 let root_candidate_from_loop_merge =
344 self.cfg.loop_merge_to_loop_header.get(&target).and_then(|&loop_header| {
345 match self.scc_state.get(loop_header) {
346 Some(&SccState::Pending(scc_stack_idx)) => Some(scc_stack_idx),
347 _ => None,
348 }
349 });
350
351 earliest_scc_root_of_target.into_iter().chain(root_candidate_from_loop_merge)
352 })
353 .min();
354
355 // If this node has been chosen as the root of an SCC, complete that SCC.
356 if earliest_scc_root == Some(scc_stack_idx) {
357 let scc_start = scc_stack_idx.0 as usize;
358
359 // It's now possible to find all the loop exits: they're all the
360 // edges from nodes of this SCC (loop) to nodes not in the SCC.
361 let target_is_exit = |target| {
362 match self.scc_state[target] {
363 SccState::Pending(i) => {
364 assert!(i >= scc_stack_idx);
365 false
366 }
367 SccState::Complete(eventual_cfg_exits_of_target) => {
368 let EventualCfgExits { may_return_from_func: loop_may_reconverge } =
369 eventual_cfg_exits;
370 let EventualCfgExits { may_return_from_func: target_may_reconverge } =
371 eventual_cfg_exits_of_target;
372
373 // HACK(eddyb) see comment on `EventualCfgExits` for why
374 // edges leaving the SCC aren't treated as loop exits
375 // when they're "more divergent" than the loop itself,
376 // i.e. if any edges leaving the SCC can reconverge,
377 // (and therefore the loop as a whole can reconverge)
378 // only those edges are kept as loop exits.
379 target_may_reconverge == loop_may_reconverge
380 }
381 }
382 };
383 self.loop_header_to_exit_targets.insert(
384 node,
385 self.scc_stack[scc_start..]
386 .iter()
387 .flat_map(|&scc_node| {
388 self.cfg.control_inst_on_exit_from[scc_node].targets.iter().copied()
389 })
390 .filter(|&target| target_is_exit(target))
391 .collect(),
392 );
393
394 // Find nested loops by marking *only* the loop header as complete,
395 // clearing loop body nodes' state, and recursing on them: all the
396 // nodes outside the loop (otherwise reachable from within), and the
397 // loop header itself, are already marked as complete, meaning that
398 // all exits and backedges will be ignored, and the recursion will
399 // only find more SCCs within the loop body (i.e. nested loops).
400 self.scc_state[node] = SccState::Complete(eventual_cfg_exits);
401 let loop_body_range = scc_start + 1..self.scc_stack.len();
402 for &scc_node in &self.scc_stack[loop_body_range.clone()] {
403 self.scc_state.remove(scc_node);
404 }
405 for i in loop_body_range.clone() {
406 self.find_earliest_scc_root_of(self.scc_stack[i]);
407 }
408 assert_eq!(self.scc_stack.len(), loop_body_range.end);
409
410 // Remove the entire SCC from the accumulation stack all at once.
411 self.scc_stack.truncate(scc_start);
412
413 return (None, eventual_cfg_exits);
414 }
415
416 // Not actually in an SCC at all, just some node outside any CFG cycles.
417 if earliest_scc_root.is_none() {
418 assert!(self.scc_stack.pop() == Some(node));
419 self.scc_state[node] = SccState::Complete(eventual_cfg_exits);
420 }
421
422 (earliest_scc_root, eventual_cfg_exits)
423 }
424}
425
426#[allow(rustdoc::private_intra_doc_links)]
427/// Control-flow "structurizer", which attempts to convert as much of the CFG
428/// as possible into structural control-flow (regions).
429///
430/// See [`StructurizeRegionState`]'s docs for more details on the algorithm.
431//
432// FIXME(eddyb) document this (instead of having it on `StructurizeRegionState`).
433//
434// NOTE(eddyb) CFG structurizer has these stages (per-region):
435// 1. absorb any deferred exits that finally have 100% refcount
436// 2. absorb a single backedge deferred exit to the same region
437//
438// What we could add is a third step, to handle irreducible controlflow:
439// 3. check for groups of exits that have fully satisfied refcounts iff the
440// rest of the exits in the group are all added together - if so, the group
441// is *irreducible* and a single "loop header" can be created, that gets
442// the group of deferred exits, and any other occurrence of the deferred
443// exits (in either the original region, or amongst themselves) can be
444// replaced with the "loop header" with appropriate selector inputs
445//
446// Sadly 3. requires a bunch of tests that are hard to craft (can rustc MIR
447// even end up in the right shape?).
448// OpenCL has `goto` so maybe it can also be used for this worse-than-diamond
449// example: `entry -> a,b,d` `a,b -> c` `a,b,c -> d` `a,b,c,d <-> a,b,c,d`
450// (the goal is avoiding a "flat group", i.e. where there is only one step
451// between every exit in the group and another exit)
452pub struct Structurizer<'a> {
453 cx: &'a Context,
454
455 /// Scrutinee type for [`SelectionKind::BoolCond`].
456 type_bool: Type,
457
458 /// Scrutinee value for [`SelectionKind::BoolCond`], for the "then" case.
459 const_true: Const,
460
461 /// Scrutinee value for [`SelectionKind::BoolCond`], for the "else" case.
462 const_false: Const,
463
464 func_def_body: &'a mut FuncDefBody,
465
466 // FIXME(eddyb) this feels a bit inefficient (are many-exit loops rare?).
467 loop_header_to_exit_targets: FxIndexMap<ControlRegion, FxIndexSet<ControlRegion>>,
468
469 // HACK(eddyb) this also tracks all of `loop_header_to_exit_targets`, as
470 // "false edges" from every loop header to each exit target of that loop,
471 // which structurizing that loop consumes to "unlock" its own exits.
472 incoming_edge_counts_including_loop_exits:
473 EntityOrientedDenseMap<ControlRegion, IncomingEdgeCount>,
474
475 /// `structurize_region_state[region]` tracks `.structurize_region(region)`
476 /// progress/results (see also [`StructurizeRegionState`]'s docs).
477 //
478 // FIXME(eddyb) use `EntityOrientedDenseMap` (which lacks iteration by design).
479 structurize_region_state: FxIndexMap<ControlRegion, StructurizeRegionState>,
480
481 /// Accumulated rewrites (caused by e.g. `target_inputs`s, but not only),
482 /// i.e.: `Value::ControlRegionInput { region, input_idx }` must be
483 /// rewritten based on `control_region_input_rewrites[region]`, as either
484 /// the original `region` wasn't reused, or its inputs were renumbered.
485 control_region_input_rewrites:
486 EntityOrientedDenseMap<ControlRegion, ControlRegionInputRewrites>,
487}
488
489/// How all `Value::ControlRegionInput { region, input_idx }` for a `region`
490/// must be rewritten (see also `control_region_input_rewrites` docs).
491enum ControlRegionInputRewrites {
492 /// Complete replacement with another value (which can take any form), as
493 /// `region` wasn't kept in its original form in the final structured IR.
494 ///
495 /// **Note**: such replacement can be chained, i.e. a replacement value can
496 /// be `Value::ControlRegionInput { region: other_region, .. }`, and then
497 /// `other_region` itself may have its inputs written.
498 ReplaceWith(SmallVec<[Value; 2]>),
499
500 /// The value may remain an input of the same `region`, only changing its
501 /// `input_idx` (e.g. if indices need compaction after removing some inputs),
502 /// or get replaced anyway, depending on the `Result` for `input_idx`.
503 ///
504 /// **Note**: renumbering can only be the last rewrite step of a value,
505 /// as `region` must've been chosen to be kept in the final structured IR,
506 /// but the `Err` cases are transitive just like `ReplaceWith`.
507 //
508 // FIXME(eddyb) this is a bit silly, maybe try to rely more on hermeticity
509 // to get rid of this?
510 RenumberOrReplaceWith(SmallVec<[Result<u32, Value>; 2]>),
511}
512
513impl ControlRegionInputRewrites {
514 // HACK(eddyb) this is here because it depends on a field of `Structurizer`
515 // and borrowing issues ensue if it's made a method of `Structurizer`.
516 fn rewrite_all(
517 rewrites: &EntityOrientedDenseMap<ControlRegion, Self>,
518 ) -> impl crate::transform::Transformer + '_ {
519 // FIXME(eddyb) maybe this should be provided by `transform`.
520 use crate::transform::*;
521 struct ReplaceValueWith<F>(F);
522 impl<F: Fn(Value) -> Option<Value>> Transformer for ReplaceValueWith<F> {
523 fn transform_value_use(&mut self, v: &Value) -> Transformed<Value> {
524 self.0(*v).map_or(Transformed::Unchanged, Transformed::Changed)
525 }
526 }
527
528 ReplaceValueWith(move |v| {
529 let mut new_v = v;
530 while let Value::ControlRegionInput { region, input_idx } = new_v {
531 match rewrites.get(region) {
532 // NOTE(eddyb) this needs to be able to apply multiple replacements,
533 // due to the input potentially having redundantly chained `OpPhi`s.
534 //
535 // FIXME(eddyb) union-find-style "path compression" could record the
536 // final value inside `rewrites` while replacements are being made,
537 // to avoid going through a chain more than once (and some of these
538 // replacements could also be applied early).
539 Some(ControlRegionInputRewrites::ReplaceWith(replacements)) => {
540 new_v = replacements[input_idx as usize];
541 }
542 Some(ControlRegionInputRewrites::RenumberOrReplaceWith(
543 renumbering_and_replacements,
544 )) => match renumbering_and_replacements[input_idx as usize] {
545 Ok(new_idx) => {
546 new_v = Value::ControlRegionInput { region, input_idx: new_idx };
547 break;
548 }
549 Err(replacement) => new_v = replacement,
550 },
551 None => break,
552 }
553 }
554 (v != new_v).then_some(new_v)
555 })
556 }
557}
558
559/// The state of one `.structurize_region(region)` invocation, and its result.
560///
561/// There is a fourth (or 0th) implicit state, which is where nothing has yet
562/// observed some region, and [`Structurizer`] isn't tracking it at all.
563//
564// FIXME(eddyb) make the 0th state explicit and move `incoming_edge_counts` to it.
565enum StructurizeRegionState {
566 /// Structurization is still running, and observing this is a cycle.
567 InProgress,
568
569 /// Structurization completed, and this region can now be claimed.
570 Ready {
571 /// Cached `region_deferred_edges[region].edge_bundle.accumulated_count`,
572 /// i.e. the total count of backedges (if any exist) pointing to `region`
573 /// from the CFG subgraph that `region` itself dominates.
574 ///
575 /// Claiming a region with backedges can combine them with the bundled
576 /// edges coming into the CFG cycle from outside, and instead of failing
577 /// due to the latter not being enough to claim the region on their own,
578 /// actually perform loop structurization.
579 accumulated_backedge_count: IncomingEdgeCount,
580
581 // HACK(eddyb) the only part of a `ClaimedRegion` that is computed by
582 // `structurize_region` (the rest comes from `try_claim_edge_bundle`).
583 region_deferred_edges: DeferredEdgeBundleSet,
584 },
585
586 /// Region was claimed (by an [`IncomingEdgeBundle`], with the appropriate
587 /// total [`IncomingEdgeCount`], minus `accumulated_backedge_count`), and
588 /// must eventually be incorporated as part of some larger region.
589 Claimed,
590}
591
592/// An "(incoming) edge bundle" is a subset of the edges into a single `target`.
593///
594/// When `accumulated_count` reaches the total [`IncomingEdgeCount`] for `target`,
595/// that [`IncomingEdgeBundle`] is said to "effectively own" its `target` (akin to
596/// the more commonly used CFG domination relation, but more "incremental").
597///
598/// **Note**: `target` has a generic type `T` to reduce redundancy when it's
599/// already implied (e.g. by the key in [`DeferredEdgeBundleSet`]'s map).
600struct IncomingEdgeBundle<T> {
601 target: T,
602 accumulated_count: IncomingEdgeCount,
603
604 /// The [`Value`]s that `Value::ControlRegionInput { region, .. }` will get
605 /// on entry into `region`, through this "edge bundle".
606 target_inputs: SmallVec<[Value; 2]>,
607}
608
609impl<T> IncomingEdgeBundle<T> {
610 fn with_target<U>(self, target: U) -> IncomingEdgeBundle<U> {
611 let IncomingEdgeBundle { target: _, accumulated_count, target_inputs } = self;
612 IncomingEdgeBundle { target, accumulated_count, target_inputs }
613 }
614}
615
616/// A "deferred (incoming) edge bundle" is an [`IncomingEdgeBundle`] that cannot
617/// be structurized immediately, but instead waits for its `accumulated_count`
618/// to reach the full count of its `target`, before it can grafted into some
619/// structured control-flow region.
620///
621/// While in the "deferred" state, its can accumulate a non-trivial `condition`,
622/// every time it's propagated to an "outer" region, e.g. for this pseudocode:
623/// ```text
624/// if a {
625/// branch => label1
626/// } else {
627/// if b {
628/// branch => label1
629/// }
630/// }
631/// ```
632/// the deferral of branches to `label1` will result in:
633/// ```text
634/// label1_condition = if a {
635/// true
636/// } else {
637/// if b {
638/// true
639/// } else {
640/// false
641/// }
642/// }
643/// if label1_condition {
644/// branch => label1
645/// }
646/// ```
647/// which could theoretically be simplified (after the [`Structurizer`]) to:
648/// ```text
649/// label1_condition = a | b
650/// if label1_condition {
651/// branch => label1
652/// }
653/// ```
654///
655/// **Note**: `edge_bundle.target` has a generic type `T` to reduce redundancy
656/// when it's already implied (e.g. by the key in [`DeferredEdgeBundleSet`]'s map).
657struct DeferredEdgeBundle<T = DeferredTarget> {
658 condition: LazyCond,
659 edge_bundle: IncomingEdgeBundle<T>,
660}
661
662impl<T> DeferredEdgeBundle<T> {
663 fn with_target<U>(self, target: U) -> DeferredEdgeBundle<U> {
664 let DeferredEdgeBundle { condition, edge_bundle } = self;
665 DeferredEdgeBundle { condition, edge_bundle: edge_bundle.with_target(target) }
666 }
667}
668
669/// A recipe for computing a control-flow-sensitive (boolean) condition [`Value`],
670/// potentially requiring merging through an arbitrary number of `Select`s
671/// (via per-case outputs and [`Value::ControlNodeOutput`], for each `Select`).
672///
673/// This should largely be equivalent to eagerly generating all region outputs
674/// that might be needed, and then removing the unused ones, but this way we
675/// never generate unused outputs, and can potentially even optimize away some
676/// redundant dataflow (e.g. `if cond { true } else { false }` is just `cond`).
677#[derive(Clone)]
678enum LazyCond {
679 // HACK(eddyb) `Undef` is used when the condition comes from e.g. a `Select`
680 // case that diverges and/or represents `unreachable`.
681 Undef,
682
683 False,
684 True,
685
686 Merge(Rc<LazyCondMerge>),
687}
688
689enum LazyCondMerge {
690 Select {
691 control_node: ControlNode,
692 // FIXME(eddyb) the lowest level of `LazyCond` ends up containing only
693 // `LazyCond::{Undef,False,True}`, and that could more efficiently be
694 // expressed using e.g. bitsets, but the `Rc` in `LazyCond::Merge`
695 // means that this is more compact than it would otherwise be.
696 per_case_conds: SmallVec<[LazyCond; 4]>,
697 },
698}
699
700/// A target for one of the edge bundles in a [`DeferredEdgeBundleSet`], mostly
701/// separate from [`ControlRegion`] to allow expressing returns as well.
702#[derive(Copy, Clone, PartialEq, Eq, Hash)]
703enum DeferredTarget {
704 Region(ControlRegion),
705
706 /// Structured "return" out of the function (with `target_inputs` used for
707 /// the function body `output`s, i.e. inputs of [`ControlInstKind::Return`]).
708 Return,
709}
710
711/// Set of [`DeferredEdgeBundle`]s, uniquely keyed by their `target`s.
712///
713/// Semantically equivalent to an unordered series of conditional branches
714/// to each possible `target`, which corresponds to an unenforced invariant
715/// that exactly one [`DeferredEdgeBundle`] condition must be `true` at any
716/// given time (the only non-trivial case, [`DeferredEdgeBundleSet::Choice`],
717/// satisfies it because it's only used for merging `Select` cases, and so
718/// all the conditions will end up using disjoint [`LazyCond::Merge`]s).
719enum DeferredEdgeBundleSet {
720 Unreachable,
721
722 // NOTE(eddyb) this erases the condition (by not using `DeferredEdgeBundle`).
723 Always {
724 // HACK(eddyb) fields are split here to allow e.g. iteration.
725 target: DeferredTarget,
726 edge_bundle: IncomingEdgeBundle<()>,
727 },
728
729 Choice {
730 target_to_deferred: FxIndexMap<DeferredTarget, DeferredEdgeBundle<()>>,
731 },
732}
733
734impl FromIterator<DeferredEdgeBundle> for DeferredEdgeBundleSet {
735 fn from_iter<T: IntoIterator<Item = DeferredEdgeBundle>>(iter: T) -> Self {
736 let mut iter = iter.into_iter();
737 match iter.next() {
738 None => Self::Unreachable,
739 Some(first) => match iter.next() {
740 // NOTE(eddyb) this erases the condition (by not using `DeferredEdgeBundle`).
741 None => Self::Always {
742 target: first.edge_bundle.target,
743 edge_bundle: first.edge_bundle.with_target(()),
744 },
745 Some(second) => Self::Choice {
746 target_to_deferred: ([first, second].into_iter().chain(iter))
747 .map(|d| (d.edge_bundle.target, d.with_target(())))
748 .collect(),
749 },
750 },
751 }
752 }
753}
754
755impl From<FxIndexMap<DeferredTarget, DeferredEdgeBundle<()>>> for DeferredEdgeBundleSet {
756 fn from(target_to_deferred: FxIndexMap<DeferredTarget, DeferredEdgeBundle<()>>) -> Self {
757 if target_to_deferred.len() <= 1 {
758 target_to_deferred
759 .into_iter()
760 .map(|(target, deferred)| deferred.with_target(target))
761 .collect()
762 } else {
763 Self::Choice { target_to_deferred }
764 }
765 }
766}
767
768// HACK(eddyb) this API is a mess, is there an uncompromising way to clean it up?
769impl DeferredEdgeBundleSet {
770 fn get_edge_bundle_by_target(
771 &self,
772 search_target: DeferredTarget,
773 ) -> Option<&IncomingEdgeBundle<()>> {
774 match self {
775 DeferredEdgeBundleSet::Unreachable => None,
776 DeferredEdgeBundleSet::Always { target, edge_bundle } => {
777 (*target == search_target).then_some(edge_bundle)
778 }
779 DeferredEdgeBundleSet::Choice { target_to_deferred } => {
780 Some(&target_to_deferred.get(&search_target)?.edge_bundle)
781 }
782 }
783 }
784
785 fn get_edge_bundle_mut_by_target(
786 &mut self,
787 search_target: DeferredTarget,
788 ) -> Option<&mut IncomingEdgeBundle<()>> {
789 match self {
790 DeferredEdgeBundleSet::Unreachable => None,
791 DeferredEdgeBundleSet::Always { target, edge_bundle } => {
792 (*target == search_target).then_some(edge_bundle)
793 }
794 DeferredEdgeBundleSet::Choice { target_to_deferred } => {
795 Some(&mut target_to_deferred.get_mut(&search_target)?.edge_bundle)
796 }
797 }
798 }
799
800 fn iter_targets_with_edge_bundle(
801 &self,
802 ) -> impl Iterator<Item = (DeferredTarget, &IncomingEdgeBundle<()>)> {
803 match self {
804 DeferredEdgeBundleSet::Unreachable => Either::Left(None.into_iter()),
805 DeferredEdgeBundleSet::Always { target, edge_bundle } => {
806 Either::Left(Some((*target, edge_bundle)).into_iter())
807 }
808 DeferredEdgeBundleSet::Choice { target_to_deferred } => Either::Right(
809 target_to_deferred
810 .iter()
811 .map(|(&target, deferred)| (target, &deferred.edge_bundle)),
812 ),
813 }
814 }
815
816 fn iter_targets_with_edge_bundle_mut(
817 &mut self,
818 ) -> impl Iterator<Item = (DeferredTarget, &mut IncomingEdgeBundle<()>)> {
819 match self {
820 DeferredEdgeBundleSet::Unreachable => Either::Left(None.into_iter()),
821 DeferredEdgeBundleSet::Always { target, edge_bundle } => {
822 Either::Left(Some((*target, edge_bundle)).into_iter())
823 }
824 DeferredEdgeBundleSet::Choice { target_to_deferred } => Either::Right(
825 target_to_deferred
826 .iter_mut()
827 .map(|(&target, deferred)| (target, &mut deferred.edge_bundle)),
828 ),
829 }
830 }
831
832 // HACK(eddyb) this only exists because of `DeferredEdgeBundleSet`'s lossy
833 // representation wrt conditions, so removal from a `DeferredEdgeBundleSet`
834 // cannot be used for e.g. `Select` iterating over per-case deferreds.
835 fn steal_deferred_by_target_without_removal(
836 &mut self,
837 search_target: DeferredTarget,
838 ) -> Option<DeferredEdgeBundle<()>> {
839 let steal_edge_bundle = |edge_bundle: &mut IncomingEdgeBundle<()>| IncomingEdgeBundle {
840 target: (),
841 accumulated_count: edge_bundle.accumulated_count,
842 target_inputs: mem::take(&mut edge_bundle.target_inputs),
843 };
844 match self {
845 DeferredEdgeBundleSet::Unreachable => None,
846 DeferredEdgeBundleSet::Always { target, edge_bundle } => (*target == search_target)
847 .then(|| DeferredEdgeBundle {
848 condition: LazyCond::True,
849 edge_bundle: steal_edge_bundle(edge_bundle),
850 }),
851 DeferredEdgeBundleSet::Choice { target_to_deferred } => {
852 let DeferredEdgeBundle { condition, edge_bundle } =
853 target_to_deferred.get_mut(&search_target)?;
854 Some(DeferredEdgeBundle {
855 condition: mem::replace(condition, LazyCond::False),
856 edge_bundle: steal_edge_bundle(edge_bundle),
857 })
858 }
859 }
860 }
861
862 // NOTE(eddyb) the returned `DeferredEdgeBundleSet` exists under the assumption
863 // that `split_target` is not reachable from it, so this method is not suitable
864 // for e.g. uniformly draining `DeferredEdgeBundleSet` in a way that preserves
865 // conditions (but rather it's almost a kind of control-flow "slicing").
866 fn split_out_target(self, split_target: DeferredTarget) -> (Option<DeferredEdgeBundle>, Self) {
867 match self {
868 DeferredEdgeBundleSet::Unreachable => (None, DeferredEdgeBundleSet::Unreachable),
869 DeferredEdgeBundleSet::Always { target, edge_bundle } => {
870 if target == split_target {
871 (
872 Some(DeferredEdgeBundle {
873 condition: LazyCond::True,
874 edge_bundle: edge_bundle.with_target(target),
875 }),
876 DeferredEdgeBundleSet::Unreachable,
877 )
878 } else {
879 (None, DeferredEdgeBundleSet::Always { target, edge_bundle })
880 }
881 }
882 DeferredEdgeBundleSet::Choice { mut target_to_deferred } => {
883 // FIXME(eddyb) should this use `shift_remove` and/or emulate
884 // extra tombstones, to avoid impacting the order?
885 (
886 target_to_deferred
887 .swap_remove(&split_target)
888 .map(|d| d.with_target(split_target)),
889 Self::from(target_to_deferred),
890 )
891 }
892 }
893 }
894
895 // HACK(eddyb) the strange signature is overfitted to its own callsite.
896 fn split_out_matching<T>(
897 self,
898 mut matches: impl FnMut(DeferredEdgeBundle) -> Result<T, DeferredEdgeBundle>,
899 ) -> (Option<T>, Self) {
900 match self {
901 DeferredEdgeBundleSet::Unreachable => (None, DeferredEdgeBundleSet::Unreachable),
902 DeferredEdgeBundleSet::Always { target, edge_bundle } => {
903 match matches(DeferredEdgeBundle {
904 condition: LazyCond::True,
905 edge_bundle: edge_bundle.with_target(target),
906 }) {
907 Ok(x) => (Some(x), DeferredEdgeBundleSet::Unreachable),
908 Err(new_deferred) => {
909 assert!(new_deferred.edge_bundle.target == target);
910 assert!(matches!(new_deferred.condition, LazyCond::True));
911 (None, DeferredEdgeBundleSet::Always {
912 target,
913 edge_bundle: new_deferred.edge_bundle.with_target(()),
914 })
915 }
916 }
917 }
918 DeferredEdgeBundleSet::Choice { mut target_to_deferred } => {
919 let mut result = None;
920 for (i, (&target, deferred)) in target_to_deferred.iter_mut().enumerate() {
921 // HACK(eddyb) "take" `deferred` so it can be passed to
922 // `matches` (and put back if that returned `Err`).
923 let taken_deferred = mem::replace(deferred, DeferredEdgeBundle {
924 condition: LazyCond::False,
925 edge_bundle: IncomingEdgeBundle {
926 target: Default::default(),
927 accumulated_count: Default::default(),
928 target_inputs: Default::default(),
929 },
930 });
931
932 match matches(taken_deferred.with_target(target)) {
933 Ok(x) => {
934 result = Some(x);
935 // FIXME(eddyb) should this use `swap_remove_index`?
936 target_to_deferred.shift_remove_index(i).unwrap();
937 break;
938 }
939
940 // Put back the `DeferredEdgeBundle` and keep looking.
941 Err(new_deferred) => {
942 assert!(new_deferred.edge_bundle.target == target);
943 *deferred = new_deferred.with_target(());
944 }
945 }
946 }
947 (result, Self::from(target_to_deferred))
948 }
949 }
950 }
951}
952
953/// A successfully "claimed" (via `try_claim_edge_bundle`) partially structurized
954/// CFG subgraph (i.e. set of [`ControlRegion`]s previously connected by CFG edges),
955/// which is effectively owned by the "claimer" and **must** be used for:
956/// - the whole function body (if `deferred_edges` only contains `Return`)
957/// - one of the cases of a `Select` node
958/// - merging into a larger region (i.e. its nearest dominator)
959//
960// FIXME(eddyb) consider never having to claim the function body itself,
961// by wrapping the CFG in a `ControlNode` instead.
962struct ClaimedRegion {
963 // FIXME(eddyb) find a way to clarify that this can differ from the target
964 // of `try_claim_edge_bundle`, and also that `deferred_edges` are from the
965 // perspective of being "inside" `structured_body` (wrt hermeticity).
966 structured_body: ControlRegion,
967
968 /// The [`Value`]s that `Value::ControlRegionInput { region: structured_body, .. }`
969 /// will get on entry into `structured_body`, when this region ends up
970 /// merged into a larger region, or as a child of a new [`ControlNode`].
971 //
972 // FIXME(eddyb) don't replace `Value::ControlRegionInput { region: structured_body, .. }`
973 // with `region_inputs` when `structured_body` ends up a `ControlNode` child,
974 // but instead make all `ControlRegion`s entirely hermetic wrt inputs.
975 structured_body_inputs: SmallVec<[Value; 2]>,
976
977 /// The transitive targets which couldn't be claimed into `structured_body`
978 /// remain as deferred exits, and will block further structurization until
979 /// all other edges to those same targets are gathered together.
980 ///
981 /// **Note**: this will only be empty if the region can never exit,
982 /// i.e. it has divergent control-flow (such as an infinite loop), as any
983 /// control-flow path that can (eventually) return from the function, will
984 /// end up using a deferred target for that (see [`DeferredTarget::Return`]).
985 deferred_edges: DeferredEdgeBundleSet,
986}
987
988impl<'a> Structurizer<'a> {
989 pub fn new(cx: &'a Context, func_def_body: &'a mut FuncDefBody) -> Self {
990 // FIXME(eddyb) SPIR-T should have native booleans itself.
991 let wk = &spv::spec::Spec::get().well_known;
992 let type_bool = cx.intern(TypeKind::SpvInst {
993 spv_inst: wk.OpTypeBool.into(),
994 type_and_const_inputs: [].into_iter().collect(),
995 });
996 let const_true = cx.intern(ConstDef {
997 attrs: AttrSet::default(),
998 ty: type_bool,
999 kind: ConstKind::SpvInst {
1000 spv_inst_and_const_inputs: Rc::new((
1001 wk.OpConstantTrue.into(),
1002 [].into_iter().collect(),
1003 )),
1004 },
1005 });
1006 let const_false = cx.intern(ConstDef {
1007 attrs: AttrSet::default(),
1008 ty: type_bool,
1009 kind: ConstKind::SpvInst {
1010 spv_inst_and_const_inputs: Rc::new((
1011 wk.OpConstantFalse.into(),
1012 [].into_iter().collect(),
1013 )),
1014 },
1015 });
1016
1017 let (loop_header_to_exit_targets, incoming_edge_counts_including_loop_exits) =
1018 func_def_body
1019 .unstructured_cfg
1020 .as_ref()
1021 .map(|cfg| {
1022 let loop_header_to_exit_targets = {
1023 let mut loop_finder = LoopFinder::new(cfg);
1024 loop_finder.find_earliest_scc_root_of(func_def_body.body);
1025 loop_finder.loop_header_to_exit_targets
1026 };
1027
1028 let mut state = TraversalState {
1029 incoming_edge_counts: EntityOrientedDenseMap::new(),
1030
1031 pre_order_visit: |_| {},
1032 post_order_visit: |_| {},
1033 reverse_targets: false,
1034 };
1035 cfg.traverse_whole_func(func_def_body, &mut state);
1036
1037 // HACK(eddyb) treat loop exits as "false edges", that their
1038 // respective loop header "owns", such that structurization
1039 // naturally stops at those loop exits, instead of continuing
1040 // greedily into the loop exterior (producing "maximal loops").
1041 for loop_exit_targets in loop_header_to_exit_targets.values() {
1042 for &exit_target in loop_exit_targets {
1043 *state
1044 .incoming_edge_counts
1045 .entry(exit_target)
1046 .get_or_insert(Default::default()) += IncomingEdgeCount::ONE;
1047 }
1048 }
1049
1050 (loop_header_to_exit_targets, state.incoming_edge_counts)
1051 })
1052 .unwrap_or_default();
1053
1054 Self {
1055 cx,
1056 type_bool,
1057 const_true,
1058 const_false,
1059
1060 func_def_body,
1061
1062 loop_header_to_exit_targets,
1063 incoming_edge_counts_including_loop_exits,
1064
1065 structurize_region_state: FxIndexMap::default(),
1066 control_region_input_rewrites: EntityOrientedDenseMap::new(),
1067 }
1068 }
1069
1070 pub fn structurize_func(mut self) {
1071 // Don't even try to re-structurize functions.
1072 if self.func_def_body.unstructured_cfg.is_none() {
1073 return;
1074 }
1075
1076 // FIXME(eddyb) it might work much better to have the unstructured CFG
1077 // wrapped in a `ControlNode` inside the function body, instead.
1078 let func_body_deferred_edges = {
1079 let func_entry_pseudo_edge = {
1080 let target = self.func_def_body.body;
1081 move || IncomingEdgeBundle {
1082 target,
1083 accumulated_count: IncomingEdgeCount::ONE,
1084 target_inputs: [].into_iter().collect(),
1085 }
1086 };
1087
1088 // HACK(eddyb) it's easier to assume the function never loops back
1089 // to its body, than fix up the broken CFG if that never happens.
1090 if self.incoming_edge_counts_including_loop_exits[func_entry_pseudo_edge().target]
1091 != func_entry_pseudo_edge().accumulated_count
1092 {
1093 // FIXME(eddyb) find a way to attach (diagnostic) attributes
1094 // to a `FuncDefBody`, would be useful to have that here.
1095 return;
1096 }
1097
1098 let ClaimedRegion { structured_body, structured_body_inputs, deferred_edges } =
1099 self.try_claim_edge_bundle(func_entry_pseudo_edge()).ok().unwrap();
1100 assert!(structured_body == func_entry_pseudo_edge().target);
1101 assert!(structured_body_inputs == func_entry_pseudo_edge().target_inputs);
1102 deferred_edges
1103 };
1104
1105 match func_body_deferred_edges {
1106 // FIXME(eddyb) also support structured return when the whole body
1107 // is divergent, by generating undef constants (needs access to the
1108 // whole `FuncDecl`, not just `FuncDefBody`, to get the right types).
1109 DeferredEdgeBundleSet::Unreachable => {
1110 // HACK(eddyb) replace the CFG with one that only contains an
1111 // `Unreachable` terminator for the body, comparable to what
1112 // `rebuild_cfg_from_unclaimed_region_deferred_edges` would do
1113 // in the general case (but special-cased because this is very
1114 // close to being structurizable, just needs a bit of plumbing).
1115 let mut control_inst_on_exit_from = EntityOrientedDenseMap::new();
1116 control_inst_on_exit_from.insert(self.func_def_body.body, ControlInst {
1117 attrs: AttrSet::default(),
1118 kind: ControlInstKind::Unreachable,
1119 inputs: [].into_iter().collect(),
1120 targets: [].into_iter().collect(),
1121 target_inputs: FxIndexMap::default(),
1122 });
1123 self.func_def_body.unstructured_cfg = Some(ControlFlowGraph {
1124 control_inst_on_exit_from,
1125 loop_merge_to_loop_header: Default::default(),
1126 });
1127 }
1128
1129 // Structured return, the function is fully structurized.
1130 DeferredEdgeBundleSet::Always { target: DeferredTarget::Return, edge_bundle } => {
1131 let body_def = self.func_def_body.at_mut_body().def();
1132 body_def.outputs = edge_bundle.target_inputs;
1133 self.func_def_body.unstructured_cfg = None;
1134 }
1135
1136 _ => {
1137 // Repair all the regions that remain unclaimed, including the body.
1138 let structurize_region_state = mem::take(&mut self.structurize_region_state)
1139 .into_iter()
1140 .chain([(self.func_def_body.body, StructurizeRegionState::Ready {
1141 accumulated_backedge_count: IncomingEdgeCount::default(),
1142
1143 region_deferred_edges: func_body_deferred_edges,
1144 })]);
1145 for (target, state) in structurize_region_state {
1146 if let StructurizeRegionState::Ready { region_deferred_edges, .. } = state {
1147 self.rebuild_cfg_from_unclaimed_region_deferred_edges(
1148 target,
1149 region_deferred_edges,
1150 );
1151 }
1152 }
1153 }
1154 }
1155
1156 // The last step of structurization is applying rewrites accumulated
1157 // while structurizing (i.e. `control_region_input_rewrites`).
1158 //
1159 // FIXME(eddyb) obsolete this by fully taking advantage of hermeticity,
1160 // and only replacing `Value::ControlRegionInput { region, .. }` within
1161 // `region`'s children, shallowly, whenever `region` gets claimed.
1162 self.func_def_body.inner_in_place_transform_with(
1163 &mut ControlRegionInputRewrites::rewrite_all(&self.control_region_input_rewrites),
1164 );
1165 }
1166
1167 fn try_claim_edge_bundle(
1168 &mut self,
1169 edge_bundle: IncomingEdgeBundle<ControlRegion>,
1170 ) -> Result<ClaimedRegion, IncomingEdgeBundle<ControlRegion>> {
1171 let target = edge_bundle.target;
1172
1173 // Always attempt structurization before checking the `IncomingEdgeCount`,
1174 // to be able to make use of backedges (if any were found).
1175 if self.structurize_region_state.get(&target).is_none() {
1176 self.structurize_region(target);
1177 }
1178
1179 let backedge_count = match self.structurize_region_state[&target] {
1180 // This `try_claim_edge_bundle` call is itself a backedge, and it's
1181 // coherent to not let any of them claim the loop itself, and only
1182 // allow claiming the whole loop (if successfully structurized).
1183 StructurizeRegionState::InProgress => IncomingEdgeCount::default(),
1184
1185 StructurizeRegionState::Ready { accumulated_backedge_count, .. } => {
1186 accumulated_backedge_count
1187 }
1188
1189 StructurizeRegionState::Claimed => {
1190 unreachable!("cfg::Structurizer::try_claim_edge_bundle: already claimed");
1191 }
1192 };
1193
1194 if self.incoming_edge_counts_including_loop_exits[target]
1195 != edge_bundle.accumulated_count + backedge_count
1196 {
1197 return Err(edge_bundle);
1198 }
1199
1200 let state =
1201 self.structurize_region_state.insert(target, StructurizeRegionState::Claimed).unwrap();
1202
1203 let mut deferred_edges = match state {
1204 StructurizeRegionState::InProgress => unreachable!(
1205 "cfg::Structurizer::try_claim_edge_bundle: cyclic calls \
1206 should not get this far"
1207 ),
1208
1209 StructurizeRegionState::Ready { region_deferred_edges, .. } => region_deferred_edges,
1210
1211 StructurizeRegionState::Claimed => {
1212 // Handled above.
1213 unreachable!()
1214 }
1215 };
1216
1217 let mut backedge = None;
1218 if backedge_count != IncomingEdgeCount::default() {
1219 (backedge, deferred_edges) =
1220 deferred_edges.split_out_target(DeferredTarget::Region(target));
1221 }
1222
1223 // If the target contains any backedge to itself, that's a loop, with:
1224 // * entry: `edge_bundle` (unconditional, i.e. `do`-`while`-like)
1225 // * body: `target`
1226 // * repeat ("continue") edge: `backedge` (with its `condition`)
1227 // * exit ("break") edges: `deferred_edges`
1228 let structured_body = if let Some(backedge) = backedge {
1229 let DeferredEdgeBundle { condition: repeat_condition, edge_bundle: backedge } =
1230 backedge;
1231 let body = target;
1232
1233 // HACK(eddyb) due to `Loop` `ControlNode`s not being hermetic on
1234 // the output side yet (i.e. they still have SSA-like semantics),
1235 // it gets wrapped in a `ControlRegion`, which can be as hermetic as
1236 // the loop body itself was originally.
1237 // NOTE(eddyb) both input declarations and the child `Loop` node are
1238 // added later down below, after the `Loop` node is created.
1239 let wrapper_region =
1240 self.func_def_body.control_regions.define(self.cx, ControlRegionDef::default());
1241
1242 // Any loop body region inputs, which must receive values from both
1243 // the loop entry and the backedge, become explicit "loop state",
1244 // starting as `initial_inputs` and being replaced with body outputs
1245 // after every loop iteration.
1246 //
1247 // FIXME(eddyb) `Loop` `ControlNode`s should be changed to be hermetic
1248 // and have the loop state be output from the whole node itself,
1249 // for any outside uses of values defined within the loop body.
1250 let body_def = self.func_def_body.at_mut(body).def();
1251 let original_input_decls = mem::take(&mut body_def.inputs);
1252 assert!(body_def.outputs.is_empty());
1253
1254 // HACK(eddyb) some dataflow through the loop body is redundant,
1255 // and can be lifted out of it, but the worst part is that applying
1256 // the replacement requires leaving alone all the non-redundant
1257 // `body` region inputs at the same time, and it's not really
1258 // feasible to move `body`'s children into a new region without
1259 // wasting it completely (i.e. can't swap with `wrapper_region`).
1260 let mut initial_inputs = SmallVec::<[_; 2]>::new();
1261 let body_input_rewrites = ControlRegionInputRewrites::RenumberOrReplaceWith(
1262 backedge
1263 .target_inputs
1264 .into_iter()
1265 .enumerate()
1266 .map(|(original_idx, mut backedge_value)| {
1267 ControlRegionInputRewrites::rewrite_all(
1268 &self.control_region_input_rewrites,
1269 )
1270 .transform_value_use(&backedge_value)
1271 .apply_to(&mut backedge_value);
1272
1273 let original_idx = u32::try_from(original_idx).unwrap();
1274 if backedge_value
1275 == (Value::ControlRegionInput { region: body, input_idx: original_idx })
1276 {
1277 // FIXME(eddyb) does this have to be general purpose,
1278 // or could this be handled as `None` with a single
1279 // `wrapper_region` per `ControlRegionInputRewrites`?
1280 Err(Value::ControlRegionInput {
1281 region: wrapper_region,
1282 input_idx: original_idx,
1283 })
1284 } else {
1285 let renumbered_idx = u32::try_from(body_def.inputs.len()).unwrap();
1286 initial_inputs.push(Value::ControlRegionInput {
1287 region: wrapper_region,
1288 input_idx: original_idx,
1289 });
1290 body_def.inputs.push(original_input_decls[original_idx as usize]);
1291 body_def.outputs.push(backedge_value);
1292 Ok(renumbered_idx)
1293 }
1294 })
1295 .collect(),
1296 );
1297 self.control_region_input_rewrites.insert(body, body_input_rewrites);
1298
1299 assert_eq!(initial_inputs.len(), body_def.inputs.len());
1300 assert_eq!(body_def.outputs.len(), body_def.inputs.len());
1301
1302 let repeat_condition = self.materialize_lazy_cond(&repeat_condition);
1303 let loop_node = self.func_def_body.control_nodes.define(
1304 self.cx,
1305 ControlNodeDef {
1306 kind: ControlNodeKind::Loop { initial_inputs, body, repeat_condition },
1307 outputs: [].into_iter().collect(),
1308 }
1309 .into(),
1310 );
1311
1312 let wrapper_region_def = &mut self.func_def_body.control_regions[wrapper_region];
1313 wrapper_region_def.inputs = original_input_decls;
1314 wrapper_region_def
1315 .children
1316 .insert_last(loop_node, &mut self.func_def_body.control_nodes);
1317
1318 // HACK(eddyb) we've treated loop exits as extra "false edges", so
1319 // here they have to be added to the loop (potentially unlocking
1320 // structurization to the outside of the loop, in the caller).
1321 if let Some(exit_targets) = self.loop_header_to_exit_targets.get(&target) {
1322 for &exit_target in exit_targets {
1323 // FIXME(eddyb) what if this is `None`, is that impossible?
1324 if let Some(exit_edge_bundle) = deferred_edges
1325 .get_edge_bundle_mut_by_target(DeferredTarget::Region(exit_target))
1326 {
1327 exit_edge_bundle.accumulated_count += IncomingEdgeCount::ONE;
1328 }
1329 }
1330 }
1331
1332 wrapper_region
1333 } else {
1334 target
1335 };
1336 Ok(ClaimedRegion {
1337 structured_body,
1338 structured_body_inputs: edge_bundle.target_inputs,
1339 deferred_edges,
1340 })
1341 }
1342
1343 /// Structurize `region` by absorbing into it the entire CFG subgraph which
1344 /// it dominates (and deferring any other edges to the rest of the CFG).
1345 ///
1346 /// The output of this process is stored in, and any other bookkeeping is
1347 /// done through, `self.structurize_region_state[region]`.
1348 ///
1349 /// See also [`StructurizeRegionState`]'s docs.
1350 fn structurize_region(&mut self, region: ControlRegion) {
1351 {
1352 let old_state =
1353 self.structurize_region_state.insert(region, StructurizeRegionState::InProgress);
1354 if let Some(old_state) = old_state {
1355 unreachable!(
1356 "cfg::Structurizer::structurize_region: \
1357 already {}, when attempting to start structurization",
1358 match old_state {
1359 StructurizeRegionState::InProgress => "in progress (cycle detected)",
1360 StructurizeRegionState::Ready { .. } => "completed",
1361 StructurizeRegionState::Claimed => "claimed",
1362 }
1363 );
1364 }
1365 }
1366
1367 let control_inst_on_exit = self
1368 .func_def_body
1369 .unstructured_cfg
1370 .as_mut()
1371 .unwrap()
1372 .control_inst_on_exit_from
1373 .remove(region)
1374 .expect(
1375 "cfg::Structurizer::structurize_region: missing \
1376 `ControlInst` (CFG wasn't unstructured in the first place?)",
1377 );
1378
1379 // Start with the concatenation of `region` and `control_inst_on_exit`,
1380 // always appending `ControlNode`s (including the children of entire
1381 // `ClaimedRegion`s) to `region`'s definition itself.
1382 let mut deferred_edges = {
1383 let ControlInst { attrs, kind, inputs, targets, target_inputs } = control_inst_on_exit;
1384
1385 // FIXME(eddyb) this loses `attrs`.
1386 let _ = attrs;
1387
1388 let target_regions: SmallVec<[_; 8]> = targets
1389 .iter()
1390 .map(|&target| {
1391 self.try_claim_edge_bundle(IncomingEdgeBundle {
1392 target,
1393 accumulated_count: IncomingEdgeCount::ONE,
1394 target_inputs: target_inputs.get(&target).cloned().unwrap_or_default(),
1395 })
1396 .map_err(|edge_bundle| {
1397 // HACK(eddyb) special-case "shared `unreachable`" to
1398 // always inline it and avoid awkward "merges".
1399 // FIXME(eddyb) should this be in a separate CFG pass?
1400 // (i.e. is there a risk of other logic needing this?)
1401 let target_is_trivial_unreachable =
1402 match self.structurize_region_state.get(&edge_bundle.target) {
1403 Some(StructurizeRegionState::Ready {
1404 region_deferred_edges: DeferredEdgeBundleSet::Unreachable,
1405 ..
1406 }) => {
1407 // FIXME(eddyb) DRY this "is empty region" check.
1408 self.func_def_body
1409 .at(edge_bundle.target)
1410 .at_children()
1411 .into_iter()
1412 .next()
1413 .is_none()
1414 }
1415 _ => false,
1416 };
1417 if target_is_trivial_unreachable {
1418 DeferredEdgeBundleSet::Unreachable
1419 } else {
1420 DeferredEdgeBundleSet::Always {
1421 target: DeferredTarget::Region(edge_bundle.target),
1422 edge_bundle: edge_bundle.with_target(()),
1423 }
1424 }
1425 })
1426 })
1427 .collect();
1428
1429 match kind {
1430 ControlInstKind::Unreachable => {
1431 assert_eq!((inputs.len(), target_regions.len()), (0, 0));
1432
1433 // FIXME(eddyb) this may result in lost optimizations over
1434 // actually encoding it in `ControlNode`/`ControlRegion`
1435 // (e.g. a new `ControlNodeKind`, or replacing region `outputs`),
1436 // but it's simpler to handle it like this.
1437 //
1438 // NOTE(eddyb) actually, this encoding is lossless *during*
1439 // structurization, and a divergent region can only end up as:
1440 // - the function body, where it implies the function can
1441 // never actually return: not fully structurized currently
1442 // (but only for a silly reason, and is entirely fixable)
1443 // - a `Select` case, where it implies that case never merges
1444 // back into the `Select` node, and potentially that the
1445 // case can never be taken: this is where a structured
1446 // encoding can be introduced, by pruning unreachable
1447 // cases, and potentially even introducing `assume`s
1448 // - a `Loop` body is not actually possible when divergent
1449 // (as there can be no backedge to form a cyclic CFG)
1450 DeferredEdgeBundleSet::Unreachable
1451 }
1452
1453 ControlInstKind::ExitInvocation(kind) => {
1454 assert_eq!(target_regions.len(), 0);
1455
1456 let control_node = self.func_def_body.control_nodes.define(
1457 self.cx,
1458 ControlNodeDef {
1459 kind: ControlNodeKind::ExitInvocation { kind, inputs },
1460 outputs: [].into_iter().collect(),
1461 }
1462 .into(),
1463 );
1464 self.func_def_body.control_regions[region]
1465 .children
1466 .insert_last(control_node, &mut self.func_def_body.control_nodes);
1467
1468 DeferredEdgeBundleSet::Unreachable
1469 }
1470
1471 ControlInstKind::Return => {
1472 assert_eq!(target_regions.len(), 0);
1473
1474 DeferredEdgeBundleSet::Always {
1475 target: DeferredTarget::Return,
1476 edge_bundle: IncomingEdgeBundle {
1477 accumulated_count: IncomingEdgeCount::default(),
1478 target: (),
1479 target_inputs: inputs,
1480 },
1481 }
1482 }
1483
1484 ControlInstKind::Branch => {
1485 assert_eq!((inputs.len(), target_regions.len()), (0, 1));
1486
1487 self.append_maybe_claimed_region(
1488 region,
1489 target_regions.into_iter().next().unwrap(),
1490 )
1491 }
1492
1493 ControlInstKind::SelectBranch(kind) => {
1494 assert_eq!(inputs.len(), 1);
1495
1496 let scrutinee = inputs[0];
1497
1498 self.structurize_select_into(region, kind, Ok(scrutinee), target_regions)
1499 }
1500 }
1501 };
1502
1503 // Try to resolve deferred edges that may have accumulated, and keep
1504 // going until there's no more deferred edges that can be claimed.
1505 loop {
1506 // FIXME(eddyb) this should try to take as many edges as possible,
1507 // and incorporate them all at once, potentially with a switch instead
1508 // of N individual branches with their own booleans etc.
1509 let (claimed, else_deferred_edges) = deferred_edges.split_out_matching(|deferred| {
1510 let deferred_target = deferred.edge_bundle.target;
1511 let DeferredEdgeBundle { condition, edge_bundle } = match deferred_target {
1512 DeferredTarget::Region(target) => deferred.with_target(target),
1513 DeferredTarget::Return => return Err(deferred),
1514 };
1515
1516 match self.try_claim_edge_bundle(edge_bundle) {
1517 Ok(claimed_region) => Ok((condition, claimed_region)),
1518
1519 Err(new_edge_bundle) => {
1520 let new_target = DeferredTarget::Region(new_edge_bundle.target);
1521 Err(DeferredEdgeBundle {
1522 condition,
1523 edge_bundle: new_edge_bundle.with_target(new_target),
1524 })
1525 }
1526 }
1527 });
1528 let Some((condition, then_region)) = claimed else {
1529 deferred_edges = else_deferred_edges;
1530 break;
1531 };
1532
1533 deferred_edges = self.structurize_select_into(
1534 region,
1535 SelectionKind::BoolCond,
1536 Err(&condition),
1537 [Ok(then_region), Err(else_deferred_edges)].into_iter().collect(),
1538 );
1539 }
1540
1541 // Cache the edge count for backedges (which later get turned into loops).
1542 let accumulated_backedge_count = deferred_edges
1543 .get_edge_bundle_by_target(DeferredTarget::Region(region))
1544 .map(|backedge| backedge.accumulated_count)
1545 .unwrap_or_default();
1546
1547 let old_state =
1548 self.structurize_region_state.insert(region, StructurizeRegionState::Ready {
1549 accumulated_backedge_count,
1550 region_deferred_edges: deferred_edges,
1551 });
1552 if !matches!(old_state, Some(StructurizeRegionState::InProgress)) {
1553 unreachable!(
1554 "cfg::Structurizer::structurize_region: \
1555 already {}, when attempting to store structurization result",
1556 match old_state {
1557 None => "reverted to missing (removed from the map?)",
1558 Some(StructurizeRegionState::InProgress) => unreachable!(),
1559 Some(StructurizeRegionState::Ready { .. }) => "completed",
1560 Some(StructurizeRegionState::Claimed) => "claimed",
1561 }
1562 );
1563 }
1564 }
1565
1566 /// Append to `parent_region` a new `Select` [`ControlNode`] built from
1567 /// partially structured `cases`, merging all of their `deferred_edges`
1568 /// together into a combined `DeferredEdgeBundleSet` (which gets returned).
1569 //
1570 // FIXME(eddyb) handle `unreachable` cases losslessly.
1571 fn structurize_select_into(
1572 &mut self,
1573 parent_region: ControlRegion,
1574 kind: SelectionKind,
1575 scrutinee: Result<Value, &LazyCond>,
1576 mut cases: SmallVec<[Result<ClaimedRegion, DeferredEdgeBundleSet>; 8]>,
1577 ) -> DeferredEdgeBundleSet {
1578 // HACK(eddyb) don't nest a sole convergent case inside the `Select`,
1579 // and instead prefer early convergence (see also `EventualCfgExits`).
1580 // NOTE(eddyb) this also happens to handle the situation where `Select`
1581 // isn't even needed (i.e. the other cases don't even have side-effects),
1582 // via the `any_non_empty_case` check (after taking `convergent_case`).
1583 // FIXME(eddyb) consider introducing some kind of `assume` for `scrutinee`,
1584 // to preserve its known value (whenever `convergent_case` is reached).
1585 let convergent_cases = cases.iter_mut().filter(|case| match case {
1586 Ok(ClaimedRegion { deferred_edges, .. }) | Err(deferred_edges) => {
1587 !matches!(deferred_edges, DeferredEdgeBundleSet::Unreachable)
1588 }
1589 });
1590 if let Ok(convergent_case) = convergent_cases.exactly_one() {
1591 // HACK(eddyb) this relies on `structurize_select_into`'s behavior
1592 // for `unreachable` cases being largely equivalent to empty cases.
1593 let convergent_case =
1594 mem::replace(convergent_case, Err(DeferredEdgeBundleSet::Unreachable));
1595
1596 // FIXME(eddyb) avoid needing recursion, by instead changing the
1597 // "`Select` node insertion cursor" (into `parent_region`), and
1598 // stashing `convergent_case`'s deferred edges to return later.
1599 let deferred_edges =
1600 self.structurize_select_into(parent_region, kind, scrutinee, cases);
1601 assert!(matches!(deferred_edges, DeferredEdgeBundleSet::Unreachable));
1602
1603 // The sole convergent case goes in the `parent_region`, and its
1604 // relationship with the `Select` (if it was even necessary at all)
1605 // is only at most one of side-effect sequencing.
1606 return self.append_maybe_claimed_region(parent_region, convergent_case);
1607 }
1608
1609 // Support lazily defining the `Select` node, as soon as it's necessary
1610 // (i.e. to plumb per-case dataflow through `Value::ControlNodeOutput`s),
1611 // but also if any of the cases actually have non-empty regions, which
1612 // is checked after the special-cases (which return w/o a `Select` at all).
1613 //
1614 // FIXME(eddyb) some cases may be `unreachable`, and that's erased here.
1615 let mut cached_select_node = None;
1616 let mut non_move_kind = Some(kind);
1617 let mut get_or_define_select_node = |this: &mut Self, cases: &[_]| {
1618 *cached_select_node.get_or_insert_with(|| {
1619 let kind = non_move_kind.take().unwrap();
1620 let cases = cases
1621 .iter()
1622 .map(|case| {
1623 let case_region = match case {
1624 &Ok(ClaimedRegion { structured_body, .. }) => structured_body,
1625 Err(_) => this
1626 .func_def_body
1627 .control_regions
1628 .define(this.cx, ControlRegionDef::default()),
1629 };
1630
1631 // FIXME(eddyb) should these be asserts that it's already empty?
1632 let case_region_def = this.func_def_body.at_mut(case_region).def();
1633 case_region_def.outputs.clear();
1634 case_region
1635 })
1636 .collect();
1637 let scrutinee =
1638 scrutinee.unwrap_or_else(|lazy_cond| this.materialize_lazy_cond(lazy_cond));
1639 let select_node = this.func_def_body.control_nodes.define(
1640 this.cx,
1641 ControlNodeDef {
1642 kind: ControlNodeKind::Select { kind, scrutinee, cases },
1643 outputs: [].into_iter().collect(),
1644 }
1645 .into(),
1646 );
1647 this.func_def_body.control_regions[parent_region]
1648 .children
1649 .insert_last(select_node, &mut this.func_def_body.control_nodes);
1650 select_node
1651 })
1652 };
1653
1654 // Ensure the `Select` exists if needed for any per-case side-effects.
1655 let any_non_empty_case = cases.iter().any(|case| {
1656 case.as_ref().is_ok_and(|&ClaimedRegion { structured_body, .. }| {
1657 self.func_def_body.at(structured_body).at_children().into_iter().next().is_some()
1658 })
1659 });
1660 if any_non_empty_case {
1661 get_or_define_select_node(self, &cases);
1662 }
1663
1664 // Gather the full set of deferred edges (and returns).
1665 struct DeferredTargetSummary {
1666 input_count: usize,
1667 total_edge_count: IncomingEdgeCount,
1668 }
1669 let mut deferred_targets = FxIndexMap::default();
1670 for case in &cases {
1671 let case_deferred_edges = match case {
1672 Ok(ClaimedRegion { deferred_edges, .. }) | Err(deferred_edges) => deferred_edges,
1673 };
1674 for (target, edge_bundle) in case_deferred_edges.iter_targets_with_edge_bundle() {
1675 let input_count = edge_bundle.target_inputs.len();
1676
1677 let summary = deferred_targets.entry(target).or_insert(DeferredTargetSummary {
1678 input_count,
1679 total_edge_count: IncomingEdgeCount::default(),
1680 });
1681 assert_eq!(summary.input_count, input_count);
1682 summary.total_edge_count += edge_bundle.accumulated_count;
1683 }
1684 }
1685
1686 // FIXME(eddyb) `control_region_input_rewrites` mappings, generated
1687 // for every `ClaimedRegion` that has been merged into a larger region,
1688 // only get applied after structurization fully completes, but here it's
1689 // very useful to have the fully resolved values across all `cases`'
1690 // incoming/outgoing edges (note, however, that within outgoing edges,
1691 // i.e. `case_deferred_edges`' `target_inputs`, `Value::ControlRegionInput`
1692 // are not resolved using the contents of `case_structured_body_inputs`,
1693 // which is kept hermetic until just before `structurize_select` returns).
1694 for case in &mut cases {
1695 let (case_structured_body_inputs, case_deferred_edges) = match case {
1696 Ok(ClaimedRegion { structured_body_inputs, deferred_edges, .. }) => {
1697 (&mut structured_body_inputs[..], deferred_edges)
1698 }
1699 Err(deferred_edges) => (&mut [][..], deferred_edges),
1700 };
1701 let all_values = case_structured_body_inputs.iter_mut().chain(
1702 case_deferred_edges
1703 .iter_targets_with_edge_bundle_mut()
1704 .flat_map(|(_, edge_bundle)| &mut edge_bundle.target_inputs),
1705 );
1706 for v in all_values {
1707 ControlRegionInputRewrites::rewrite_all(&self.control_region_input_rewrites)
1708 .transform_value_use(v)
1709 .apply_to(v);
1710 }
1711 }
1712
1713 // Merge all `deferred_edges` by plumbing their per-case `target_input`s
1714 // (into per-case region outputs, and therefore the `Select` outputs)
1715 // out of all cases that can reach them, with undef constants used to
1716 // fill any gaps (i.e. for the targets not reached through each case),
1717 // while deferred conditions are collected separately (for `LazyCond`).
1718 let deferred_edges = deferred_targets.into_iter().map(|(target, target_summary)| {
1719 let DeferredTargetSummary { input_count, total_edge_count } = target_summary;
1720
1721 // HACK(eddyb) `Err` wraps only `LazyCond::{Undef,False}`, which allows
1722 // distinguishing between "not taken" and "not even reachable".
1723 let per_case_deferred: SmallVec<[Result<DeferredEdgeBundle<()>, LazyCond>; 8]> = cases
1724 .iter_mut()
1725 .map(|case| match case {
1726 Ok(ClaimedRegion { deferred_edges, .. }) | Err(deferred_edges) => {
1727 if let DeferredEdgeBundleSet::Unreachable = deferred_edges {
1728 Err(LazyCond::Undef)
1729 } else {
1730 deferred_edges
1731 .steal_deferred_by_target_without_removal(target)
1732 .ok_or(LazyCond::False)
1733 }
1734 }
1735 })
1736 .collect();
1737
1738 let target_inputs = (0..input_count)
1739 .map(|target_input_idx| {
1740 let per_case_target_input = per_case_deferred.iter().map(|per_case_deferred| {
1741 per_case_deferred.as_ref().ok().map(
1742 |DeferredEdgeBundle { edge_bundle, .. }| {
1743 edge_bundle.target_inputs[target_input_idx]
1744 },
1745 )
1746 });
1747
1748 // Avoid introducing dynamic dataflow when the same value is
1749 // used across all cases (which can reach this `target`).
1750 let unique_target_input_value = per_case_target_input
1751 .clone()
1752 .zip_eq(&cases)
1753 .filter_map(|(v, case)| Some((v?, case)))
1754 .map(|(v, case)| {
1755 // If possible, resolve `v` to a `Value` valid in
1756 // `parent_region` (i.e. the `Select` node parent).
1757 match case {
1758 // `case`'s `structured_body` effectively "wraps"
1759 // its `deferred_edges` (where `v` came from),
1760 // so values from `parent_region` can only be
1761 // hermetically used via `structured_body` inputs.
1762 Ok(ClaimedRegion {
1763 structured_body,
1764 structured_body_inputs,
1765 ..
1766 }) => match v {
1767 Value::Const(_) => Ok(v),
1768 Value::ControlRegionInput { region, input_idx }
1769 if region == *structured_body =>
1770 {
1771 Ok(structured_body_inputs[input_idx as usize])
1772 }
1773 _ => Err(()),
1774 },
1775
1776 // `case` has no region of its own, so everything
1777 // it carries is already from within `parent_region`.
1778 Err(_) => Ok(v),
1779 }
1780 })
1781 .dedup()
1782 .exactly_one();
1783 if let Ok(Ok(v)) = unique_target_input_value {
1784 return v;
1785 }
1786
1787 let ty = match target {
1788 DeferredTarget::Region(target) => {
1789 self.func_def_body.at(target).def().inputs[target_input_idx].ty
1790 }
1791 // HACK(eddyb) in the absence of `FuncDecl`, infer the
1792 // type from each returned value (and require they match).
1793 DeferredTarget::Return => per_case_target_input
1794 .clone()
1795 .flatten()
1796 .map(|v| self.func_def_body.at(v).type_of(self.cx))
1797 .dedup()
1798 .exactly_one()
1799 .ok()
1800 .expect("mismatched `return`ed value types"),
1801 };
1802
1803 let select_node = get_or_define_select_node(self, &cases);
1804 let output_decls = &mut self.func_def_body.at_mut(select_node).def().outputs;
1805 let output_idx = output_decls.len();
1806 output_decls.push(ControlNodeOutputDecl { attrs: AttrSet::default(), ty });
1807 for (case_idx, v) in per_case_target_input.enumerate() {
1808 let v = v.unwrap_or_else(|| Value::Const(self.const_undef(ty)));
1809
1810 let case_region = match &self.func_def_body.at(select_node).def().kind {
1811 ControlNodeKind::Select { cases, .. } => cases[case_idx],
1812 _ => unreachable!(),
1813 };
1814 let outputs = &mut self.func_def_body.at_mut(case_region).def().outputs;
1815 assert_eq!(outputs.len(), output_idx);
1816 outputs.push(v);
1817 }
1818 Value::ControlNodeOutput {
1819 control_node: select_node,
1820 output_idx: output_idx.try_into().unwrap(),
1821 }
1822 })
1823 .collect();
1824
1825 // Simplify `LazyCond`s eagerly, to reduce costs later on, or even
1826 // outright avoid defining the `Select` node in the first place.
1827 //
1828 // FIXME(eddyb) move all simplifications from `materialize_lazy_cond`
1829 // to here (allowing e.g. not defining the `Select` in more cases).
1830 let per_case_conds =
1831 per_case_deferred.iter().map(|per_case_deferred| match per_case_deferred {
1832 Ok(DeferredEdgeBundle { condition, .. }) => condition,
1833 Err(undef_or_false) => undef_or_false,
1834 });
1835 let condition = if per_case_conds
1836 .clone()
1837 .all(|cond| matches!(cond, LazyCond::Undef | LazyCond::True))
1838 {
1839 LazyCond::True
1840 } else {
1841 LazyCond::Merge(Rc::new(LazyCondMerge::Select {
1842 control_node: get_or_define_select_node(self, &cases),
1843 per_case_conds: per_case_conds.cloned().collect(),
1844 }))
1845 };
1846
1847 DeferredEdgeBundle {
1848 condition,
1849 edge_bundle: IncomingEdgeBundle {
1850 target,
1851 accumulated_count: total_edge_count,
1852 target_inputs,
1853 },
1854 }
1855 });
1856 let deferred_edges = deferred_edges.collect();
1857
1858 // Only as the very last step, can per-case `region_inputs` be added to
1859 // `control_region_input_rewrites`.
1860 //
1861 // FIXME(eddyb) don't replace `Value::ControlRegionInput { region, .. }`
1862 // with `region_inputs` when the `region` ends up a `ControlNode` child,
1863 // but instead make all `ControlRegion`s entirely hermetic wrt inputs.
1864 #[allow(clippy::manual_flatten)]
1865 for case in cases {
1866 if let Ok(ClaimedRegion { structured_body, structured_body_inputs, .. }) = case {
1867 if !structured_body_inputs.is_empty() {
1868 self.control_region_input_rewrites.insert(
1869 structured_body,
1870 ControlRegionInputRewrites::ReplaceWith(structured_body_inputs),
1871 );
1872 self.func_def_body.at_mut(structured_body).def().inputs.clear();
1873 }
1874 }
1875 }
1876
1877 deferred_edges
1878 }
1879
1880 // FIXME(eddyb) this should try to handle as many `LazyCond` as are available,
1881 // for incorporating them all at once, ideally with a switch instead
1882 // of N individual branches with their own booleans etc.
1883 fn materialize_lazy_cond(&mut self, cond: &LazyCond) -> Value {
1884 match cond {
1885 LazyCond::Undef => Value::Const(self.const_undef(self.type_bool)),
1886 LazyCond::False => Value::Const(self.const_false),
1887 LazyCond::True => Value::Const(self.const_true),
1888
1889 // `LazyCond::Merge` was only created in the first place if a merge
1890 // was actually necessary, so there shouldn't be simplifications to
1891 // do here (i.e. the value provided is if `materialize_lazy_cond`
1892 // never gets called because the target has become unconditional).
1893 //
1894 // FIXME(eddyb) there is still an `if cond { true } else { false }`
1895 // special-case (repalcing with just `cond`), that cannot be expressed
1896 // currently in `LazyCond` itself (but maybe it should be).
1897 LazyCond::Merge(merge) => {
1898 let LazyCondMerge::Select { control_node, ref per_case_conds } = **merge;
1899
1900 // HACK(eddyb) this won't actually allocate most of the time,
1901 // and avoids complications later below, when mutating the cases.
1902 let per_case_conds: SmallVec<[_; 8]> = per_case_conds
1903 .into_iter()
1904 .map(|cond| self.materialize_lazy_cond(cond))
1905 .collect();
1906
1907 let ControlNodeDef { kind, outputs: output_decls } =
1908 &mut *self.func_def_body.control_nodes[control_node];
1909 let cases = match kind {
1910 ControlNodeKind::Select { kind, scrutinee, cases } => {
1911 assert_eq!(cases.len(), per_case_conds.len());
1912
1913 if let SelectionKind::BoolCond = kind {
1914 let [val_false, val_true] =
1915 [self.const_false, self.const_true].map(Value::Const);
1916 if per_case_conds[..] == [val_true, val_false] {
1917 return *scrutinee;
1918 } else if per_case_conds[..] == [val_false, val_true] {
1919 // FIXME(eddyb) this could also be special-cased,
1920 // at least when called from the topmost level,
1921 // where which side is `false`/`true` doesn't
1922 // matter (or we could even generate `!cond`?).
1923 let _not_cond = *scrutinee;
1924 }
1925 }
1926
1927 cases
1928 }
1929 _ => unreachable!(),
1930 };
1931
1932 let output_idx = u32::try_from(output_decls.len()).unwrap();
1933 output_decls
1934 .push(ControlNodeOutputDecl { attrs: AttrSet::default(), ty: self.type_bool });
1935
1936 for (&case, cond) in cases.iter().zip_eq(per_case_conds) {
1937 let ControlRegionDef { outputs, .. } =
1938 &mut self.func_def_body.control_regions[case];
1939 outputs.push(cond);
1940 assert_eq!(outputs.len(), output_decls.len());
1941 }
1942
1943 Value::ControlNodeOutput { control_node, output_idx }
1944 }
1945 }
1946 }
1947
1948 /// Append to `parent_region` the children of `maybe_claimed_region` (if `Ok`),
1949 /// returning the `DeferredEdgeBundleSet` from `maybe_claimed_region`.
1950 //
1951 // FIXME(eddyb) the name isn't great, but e.g. "absorb into" would also be
1952 // weird (and on top of that, the append direction can be tricky to express).
1953 fn append_maybe_claimed_region(
1954 &mut self,
1955 parent_region: ControlRegion,
1956 maybe_claimed_region: Result<ClaimedRegion, DeferredEdgeBundleSet>,
1957 ) -> DeferredEdgeBundleSet {
1958 match maybe_claimed_region {
1959 Ok(ClaimedRegion { structured_body, structured_body_inputs, deferred_edges }) => {
1960 if !structured_body_inputs.is_empty() {
1961 self.control_region_input_rewrites.insert(
1962 structured_body,
1963 ControlRegionInputRewrites::ReplaceWith(structured_body_inputs),
1964 );
1965 }
1966 let new_children =
1967 mem::take(&mut self.func_def_body.at_mut(structured_body).def().children);
1968 self.func_def_body.control_regions[parent_region]
1969 .children
1970 .append(new_children, &mut self.func_def_body.control_nodes);
1971 deferred_edges
1972 }
1973 Err(deferred_edges) => deferred_edges,
1974 }
1975 }
1976
1977 /// When structurization is only partial, and there remain unclaimed regions,
1978 /// they have to be reintegrated into the CFG, putting back [`ControlInst`]s
1979 /// where `structurize_region` has taken them from.
1980 ///
1981 /// This function handles one region at a time to make it more manageable,
1982 /// despite it having a single call site (in a loop in `structurize_func`).
1983 fn rebuild_cfg_from_unclaimed_region_deferred_edges(
1984 &mut self,
1985 region: ControlRegion,
1986 mut deferred_edges: DeferredEdgeBundleSet,
1987 ) {
1988 assert!(
1989 self.structurize_region_state.is_empty(),
1990 "cfg::Structurizer::rebuild_cfg_from_unclaimed_region_deferred_edges:
1991 must only be called from `structurize_func`, \
1992 after it takes `structurize_region_state`"
1993 );
1994
1995 // Build a chain of conditional branches to apply deferred edges.
1996 let mut control_source = Some(region);
1997 loop {
1998 let taken_then;
1999 (taken_then, deferred_edges) =
2000 deferred_edges.split_out_matching(|deferred| match deferred.edge_bundle.target {
2001 DeferredTarget::Region(target) => {
2002 Ok((deferred.condition, (target, deferred.edge_bundle.target_inputs)))
2003 }
2004 DeferredTarget::Return => Err(deferred),
2005 });
2006 let Some((condition, then_target_and_inputs)) = taken_then else {
2007 break;
2008 };
2009 let branch_source = control_source.take().unwrap();
2010 let else_target_and_inputs = match deferred_edges {
2011 // At most one deferral left, so it can be used as the "else"
2012 // case, or the branch left unconditional in its absence.
2013 DeferredEdgeBundleSet::Unreachable => None,
2014 DeferredEdgeBundleSet::Always {
2015 target: DeferredTarget::Region(else_target),
2016 edge_bundle,
2017 } => {
2018 deferred_edges = DeferredEdgeBundleSet::Unreachable;
2019 Some((else_target, edge_bundle.target_inputs))
2020 }
2021
2022 // Either more branches, or a deferred return, are needed, so
2023 // the "else" case must be a `ControlRegion` that itself can
2024 // have a `ControlInst` attached to it later on.
2025 _ => {
2026 let new_empty_region = self
2027 .func_def_body
2028 .control_regions
2029 .define(self.cx, ControlRegionDef::default());
2030 control_source = Some(new_empty_region);
2031 Some((new_empty_region, [].into_iter().collect()))
2032 }
2033 };
2034
2035 let condition = Some(condition)
2036 .filter(|_| else_target_and_inputs.is_some())
2037 .map(|cond| self.materialize_lazy_cond(&cond));
2038 let branch_control_inst = ControlInst {
2039 attrs: AttrSet::default(),
2040 kind: if condition.is_some() {
2041 ControlInstKind::SelectBranch(SelectionKind::BoolCond)
2042 } else {
2043 ControlInstKind::Branch
2044 },
2045 inputs: condition.into_iter().collect(),
2046 targets: [&then_target_and_inputs]
2047 .into_iter()
2048 .chain(&else_target_and_inputs)
2049 .map(|&(target, _)| target)
2050 .collect(),
2051 target_inputs: [then_target_and_inputs]
2052 .into_iter()
2053 .chain(else_target_and_inputs)
2054 .filter(|(_, inputs)| !inputs.is_empty())
2055 .collect(),
2056 };
2057 assert!(
2058 self.func_def_body
2059 .unstructured_cfg
2060 .as_mut()
2061 .unwrap()
2062 .control_inst_on_exit_from
2063 .insert(branch_source, branch_control_inst)
2064 .is_none()
2065 );
2066 }
2067
2068 let deferred_return = match deferred_edges {
2069 DeferredEdgeBundleSet::Unreachable => None,
2070 DeferredEdgeBundleSet::Always { target: DeferredTarget::Return, edge_bundle } => {
2071 Some(edge_bundle.target_inputs)
2072 }
2073 _ => unreachable!(),
2074 };
2075
2076 let final_source = match control_source {
2077 Some(region) => region,
2078 None => {
2079 // The loop above handled all the targets, nothing left to do.
2080 assert!(deferred_return.is_none());
2081 return;
2082 }
2083 };
2084
2085 // Final deferral is either a `Return` (if needed), or an `Unreachable`
2086 // (only when truly divergent, i.e. no `deferred_edges`/`deferred_return`).
2087 let final_control_inst = {
2088 let (kind, inputs) = match deferred_return {
2089 Some(return_values) => (ControlInstKind::Return, return_values),
2090 None => (ControlInstKind::Unreachable, [].into_iter().collect()),
2091 };
2092 ControlInst {
2093 attrs: AttrSet::default(),
2094 kind,
2095 inputs,
2096 targets: [].into_iter().collect(),
2097 target_inputs: FxIndexMap::default(),
2098 }
2099 };
2100 assert!(
2101 self.func_def_body
2102 .unstructured_cfg
2103 .as_mut()
2104 .unwrap()
2105 .control_inst_on_exit_from
2106 .insert(final_source, final_control_inst)
2107 .is_none()
2108 );
2109 }
2110
2111 /// Create an undefined constant (as a placeholder where a value needs to be
2112 /// present, but won't actually be used), of type `ty`.
2113 fn const_undef(&self, ty: Type) -> Const {
2114 // FIXME(eddyb) SPIR-T should have native undef itself.
2115 let wk = &spv::spec::Spec::get().well_known;
2116 self.cx.intern(ConstDef {
2117 attrs: AttrSet::default(),
2118 ty,
2119 kind: ConstKind::SpvInst {
2120 spv_inst_and_const_inputs: Rc::new((wk.OpUndef.into(), [].into_iter().collect())),
2121 },
2122 })
2123 }
2124}