spirt/
cfg.rs

1//! Control-flow graph (CFG) abstractions and utilities.
2
3use crate::transform::{InnerInPlaceTransform as _, Transformer};
4use crate::{
5    AttrSet, Const, ConstDef, ConstKind, Context, ControlNode, ControlNodeDef, ControlNodeKind,
6    ControlNodeOutputDecl, ControlRegion, ControlRegionDef, EntityOrientedDenseMap, FuncDefBody,
7    FxIndexMap, FxIndexSet, SelectionKind, Type, TypeKind, Value, spv,
8};
9use itertools::{Either, Itertools};
10use smallvec::SmallVec;
11use std::mem;
12use std::rc::Rc;
13
14/// The control-flow graph (CFG) of a function, as control-flow instructions
15/// ([`ControlInst`]s) attached to [`ControlRegion`]s, as an "action on exit", i.e.
16/// "terminator" (while intra-region control-flow is strictly structured).
17#[derive(Clone, Default)]
18pub struct ControlFlowGraph {
19    pub control_inst_on_exit_from: EntityOrientedDenseMap<ControlRegion, ControlInst>,
20
21    // HACK(eddyb) this currently only comes from `OpLoopMerge`, and cannot be
22    // inferred (because implies too strong of an ownership/uniqueness notion).
23    pub loop_merge_to_loop_header: FxIndexMap<ControlRegion, ControlRegion>,
24}
25
26#[derive(Clone)]
27pub struct ControlInst {
28    pub attrs: AttrSet,
29
30    pub kind: ControlInstKind,
31
32    pub inputs: SmallVec<[Value; 2]>,
33
34    // FIXME(eddyb) change the inline size of this to fit most instructions.
35    pub targets: SmallVec<[ControlRegion; 4]>,
36
37    /// `target_inputs[region][input_idx]` is the [`Value`] that
38    /// `Value::ControlRegionInput { region, input_idx }` will get on entry,
39    /// where `region` must be appear at least once in `targets` - this is a
40    /// separate map instead of being part of `targets` because it reflects the
41    /// limitations of φ ("phi") nodes, which (unlike "basic block arguments")
42    /// cannot tell apart multiple edges with the same source and destination.
43    pub target_inputs: FxIndexMap<ControlRegion, SmallVec<[Value; 2]>>,
44}
45
46#[derive(Clone)]
47pub enum ControlInstKind {
48    /// Reaching this point in the control-flow is undefined behavior, e.g.:
49    /// * a `SelectBranch` case that's known to be impossible
50    /// * after a function call, where the function never returns
51    ///
52    /// Optimizations can take advantage of this information, to assume that any
53    /// necessary preconditions for reaching this point, are never met.
54    Unreachable,
55
56    /// Leave the current function, optionally returning a value.
57    Return,
58
59    /// Leave the current invocation, similar to returning from every function
60    /// call in the stack (up to and including the entry-point), but potentially
61    /// indicating a fatal error as well.
62    ExitInvocation(ExitInvocationKind),
63
64    /// Unconditional branch to a single target.
65    Branch,
66
67    /// Branch to one of several targets, chosen by a single value input.
68    SelectBranch(SelectionKind),
69}
70
71#[derive(Clone)]
72pub enum ExitInvocationKind {
73    SpvInst(spv::Inst),
74}
75
76impl ControlFlowGraph {
77    /// Iterate over all [`ControlRegion`]s making up `func_def_body`'s CFG, in
78    /// reverse post-order (RPO).
79    ///
80    /// RPO iteration over a CFG provides certain guarantees, most importantly
81    /// that dominators are visited before the entire subgraph they dominate.
82    pub fn rev_post_order(
83        &self,
84        func_def_body: &FuncDefBody,
85    ) -> impl DoubleEndedIterator<Item = ControlRegion> {
86        let mut post_order = SmallVec::<[_; 8]>::new();
87        self.traverse_whole_func(func_def_body, &mut TraversalState {
88            incoming_edge_counts: EntityOrientedDenseMap::new(),
89
90            pre_order_visit: |_| {},
91            post_order_visit: |region| post_order.push(region),
92
93            // NOTE(eddyb) this doesn't impact semantics, but combined with
94            // the final reversal, it should keep targets in the original
95            // order in the cases when they didn't get deduplicated.
96            reverse_targets: true,
97        });
98        post_order.into_iter().rev()
99    }
100}
101
102// HACK(eddyb) this only serves to disallow accessing `private_count` field of
103// `IncomingEdgeCount`.
104mod sealed {
105    /// Opaque newtype for the count of incoming edges (into a [`ControlRegion`](crate::ControlRegion)).
106    ///
107    /// The private field prevents direct mutation or construction, forcing the
108    /// use of [`IncomingEdgeCount::ONE`] and addition operations to produce some
109    /// specific count (which would require explicit workarounds for misuse).
110    #[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
111    pub(super) struct IncomingEdgeCount(usize);
112
113    impl IncomingEdgeCount {
114        pub(super) const ONE: Self = Self(1);
115    }
116
117    impl std::ops::Add for IncomingEdgeCount {
118        type Output = Self;
119        fn add(self, other: Self) -> Self {
120            Self(self.0 + other.0)
121        }
122    }
123
124    impl std::ops::AddAssign for IncomingEdgeCount {
125        fn add_assign(&mut self, other: Self) {
126            *self = *self + other;
127        }
128    }
129}
130use sealed::IncomingEdgeCount;
131
132struct TraversalState<PreVisit: FnMut(ControlRegion), PostVisit: FnMut(ControlRegion)> {
133    incoming_edge_counts: EntityOrientedDenseMap<ControlRegion, IncomingEdgeCount>,
134    pre_order_visit: PreVisit,
135    post_order_visit: PostVisit,
136
137    // FIXME(eddyb) should this be a generic parameter for "targets iterator"?
138    reverse_targets: bool,
139}
140
141impl ControlFlowGraph {
142    fn traverse_whole_func(
143        &self,
144        func_def_body: &FuncDefBody,
145        state: &mut TraversalState<impl FnMut(ControlRegion), impl FnMut(ControlRegion)>,
146    ) {
147        let func_at_body = func_def_body.at_body();
148
149        // Quick sanity check that this is the right CFG for `func_def_body`.
150        assert!(std::ptr::eq(func_def_body.unstructured_cfg.as_ref().unwrap(), self));
151        assert!(func_at_body.def().outputs.is_empty());
152
153        self.traverse(func_def_body.body, state);
154    }
155
156    fn traverse(
157        &self,
158        region: ControlRegion,
159        state: &mut TraversalState<impl FnMut(ControlRegion), impl FnMut(ControlRegion)>,
160    ) {
161        // FIXME(eddyb) `EntityOrientedDenseMap` should have an `entry` API.
162        if let Some(existing_count) = state.incoming_edge_counts.get_mut(region) {
163            *existing_count += IncomingEdgeCount::ONE;
164            return;
165        }
166        state.incoming_edge_counts.insert(region, IncomingEdgeCount::ONE);
167
168        (state.pre_order_visit)(region);
169
170        let control_inst = self
171            .control_inst_on_exit_from
172            .get(region)
173            .expect("cfg: missing `ControlInst`, despite having left structured control-flow");
174
175        let targets = control_inst.targets.iter().copied();
176        let targets = if state.reverse_targets {
177            Either::Left(targets.rev())
178        } else {
179            Either::Right(targets)
180        };
181        for target in targets {
182            self.traverse(target, state);
183        }
184
185        (state.post_order_visit)(region);
186    }
187}
188
189/// Minimal loop analysis, based on Tarjan's SCC (strongly connected components)
190/// algorithm, applied recursively (for every level of loop nesting).
191///
192/// Here "minimal" means that each loops is the smallest CFG subgraph possible
193/// (excluding any control-flow paths that cannot reach a backedge and cycle),
194/// i.e. each loop is a CFG SCC (strongly connected component).
195///
196/// These "minimal loops" contrast with the "maximal loops" that the greedy
197/// architecture of the structurizer would naively produce, with the main impact
198/// of the difference being where loop exits (`break`s) "merge" (or "reconverge"),
199/// which SPIR-V encodes via `OpLoopMerge`, and is significant for almost anything
200/// where shared memory and/or subgroup ops can allow observing when invocations
201/// "wait for others in the subgroup to exit the loop" (or when they fail to wait).
202///
203/// This analysis was added to because of two observations wrt "reconvergence":
204/// 1. syntactic loops (from some high-level language), when truly structured
205///    (i.e. only using `while`/`do`-`while` exit conditions, not `break` etc.),
206///    *always* map to "minimal loops" on a CFG, as the only loop exit edge is
207///    built-in, and no part of the syntactic "loop body" can be its successor
208/// 2. more pragmatically, compiling shader languages to SPIR-V seems to (almost?)
209///    always *either* fully preserve syntactic loops (via SPIR-V `OpLoopMerge`),
210///    *or* structurize CFGs in a way that produces "minimal loops", which can
211///    be misleading with explicit `break`s (moving user code from just before
212///    the `break` to after the loop), but is less impactful than "maximal loops"
213struct LoopFinder<'a> {
214    cfg: &'a ControlFlowGraph,
215
216    // FIXME(eddyb) this feels a bit inefficient (are many-exit loops rare?).
217    loop_header_to_exit_targets: FxIndexMap<ControlRegion, FxIndexSet<ControlRegion>>,
218
219    /// SCC accumulation stack, where CFG nodes collect during the depth-first
220    /// traversal, and are only popped when their "SCC root" (loop header) is
221    /// (note that multiple SCCs on the stack does *not* indicate SCC nesting,
222    /// but rather a path between two SCCs, i.e. a loop *following* another).
223    scc_stack: Vec<ControlRegion>,
224    /// Per-CFG-node traversal state (often just pointing to a `scc_stack` slot).
225    scc_state: EntityOrientedDenseMap<ControlRegion, SccState>,
226}
227
228#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
229struct SccStackIdx(u32);
230
231#[derive(PartialEq, Eq)]
232enum SccState {
233    /// CFG node has been reached and ended up somewhere on the `scc_stack`,
234    /// where it will remain until the SCC it's part of will be completed.
235    Pending(SccStackIdx),
236
237    /// CFG node had been reached once, but is no longer on the `scc_stack`, its
238    /// parent SCC having been completed (or it wasn't in an SCC to begin with).
239    Complete(EventualCfgExits),
240}
241
242/// Summary of all the ways in which a CFG node may eventually leave the CFG.
243///
244// HACK(eddyb) a loop can reach a CFG subgraph that happens to always "diverge"
245// (e.g. ending in `unreachable`, `ExitInvocation`, or even infinite loops,
246// though those have other issues) and strictly speaking that would always be
247// an edge leaving the SCC of the loop (as it can't reach a backedge), but it
248// still shouldn't be treated as an exit because it doesn't reconverge to the
249// rest of the function, i.e. it can't reach any `return`s, which is what this
250// tracks in order to later make a more accurate decision wrt loop exits.
251//
252// NOTE(eddyb) only in the case where a loop *also* has non-"diverging" exits,
253// do the "diverging" ones not get treated as exits, as the presence of both
254// disambiguates `break`s from naturally "diverging" sections of the loop body
255// (at least for CFGs built from languages without labelled `break` or `goto`,
256// but even then it would be pretty convoluted to set up `break` to diverge,
257// while `break some_outer_label` to reconverge to the rest of the function).
258#[derive(Copy, Clone, Default, PartialEq, Eq)]
259struct EventualCfgExits {
260    // FIXME(eddyb) do the other situations need their own flags here?
261    may_return_from_func: bool,
262}
263
264impl std::ops::BitOr for EventualCfgExits {
265    type Output = Self;
266    fn bitor(self, other: Self) -> Self {
267        Self { may_return_from_func: self.may_return_from_func | other.may_return_from_func }
268    }
269}
270impl std::ops::BitOrAssign for EventualCfgExits {
271    fn bitor_assign(&mut self, other: Self) {
272        *self = *self | other;
273    }
274}
275
276impl<'a> LoopFinder<'a> {
277    fn new(cfg: &'a ControlFlowGraph) -> Self {
278        Self {
279            cfg,
280            loop_header_to_exit_targets: FxIndexMap::default(),
281            scc_stack: vec![],
282            scc_state: EntityOrientedDenseMap::new(),
283        }
284    }
285
286    /// Tarjan's SCC algorithm works by computing the "earliest" reachable node,
287    /// from every node (often using the name `lowlink`), which will be equal
288    /// to the origin node itself iff that node is an "SCC root" (loop header),
289    /// and always point to an "earlier" node if a cycle (via loop backedge) was
290    /// found from somewhere else in the SCC (i.e. from inside the loop body).
291    ///
292    /// Here we track stack indices (as the stack order is the traversal order),
293    /// and distinguish the acyclic case to avoid treating most nodes as self-loops.
294    //
295    // FIXME(eddyb) name of the function is a bit clunky wrt its return type.
296    fn find_earliest_scc_root_of(
297        &mut self,
298        node: ControlRegion,
299    ) -> (Option<SccStackIdx>, EventualCfgExits) {
300        let state_entry = self.scc_state.entry(node);
301        if let Some(state) = &state_entry {
302            return match *state {
303                SccState::Pending(scc_stack_idx) => {
304                    // HACK(eddyb) this means that `EventualCfgExits`s will be
305                    // inconsistently observed across the `Pending` nodes of a
306                    // loop body, but that is sound as it cannot feed into any
307                    // `Complete` state until the loop header itself is complete,
308                    // and the monotonic nature of `EventualCfgExits` means that
309                    // the loop header will still get to see the complete picture.
310                    (Some(scc_stack_idx), EventualCfgExits::default())
311                }
312                SccState::Complete(eventual_cfg_exits) => (None, eventual_cfg_exits),
313            };
314        }
315        let scc_stack_idx = SccStackIdx(self.scc_stack.len().try_into().unwrap());
316        self.scc_stack.push(node);
317        *state_entry = Some(SccState::Pending(scc_stack_idx));
318
319        let control_inst = self
320            .cfg
321            .control_inst_on_exit_from
322            .get(node)
323            .expect("cfg: missing `ControlInst`, despite having left structured control-flow");
324
325        let mut eventual_cfg_exits = EventualCfgExits::default();
326
327        if let ControlInstKind::Return = control_inst.kind {
328            eventual_cfg_exits.may_return_from_func = true;
329        }
330
331        let earliest_scc_root = control_inst
332            .targets
333            .iter()
334            .flat_map(|&target| {
335                let (earliest_scc_root_of_target, eventual_cfg_exits_of_target) =
336                    self.find_earliest_scc_root_of(target);
337                eventual_cfg_exits |= eventual_cfg_exits_of_target;
338
339                // HACK(eddyb) if one of the edges is already known to be a loop exit
340                // (from `OpLoopMerge` specifically), treat it almost like a backedge,
341                // but with the additional requirement that the loop header is already
342                // on the stack (i.e. this `node` is reachable from that loop header).
343                let root_candidate_from_loop_merge =
344                    self.cfg.loop_merge_to_loop_header.get(&target).and_then(|&loop_header| {
345                        match self.scc_state.get(loop_header) {
346                            Some(&SccState::Pending(scc_stack_idx)) => Some(scc_stack_idx),
347                            _ => None,
348                        }
349                    });
350
351                earliest_scc_root_of_target.into_iter().chain(root_candidate_from_loop_merge)
352            })
353            .min();
354
355        // If this node has been chosen as the root of an SCC, complete that SCC.
356        if earliest_scc_root == Some(scc_stack_idx) {
357            let scc_start = scc_stack_idx.0 as usize;
358
359            // It's now possible to find all the loop exits: they're all the
360            // edges from nodes of this SCC (loop) to nodes not in the SCC.
361            let target_is_exit = |target| {
362                match self.scc_state[target] {
363                    SccState::Pending(i) => {
364                        assert!(i >= scc_stack_idx);
365                        false
366                    }
367                    SccState::Complete(eventual_cfg_exits_of_target) => {
368                        let EventualCfgExits { may_return_from_func: loop_may_reconverge } =
369                            eventual_cfg_exits;
370                        let EventualCfgExits { may_return_from_func: target_may_reconverge } =
371                            eventual_cfg_exits_of_target;
372
373                        // HACK(eddyb) see comment on `EventualCfgExits` for why
374                        // edges leaving the SCC aren't treated as loop exits
375                        // when they're "more divergent" than the loop itself,
376                        // i.e. if any edges leaving the SCC can reconverge,
377                        // (and therefore the loop as a whole can reconverge)
378                        // only those edges are kept as loop exits.
379                        target_may_reconverge == loop_may_reconverge
380                    }
381                }
382            };
383            self.loop_header_to_exit_targets.insert(
384                node,
385                self.scc_stack[scc_start..]
386                    .iter()
387                    .flat_map(|&scc_node| {
388                        self.cfg.control_inst_on_exit_from[scc_node].targets.iter().copied()
389                    })
390                    .filter(|&target| target_is_exit(target))
391                    .collect(),
392            );
393
394            // Find nested loops by marking *only* the loop header as complete,
395            // clearing loop body nodes' state, and recursing on them: all the
396            // nodes outside the loop (otherwise reachable from within), and the
397            // loop header itself, are already marked as complete, meaning that
398            // all exits and backedges will be ignored, and the recursion will
399            // only find more SCCs within the loop body (i.e. nested loops).
400            self.scc_state[node] = SccState::Complete(eventual_cfg_exits);
401            let loop_body_range = scc_start + 1..self.scc_stack.len();
402            for &scc_node in &self.scc_stack[loop_body_range.clone()] {
403                self.scc_state.remove(scc_node);
404            }
405            for i in loop_body_range.clone() {
406                self.find_earliest_scc_root_of(self.scc_stack[i]);
407            }
408            assert_eq!(self.scc_stack.len(), loop_body_range.end);
409
410            // Remove the entire SCC from the accumulation stack all at once.
411            self.scc_stack.truncate(scc_start);
412
413            return (None, eventual_cfg_exits);
414        }
415
416        // Not actually in an SCC at all, just some node outside any CFG cycles.
417        if earliest_scc_root.is_none() {
418            assert!(self.scc_stack.pop() == Some(node));
419            self.scc_state[node] = SccState::Complete(eventual_cfg_exits);
420        }
421
422        (earliest_scc_root, eventual_cfg_exits)
423    }
424}
425
426#[allow(rustdoc::private_intra_doc_links)]
427/// Control-flow "structurizer", which attempts to convert as much of the CFG
428/// as possible into structural control-flow (regions).
429///
430/// See [`StructurizeRegionState`]'s docs for more details on the algorithm.
431//
432// FIXME(eddyb) document this (instead of having it on `StructurizeRegionState`).
433//
434// NOTE(eddyb) CFG structurizer has these stages (per-region):
435//   1. absorb any deferred exits that finally have 100% refcount
436//   2. absorb a single backedge deferred exit to the same region
437//
438//   What we could add is a third step, to handle irreducible controlflow:
439//   3. check for groups of exits that have fully satisfied refcounts iff the
440//     rest of the exits in the group are all added together - if so, the group
441//     is *irreducible* and a single "loop header" can be created, that gets
442//     the group of deferred exits, and any other occurrence of the deferred
443//     exits (in either the original region, or amongst themselves) can be
444//     replaced with the "loop header" with appropriate selector inputs
445//
446//   Sadly 3. requires a bunch of tests that are hard to craft (can rustc MIR
447//   even end up in the right shape?).
448//   OpenCL has `goto` so maybe it can also be used for this worse-than-diamond
449//   example: `entry -> a,b,d` `a,b -> c` `a,b,c -> d` `a,b,c,d <-> a,b,c,d`
450//   (the goal is avoiding a "flat group", i.e. where there is only one step
451//   between every exit in the group and another exit)
452pub struct Structurizer<'a> {
453    cx: &'a Context,
454
455    /// Scrutinee type for [`SelectionKind::BoolCond`].
456    type_bool: Type,
457
458    /// Scrutinee value for [`SelectionKind::BoolCond`], for the "then" case.
459    const_true: Const,
460
461    /// Scrutinee value for [`SelectionKind::BoolCond`], for the "else" case.
462    const_false: Const,
463
464    func_def_body: &'a mut FuncDefBody,
465
466    // FIXME(eddyb) this feels a bit inefficient (are many-exit loops rare?).
467    loop_header_to_exit_targets: FxIndexMap<ControlRegion, FxIndexSet<ControlRegion>>,
468
469    // HACK(eddyb) this also tracks all of `loop_header_to_exit_targets`, as
470    // "false edges" from every loop header to each exit target of that loop,
471    // which structurizing that loop consumes to "unlock" its own exits.
472    incoming_edge_counts_including_loop_exits:
473        EntityOrientedDenseMap<ControlRegion, IncomingEdgeCount>,
474
475    /// `structurize_region_state[region]` tracks `.structurize_region(region)`
476    /// progress/results (see also [`StructurizeRegionState`]'s docs).
477    //
478    // FIXME(eddyb) use `EntityOrientedDenseMap` (which lacks iteration by design).
479    structurize_region_state: FxIndexMap<ControlRegion, StructurizeRegionState>,
480
481    /// Accumulated rewrites (caused by e.g. `target_inputs`s, but not only),
482    /// i.e.: `Value::ControlRegionInput { region, input_idx }` must be
483    /// rewritten based on `control_region_input_rewrites[region]`, as either
484    /// the original `region` wasn't reused, or its inputs were renumbered.
485    control_region_input_rewrites:
486        EntityOrientedDenseMap<ControlRegion, ControlRegionInputRewrites>,
487}
488
489/// How all `Value::ControlRegionInput { region, input_idx }` for a `region`
490/// must be rewritten (see also `control_region_input_rewrites` docs).
491enum ControlRegionInputRewrites {
492    /// Complete replacement with another value (which can take any form), as
493    /// `region` wasn't kept in its original form in the final structured IR.
494    ///
495    /// **Note**: such replacement can be chained, i.e. a replacement value can
496    /// be `Value::ControlRegionInput { region: other_region, .. }`, and then
497    /// `other_region` itself may have its inputs written.
498    ReplaceWith(SmallVec<[Value; 2]>),
499
500    /// The value may remain an input of the same `region`, only changing its
501    /// `input_idx` (e.g. if indices need compaction after removing some inputs),
502    /// or get replaced anyway, depending on the `Result` for `input_idx`.
503    ///
504    /// **Note**: renumbering can only be the last rewrite step of a value,
505    /// as `region` must've been chosen to be kept in the final structured IR,
506    /// but the `Err` cases are transitive just like `ReplaceWith`.
507    //
508    // FIXME(eddyb) this is a bit silly, maybe try to rely more on hermeticity
509    // to get rid of this?
510    RenumberOrReplaceWith(SmallVec<[Result<u32, Value>; 2]>),
511}
512
513impl ControlRegionInputRewrites {
514    // HACK(eddyb) this is here because it depends on a field of `Structurizer`
515    // and borrowing issues ensue if it's made a method of `Structurizer`.
516    fn rewrite_all(
517        rewrites: &EntityOrientedDenseMap<ControlRegion, Self>,
518    ) -> impl crate::transform::Transformer + '_ {
519        // FIXME(eddyb) maybe this should be provided by `transform`.
520        use crate::transform::*;
521        struct ReplaceValueWith<F>(F);
522        impl<F: Fn(Value) -> Option<Value>> Transformer for ReplaceValueWith<F> {
523            fn transform_value_use(&mut self, v: &Value) -> Transformed<Value> {
524                self.0(*v).map_or(Transformed::Unchanged, Transformed::Changed)
525            }
526        }
527
528        ReplaceValueWith(move |v| {
529            let mut new_v = v;
530            while let Value::ControlRegionInput { region, input_idx } = new_v {
531                match rewrites.get(region) {
532                    // NOTE(eddyb) this needs to be able to apply multiple replacements,
533                    // due to the input potentially having redundantly chained `OpPhi`s.
534                    //
535                    // FIXME(eddyb) union-find-style "path compression" could record the
536                    // final value inside `rewrites` while replacements are being made,
537                    // to avoid going through a chain more than once (and some of these
538                    // replacements could also be applied early).
539                    Some(ControlRegionInputRewrites::ReplaceWith(replacements)) => {
540                        new_v = replacements[input_idx as usize];
541                    }
542                    Some(ControlRegionInputRewrites::RenumberOrReplaceWith(
543                        renumbering_and_replacements,
544                    )) => match renumbering_and_replacements[input_idx as usize] {
545                        Ok(new_idx) => {
546                            new_v = Value::ControlRegionInput { region, input_idx: new_idx };
547                            break;
548                        }
549                        Err(replacement) => new_v = replacement,
550                    },
551                    None => break,
552                }
553            }
554            (v != new_v).then_some(new_v)
555        })
556    }
557}
558
559/// The state of one `.structurize_region(region)` invocation, and its result.
560///
561/// There is a fourth (or 0th) implicit state, which is where nothing has yet
562/// observed some region, and [`Structurizer`] isn't tracking it at all.
563//
564// FIXME(eddyb) make the 0th state explicit and move `incoming_edge_counts` to it.
565enum StructurizeRegionState {
566    /// Structurization is still running, and observing this is a cycle.
567    InProgress,
568
569    /// Structurization completed, and this region can now be claimed.
570    Ready {
571        /// Cached `region_deferred_edges[region].edge_bundle.accumulated_count`,
572        /// i.e. the total count of backedges (if any exist) pointing to `region`
573        /// from the CFG subgraph that `region` itself dominates.
574        ///
575        /// Claiming a region with backedges can combine them with the bundled
576        /// edges coming into the CFG cycle from outside, and instead of failing
577        /// due to the latter not being enough to claim the region on their own,
578        /// actually perform loop structurization.
579        accumulated_backedge_count: IncomingEdgeCount,
580
581        // HACK(eddyb) the only part of a `ClaimedRegion` that is computed by
582        // `structurize_region` (the rest comes from `try_claim_edge_bundle`).
583        region_deferred_edges: DeferredEdgeBundleSet,
584    },
585
586    /// Region was claimed (by an [`IncomingEdgeBundle`], with the appropriate
587    /// total [`IncomingEdgeCount`], minus `accumulated_backedge_count`), and
588    /// must eventually be incorporated as part of some larger region.
589    Claimed,
590}
591
592/// An "(incoming) edge bundle" is a subset of the edges into a single `target`.
593///
594/// When `accumulated_count` reaches the total [`IncomingEdgeCount`] for `target`,
595/// that [`IncomingEdgeBundle`] is said to "effectively own" its `target` (akin to
596/// the more commonly used CFG domination relation, but more "incremental").
597///
598/// **Note**: `target` has a generic type `T` to reduce redundancy when it's
599/// already implied (e.g. by the key in [`DeferredEdgeBundleSet`]'s map).
600struct IncomingEdgeBundle<T> {
601    target: T,
602    accumulated_count: IncomingEdgeCount,
603
604    /// The [`Value`]s that `Value::ControlRegionInput { region, .. }` will get
605    /// on entry into `region`, through this "edge bundle".
606    target_inputs: SmallVec<[Value; 2]>,
607}
608
609impl<T> IncomingEdgeBundle<T> {
610    fn with_target<U>(self, target: U) -> IncomingEdgeBundle<U> {
611        let IncomingEdgeBundle { target: _, accumulated_count, target_inputs } = self;
612        IncomingEdgeBundle { target, accumulated_count, target_inputs }
613    }
614}
615
616/// A "deferred (incoming) edge bundle" is an [`IncomingEdgeBundle`] that cannot
617/// be structurized immediately, but instead waits for its `accumulated_count`
618/// to reach the full count of its `target`, before it can grafted into some
619/// structured control-flow region.
620///
621/// While in the "deferred" state, its can accumulate a non-trivial `condition`,
622/// every time it's propagated to an "outer" region, e.g. for this pseudocode:
623/// ```text
624/// if a {
625///     branch => label1
626/// } else {
627///     if b {
628///         branch => label1
629///     }
630/// }
631/// ```
632/// the deferral of branches to `label1` will result in:
633/// ```text
634/// label1_condition = if a {
635///     true
636/// } else {
637///     if b {
638///         true
639///     } else {
640///         false
641///     }
642/// }
643/// if label1_condition {
644///     branch => label1
645/// }
646/// ```
647/// which could theoretically be simplified (after the [`Structurizer`]) to:
648/// ```text
649/// label1_condition = a | b
650/// if label1_condition {
651///     branch => label1
652/// }
653/// ```
654///
655/// **Note**: `edge_bundle.target` has a generic type `T` to reduce redundancy
656/// when it's already implied (e.g. by the key in [`DeferredEdgeBundleSet`]'s map).
657struct DeferredEdgeBundle<T = DeferredTarget> {
658    condition: LazyCond,
659    edge_bundle: IncomingEdgeBundle<T>,
660}
661
662impl<T> DeferredEdgeBundle<T> {
663    fn with_target<U>(self, target: U) -> DeferredEdgeBundle<U> {
664        let DeferredEdgeBundle { condition, edge_bundle } = self;
665        DeferredEdgeBundle { condition, edge_bundle: edge_bundle.with_target(target) }
666    }
667}
668
669/// A recipe for computing a control-flow-sensitive (boolean) condition [`Value`],
670/// potentially requiring merging through an arbitrary number of `Select`s
671/// (via per-case outputs and [`Value::ControlNodeOutput`], for each `Select`).
672///
673/// This should largely be equivalent to eagerly generating all region outputs
674/// that might be needed, and then removing the unused ones, but this way we
675/// never generate unused outputs, and can potentially even optimize away some
676/// redundant dataflow (e.g. `if cond { true } else { false }` is just `cond`).
677#[derive(Clone)]
678enum LazyCond {
679    // HACK(eddyb) `Undef` is used when the condition comes from e.g. a `Select`
680    // case that diverges and/or represents `unreachable`.
681    Undef,
682
683    False,
684    True,
685
686    Merge(Rc<LazyCondMerge>),
687}
688
689enum LazyCondMerge {
690    Select {
691        control_node: ControlNode,
692        // FIXME(eddyb) the lowest level of `LazyCond` ends up containing only
693        // `LazyCond::{Undef,False,True}`, and that could more efficiently be
694        // expressed using e.g. bitsets, but the `Rc` in `LazyCond::Merge`
695        // means that this is more compact than it would otherwise be.
696        per_case_conds: SmallVec<[LazyCond; 4]>,
697    },
698}
699
700/// A target for one of the edge bundles in a [`DeferredEdgeBundleSet`], mostly
701/// separate from [`ControlRegion`] to allow expressing returns as well.
702#[derive(Copy, Clone, PartialEq, Eq, Hash)]
703enum DeferredTarget {
704    Region(ControlRegion),
705
706    /// Structured "return" out of the function (with `target_inputs` used for
707    /// the function body `output`s, i.e. inputs of [`ControlInstKind::Return`]).
708    Return,
709}
710
711/// Set of [`DeferredEdgeBundle`]s, uniquely keyed by their `target`s.
712///
713/// Semantically equivalent to an unordered series of conditional branches
714/// to each possible `target`, which corresponds to an unenforced invariant
715/// that exactly one [`DeferredEdgeBundle`] condition must be `true` at any
716/// given time (the only non-trivial case, [`DeferredEdgeBundleSet::Choice`],
717/// satisfies it because it's only used for merging `Select` cases, and so
718/// all the conditions will end up using disjoint [`LazyCond::Merge`]s).
719enum DeferredEdgeBundleSet {
720    Unreachable,
721
722    // NOTE(eddyb) this erases the condition (by not using `DeferredEdgeBundle`).
723    Always {
724        // HACK(eddyb) fields are split here to allow e.g. iteration.
725        target: DeferredTarget,
726        edge_bundle: IncomingEdgeBundle<()>,
727    },
728
729    Choice {
730        target_to_deferred: FxIndexMap<DeferredTarget, DeferredEdgeBundle<()>>,
731    },
732}
733
734impl FromIterator<DeferredEdgeBundle> for DeferredEdgeBundleSet {
735    fn from_iter<T: IntoIterator<Item = DeferredEdgeBundle>>(iter: T) -> Self {
736        let mut iter = iter.into_iter();
737        match iter.next() {
738            None => Self::Unreachable,
739            Some(first) => match iter.next() {
740                // NOTE(eddyb) this erases the condition (by not using `DeferredEdgeBundle`).
741                None => Self::Always {
742                    target: first.edge_bundle.target,
743                    edge_bundle: first.edge_bundle.with_target(()),
744                },
745                Some(second) => Self::Choice {
746                    target_to_deferred: ([first, second].into_iter().chain(iter))
747                        .map(|d| (d.edge_bundle.target, d.with_target(())))
748                        .collect(),
749                },
750            },
751        }
752    }
753}
754
755impl From<FxIndexMap<DeferredTarget, DeferredEdgeBundle<()>>> for DeferredEdgeBundleSet {
756    fn from(target_to_deferred: FxIndexMap<DeferredTarget, DeferredEdgeBundle<()>>) -> Self {
757        if target_to_deferred.len() <= 1 {
758            target_to_deferred
759                .into_iter()
760                .map(|(target, deferred)| deferred.with_target(target))
761                .collect()
762        } else {
763            Self::Choice { target_to_deferred }
764        }
765    }
766}
767
768// HACK(eddyb) this API is a mess, is there an uncompromising way to clean it up?
769impl DeferredEdgeBundleSet {
770    fn get_edge_bundle_by_target(
771        &self,
772        search_target: DeferredTarget,
773    ) -> Option<&IncomingEdgeBundle<()>> {
774        match self {
775            DeferredEdgeBundleSet::Unreachable => None,
776            DeferredEdgeBundleSet::Always { target, edge_bundle } => {
777                (*target == search_target).then_some(edge_bundle)
778            }
779            DeferredEdgeBundleSet::Choice { target_to_deferred } => {
780                Some(&target_to_deferred.get(&search_target)?.edge_bundle)
781            }
782        }
783    }
784
785    fn get_edge_bundle_mut_by_target(
786        &mut self,
787        search_target: DeferredTarget,
788    ) -> Option<&mut IncomingEdgeBundle<()>> {
789        match self {
790            DeferredEdgeBundleSet::Unreachable => None,
791            DeferredEdgeBundleSet::Always { target, edge_bundle } => {
792                (*target == search_target).then_some(edge_bundle)
793            }
794            DeferredEdgeBundleSet::Choice { target_to_deferred } => {
795                Some(&mut target_to_deferred.get_mut(&search_target)?.edge_bundle)
796            }
797        }
798    }
799
800    fn iter_targets_with_edge_bundle(
801        &self,
802    ) -> impl Iterator<Item = (DeferredTarget, &IncomingEdgeBundle<()>)> {
803        match self {
804            DeferredEdgeBundleSet::Unreachable => Either::Left(None.into_iter()),
805            DeferredEdgeBundleSet::Always { target, edge_bundle } => {
806                Either::Left(Some((*target, edge_bundle)).into_iter())
807            }
808            DeferredEdgeBundleSet::Choice { target_to_deferred } => Either::Right(
809                target_to_deferred
810                    .iter()
811                    .map(|(&target, deferred)| (target, &deferred.edge_bundle)),
812            ),
813        }
814    }
815
816    fn iter_targets_with_edge_bundle_mut(
817        &mut self,
818    ) -> impl Iterator<Item = (DeferredTarget, &mut IncomingEdgeBundle<()>)> {
819        match self {
820            DeferredEdgeBundleSet::Unreachable => Either::Left(None.into_iter()),
821            DeferredEdgeBundleSet::Always { target, edge_bundle } => {
822                Either::Left(Some((*target, edge_bundle)).into_iter())
823            }
824            DeferredEdgeBundleSet::Choice { target_to_deferred } => Either::Right(
825                target_to_deferred
826                    .iter_mut()
827                    .map(|(&target, deferred)| (target, &mut deferred.edge_bundle)),
828            ),
829        }
830    }
831
832    // HACK(eddyb) this only exists because of `DeferredEdgeBundleSet`'s lossy
833    // representation wrt conditions, so removal from a `DeferredEdgeBundleSet`
834    // cannot be used for e.g. `Select` iterating over per-case deferreds.
835    fn steal_deferred_by_target_without_removal(
836        &mut self,
837        search_target: DeferredTarget,
838    ) -> Option<DeferredEdgeBundle<()>> {
839        let steal_edge_bundle = |edge_bundle: &mut IncomingEdgeBundle<()>| IncomingEdgeBundle {
840            target: (),
841            accumulated_count: edge_bundle.accumulated_count,
842            target_inputs: mem::take(&mut edge_bundle.target_inputs),
843        };
844        match self {
845            DeferredEdgeBundleSet::Unreachable => None,
846            DeferredEdgeBundleSet::Always { target, edge_bundle } => (*target == search_target)
847                .then(|| DeferredEdgeBundle {
848                    condition: LazyCond::True,
849                    edge_bundle: steal_edge_bundle(edge_bundle),
850                }),
851            DeferredEdgeBundleSet::Choice { target_to_deferred } => {
852                let DeferredEdgeBundle { condition, edge_bundle } =
853                    target_to_deferred.get_mut(&search_target)?;
854                Some(DeferredEdgeBundle {
855                    condition: mem::replace(condition, LazyCond::False),
856                    edge_bundle: steal_edge_bundle(edge_bundle),
857                })
858            }
859        }
860    }
861
862    // NOTE(eddyb) the returned `DeferredEdgeBundleSet` exists under the assumption
863    // that `split_target` is not reachable from it, so this method is not suitable
864    // for e.g. uniformly draining `DeferredEdgeBundleSet` in a way that preserves
865    // conditions (but rather it's almost a kind of control-flow "slicing").
866    fn split_out_target(self, split_target: DeferredTarget) -> (Option<DeferredEdgeBundle>, Self) {
867        match self {
868            DeferredEdgeBundleSet::Unreachable => (None, DeferredEdgeBundleSet::Unreachable),
869            DeferredEdgeBundleSet::Always { target, edge_bundle } => {
870                if target == split_target {
871                    (
872                        Some(DeferredEdgeBundle {
873                            condition: LazyCond::True,
874                            edge_bundle: edge_bundle.with_target(target),
875                        }),
876                        DeferredEdgeBundleSet::Unreachable,
877                    )
878                } else {
879                    (None, DeferredEdgeBundleSet::Always { target, edge_bundle })
880                }
881            }
882            DeferredEdgeBundleSet::Choice { mut target_to_deferred } => {
883                // FIXME(eddyb) should this use `shift_remove` and/or emulate
884                // extra tombstones, to avoid impacting the order?
885                (
886                    target_to_deferred
887                        .swap_remove(&split_target)
888                        .map(|d| d.with_target(split_target)),
889                    Self::from(target_to_deferred),
890                )
891            }
892        }
893    }
894
895    // HACK(eddyb) the strange signature is overfitted to its own callsite.
896    fn split_out_matching<T>(
897        self,
898        mut matches: impl FnMut(DeferredEdgeBundle) -> Result<T, DeferredEdgeBundle>,
899    ) -> (Option<T>, Self) {
900        match self {
901            DeferredEdgeBundleSet::Unreachable => (None, DeferredEdgeBundleSet::Unreachable),
902            DeferredEdgeBundleSet::Always { target, edge_bundle } => {
903                match matches(DeferredEdgeBundle {
904                    condition: LazyCond::True,
905                    edge_bundle: edge_bundle.with_target(target),
906                }) {
907                    Ok(x) => (Some(x), DeferredEdgeBundleSet::Unreachable),
908                    Err(new_deferred) => {
909                        assert!(new_deferred.edge_bundle.target == target);
910                        assert!(matches!(new_deferred.condition, LazyCond::True));
911                        (None, DeferredEdgeBundleSet::Always {
912                            target,
913                            edge_bundle: new_deferred.edge_bundle.with_target(()),
914                        })
915                    }
916                }
917            }
918            DeferredEdgeBundleSet::Choice { mut target_to_deferred } => {
919                let mut result = None;
920                for (i, (&target, deferred)) in target_to_deferred.iter_mut().enumerate() {
921                    // HACK(eddyb) "take" `deferred` so it can be passed to
922                    // `matches` (and put back if that returned `Err`).
923                    let taken_deferred = mem::replace(deferred, DeferredEdgeBundle {
924                        condition: LazyCond::False,
925                        edge_bundle: IncomingEdgeBundle {
926                            target: Default::default(),
927                            accumulated_count: Default::default(),
928                            target_inputs: Default::default(),
929                        },
930                    });
931
932                    match matches(taken_deferred.with_target(target)) {
933                        Ok(x) => {
934                            result = Some(x);
935                            // FIXME(eddyb) should this use `swap_remove_index`?
936                            target_to_deferred.shift_remove_index(i).unwrap();
937                            break;
938                        }
939
940                        // Put back the `DeferredEdgeBundle` and keep looking.
941                        Err(new_deferred) => {
942                            assert!(new_deferred.edge_bundle.target == target);
943                            *deferred = new_deferred.with_target(());
944                        }
945                    }
946                }
947                (result, Self::from(target_to_deferred))
948            }
949        }
950    }
951}
952
953/// A successfully "claimed" (via `try_claim_edge_bundle`) partially structurized
954/// CFG subgraph (i.e. set of [`ControlRegion`]s previously connected by CFG edges),
955/// which is effectively owned by the "claimer" and **must** be used for:
956/// - the whole function body (if `deferred_edges` only contains `Return`)
957/// - one of the cases of a `Select` node
958/// - merging into a larger region (i.e. its nearest dominator)
959//
960// FIXME(eddyb) consider never having to claim the function body itself,
961// by wrapping the CFG in a `ControlNode` instead.
962struct ClaimedRegion {
963    // FIXME(eddyb) find a way to clarify that this can differ from the target
964    // of `try_claim_edge_bundle`, and also that `deferred_edges` are from the
965    // perspective of being "inside" `structured_body` (wrt hermeticity).
966    structured_body: ControlRegion,
967
968    /// The [`Value`]s that `Value::ControlRegionInput { region: structured_body, .. }`
969    /// will get on entry into `structured_body`, when this region ends up
970    /// merged into a larger region, or as a child of a new [`ControlNode`].
971    //
972    // FIXME(eddyb) don't replace `Value::ControlRegionInput { region: structured_body, .. }`
973    // with `region_inputs` when `structured_body` ends up a `ControlNode` child,
974    // but instead make all `ControlRegion`s entirely hermetic wrt inputs.
975    structured_body_inputs: SmallVec<[Value; 2]>,
976
977    /// The transitive targets which couldn't be claimed into `structured_body`
978    /// remain as deferred exits, and will block further structurization until
979    /// all other edges to those same targets are gathered together.
980    ///
981    /// **Note**: this will only be empty if the region can never exit,
982    /// i.e. it has divergent control-flow (such as an infinite loop), as any
983    /// control-flow path that can (eventually) return from the function, will
984    /// end up using a deferred target for that (see [`DeferredTarget::Return`]).
985    deferred_edges: DeferredEdgeBundleSet,
986}
987
988impl<'a> Structurizer<'a> {
989    pub fn new(cx: &'a Context, func_def_body: &'a mut FuncDefBody) -> Self {
990        // FIXME(eddyb) SPIR-T should have native booleans itself.
991        let wk = &spv::spec::Spec::get().well_known;
992        let type_bool = cx.intern(TypeKind::SpvInst {
993            spv_inst: wk.OpTypeBool.into(),
994            type_and_const_inputs: [].into_iter().collect(),
995        });
996        let const_true = cx.intern(ConstDef {
997            attrs: AttrSet::default(),
998            ty: type_bool,
999            kind: ConstKind::SpvInst {
1000                spv_inst_and_const_inputs: Rc::new((
1001                    wk.OpConstantTrue.into(),
1002                    [].into_iter().collect(),
1003                )),
1004            },
1005        });
1006        let const_false = cx.intern(ConstDef {
1007            attrs: AttrSet::default(),
1008            ty: type_bool,
1009            kind: ConstKind::SpvInst {
1010                spv_inst_and_const_inputs: Rc::new((
1011                    wk.OpConstantFalse.into(),
1012                    [].into_iter().collect(),
1013                )),
1014            },
1015        });
1016
1017        let (loop_header_to_exit_targets, incoming_edge_counts_including_loop_exits) =
1018            func_def_body
1019                .unstructured_cfg
1020                .as_ref()
1021                .map(|cfg| {
1022                    let loop_header_to_exit_targets = {
1023                        let mut loop_finder = LoopFinder::new(cfg);
1024                        loop_finder.find_earliest_scc_root_of(func_def_body.body);
1025                        loop_finder.loop_header_to_exit_targets
1026                    };
1027
1028                    let mut state = TraversalState {
1029                        incoming_edge_counts: EntityOrientedDenseMap::new(),
1030
1031                        pre_order_visit: |_| {},
1032                        post_order_visit: |_| {},
1033                        reverse_targets: false,
1034                    };
1035                    cfg.traverse_whole_func(func_def_body, &mut state);
1036
1037                    // HACK(eddyb) treat loop exits as "false edges", that their
1038                    // respective loop header "owns", such that structurization
1039                    // naturally stops at those loop exits, instead of continuing
1040                    // greedily into the loop exterior (producing "maximal loops").
1041                    for loop_exit_targets in loop_header_to_exit_targets.values() {
1042                        for &exit_target in loop_exit_targets {
1043                            *state
1044                                .incoming_edge_counts
1045                                .entry(exit_target)
1046                                .get_or_insert(Default::default()) += IncomingEdgeCount::ONE;
1047                        }
1048                    }
1049
1050                    (loop_header_to_exit_targets, state.incoming_edge_counts)
1051                })
1052                .unwrap_or_default();
1053
1054        Self {
1055            cx,
1056            type_bool,
1057            const_true,
1058            const_false,
1059
1060            func_def_body,
1061
1062            loop_header_to_exit_targets,
1063            incoming_edge_counts_including_loop_exits,
1064
1065            structurize_region_state: FxIndexMap::default(),
1066            control_region_input_rewrites: EntityOrientedDenseMap::new(),
1067        }
1068    }
1069
1070    pub fn structurize_func(mut self) {
1071        // Don't even try to re-structurize functions.
1072        if self.func_def_body.unstructured_cfg.is_none() {
1073            return;
1074        }
1075
1076        // FIXME(eddyb) it might work much better to have the unstructured CFG
1077        // wrapped in a `ControlNode` inside the function body, instead.
1078        let func_body_deferred_edges = {
1079            let func_entry_pseudo_edge = {
1080                let target = self.func_def_body.body;
1081                move || IncomingEdgeBundle {
1082                    target,
1083                    accumulated_count: IncomingEdgeCount::ONE,
1084                    target_inputs: [].into_iter().collect(),
1085                }
1086            };
1087
1088            // HACK(eddyb) it's easier to assume the function never loops back
1089            // to its body, than fix up the broken CFG if that never happens.
1090            if self.incoming_edge_counts_including_loop_exits[func_entry_pseudo_edge().target]
1091                != func_entry_pseudo_edge().accumulated_count
1092            {
1093                // FIXME(eddyb) find a way to attach (diagnostic) attributes
1094                // to a `FuncDefBody`, would be useful to have that here.
1095                return;
1096            }
1097
1098            let ClaimedRegion { structured_body, structured_body_inputs, deferred_edges } =
1099                self.try_claim_edge_bundle(func_entry_pseudo_edge()).ok().unwrap();
1100            assert!(structured_body == func_entry_pseudo_edge().target);
1101            assert!(structured_body_inputs == func_entry_pseudo_edge().target_inputs);
1102            deferred_edges
1103        };
1104
1105        match func_body_deferred_edges {
1106            // FIXME(eddyb) also support structured return when the whole body
1107            // is divergent, by generating undef constants (needs access to the
1108            // whole `FuncDecl`, not just `FuncDefBody`, to get the right types).
1109            DeferredEdgeBundleSet::Unreachable => {
1110                // HACK(eddyb) replace the CFG with one that only contains an
1111                // `Unreachable` terminator for the body, comparable to what
1112                // `rebuild_cfg_from_unclaimed_region_deferred_edges` would do
1113                // in the general case (but special-cased because this is very
1114                // close to being structurizable, just needs a bit of plumbing).
1115                let mut control_inst_on_exit_from = EntityOrientedDenseMap::new();
1116                control_inst_on_exit_from.insert(self.func_def_body.body, ControlInst {
1117                    attrs: AttrSet::default(),
1118                    kind: ControlInstKind::Unreachable,
1119                    inputs: [].into_iter().collect(),
1120                    targets: [].into_iter().collect(),
1121                    target_inputs: FxIndexMap::default(),
1122                });
1123                self.func_def_body.unstructured_cfg = Some(ControlFlowGraph {
1124                    control_inst_on_exit_from,
1125                    loop_merge_to_loop_header: Default::default(),
1126                });
1127            }
1128
1129            // Structured return, the function is fully structurized.
1130            DeferredEdgeBundleSet::Always { target: DeferredTarget::Return, edge_bundle } => {
1131                let body_def = self.func_def_body.at_mut_body().def();
1132                body_def.outputs = edge_bundle.target_inputs;
1133                self.func_def_body.unstructured_cfg = None;
1134            }
1135
1136            _ => {
1137                // Repair all the regions that remain unclaimed, including the body.
1138                let structurize_region_state = mem::take(&mut self.structurize_region_state)
1139                    .into_iter()
1140                    .chain([(self.func_def_body.body, StructurizeRegionState::Ready {
1141                        accumulated_backedge_count: IncomingEdgeCount::default(),
1142
1143                        region_deferred_edges: func_body_deferred_edges,
1144                    })]);
1145                for (target, state) in structurize_region_state {
1146                    if let StructurizeRegionState::Ready { region_deferred_edges, .. } = state {
1147                        self.rebuild_cfg_from_unclaimed_region_deferred_edges(
1148                            target,
1149                            region_deferred_edges,
1150                        );
1151                    }
1152                }
1153            }
1154        }
1155
1156        // The last step of structurization is applying rewrites accumulated
1157        // while structurizing (i.e. `control_region_input_rewrites`).
1158        //
1159        // FIXME(eddyb) obsolete this by fully taking advantage of hermeticity,
1160        // and only replacing `Value::ControlRegionInput { region, .. }` within
1161        // `region`'s children, shallowly, whenever `region` gets claimed.
1162        self.func_def_body.inner_in_place_transform_with(
1163            &mut ControlRegionInputRewrites::rewrite_all(&self.control_region_input_rewrites),
1164        );
1165    }
1166
1167    fn try_claim_edge_bundle(
1168        &mut self,
1169        edge_bundle: IncomingEdgeBundle<ControlRegion>,
1170    ) -> Result<ClaimedRegion, IncomingEdgeBundle<ControlRegion>> {
1171        let target = edge_bundle.target;
1172
1173        // Always attempt structurization before checking the `IncomingEdgeCount`,
1174        // to be able to make use of backedges (if any were found).
1175        if self.structurize_region_state.get(&target).is_none() {
1176            self.structurize_region(target);
1177        }
1178
1179        let backedge_count = match self.structurize_region_state[&target] {
1180            // This `try_claim_edge_bundle` call is itself a backedge, and it's
1181            // coherent to not let any of them claim the loop itself, and only
1182            // allow claiming the whole loop (if successfully structurized).
1183            StructurizeRegionState::InProgress => IncomingEdgeCount::default(),
1184
1185            StructurizeRegionState::Ready { accumulated_backedge_count, .. } => {
1186                accumulated_backedge_count
1187            }
1188
1189            StructurizeRegionState::Claimed => {
1190                unreachable!("cfg::Structurizer::try_claim_edge_bundle: already claimed");
1191            }
1192        };
1193
1194        if self.incoming_edge_counts_including_loop_exits[target]
1195            != edge_bundle.accumulated_count + backedge_count
1196        {
1197            return Err(edge_bundle);
1198        }
1199
1200        let state =
1201            self.structurize_region_state.insert(target, StructurizeRegionState::Claimed).unwrap();
1202
1203        let mut deferred_edges = match state {
1204            StructurizeRegionState::InProgress => unreachable!(
1205                "cfg::Structurizer::try_claim_edge_bundle: cyclic calls \
1206                 should not get this far"
1207            ),
1208
1209            StructurizeRegionState::Ready { region_deferred_edges, .. } => region_deferred_edges,
1210
1211            StructurizeRegionState::Claimed => {
1212                // Handled above.
1213                unreachable!()
1214            }
1215        };
1216
1217        let mut backedge = None;
1218        if backedge_count != IncomingEdgeCount::default() {
1219            (backedge, deferred_edges) =
1220                deferred_edges.split_out_target(DeferredTarget::Region(target));
1221        }
1222
1223        // If the target contains any backedge to itself, that's a loop, with:
1224        // * entry: `edge_bundle` (unconditional, i.e. `do`-`while`-like)
1225        // * body: `target`
1226        // * repeat ("continue") edge: `backedge` (with its `condition`)
1227        // * exit ("break") edges: `deferred_edges`
1228        let structured_body = if let Some(backedge) = backedge {
1229            let DeferredEdgeBundle { condition: repeat_condition, edge_bundle: backedge } =
1230                backedge;
1231            let body = target;
1232
1233            // HACK(eddyb) due to `Loop` `ControlNode`s not being hermetic on
1234            // the output side yet (i.e. they still have SSA-like semantics),
1235            // it gets wrapped in a `ControlRegion`, which can be as hermetic as
1236            // the loop body itself was originally.
1237            // NOTE(eddyb) both input declarations and the child `Loop` node are
1238            // added later down below, after the `Loop` node is created.
1239            let wrapper_region =
1240                self.func_def_body.control_regions.define(self.cx, ControlRegionDef::default());
1241
1242            // Any loop body region inputs, which must receive values from both
1243            // the loop entry and the backedge, become explicit "loop state",
1244            // starting as `initial_inputs` and being replaced with body outputs
1245            // after every loop iteration.
1246            //
1247            // FIXME(eddyb) `Loop` `ControlNode`s should be changed to be hermetic
1248            // and have the loop state be output from the whole node itself,
1249            // for any outside uses of values defined within the loop body.
1250            let body_def = self.func_def_body.at_mut(body).def();
1251            let original_input_decls = mem::take(&mut body_def.inputs);
1252            assert!(body_def.outputs.is_empty());
1253
1254            // HACK(eddyb) some dataflow through the loop body is redundant,
1255            // and can be lifted out of it, but the worst part is that applying
1256            // the replacement requires leaving alone all the non-redundant
1257            // `body` region inputs at the same time, and it's not really
1258            // feasible to move `body`'s children into a new region without
1259            // wasting it completely (i.e. can't swap with `wrapper_region`).
1260            let mut initial_inputs = SmallVec::<[_; 2]>::new();
1261            let body_input_rewrites = ControlRegionInputRewrites::RenumberOrReplaceWith(
1262                backedge
1263                    .target_inputs
1264                    .into_iter()
1265                    .enumerate()
1266                    .map(|(original_idx, mut backedge_value)| {
1267                        ControlRegionInputRewrites::rewrite_all(
1268                            &self.control_region_input_rewrites,
1269                        )
1270                        .transform_value_use(&backedge_value)
1271                        .apply_to(&mut backedge_value);
1272
1273                        let original_idx = u32::try_from(original_idx).unwrap();
1274                        if backedge_value
1275                            == (Value::ControlRegionInput { region: body, input_idx: original_idx })
1276                        {
1277                            // FIXME(eddyb) does this have to be general purpose,
1278                            // or could this be handled as `None` with a single
1279                            // `wrapper_region` per `ControlRegionInputRewrites`?
1280                            Err(Value::ControlRegionInput {
1281                                region: wrapper_region,
1282                                input_idx: original_idx,
1283                            })
1284                        } else {
1285                            let renumbered_idx = u32::try_from(body_def.inputs.len()).unwrap();
1286                            initial_inputs.push(Value::ControlRegionInput {
1287                                region: wrapper_region,
1288                                input_idx: original_idx,
1289                            });
1290                            body_def.inputs.push(original_input_decls[original_idx as usize]);
1291                            body_def.outputs.push(backedge_value);
1292                            Ok(renumbered_idx)
1293                        }
1294                    })
1295                    .collect(),
1296            );
1297            self.control_region_input_rewrites.insert(body, body_input_rewrites);
1298
1299            assert_eq!(initial_inputs.len(), body_def.inputs.len());
1300            assert_eq!(body_def.outputs.len(), body_def.inputs.len());
1301
1302            let repeat_condition = self.materialize_lazy_cond(&repeat_condition);
1303            let loop_node = self.func_def_body.control_nodes.define(
1304                self.cx,
1305                ControlNodeDef {
1306                    kind: ControlNodeKind::Loop { initial_inputs, body, repeat_condition },
1307                    outputs: [].into_iter().collect(),
1308                }
1309                .into(),
1310            );
1311
1312            let wrapper_region_def = &mut self.func_def_body.control_regions[wrapper_region];
1313            wrapper_region_def.inputs = original_input_decls;
1314            wrapper_region_def
1315                .children
1316                .insert_last(loop_node, &mut self.func_def_body.control_nodes);
1317
1318            // HACK(eddyb) we've treated loop exits as extra "false edges", so
1319            // here they have to be added to the loop (potentially unlocking
1320            // structurization to the outside of the loop, in the caller).
1321            if let Some(exit_targets) = self.loop_header_to_exit_targets.get(&target) {
1322                for &exit_target in exit_targets {
1323                    // FIXME(eddyb) what if this is `None`, is that impossible?
1324                    if let Some(exit_edge_bundle) = deferred_edges
1325                        .get_edge_bundle_mut_by_target(DeferredTarget::Region(exit_target))
1326                    {
1327                        exit_edge_bundle.accumulated_count += IncomingEdgeCount::ONE;
1328                    }
1329                }
1330            }
1331
1332            wrapper_region
1333        } else {
1334            target
1335        };
1336        Ok(ClaimedRegion {
1337            structured_body,
1338            structured_body_inputs: edge_bundle.target_inputs,
1339            deferred_edges,
1340        })
1341    }
1342
1343    /// Structurize `region` by absorbing into it the entire CFG subgraph which
1344    /// it dominates (and deferring any other edges to the rest of the CFG).
1345    ///
1346    /// The output of this process is stored in, and any other bookkeeping is
1347    /// done through, `self.structurize_region_state[region]`.
1348    ///
1349    /// See also [`StructurizeRegionState`]'s docs.
1350    fn structurize_region(&mut self, region: ControlRegion) {
1351        {
1352            let old_state =
1353                self.structurize_region_state.insert(region, StructurizeRegionState::InProgress);
1354            if let Some(old_state) = old_state {
1355                unreachable!(
1356                    "cfg::Structurizer::structurize_region: \
1357                     already {}, when attempting to start structurization",
1358                    match old_state {
1359                        StructurizeRegionState::InProgress => "in progress (cycle detected)",
1360                        StructurizeRegionState::Ready { .. } => "completed",
1361                        StructurizeRegionState::Claimed => "claimed",
1362                    }
1363                );
1364            }
1365        }
1366
1367        let control_inst_on_exit = self
1368            .func_def_body
1369            .unstructured_cfg
1370            .as_mut()
1371            .unwrap()
1372            .control_inst_on_exit_from
1373            .remove(region)
1374            .expect(
1375                "cfg::Structurizer::structurize_region: missing \
1376                   `ControlInst` (CFG wasn't unstructured in the first place?)",
1377            );
1378
1379        // Start with the concatenation of `region` and `control_inst_on_exit`,
1380        // always appending `ControlNode`s (including the children of entire
1381        // `ClaimedRegion`s) to `region`'s definition itself.
1382        let mut deferred_edges = {
1383            let ControlInst { attrs, kind, inputs, targets, target_inputs } = control_inst_on_exit;
1384
1385            // FIXME(eddyb) this loses `attrs`.
1386            let _ = attrs;
1387
1388            let target_regions: SmallVec<[_; 8]> = targets
1389                .iter()
1390                .map(|&target| {
1391                    self.try_claim_edge_bundle(IncomingEdgeBundle {
1392                        target,
1393                        accumulated_count: IncomingEdgeCount::ONE,
1394                        target_inputs: target_inputs.get(&target).cloned().unwrap_or_default(),
1395                    })
1396                    .map_err(|edge_bundle| {
1397                        // HACK(eddyb) special-case "shared `unreachable`" to
1398                        // always inline it and avoid awkward "merges".
1399                        // FIXME(eddyb) should this be in a separate CFG pass?
1400                        // (i.e. is there a risk of other logic needing this?)
1401                        let target_is_trivial_unreachable =
1402                            match self.structurize_region_state.get(&edge_bundle.target) {
1403                                Some(StructurizeRegionState::Ready {
1404                                    region_deferred_edges: DeferredEdgeBundleSet::Unreachable,
1405                                    ..
1406                                }) => {
1407                                    // FIXME(eddyb) DRY this "is empty region" check.
1408                                    self.func_def_body
1409                                        .at(edge_bundle.target)
1410                                        .at_children()
1411                                        .into_iter()
1412                                        .next()
1413                                        .is_none()
1414                                }
1415                                _ => false,
1416                            };
1417                        if target_is_trivial_unreachable {
1418                            DeferredEdgeBundleSet::Unreachable
1419                        } else {
1420                            DeferredEdgeBundleSet::Always {
1421                                target: DeferredTarget::Region(edge_bundle.target),
1422                                edge_bundle: edge_bundle.with_target(()),
1423                            }
1424                        }
1425                    })
1426                })
1427                .collect();
1428
1429            match kind {
1430                ControlInstKind::Unreachable => {
1431                    assert_eq!((inputs.len(), target_regions.len()), (0, 0));
1432
1433                    // FIXME(eddyb) this may result in lost optimizations over
1434                    // actually encoding it in `ControlNode`/`ControlRegion`
1435                    // (e.g. a new `ControlNodeKind`, or replacing region `outputs`),
1436                    // but it's simpler to handle it like this.
1437                    //
1438                    // NOTE(eddyb) actually, this encoding is lossless *during*
1439                    // structurization, and a divergent region can only end up as:
1440                    // - the function body, where it implies the function can
1441                    //   never actually return: not fully structurized currently
1442                    //   (but only for a silly reason, and is entirely fixable)
1443                    // - a `Select` case, where it implies that case never merges
1444                    //   back into the `Select` node, and potentially that the
1445                    //   case can never be taken: this is where a structured
1446                    //   encoding can be introduced, by pruning unreachable
1447                    //   cases, and potentially even introducing `assume`s
1448                    // - a `Loop` body is not actually possible when divergent
1449                    //   (as there can be no backedge to form a cyclic CFG)
1450                    DeferredEdgeBundleSet::Unreachable
1451                }
1452
1453                ControlInstKind::ExitInvocation(kind) => {
1454                    assert_eq!(target_regions.len(), 0);
1455
1456                    let control_node = self.func_def_body.control_nodes.define(
1457                        self.cx,
1458                        ControlNodeDef {
1459                            kind: ControlNodeKind::ExitInvocation { kind, inputs },
1460                            outputs: [].into_iter().collect(),
1461                        }
1462                        .into(),
1463                    );
1464                    self.func_def_body.control_regions[region]
1465                        .children
1466                        .insert_last(control_node, &mut self.func_def_body.control_nodes);
1467
1468                    DeferredEdgeBundleSet::Unreachable
1469                }
1470
1471                ControlInstKind::Return => {
1472                    assert_eq!(target_regions.len(), 0);
1473
1474                    DeferredEdgeBundleSet::Always {
1475                        target: DeferredTarget::Return,
1476                        edge_bundle: IncomingEdgeBundle {
1477                            accumulated_count: IncomingEdgeCount::default(),
1478                            target: (),
1479                            target_inputs: inputs,
1480                        },
1481                    }
1482                }
1483
1484                ControlInstKind::Branch => {
1485                    assert_eq!((inputs.len(), target_regions.len()), (0, 1));
1486
1487                    self.append_maybe_claimed_region(
1488                        region,
1489                        target_regions.into_iter().next().unwrap(),
1490                    )
1491                }
1492
1493                ControlInstKind::SelectBranch(kind) => {
1494                    assert_eq!(inputs.len(), 1);
1495
1496                    let scrutinee = inputs[0];
1497
1498                    self.structurize_select_into(region, kind, Ok(scrutinee), target_regions)
1499                }
1500            }
1501        };
1502
1503        // Try to resolve deferred edges that may have accumulated, and keep
1504        // going until there's no more deferred edges that can be claimed.
1505        loop {
1506            // FIXME(eddyb) this should try to take as many edges as possible,
1507            // and incorporate them all at once, potentially with a switch instead
1508            // of N individual branches with their own booleans etc.
1509            let (claimed, else_deferred_edges) = deferred_edges.split_out_matching(|deferred| {
1510                let deferred_target = deferred.edge_bundle.target;
1511                let DeferredEdgeBundle { condition, edge_bundle } = match deferred_target {
1512                    DeferredTarget::Region(target) => deferred.with_target(target),
1513                    DeferredTarget::Return => return Err(deferred),
1514                };
1515
1516                match self.try_claim_edge_bundle(edge_bundle) {
1517                    Ok(claimed_region) => Ok((condition, claimed_region)),
1518
1519                    Err(new_edge_bundle) => {
1520                        let new_target = DeferredTarget::Region(new_edge_bundle.target);
1521                        Err(DeferredEdgeBundle {
1522                            condition,
1523                            edge_bundle: new_edge_bundle.with_target(new_target),
1524                        })
1525                    }
1526                }
1527            });
1528            let Some((condition, then_region)) = claimed else {
1529                deferred_edges = else_deferred_edges;
1530                break;
1531            };
1532
1533            deferred_edges = self.structurize_select_into(
1534                region,
1535                SelectionKind::BoolCond,
1536                Err(&condition),
1537                [Ok(then_region), Err(else_deferred_edges)].into_iter().collect(),
1538            );
1539        }
1540
1541        // Cache the edge count for backedges (which later get turned into loops).
1542        let accumulated_backedge_count = deferred_edges
1543            .get_edge_bundle_by_target(DeferredTarget::Region(region))
1544            .map(|backedge| backedge.accumulated_count)
1545            .unwrap_or_default();
1546
1547        let old_state =
1548            self.structurize_region_state.insert(region, StructurizeRegionState::Ready {
1549                accumulated_backedge_count,
1550                region_deferred_edges: deferred_edges,
1551            });
1552        if !matches!(old_state, Some(StructurizeRegionState::InProgress)) {
1553            unreachable!(
1554                "cfg::Structurizer::structurize_region: \
1555                 already {}, when attempting to store structurization result",
1556                match old_state {
1557                    None => "reverted to missing (removed from the map?)",
1558                    Some(StructurizeRegionState::InProgress) => unreachable!(),
1559                    Some(StructurizeRegionState::Ready { .. }) => "completed",
1560                    Some(StructurizeRegionState::Claimed) => "claimed",
1561                }
1562            );
1563        }
1564    }
1565
1566    /// Append to `parent_region` a new `Select` [`ControlNode`] built from
1567    /// partially structured `cases`, merging all of their `deferred_edges`
1568    /// together into a combined `DeferredEdgeBundleSet` (which gets returned).
1569    //
1570    // FIXME(eddyb) handle `unreachable` cases losslessly.
1571    fn structurize_select_into(
1572        &mut self,
1573        parent_region: ControlRegion,
1574        kind: SelectionKind,
1575        scrutinee: Result<Value, &LazyCond>,
1576        mut cases: SmallVec<[Result<ClaimedRegion, DeferredEdgeBundleSet>; 8]>,
1577    ) -> DeferredEdgeBundleSet {
1578        // HACK(eddyb) don't nest a sole convergent case inside the `Select`,
1579        // and instead prefer early convergence (see also `EventualCfgExits`).
1580        // NOTE(eddyb) this also happens to handle the situation where `Select`
1581        // isn't even needed (i.e. the other cases don't even have side-effects),
1582        // via the `any_non_empty_case` check (after taking `convergent_case`).
1583        // FIXME(eddyb) consider introducing some kind of `assume` for `scrutinee`,
1584        // to preserve its known value (whenever `convergent_case` is reached).
1585        let convergent_cases = cases.iter_mut().filter(|case| match case {
1586            Ok(ClaimedRegion { deferred_edges, .. }) | Err(deferred_edges) => {
1587                !matches!(deferred_edges, DeferredEdgeBundleSet::Unreachable)
1588            }
1589        });
1590        if let Ok(convergent_case) = convergent_cases.exactly_one() {
1591            // HACK(eddyb) this relies on `structurize_select_into`'s behavior
1592            // for `unreachable` cases being largely equivalent to empty cases.
1593            let convergent_case =
1594                mem::replace(convergent_case, Err(DeferredEdgeBundleSet::Unreachable));
1595
1596            // FIXME(eddyb) avoid needing recursion, by instead changing the
1597            // "`Select` node insertion cursor" (into `parent_region`), and
1598            // stashing `convergent_case`'s deferred edges to return later.
1599            let deferred_edges =
1600                self.structurize_select_into(parent_region, kind, scrutinee, cases);
1601            assert!(matches!(deferred_edges, DeferredEdgeBundleSet::Unreachable));
1602
1603            // The sole convergent case goes in the `parent_region`, and its
1604            // relationship with the `Select` (if it was even necessary at all)
1605            // is only at most one of side-effect sequencing.
1606            return self.append_maybe_claimed_region(parent_region, convergent_case);
1607        }
1608
1609        // Support lazily defining the `Select` node, as soon as it's necessary
1610        // (i.e. to plumb per-case dataflow through `Value::ControlNodeOutput`s),
1611        // but also if any of the cases actually have non-empty regions, which
1612        // is checked after the special-cases (which return w/o a `Select` at all).
1613        //
1614        // FIXME(eddyb) some cases may be `unreachable`, and that's erased here.
1615        let mut cached_select_node = None;
1616        let mut non_move_kind = Some(kind);
1617        let mut get_or_define_select_node = |this: &mut Self, cases: &[_]| {
1618            *cached_select_node.get_or_insert_with(|| {
1619                let kind = non_move_kind.take().unwrap();
1620                let cases = cases
1621                    .iter()
1622                    .map(|case| {
1623                        let case_region = match case {
1624                            &Ok(ClaimedRegion { structured_body, .. }) => structured_body,
1625                            Err(_) => this
1626                                .func_def_body
1627                                .control_regions
1628                                .define(this.cx, ControlRegionDef::default()),
1629                        };
1630
1631                        // FIXME(eddyb) should these be asserts that it's already empty?
1632                        let case_region_def = this.func_def_body.at_mut(case_region).def();
1633                        case_region_def.outputs.clear();
1634                        case_region
1635                    })
1636                    .collect();
1637                let scrutinee =
1638                    scrutinee.unwrap_or_else(|lazy_cond| this.materialize_lazy_cond(lazy_cond));
1639                let select_node = this.func_def_body.control_nodes.define(
1640                    this.cx,
1641                    ControlNodeDef {
1642                        kind: ControlNodeKind::Select { kind, scrutinee, cases },
1643                        outputs: [].into_iter().collect(),
1644                    }
1645                    .into(),
1646                );
1647                this.func_def_body.control_regions[parent_region]
1648                    .children
1649                    .insert_last(select_node, &mut this.func_def_body.control_nodes);
1650                select_node
1651            })
1652        };
1653
1654        // Ensure the `Select` exists if needed for any per-case side-effects.
1655        let any_non_empty_case = cases.iter().any(|case| {
1656            case.as_ref().is_ok_and(|&ClaimedRegion { structured_body, .. }| {
1657                self.func_def_body.at(structured_body).at_children().into_iter().next().is_some()
1658            })
1659        });
1660        if any_non_empty_case {
1661            get_or_define_select_node(self, &cases);
1662        }
1663
1664        // Gather the full set of deferred edges (and returns).
1665        struct DeferredTargetSummary {
1666            input_count: usize,
1667            total_edge_count: IncomingEdgeCount,
1668        }
1669        let mut deferred_targets = FxIndexMap::default();
1670        for case in &cases {
1671            let case_deferred_edges = match case {
1672                Ok(ClaimedRegion { deferred_edges, .. }) | Err(deferred_edges) => deferred_edges,
1673            };
1674            for (target, edge_bundle) in case_deferred_edges.iter_targets_with_edge_bundle() {
1675                let input_count = edge_bundle.target_inputs.len();
1676
1677                let summary = deferred_targets.entry(target).or_insert(DeferredTargetSummary {
1678                    input_count,
1679                    total_edge_count: IncomingEdgeCount::default(),
1680                });
1681                assert_eq!(summary.input_count, input_count);
1682                summary.total_edge_count += edge_bundle.accumulated_count;
1683            }
1684        }
1685
1686        // FIXME(eddyb) `control_region_input_rewrites` mappings, generated
1687        // for every `ClaimedRegion` that has been merged into a larger region,
1688        // only get applied after structurization fully completes, but here it's
1689        // very useful to have the fully resolved values across all `cases`'
1690        // incoming/outgoing edges (note, however, that within outgoing edges,
1691        // i.e. `case_deferred_edges`' `target_inputs`, `Value::ControlRegionInput`
1692        // are not resolved using the contents of `case_structured_body_inputs`,
1693        // which is kept hermetic until just before `structurize_select` returns).
1694        for case in &mut cases {
1695            let (case_structured_body_inputs, case_deferred_edges) = match case {
1696                Ok(ClaimedRegion { structured_body_inputs, deferred_edges, .. }) => {
1697                    (&mut structured_body_inputs[..], deferred_edges)
1698                }
1699                Err(deferred_edges) => (&mut [][..], deferred_edges),
1700            };
1701            let all_values = case_structured_body_inputs.iter_mut().chain(
1702                case_deferred_edges
1703                    .iter_targets_with_edge_bundle_mut()
1704                    .flat_map(|(_, edge_bundle)| &mut edge_bundle.target_inputs),
1705            );
1706            for v in all_values {
1707                ControlRegionInputRewrites::rewrite_all(&self.control_region_input_rewrites)
1708                    .transform_value_use(v)
1709                    .apply_to(v);
1710            }
1711        }
1712
1713        // Merge all `deferred_edges` by plumbing their per-case `target_input`s
1714        // (into per-case region outputs, and therefore the `Select` outputs)
1715        // out of all cases that can reach them, with undef constants used to
1716        // fill any gaps (i.e. for the targets not reached through each case),
1717        // while deferred conditions are collected separately (for `LazyCond`).
1718        let deferred_edges = deferred_targets.into_iter().map(|(target, target_summary)| {
1719            let DeferredTargetSummary { input_count, total_edge_count } = target_summary;
1720
1721            // HACK(eddyb) `Err` wraps only `LazyCond::{Undef,False}`, which allows
1722            // distinguishing between "not taken" and "not even reachable".
1723            let per_case_deferred: SmallVec<[Result<DeferredEdgeBundle<()>, LazyCond>; 8]> = cases
1724                .iter_mut()
1725                .map(|case| match case {
1726                    Ok(ClaimedRegion { deferred_edges, .. }) | Err(deferred_edges) => {
1727                        if let DeferredEdgeBundleSet::Unreachable = deferred_edges {
1728                            Err(LazyCond::Undef)
1729                        } else {
1730                            deferred_edges
1731                                .steal_deferred_by_target_without_removal(target)
1732                                .ok_or(LazyCond::False)
1733                        }
1734                    }
1735                })
1736                .collect();
1737
1738            let target_inputs = (0..input_count)
1739                .map(|target_input_idx| {
1740                    let per_case_target_input = per_case_deferred.iter().map(|per_case_deferred| {
1741                        per_case_deferred.as_ref().ok().map(
1742                            |DeferredEdgeBundle { edge_bundle, .. }| {
1743                                edge_bundle.target_inputs[target_input_idx]
1744                            },
1745                        )
1746                    });
1747
1748                    // Avoid introducing dynamic dataflow when the same value is
1749                    // used across all cases (which can reach this `target`).
1750                    let unique_target_input_value = per_case_target_input
1751                        .clone()
1752                        .zip_eq(&cases)
1753                        .filter_map(|(v, case)| Some((v?, case)))
1754                        .map(|(v, case)| {
1755                            // If possible, resolve `v` to a `Value` valid in
1756                            // `parent_region` (i.e. the `Select` node parent).
1757                            match case {
1758                                // `case`'s `structured_body` effectively "wraps"
1759                                // its `deferred_edges` (where `v` came from),
1760                                // so values from `parent_region` can only be
1761                                // hermetically used via `structured_body` inputs.
1762                                Ok(ClaimedRegion {
1763                                    structured_body,
1764                                    structured_body_inputs,
1765                                    ..
1766                                }) => match v {
1767                                    Value::Const(_) => Ok(v),
1768                                    Value::ControlRegionInput { region, input_idx }
1769                                        if region == *structured_body =>
1770                                    {
1771                                        Ok(structured_body_inputs[input_idx as usize])
1772                                    }
1773                                    _ => Err(()),
1774                                },
1775
1776                                // `case` has no region of its own, so everything
1777                                // it carries is already from within `parent_region`.
1778                                Err(_) => Ok(v),
1779                            }
1780                        })
1781                        .dedup()
1782                        .exactly_one();
1783                    if let Ok(Ok(v)) = unique_target_input_value {
1784                        return v;
1785                    }
1786
1787                    let ty = match target {
1788                        DeferredTarget::Region(target) => {
1789                            self.func_def_body.at(target).def().inputs[target_input_idx].ty
1790                        }
1791                        // HACK(eddyb) in the absence of `FuncDecl`, infer the
1792                        // type from each returned value (and require they match).
1793                        DeferredTarget::Return => per_case_target_input
1794                            .clone()
1795                            .flatten()
1796                            .map(|v| self.func_def_body.at(v).type_of(self.cx))
1797                            .dedup()
1798                            .exactly_one()
1799                            .ok()
1800                            .expect("mismatched `return`ed value types"),
1801                    };
1802
1803                    let select_node = get_or_define_select_node(self, &cases);
1804                    let output_decls = &mut self.func_def_body.at_mut(select_node).def().outputs;
1805                    let output_idx = output_decls.len();
1806                    output_decls.push(ControlNodeOutputDecl { attrs: AttrSet::default(), ty });
1807                    for (case_idx, v) in per_case_target_input.enumerate() {
1808                        let v = v.unwrap_or_else(|| Value::Const(self.const_undef(ty)));
1809
1810                        let case_region = match &self.func_def_body.at(select_node).def().kind {
1811                            ControlNodeKind::Select { cases, .. } => cases[case_idx],
1812                            _ => unreachable!(),
1813                        };
1814                        let outputs = &mut self.func_def_body.at_mut(case_region).def().outputs;
1815                        assert_eq!(outputs.len(), output_idx);
1816                        outputs.push(v);
1817                    }
1818                    Value::ControlNodeOutput {
1819                        control_node: select_node,
1820                        output_idx: output_idx.try_into().unwrap(),
1821                    }
1822                })
1823                .collect();
1824
1825            // Simplify `LazyCond`s eagerly, to reduce costs later on, or even
1826            // outright avoid defining the `Select` node in the first place.
1827            //
1828            // FIXME(eddyb) move all simplifications from `materialize_lazy_cond`
1829            // to here (allowing e.g. not defining the `Select` in more cases).
1830            let per_case_conds =
1831                per_case_deferred.iter().map(|per_case_deferred| match per_case_deferred {
1832                    Ok(DeferredEdgeBundle { condition, .. }) => condition,
1833                    Err(undef_or_false) => undef_or_false,
1834                });
1835            let condition = if per_case_conds
1836                .clone()
1837                .all(|cond| matches!(cond, LazyCond::Undef | LazyCond::True))
1838            {
1839                LazyCond::True
1840            } else {
1841                LazyCond::Merge(Rc::new(LazyCondMerge::Select {
1842                    control_node: get_or_define_select_node(self, &cases),
1843                    per_case_conds: per_case_conds.cloned().collect(),
1844                }))
1845            };
1846
1847            DeferredEdgeBundle {
1848                condition,
1849                edge_bundle: IncomingEdgeBundle {
1850                    target,
1851                    accumulated_count: total_edge_count,
1852                    target_inputs,
1853                },
1854            }
1855        });
1856        let deferred_edges = deferred_edges.collect();
1857
1858        // Only as the very last step, can per-case `region_inputs` be added to
1859        // `control_region_input_rewrites`.
1860        //
1861        // FIXME(eddyb) don't replace `Value::ControlRegionInput { region, .. }`
1862        // with `region_inputs` when the `region` ends up a `ControlNode` child,
1863        // but instead make all `ControlRegion`s entirely hermetic wrt inputs.
1864        #[allow(clippy::manual_flatten)]
1865        for case in cases {
1866            if let Ok(ClaimedRegion { structured_body, structured_body_inputs, .. }) = case {
1867                if !structured_body_inputs.is_empty() {
1868                    self.control_region_input_rewrites.insert(
1869                        structured_body,
1870                        ControlRegionInputRewrites::ReplaceWith(structured_body_inputs),
1871                    );
1872                    self.func_def_body.at_mut(structured_body).def().inputs.clear();
1873                }
1874            }
1875        }
1876
1877        deferred_edges
1878    }
1879
1880    // FIXME(eddyb) this should try to handle as many `LazyCond` as are available,
1881    // for incorporating them all at once, ideally with a switch instead
1882    // of N individual branches with their own booleans etc.
1883    fn materialize_lazy_cond(&mut self, cond: &LazyCond) -> Value {
1884        match cond {
1885            LazyCond::Undef => Value::Const(self.const_undef(self.type_bool)),
1886            LazyCond::False => Value::Const(self.const_false),
1887            LazyCond::True => Value::Const(self.const_true),
1888
1889            // `LazyCond::Merge` was only created in the first place if a merge
1890            // was actually necessary, so there shouldn't be simplifications to
1891            // do here (i.e. the value provided is if `materialize_lazy_cond`
1892            // never gets called because the target has become unconditional).
1893            //
1894            // FIXME(eddyb) there is still an `if cond { true } else { false }`
1895            // special-case (repalcing with just `cond`), that cannot be expressed
1896            // currently in `LazyCond` itself (but maybe it should be).
1897            LazyCond::Merge(merge) => {
1898                let LazyCondMerge::Select { control_node, ref per_case_conds } = **merge;
1899
1900                // HACK(eddyb) this won't actually allocate most of the time,
1901                // and avoids complications later below, when mutating the cases.
1902                let per_case_conds: SmallVec<[_; 8]> = per_case_conds
1903                    .into_iter()
1904                    .map(|cond| self.materialize_lazy_cond(cond))
1905                    .collect();
1906
1907                let ControlNodeDef { kind, outputs: output_decls } =
1908                    &mut *self.func_def_body.control_nodes[control_node];
1909                let cases = match kind {
1910                    ControlNodeKind::Select { kind, scrutinee, cases } => {
1911                        assert_eq!(cases.len(), per_case_conds.len());
1912
1913                        if let SelectionKind::BoolCond = kind {
1914                            let [val_false, val_true] =
1915                                [self.const_false, self.const_true].map(Value::Const);
1916                            if per_case_conds[..] == [val_true, val_false] {
1917                                return *scrutinee;
1918                            } else if per_case_conds[..] == [val_false, val_true] {
1919                                // FIXME(eddyb) this could also be special-cased,
1920                                // at least when called from the topmost level,
1921                                // where which side is `false`/`true` doesn't
1922                                // matter (or we could even generate `!cond`?).
1923                                let _not_cond = *scrutinee;
1924                            }
1925                        }
1926
1927                        cases
1928                    }
1929                    _ => unreachable!(),
1930                };
1931
1932                let output_idx = u32::try_from(output_decls.len()).unwrap();
1933                output_decls
1934                    .push(ControlNodeOutputDecl { attrs: AttrSet::default(), ty: self.type_bool });
1935
1936                for (&case, cond) in cases.iter().zip_eq(per_case_conds) {
1937                    let ControlRegionDef { outputs, .. } =
1938                        &mut self.func_def_body.control_regions[case];
1939                    outputs.push(cond);
1940                    assert_eq!(outputs.len(), output_decls.len());
1941                }
1942
1943                Value::ControlNodeOutput { control_node, output_idx }
1944            }
1945        }
1946    }
1947
1948    /// Append to `parent_region` the children of `maybe_claimed_region` (if `Ok`),
1949    /// returning the `DeferredEdgeBundleSet` from `maybe_claimed_region`.
1950    //
1951    // FIXME(eddyb) the name isn't great, but e.g. "absorb into" would also be
1952    // weird (and on top of that, the append direction can be tricky to express).
1953    fn append_maybe_claimed_region(
1954        &mut self,
1955        parent_region: ControlRegion,
1956        maybe_claimed_region: Result<ClaimedRegion, DeferredEdgeBundleSet>,
1957    ) -> DeferredEdgeBundleSet {
1958        match maybe_claimed_region {
1959            Ok(ClaimedRegion { structured_body, structured_body_inputs, deferred_edges }) => {
1960                if !structured_body_inputs.is_empty() {
1961                    self.control_region_input_rewrites.insert(
1962                        structured_body,
1963                        ControlRegionInputRewrites::ReplaceWith(structured_body_inputs),
1964                    );
1965                }
1966                let new_children =
1967                    mem::take(&mut self.func_def_body.at_mut(structured_body).def().children);
1968                self.func_def_body.control_regions[parent_region]
1969                    .children
1970                    .append(new_children, &mut self.func_def_body.control_nodes);
1971                deferred_edges
1972            }
1973            Err(deferred_edges) => deferred_edges,
1974        }
1975    }
1976
1977    /// When structurization is only partial, and there remain unclaimed regions,
1978    /// they have to be reintegrated into the CFG, putting back [`ControlInst`]s
1979    /// where `structurize_region` has taken them from.
1980    ///
1981    /// This function handles one region at a time to make it more manageable,
1982    /// despite it having a single call site (in a loop in `structurize_func`).
1983    fn rebuild_cfg_from_unclaimed_region_deferred_edges(
1984        &mut self,
1985        region: ControlRegion,
1986        mut deferred_edges: DeferredEdgeBundleSet,
1987    ) {
1988        assert!(
1989            self.structurize_region_state.is_empty(),
1990            "cfg::Structurizer::rebuild_cfg_from_unclaimed_region_deferred_edges:
1991             must only be called from `structurize_func`, \
1992             after it takes `structurize_region_state`"
1993        );
1994
1995        // Build a chain of conditional branches to apply deferred edges.
1996        let mut control_source = Some(region);
1997        loop {
1998            let taken_then;
1999            (taken_then, deferred_edges) =
2000                deferred_edges.split_out_matching(|deferred| match deferred.edge_bundle.target {
2001                    DeferredTarget::Region(target) => {
2002                        Ok((deferred.condition, (target, deferred.edge_bundle.target_inputs)))
2003                    }
2004                    DeferredTarget::Return => Err(deferred),
2005                });
2006            let Some((condition, then_target_and_inputs)) = taken_then else {
2007                break;
2008            };
2009            let branch_source = control_source.take().unwrap();
2010            let else_target_and_inputs = match deferred_edges {
2011                // At most one deferral left, so it can be used as the "else"
2012                // case, or the branch left unconditional in its absence.
2013                DeferredEdgeBundleSet::Unreachable => None,
2014                DeferredEdgeBundleSet::Always {
2015                    target: DeferredTarget::Region(else_target),
2016                    edge_bundle,
2017                } => {
2018                    deferred_edges = DeferredEdgeBundleSet::Unreachable;
2019                    Some((else_target, edge_bundle.target_inputs))
2020                }
2021
2022                // Either more branches, or a deferred return, are needed, so
2023                // the "else" case must be a `ControlRegion` that itself can
2024                // have a `ControlInst` attached to it later on.
2025                _ => {
2026                    let new_empty_region = self
2027                        .func_def_body
2028                        .control_regions
2029                        .define(self.cx, ControlRegionDef::default());
2030                    control_source = Some(new_empty_region);
2031                    Some((new_empty_region, [].into_iter().collect()))
2032                }
2033            };
2034
2035            let condition = Some(condition)
2036                .filter(|_| else_target_and_inputs.is_some())
2037                .map(|cond| self.materialize_lazy_cond(&cond));
2038            let branch_control_inst = ControlInst {
2039                attrs: AttrSet::default(),
2040                kind: if condition.is_some() {
2041                    ControlInstKind::SelectBranch(SelectionKind::BoolCond)
2042                } else {
2043                    ControlInstKind::Branch
2044                },
2045                inputs: condition.into_iter().collect(),
2046                targets: [&then_target_and_inputs]
2047                    .into_iter()
2048                    .chain(&else_target_and_inputs)
2049                    .map(|&(target, _)| target)
2050                    .collect(),
2051                target_inputs: [then_target_and_inputs]
2052                    .into_iter()
2053                    .chain(else_target_and_inputs)
2054                    .filter(|(_, inputs)| !inputs.is_empty())
2055                    .collect(),
2056            };
2057            assert!(
2058                self.func_def_body
2059                    .unstructured_cfg
2060                    .as_mut()
2061                    .unwrap()
2062                    .control_inst_on_exit_from
2063                    .insert(branch_source, branch_control_inst)
2064                    .is_none()
2065            );
2066        }
2067
2068        let deferred_return = match deferred_edges {
2069            DeferredEdgeBundleSet::Unreachable => None,
2070            DeferredEdgeBundleSet::Always { target: DeferredTarget::Return, edge_bundle } => {
2071                Some(edge_bundle.target_inputs)
2072            }
2073            _ => unreachable!(),
2074        };
2075
2076        let final_source = match control_source {
2077            Some(region) => region,
2078            None => {
2079                // The loop above handled all the targets, nothing left to do.
2080                assert!(deferred_return.is_none());
2081                return;
2082            }
2083        };
2084
2085        // Final deferral is either a `Return` (if needed), or an `Unreachable`
2086        // (only when truly divergent, i.e. no `deferred_edges`/`deferred_return`).
2087        let final_control_inst = {
2088            let (kind, inputs) = match deferred_return {
2089                Some(return_values) => (ControlInstKind::Return, return_values),
2090                None => (ControlInstKind::Unreachable, [].into_iter().collect()),
2091            };
2092            ControlInst {
2093                attrs: AttrSet::default(),
2094                kind,
2095                inputs,
2096                targets: [].into_iter().collect(),
2097                target_inputs: FxIndexMap::default(),
2098            }
2099        };
2100        assert!(
2101            self.func_def_body
2102                .unstructured_cfg
2103                .as_mut()
2104                .unwrap()
2105                .control_inst_on_exit_from
2106                .insert(final_source, final_control_inst)
2107                .is_none()
2108        );
2109    }
2110
2111    /// Create an undefined constant (as a placeholder where a value needs to be
2112    /// present, but won't actually be used), of type `ty`.
2113    fn const_undef(&self, ty: Type) -> Const {
2114        // FIXME(eddyb) SPIR-T should have native undef itself.
2115        let wk = &spv::spec::Spec::get().well_known;
2116        self.cx.intern(ConstDef {
2117            attrs: AttrSet::default(),
2118            ty,
2119            kind: ConstKind::SpvInst {
2120                spv_inst_and_const_inputs: Rc::new((wk.OpUndef.into(), [].into_iter().collect())),
2121            },
2122        })
2123    }
2124}