spirt/
cfgssa.rs

1//! Tools for working with control-flow graphs that contain SSA dataflow
2//! (often abbreviated to `CFG<SSA>` or similar).
3//!
4//! The defining characteristic of SSA dataflow in a control-flow graph is that
5//! SSA definitions (of values, e.g. the result of an instruction) are "visible"
6//! from all the CFG locations they dominate (i.e. the locations that can only
7//! be reached by passing through the definition first), and can therefore be
8//! directly used arbitrarily far away in the CFG with no annotations required
9//! anywhere in the CFG between the definition and its uses.
10//!
11//! While "def dominates use" is sufficient to ensure the value can traverse
12//! the necessary paths (between def and use) in the CFG, a lot of care must
13//! be taken to preserve the correctness of such implicit dataflow across all
14//! transformations, and it's overall far more fragile than the local dataflow
15//! of e.g. phi nodes (or their alternative "block arguments"), or in SPIR-T's
16//! case, `ControlRegion` inputs and `ControlNode` outputs (inspired by RVSDG,
17//! which has even stricter isolation/locality in its regions).
18
19use crate::{FxIndexMap, FxIndexSet};
20use itertools::Either;
21use rustc_hash::FxHashMap;
22use std::collections::VecDeque;
23use std::hash::Hash;
24
25// HACK(eddyb) to be able to propagate many uses at once while avoiding expensive
26// hierchical indexing (and because the parent block of a def is significant),
27// each block's defs get chunked, with chunks being the size of `FixedBitSet`
28// (i.e. each bit tracks one def in the chunk, and can be propagated together).
29// FIXME(eddyb) in theory, a sparse bitset could expose some of its sparseness
30// to allow chunked addressing/iteration/etc. (but that requires more API design).
31const CHUNK_SIZE: usize = data::FixedBitSet::SIZE;
32
33#[derive(Copy, Clone, PartialEq, Eq, Hash, derive_more::From, derive_more::Into)]
34struct BlockIdx(usize);
35#[derive(Copy, Clone, PartialEq, Eq, Hash, derive_more::From, derive_more::Into)]
36struct ChunkIdx(usize);
37#[derive(Copy, Clone, PartialEq, Eq, Hash, derive_more::From, derive_more::Into)]
38struct DefIdx(usize);
39
40impl DefIdx {
41    fn chunk(self) -> ChunkIdx {
42        ChunkIdx(self.0 / CHUNK_SIZE)
43    }
44}
45
46/// All blocks and ddefinitions they contain, which have to be computed first,
47/// and remain immutable, because where a value is defined (or whether it's at
48/// all part of the function itself) can have non-monotonic effects elsewhere.
49pub struct DefMap<BlockId, DefId, DefType> {
50    blocks_by_id: FxIndexMap<BlockId, BlockDef>,
51    // FIXME(eddyb) should this contain `BlockIdx` instead?
52    chunk_to_block_id: data::KeyedVec<ChunkIdx, BlockId>,
53    chunk_defs: data::KeyedVec<ChunkIdx, [Option<(DefId, DefType)>; CHUNK_SIZE]>,
54    def_id_to_def_idx: FxHashMap<DefId, DefIdx>,
55}
56
57struct BlockDef {
58    last_def_idx: DefIdx,
59}
60
61impl<BlockId: Copy + Eq + Hash, DefId: Copy + Eq + Hash, DefType: Copy> Default
62    for DefMap<BlockId, DefId, DefType>
63{
64    fn default() -> Self {
65        Self::new()
66    }
67}
68
69impl<BlockId: Copy + Eq + Hash, DefId: Copy + Eq + Hash, DefType: Copy>
70    DefMap<BlockId, DefId, DefType>
71{
72    pub fn new() -> Self {
73        Self {
74            blocks_by_id: Default::default(),
75            chunk_to_block_id: Default::default(),
76            chunk_defs: Default::default(),
77            def_id_to_def_idx: Default::default(),
78        }
79    }
80
81    pub fn add_block(&mut self, block_id: BlockId) {
82        // FIXME(eddyb) disallow accidental re-insertion.
83        self.blocks_by_id.insert(block_id, BlockDef { last_def_idx: DefIdx(!0) });
84    }
85
86    pub fn add_def(&mut self, block_id: BlockId, def_id: DefId, def_type: DefType) {
87        // HACK(eddyb) optimize for repeated definitions in the same block.
88        let block = match self.blocks_by_id.last_mut() {
89            Some((&last_block_id, last_block)) if last_block_id == block_id => last_block,
90            _ => &mut self.blocks_by_id[&block_id],
91        };
92        let def_idx = Some(DefIdx(block.last_def_idx.0.wrapping_add(1)))
93            .filter(|def_idx| def_idx.chunk() == block.last_def_idx.chunk())
94            .unwrap_or_else(|| {
95                let chunk_idx = self.chunk_to_block_id.push(block_id);
96                assert!(chunk_idx == self.chunk_defs.push([None; CHUNK_SIZE]));
97                DefIdx(chunk_idx.0 * CHUNK_SIZE)
98            });
99        block.last_def_idx = def_idx;
100
101        self.chunk_defs[def_idx.chunk()][def_idx.0 % CHUNK_SIZE] = Some((def_id, def_type));
102
103        // FIXME(eddyb) disallow accidental re-insertion.
104        self.def_id_to_def_idx.insert(def_id, def_idx);
105    }
106
107    fn block_id_from_idx(&self, block_idx: BlockIdx) -> BlockId {
108        *self.blocks_by_id.get_index(block_idx.0).unwrap().0
109    }
110}
111
112/// Incremental tracker for definition uses (and CFG edges between blocks),
113/// accumulating the complete set of transitive uses for each block, also known
114/// as the SSA "live set" (corresponding to the starting position of each block).
115pub struct UseAccumulator<'a, BlockId, DefId, DefType> {
116    def_map: &'a DefMap<BlockId, DefId, DefType>,
117
118    blocks: data::KeyedVec<BlockIdx, BlockAcc>,
119
120    // HACK(eddyb) optimize for repeated uses from the same block.
121    most_recent_block_idx: BlockIdx,
122
123    /// Every `block_idx` with non-empty `blocks[block_idx].dirty_chunks`,
124    /// and used for breadth-first propagation through predecessors.
125    //
126    // FIXME(eddyb) some traversal orders might be more effective, but also
127    // the "chunk" granularity might itself be enough to paper over that?
128    propagate_queue: VecDeque<BlockIdx>,
129}
130
131#[derive(Default)]
132struct BlockAcc {
133    // FIXME(eddyb) should this use a bitset? seems likely to be inefficient
134    preds: FxIndexSet<BlockIdx>,
135
136    /// All definitions used in this block (or any other block reachable from it),
137    /// excluding its own definitions, and represented as a sparse bitset.
138    uses: data::SparseMap<ChunkIdx, data::FixedBitSet<usize>>,
139
140    /// All chunks `c` where `uses[c]` has changed since this block has last
141    /// propagated any of its `uses` to its predecessors.
142    dirty_chunks: data::BitSet<ChunkIdx>,
143}
144
145enum AddUsesSource {
146    New(DefIdx),
147    PropagateBackwardsAcrossEdge { target: BlockIdx, only_dirty: bool },
148}
149
150impl<'a, BlockId: Copy + Eq + Hash, DefId: Copy + Eq + Hash, DefType: Copy>
151    UseAccumulator<'a, BlockId, DefId, DefType>
152{
153    pub fn new(def_map: &'a DefMap<BlockId, DefId, DefType>) -> Self {
154        Self {
155            def_map,
156
157            blocks: data::KeyedVec::from_fn(..BlockIdx(def_map.blocks_by_id.len()), |_| {
158                Default::default()
159            }),
160
161            most_recent_block_idx: BlockIdx(0),
162
163            propagate_queue: VecDeque::new(),
164        }
165    }
166
167    // FIXME(eddyb) how inefficient is `FxIndexMap<DefId, DefType>`?
168    // (vs e.g. a bitset combined with not duplicating `DefType`s per-block?)
169    // FIXME(eddyb) naming might not be enough to clarify the semantics,
170    // might be useful to use the liveness (e.g. "live set") jargon?
171    pub fn into_inter_block_uses(
172        mut self,
173    ) -> impl Iterator<Item = (BlockId, FxIndexMap<DefId, DefType>)> + 'a {
174        self.propagate();
175
176        assert!(self.propagate_queue.is_empty());
177
178        self.blocks.into_iter().map(|(block_idx, block_acc)| {
179            assert!(block_acc.dirty_chunks.is_empty());
180
181            (
182                self.def_map.block_id_from_idx(block_idx),
183                block_acc
184                    .uses
185                    .iter()
186                    .flat_map(|(chunk_idx, chunk_uses)| {
187                        let chunk_defs = &self.def_map.chunk_defs[chunk_idx];
188                        chunk_uses.keys().map(move |i| chunk_defs[i].unwrap())
189                    })
190                    .collect(),
191            )
192        })
193    }
194
195    pub fn add_use(&mut self, block_id: BlockId, used_def_id: DefId) {
196        // FIXME(eddyb) use `let ... else`?
197        let &used_def_idx = match self.def_map.def_id_to_def_idx.get(&used_def_id) {
198            // HACK(eddyb) silently ignoring unrecognized defs.
199            None => return,
200            Some(def_idx) => def_idx,
201        };
202
203        // Intra-block uses are not tracked.
204        if self.def_map.chunk_to_block_id[used_def_idx.chunk()] == block_id {
205            return;
206        }
207
208        let block_idx = Some(self.most_recent_block_idx)
209            .filter(|&block_idx| self.def_map.block_id_from_idx(block_idx) == block_id)
210            .unwrap_or_else(|| {
211                BlockIdx(self.def_map.blocks_by_id.get_index_of(&block_id).unwrap())
212            });
213
214        self.add_uses_to(block_idx, AddUsesSource::New(used_def_idx));
215    }
216
217    pub fn add_edge(&mut self, source_block_id: BlockId, target_block_id: BlockId) {
218        // Self-loops require no tracking (could never introduce more uses).
219        if source_block_id == target_block_id {
220            return;
221        }
222
223        // FIXME(eddyb) is this necessary? (the concern is that dirty-tracking
224        // might get confused, but it shouldn't actually be an issue)
225        self.propagate();
226
227        let [source_block_idx, target_block_idx] = [source_block_id, target_block_id]
228            .map(|block_id| BlockIdx(self.def_map.blocks_by_id.get_index_of(&block_id).unwrap()));
229
230        if self.blocks[target_block_idx].preds.insert(source_block_idx) {
231            self.add_uses_to(source_block_idx, AddUsesSource::PropagateBackwardsAcrossEdge {
232                target: target_block_idx,
233                only_dirty: false,
234            });
235        }
236    }
237
238    fn propagate(&mut self) {
239        while let Some(block_idx) = self.propagate_queue.pop_front() {
240            for i in 0..self.blocks[block_idx].preds.len() {
241                let pred_block_idx = self.blocks[block_idx].preds[i];
242
243                self.add_uses_to(pred_block_idx, AddUsesSource::PropagateBackwardsAcrossEdge {
244                    target: block_idx,
245                    only_dirty: true,
246                });
247            }
248            self.blocks[block_idx].dirty_chunks.clear();
249        }
250    }
251
252    fn add_uses_to(&mut self, block_idx: BlockIdx, uses: AddUsesSource) {
253        // FIXME(eddyb) make this unnecessary for a comparison later, perhaps?
254        let block_id = self.def_map.block_id_from_idx(block_idx);
255
256        let mut new_uses;
257        let (block_acc, chunked_uses) = match uses {
258            AddUsesSource::New(def_idx) => {
259                new_uses = data::FixedBitSet::new();
260                new_uses.insert(def_idx.0 % CHUNK_SIZE, ());
261                (
262                    &mut self.blocks[block_idx],
263                    Either::Left([(def_idx.chunk(), &new_uses)].into_iter()),
264                )
265            }
266            AddUsesSource::PropagateBackwardsAcrossEdge { target, only_dirty } => {
267                let [block_acc, target_block_acc] =
268                    self.blocks.get_mut2([block_idx, target]).unwrap();
269
270                (
271                    block_acc,
272                    Either::Right(if only_dirty {
273                        Either::Left(target_block_acc.dirty_chunks.iter().map(|(chunk_idx, _)| {
274                            (chunk_idx, target_block_acc.uses.get(chunk_idx).unwrap())
275                        }))
276                    } else {
277                        Either::Right(target_block_acc.uses.iter())
278                    }),
279                )
280            }
281        };
282
283        let block_was_dirty = !block_acc.dirty_chunks.is_empty();
284        for (chunk_idx, new_uses) in chunked_uses {
285            // Use tracking terminates in the defining block.
286            if self.def_map.chunk_to_block_id[chunk_idx] == block_id {
287                continue;
288            }
289
290            let uses = block_acc.uses.entry(chunk_idx).or_default();
291
292            let old_and_new_uses = uses.union(new_uses);
293            if *uses != old_and_new_uses {
294                *uses = old_and_new_uses;
295                block_acc.dirty_chunks.entry(chunk_idx).insert(());
296            }
297        }
298        if !block_was_dirty && !block_acc.dirty_chunks.is_empty() {
299            self.propagate_queue.push_back(block_idx);
300        }
301    }
302}
303
304// HACK(eddyb) the hierarchy/breadth/sparsity/etc. of these data structures is
305// somewhat arbitrary, but they should do better than naive non-sparse solutions.
306// FIMXE(eddyb) attempt to fine-tune this for realistic workloads.
307// FIXME(eddyb) move this out of here.
308mod data {
309    use smallvec::SmallVec;
310    use std::marker::PhantomData;
311    use std::{iter, mem, ops};
312
313    // FIXME(eddyb) should this be `FlatVec`? also, does it belong here?
314    pub struct KeyedVec<K, T> {
315        vec: Vec<T>,
316        _marker: PhantomData<K>,
317    }
318
319    impl<K: Copy + Into<usize> + From<usize>, T> Default for KeyedVec<K, T> {
320        fn default() -> Self {
321            Self::new()
322        }
323    }
324    impl<K: Copy + Into<usize> + From<usize>, T> KeyedVec<K, T> {
325        pub fn new() -> Self {
326            Self { vec: vec![], _marker: PhantomData }
327        }
328        pub fn from_fn(keys: ops::RangeTo<K>, mut f: impl FnMut(K) -> T) -> Self {
329            KeyedVec {
330                vec: (0..keys.end.into()).map(|i| f(K::from(i))).collect(),
331                _marker: PhantomData,
332            }
333        }
334        pub fn push(&mut self, x: T) -> K {
335            let k = K::from(self.vec.len());
336            self.vec.push(x);
337            k
338        }
339        pub fn into_iter(self) -> impl Iterator<Item = (K, T)> {
340            self.vec.into_iter().enumerate().map(|(i, x)| (K::from(i), x))
341        }
342
343        // FIXME(eddyb) replace this when `get_many_mut` gets stabilizes.
344        pub fn get_mut2(&mut self, keys: [K; 2]) -> Option<[&mut T; 2]> {
345            let [k_i, k_j] = keys;
346            let (i, j) = (k_i.into(), k_j.into());
347            if i > j {
348                let [y, x] = self.get_mut2([k_j, k_i])?;
349                return Some([x, y]);
350            }
351            if i == j || j >= self.vec.len() {
352                return None;
353            }
354
355            let (xs, ys) = self.vec.split_at_mut(j);
356            Some([&mut xs[i], &mut ys[0]])
357        }
358    }
359    impl<K: Copy + Into<usize> + From<usize>, T> ops::Index<K> for KeyedVec<K, T> {
360        type Output = T;
361        fn index(&self, k: K) -> &T {
362            &self.vec[k.into()]
363        }
364    }
365    impl<K: Copy + Into<usize> + From<usize>, T> ops::IndexMut<K> for KeyedVec<K, T> {
366        fn index_mut(&mut self, k: K) -> &mut T {
367            &mut self.vec[k.into()]
368        }
369    }
370
371    // HACK(eddyb) abstraction to enable code sharing between maps and sets.
372    pub trait ValueStorage<V> {
373        // HACK(eddyb) most of the need for this arises from avoidance of
374        // `unsafe` code (i.e. `MaybeUninit<V>` could suffice in most cases).
375        type Slot: Default;
376        fn slot_unwrap(slot: Self::Slot) -> V;
377        fn slot_unwrap_ref(slot: &Self::Slot) -> &V;
378        fn slot_unwrap_mut(slot: &mut Self::Slot) -> &mut V;
379        fn slot_insert(slot: &mut Self::Slot, v: V) -> &mut V;
380
381        // FIXME(eddyb) ideally whether this allocates would be size-based.
382        // FIXME(eddyb) the name and APIs probably don't make it clear this is
383        // for holding some number of `Self::Slot`s specifically.
384        type LazyBox<T>;
385        fn lazy_box_default<T>(default: impl Fn() -> T) -> Self::LazyBox<T>;
386        fn lazy_box_unwrap_ref<T>(lb: &Self::LazyBox<T>) -> &T;
387        fn lazy_box_unwrap_mut_or_alloc<T>(
388            lb: &mut Self::LazyBox<T>,
389            default: impl Fn() -> T,
390        ) -> &mut T;
391    }
392
393    pub enum IgnoreValue {}
394    impl ValueStorage<()> for IgnoreValue {
395        type Slot = ();
396        fn slot_unwrap(_: ()) {}
397        fn slot_unwrap_ref(_: &()) -> &() {
398            &()
399        }
400        fn slot_unwrap_mut(slot: &mut ()) -> &mut () {
401            slot
402        }
403        fn slot_insert(slot: &mut (), _: ()) -> &mut () {
404            slot
405        }
406
407        type LazyBox<T> = T;
408        fn lazy_box_default<T>(default: impl Fn() -> T) -> T {
409            default()
410        }
411        fn lazy_box_unwrap_ref<T>(lb: &T) -> &T {
412            lb
413        }
414        fn lazy_box_unwrap_mut_or_alloc<T>(lb: &mut T, _: impl Fn() -> T) -> &mut T {
415            lb
416        }
417    }
418
419    pub enum EfficientValue {}
420    impl<V: Default> ValueStorage<V> for EfficientValue {
421        type Slot = V;
422        fn slot_unwrap(slot: V) -> V {
423            slot
424        }
425        fn slot_unwrap_ref(slot: &V) -> &V {
426            slot
427        }
428        fn slot_unwrap_mut(slot: &mut V) -> &mut V {
429            slot
430        }
431        fn slot_insert(slot: &mut V, v: V) -> &mut V {
432            *slot = v;
433            slot
434        }
435
436        // FIXME(eddyb) this is far from "efficient", maybe this part belong
437        // in another `trait`, or some better automation could be found?
438        type LazyBox<T> = Option<Box<T>>;
439        fn lazy_box_default<T>(_: impl Fn() -> T) -> Option<Box<T>> {
440            None
441        }
442        fn lazy_box_unwrap_ref<T>(lb: &Option<Box<T>>) -> &T {
443            lb.as_deref().unwrap()
444        }
445        fn lazy_box_unwrap_mut_or_alloc<T>(
446            lb: &mut Option<Box<T>>,
447            default: impl Fn() -> T,
448        ) -> &mut T {
449            lb.get_or_insert_with(|| Box::new(default()))
450        }
451    }
452
453    // HACK(eddyb) most of the need for this arises from avoidance of
454    // `unsafe` code (i.e. `MaybeUninit<V>` could suffice in most cases).
455    pub enum WrapNonDefaultValueInOption {}
456    impl<V> ValueStorage<V> for WrapNonDefaultValueInOption {
457        type Slot = Option<V>;
458        fn slot_unwrap(slot: Option<V>) -> V {
459            slot.unwrap()
460        }
461        fn slot_unwrap_ref(slot: &Option<V>) -> &V {
462            slot.as_ref().unwrap()
463        }
464        fn slot_unwrap_mut(slot: &mut Option<V>) -> &mut V {
465            slot.as_mut().unwrap()
466        }
467        fn slot_insert(slot: &mut Option<V>, v: V) -> &mut V {
468            slot.insert(v)
469        }
470
471        type LazyBox<T> = Option<Box<T>>;
472        fn lazy_box_default<T>(_: impl Fn() -> T) -> Option<Box<T>> {
473            None
474        }
475        fn lazy_box_unwrap_ref<T>(lb: &Option<Box<T>>) -> &T {
476            lb.as_deref().unwrap()
477        }
478        fn lazy_box_unwrap_mut_or_alloc<T>(
479            lb: &mut Option<Box<T>>,
480            default: impl Fn() -> T,
481        ) -> &mut T {
482            lb.get_or_insert_with(|| Box::new(default()))
483        }
484    }
485
486    // FIXME(eddyb) maybe make this parameterizable?
487    type FixedBitSetUint = u64;
488
489    pub type FixedBitSet<K> = FixedFlatMap<K, (), IgnoreValue>;
490    const _: () =
491        assert!(mem::size_of::<FixedBitSet<usize>>() == mem::size_of::<FixedBitSetUint>());
492
493    pub struct FixedFlatMap<K, V, VS: ValueStorage<V> = EfficientValue> {
494        occupied: FixedBitSetUint,
495        slots: VS::LazyBox<[VS::Slot; FixedBitSet::SIZE]>,
496        _marker: PhantomData<K>,
497    }
498
499    impl FixedBitSet<usize> {
500        pub const SIZE: usize = {
501            let bit_width = mem::size_of::<FixedBitSetUint>() * 8;
502            assert!(FixedBitSetUint::count_ones(!0) == bit_width as u32);
503            bit_width
504        };
505    }
506    impl<K> PartialEq for FixedBitSet<K> {
507        fn eq(&self, other: &Self) -> bool {
508            self.occupied == other.occupied
509        }
510    }
511    impl<K: Copy + Into<usize> + From<usize>> FixedBitSet<K> {
512        pub fn union(&self, other: &Self) -> Self {
513            Self { occupied: self.occupied | other.occupied, ..Self::default() }
514        }
515    }
516
517    impl<K: Copy + Into<usize> + From<usize>, V, VS: ValueStorage<V>> Default
518        for FixedFlatMap<K, V, VS>
519    {
520        fn default() -> Self {
521            Self::new()
522        }
523    }
524    impl<K: Copy + Into<usize> + From<usize>, V, VS: ValueStorage<V>> FixedFlatMap<K, V, VS> {
525        pub fn new() -> Self {
526            Self {
527                occupied: 0,
528                slots: VS::lazy_box_default(|| std::array::from_fn(|_| Default::default())),
529                _marker: PhantomData,
530            }
531        }
532        pub fn contains(&self, k: K) -> bool {
533            u32::try_from(k.into()).ok().and_then(|k| Some(self.occupied.checked_shr(k)? & 1))
534                == Some(1)
535        }
536        pub fn get(&self, k: K) -> Option<&V> {
537            self.contains(k)
538                .then(|| VS::slot_unwrap_ref(&VS::lazy_box_unwrap_ref(&self.slots)[k.into()]))
539        }
540        pub fn entry(&mut self, k: K) -> FixedFlatMapEntry<'_, V, VS> {
541            let k = k.into();
542            let key_mask = FixedBitSetUint::checked_shl(1, u32::try_from(k).unwrap()).unwrap();
543            FixedFlatMapEntry {
544                key_mask,
545                occupied: &mut self.occupied,
546                slot: &mut VS::lazy_box_unwrap_mut_or_alloc(&mut self.slots, || {
547                    std::array::from_fn(|_| Default::default())
548                })[k],
549            }
550        }
551
552        pub fn insert(&mut self, k: K, v: V) {
553            self.entry(k).insert(v);
554        }
555        fn remove(&mut self, k: K) -> Option<V> {
556            self.contains(k).then(|| self.entry(k).remove().unwrap())
557        }
558
559        pub fn keys(&self) -> impl Iterator<Item = K> {
560            let mut i = 0;
561            let mut remaining = self.occupied;
562            iter::from_fn(move || {
563                (remaining != 0).then(|| {
564                    let gap = remaining.trailing_zeros() as usize;
565                    i += gap;
566                    remaining >>= gap;
567
568                    let k = K::from(i);
569
570                    // Skip the lowest bit (which should always be `1` here).
571                    i += 1;
572                    remaining >>= 1;
573
574                    k
575                })
576            })
577        }
578        pub fn iter(&self) -> impl Iterator<Item = (K, &V)> + '_ {
579            self.keys().map(|k| (k, self.get(k).unwrap()))
580        }
581        pub fn drain(&mut self) -> impl Iterator<Item = (K, V)> + '_ {
582            self.keys().map(|k| (k, self.remove(k).unwrap()))
583        }
584
585        // FIXME(eddyb) does this fully replace `drain`?
586        pub fn clear(&mut self) {
587            // FIXME(eddyb) theoretically this could be more efficient, but
588            // it doesn't seem worth it wrt `VS` abstraction complexity.
589            for _ in self.drain() {}
590        }
591    }
592
593    pub struct FixedFlatMapEntry<'a, V, VS: ValueStorage<V> = EfficientValue> {
594        key_mask: FixedBitSetUint,
595        occupied: &'a mut FixedBitSetUint,
596        // FIXME(eddyb) in theory, this forces the `Box` to be allocated even
597        // when it might not be needed, so it optimizes for e.g. insertion.
598        slot: &'a mut VS::Slot,
599    }
600
601    impl<'a, V, VS: ValueStorage<V>> FixedFlatMapEntry<'a, V, VS> {
602        pub fn occupied(&self) -> bool {
603            (*self.occupied & self.key_mask) != 0
604        }
605        fn into_mut(self) -> Option<&'a mut V> {
606            self.occupied().then(|| VS::slot_unwrap_mut(self.slot))
607        }
608        pub fn or_insert_with(self, f: impl FnOnce() -> V) -> &'a mut V {
609            if self.occupied() { self.into_mut().unwrap() } else { self.insert(f()) }
610        }
611        pub fn insert(self, v: V) -> &'a mut V {
612            *self.occupied |= self.key_mask;
613            VS::slot_insert(self.slot, v)
614        }
615        pub fn remove(&mut self) -> Option<V> {
616            self.occupied().then(|| {
617                *self.occupied &= !self.key_mask;
618                VS::slot_unwrap(mem::take(self.slot))
619            })
620        }
621    }
622
623    // FIXME(eddyb) not a sparse bitset because of how `SparseMap` is only sparse
624    // wrt not allocating space to store values until needed, but `BitSet` has no
625    // real values (and uses `IgnoreValue` to completely remove that allocation).
626    pub type BitSet<K> = SparseMap<K, (), IgnoreValue>;
627
628    pub struct SparseMap<K, V, VS: ValueStorage<V> = EfficientValue> {
629        // NOTE(eddyb) this is really efficient when the keys don't exceed
630        // `FixedBitSet::SIZE`, and this can be further amplified by using
631        // e.g. `SparseMap<_, FixedFlatMap<_, V>>`, which adds another layer
632        // of sparseness (more concretely, if `FixedBitSet::SIZE == 64`, then
633        // that combination can effectively handle up to 64*64 = 4096 entries
634        // without causing the outermost `SmallFlatMap` to allocate a `Vec`).
635        outer_map: SmallVec<[FixedFlatMap<usize, V, VS>; 1]>,
636        count: usize,
637        _marker: PhantomData<K>,
638    }
639
640    impl<K: Copy + Into<usize> + From<usize>, V, VS: ValueStorage<V>> Default for SparseMap<K, V, VS> {
641        fn default() -> Self {
642            Self::new()
643        }
644    }
645    impl<K: Copy + Into<usize> + From<usize>, V, VS: ValueStorage<V>> SparseMap<K, V, VS> {
646        pub fn new() -> Self {
647            Self { outer_map: SmallVec::new(), count: 0, _marker: PhantomData }
648        }
649        pub fn is_empty(&self) -> bool {
650            self.count == 0
651        }
652        pub fn get(&self, k: K) -> Option<&V> {
653            let k = k.into();
654            let (outer_key, inner_key) = (k / FixedBitSet::SIZE, k % FixedBitSet::SIZE);
655            self.outer_map.get(outer_key)?.get(inner_key)
656        }
657        pub fn entry(&mut self, k: K) -> SparseMapEntry<'_, V, VS> {
658            let k = k.into();
659            let (outer_key, inner_key) = (k / FixedBitSet::SIZE, k % FixedBitSet::SIZE);
660            let needed_outer_len = outer_key + 1;
661            if self.outer_map.len() < needed_outer_len {
662                self.outer_map.resize_with(needed_outer_len, Default::default);
663            }
664            SparseMapEntry {
665                inner: self.outer_map[outer_key].entry(inner_key),
666                count: &mut self.count,
667            }
668        }
669
670        pub fn iter(&self) -> impl Iterator<Item = (K, &V)> + '_ {
671            (!self.is_empty())
672                .then(|| {
673                    self.outer_map.iter().enumerate().flat_map(|(outer_key, inner_map)| {
674                        inner_map.iter().map(move |(inner_key, v)| {
675                            (K::from(outer_key * FixedBitSet::SIZE + inner_key), v)
676                        })
677                    })
678                })
679                .into_iter()
680                .flatten()
681        }
682
683        pub fn clear(&mut self) {
684            for inner_map in &mut self.outer_map {
685                inner_map.clear();
686            }
687            self.count = 0;
688        }
689    }
690
691    pub struct SparseMapEntry<'a, V, VS: ValueStorage<V> = EfficientValue> {
692        inner: FixedFlatMapEntry<'a, V, VS>,
693        count: &'a mut usize,
694    }
695
696    impl<'a, V, VS: ValueStorage<V>> SparseMapEntry<'a, V, VS> {
697        pub fn or_insert_with(self, f: impl FnOnce() -> V) -> &'a mut V {
698            self.inner.or_insert_with(|| {
699                *self.count += 1;
700                f()
701            })
702        }
703        #[allow(clippy::unwrap_or_default)]
704        pub fn or_default(self) -> &'a mut V
705        where
706            V: Default,
707        {
708            self.or_insert_with(V::default)
709        }
710        pub fn insert(self, v: V) {
711            if !self.inner.occupied() {
712                *self.count += 1;
713            }
714            self.inner.insert(v);
715        }
716    }
717}