rustc_codegen_spirv/linker/
duplicates.rs

1use crate::custom_insts::{self, CustomOp};
2use rspirv::binary::Assemble;
3use rspirv::dr::{Instruction, Module, Operand};
4use rspirv::spirv::{Op, Word};
5use rustc_data_structures::fx::{FxHashMap, FxHashSet};
6use rustc_middle::bug;
7use smallvec::SmallVec;
8use std::collections::hash_map;
9use std::mem;
10
11// FIXME(eddyb) consider deduplicating the `OpString` and `OpSource` created for
12// file-level debuginfo (but using SPIR-T for linking might be better?).
13
14pub fn remove_duplicate_extensions(module: &mut Module) {
15    let mut set = FxHashSet::default();
16
17    module.extensions.retain(|inst| {
18        inst.class.opcode != Op::Extension
19            || set.insert(inst.operands[0].unwrap_literal_string().to_string())
20    });
21}
22
23pub fn remove_duplicate_capabilities(module: &mut Module) {
24    let mut set = FxHashSet::default();
25    module.capabilities.retain(|inst| {
26        inst.class.opcode != Op::Capability || set.insert(inst.operands[0].unwrap_capability())
27    });
28}
29
30pub fn remove_duplicate_ext_inst_imports(module: &mut Module) {
31    // This is a simpler version of remove_duplicate_types, see that for comments
32    let mut ext_to_id = FxHashMap::default();
33    let mut rewrite_rules = FxHashMap::default();
34
35    // First deduplicate the imports
36    for inst in &mut module.ext_inst_imports {
37        if let Operand::LiteralString(ext_inst_import) = &inst.operands[0] {
38            match ext_to_id.entry(ext_inst_import.clone()) {
39                hash_map::Entry::Vacant(entry) => {
40                    entry.insert(inst.result_id.unwrap());
41                }
42                hash_map::Entry::Occupied(entry) => {
43                    let old_value = rewrite_rules.insert(inst.result_id.unwrap(), *entry.get());
44                    assert!(old_value.is_none());
45                    // We're iterating through the vec, so removing items is hard - nop it out.
46                    *inst = Instruction::new(Op::Nop, None, None, vec![]);
47                }
48            }
49        }
50    }
51
52    // Delete the nops we inserted
53    module
54        .ext_inst_imports
55        .retain(|op| op.class.opcode != Op::Nop);
56
57    // Then rewrite all OpExtInst referencing the rewritten IDs
58    for inst in module.all_inst_iter_mut() {
59        if inst.class.opcode == Op::ExtInst
60            && let Operand::IdRef(ref mut id) = inst.operands[0]
61        {
62            *id = rewrite_rules.get(id).copied().unwrap_or(*id);
63        }
64    }
65}
66
67fn make_annotation_key(inst: &Instruction) -> Vec<u32> {
68    let mut data = vec![inst.class.opcode as u32];
69
70    // skip over the target ID
71    for op in inst.operands.iter().skip(1) {
72        op.assemble_into(&mut data);
73    }
74
75    data
76}
77
78fn gather_annotations(annotations: &[Instruction]) -> FxHashMap<Word, Vec<u32>> {
79    let mut map = FxHashMap::default();
80    for inst in annotations {
81        match inst.class.opcode {
82            Op::Decorate
83            | Op::DecorateId
84            | Op::DecorateString
85            | Op::MemberDecorate
86            | Op::MemberDecorateString => match map.entry(inst.operands[0].id_ref_any().unwrap()) {
87                hash_map::Entry::Vacant(entry) => {
88                    entry.insert(vec![make_annotation_key(inst)]);
89                }
90                hash_map::Entry::Occupied(mut entry) => {
91                    entry.get_mut().push(make_annotation_key(inst));
92                }
93            },
94            _ => {}
95        }
96    }
97    map.into_iter()
98        .map(|(key, mut value)| {
99            (key, {
100                value.sort();
101                value.concat()
102            })
103        })
104        .collect()
105}
106
107fn gather_names(debug_names: &[Instruction]) -> FxHashMap<Word, String> {
108    debug_names
109        .iter()
110        .filter(|inst| inst.class.opcode == Op::Name)
111        .map(|inst| {
112            (
113                inst.operands[0].unwrap_id_ref(),
114                inst.operands[1].unwrap_literal_string().to_owned(),
115            )
116        })
117        .collect()
118}
119
120fn make_dedupe_key(
121    inst: &Instruction,
122    unresolved_forward_pointers: &FxHashSet<Word>,
123    annotations: &FxHashMap<Word, Vec<u32>>,
124    names: &FxHashMap<Word, String>,
125) -> Vec<u32> {
126    let mut data = vec![inst.class.opcode as u32];
127
128    if let Some(id) = inst.result_type {
129        // We're not only deduplicating types here, but constants as well. Those contain result_types, and so we
130        // need to include those here. For example, OpConstant can have the same arg, but different result_type,
131        // and it should not be deduplicated (e.g. the constants 1u8 and 1u16).
132        data.push(id);
133    }
134    for op in &inst.operands {
135        if let Operand::IdRef(id) = op {
136            if unresolved_forward_pointers.contains(id) {
137                // TODO: This is implementing forward pointers incorrectly. All unresolved forward pointers will
138                // compare equal.
139                Operand::IdRef(0).assemble_into(&mut data);
140            } else {
141                op.assemble_into(&mut data);
142            }
143        } else {
144            op.assemble_into(&mut data);
145        }
146    }
147    if let Some(id) = inst.result_id {
148        if let Some(annos) = annotations.get(&id) {
149            data.extend_from_slice(annos);
150        }
151        if inst.class.opcode == Op::Variable {
152            // Names only matter for OpVariable.
153            if let Some(name) = names.get(&id) {
154                // Jump through some hoops to shove a String into a Vec<u32>.
155                //
156                // FIXME(eddyb) this should `.assemble_into(&mut data)` the
157                // `Operand::LiteralString(...)` from the original `Op::Name`.
158                for chunk in name.as_bytes().chunks(4) {
159                    let slice = match *chunk {
160                        [a] => [a, 0, 0, 0],
161                        [a, b] => [a, b, 0, 0],
162                        [a, b, c] => [a, b, c, 0],
163                        [a, b, c, d] => [a, b, c, d],
164                        _ => bug!(),
165                    };
166                    data.push(u32::from_le_bytes(slice));
167                }
168            }
169        }
170    }
171
172    data
173}
174
175fn rewrite_inst_with_rules(inst: &mut Instruction, rules: &FxHashMap<u32, u32>) {
176    if let Some(ref mut id) = inst.result_type {
177        // If the rewrite rules contain this ID, replace with the mapped value, otherwise don't touch it.
178        *id = rules.get(id).copied().unwrap_or(*id);
179    }
180    for op in &mut inst.operands {
181        if let Some(id) = op.id_ref_any_mut() {
182            *id = rules.get(id).copied().unwrap_or(*id);
183        }
184    }
185}
186
187pub fn remove_duplicate_types(module: &mut Module) {
188    // Keep in mind, this algorithm requires forward type references to not exist - i.e. it's a valid spir-v module.
189
190    // When a duplicate type is encountered, then this is a map from the deleted ID, to the new, deduplicated ID.
191    let mut rewrite_rules = FxHashMap::default();
192    // Instructions are encoded into "keys": their opcode, followed by arguments, then annotations.
193    // Importantly, result_id is left out. This means that any instruction that declares the same
194    // type, but with different result_id, will result in the same key.
195    let mut key_to_result_id = FxHashMap::default();
196    // TODO: This is implementing forward pointers incorrectly.
197    let mut unresolved_forward_pointers = FxHashSet::default();
198
199    // Collect a map from type ID to an annotation "key blob" (to append to the type key)
200    let annotations = gather_annotations(&module.annotations);
201    let names = gather_names(&module.debug_names);
202
203    for inst in &mut module.types_global_values {
204        if inst.class.opcode == Op::TypeForwardPointer
205            && let Operand::IdRef(id) = inst.operands[0]
206        {
207            unresolved_forward_pointers.insert(id);
208            continue;
209        }
210        if inst.class.opcode == Op::TypePointer
211            && unresolved_forward_pointers.contains(&inst.result_id.unwrap())
212        {
213            unresolved_forward_pointers.remove(&inst.result_id.unwrap());
214        }
215        // This is an important spot: Say that we come upon a duplicated aggregate type (one that references
216        // other types). Its arguments may be duplicated themselves, and so building the key directly will fail
217        // to match up with the first type. However, **because forward references are not allowed**, we're
218        // guaranteed to have already found and deduplicated the argument types! So that means the deduplication
219        // translation is already in rewrite_rules, and we merely need to apply the mapping before generating
220        // the key.
221        // Nit: Overwriting the instruction isn't technically necessary, as it will get handled by the final
222        // all_inst_iter_mut pass below. However, the code is a lil bit cleaner this way I guess.
223        rewrite_inst_with_rules(inst, &rewrite_rules);
224
225        let key = make_dedupe_key(inst, &unresolved_forward_pointers, &annotations, &names);
226
227        match key_to_result_id.entry(key) {
228            hash_map::Entry::Vacant(entry) => {
229                // This is the first time we've seen this key. Insert the key into the map, registering this type as
230                // something other types can deduplicate to.
231                entry.insert(inst.result_id.unwrap());
232            }
233            hash_map::Entry::Occupied(entry) => {
234                // We've already seen this key. We need to do two things:
235                // 1) Add a rewrite rule from this type to the type that we saw before.
236                let old_value = rewrite_rules.insert(inst.result_id.unwrap(), *entry.get());
237                // 2) Erase this instruction. Because we're iterating over this vec, removing an element is hard, so
238                // clear it with OpNop, and then remove it in the retain() call below.
239                assert!(old_value.is_none());
240                *inst = Instruction::new(Op::Nop, None, None, vec![]);
241            }
242        }
243    }
244
245    // We rewrote instructions we wanted to remove with OpNop. Remove them properly.
246    module
247        .types_global_values
248        .retain(|op| op.class.opcode != Op::Nop);
249
250    // Apply the rewrite rules to the whole module
251    for inst in module.all_inst_iter_mut() {
252        rewrite_inst_with_rules(inst, &rewrite_rules);
253    }
254
255    // The same decorations for duplicated types will cause those different types to merge
256    // together. So, we need to deduplicate the annotations as well. (Note we *do* care about the
257    // ID of the type being applied to here, unlike `gather_annotations`)
258    let mut anno_set = FxHashSet::default();
259    module
260        .annotations
261        .retain(|inst| anno_set.insert(inst.assemble()));
262    // Same thing with OpName
263    let mut name_ids = FxHashSet::default();
264    let mut member_name_ids = FxHashSet::default();
265    module.debug_names.retain(|inst| {
266        (inst.class.opcode != Op::Name || name_ids.insert(inst.operands[0].unwrap_id_ref()))
267            && (inst.class.opcode != Op::MemberName
268                || member_name_ids.insert((
269                    inst.operands[0].unwrap_id_ref(),
270                    inst.operands[1].unwrap_literal_bit32(),
271                )))
272    });
273}
274
275pub fn remove_duplicate_debuginfo(module: &mut Module) {
276    // FIXME(eddyb) avoid repeating this across different passes/helpers.
277    let custom_ext_inst_set_import = module
278        .ext_inst_imports
279        .iter()
280        .find(|inst| {
281            assert_eq!(inst.class.opcode, Op::ExtInstImport);
282            inst.operands[0].unwrap_literal_string() == &custom_insts::CUSTOM_EXT_INST_SET[..]
283        })
284        .map(|inst| inst.result_id.unwrap());
285
286    for func in &mut module.functions {
287        for block in &mut func.blocks {
288            // Ignore the terminator, it's effectively "outside" debuginfo.
289            let (_, insts) = block.instructions.split_last_mut().unwrap();
290
291            // HACK(eddyb) to make random access easier, we first replace unused
292            // instructions with `OpNop`, and then remove all the `OpNop`s.
293
294            #[derive(Clone)]
295            struct DbgLocInst {
296                inst_idx: usize,
297                used: bool,
298            }
299
300            fn nop() -> Instruction {
301                Instruction::new(Op::Nop, None, None, vec![])
302            }
303            impl DbgLocInst {
304                fn nop_if_unused(&self, insts: &mut [Instruction]) {
305                    if !self.used {
306                        insts[self.inst_idx] = nop();
307                    }
308                }
309            }
310
311            #[derive(Clone, Default)]
312            struct DbgState {
313                loc: Option<DbgLocInst>,
314                has_semantic_insts: bool,
315            }
316            let mut dbg = DbgState::default();
317
318            struct Frame {
319                call_dbg: DbgState,
320                push_inst_idx: usize,
321            }
322            let mut inlined_frames = SmallVec::<[Frame; 8]>::new();
323
324            // HACK(eddyb) `PopInlinedCallFrame` moves `inlined_frames.last()`
325            // `fusable_freshly_popped_inlined_frames.last()`, so a sequence of
326            // N pops will reverse the N last entries of `inlined_frames` into
327            // this vector (and go from outside-in, to inside-out), which allows
328            // *fusing* a pop with a push (of an identical inlined frame), when
329            // no interverning instructions prevent it (such instructions will
330            // clear this vector to indicate the pops are "fully committed").
331            struct PoppedFrame {
332                frame: Frame,
333                callee_has_semantic_insts: bool,
334                pop_inst_idx: usize,
335            }
336            let mut fusable_freshly_popped_inlined_frames = SmallVec::<[PoppedFrame; 8]>::new();
337
338            for inst_idx in 0..insts.len() {
339                let inst = &insts[inst_idx];
340                let custom_op = match inst.class.opcode {
341                    Op::ExtInst
342                        if Some(inst.operands[0].unwrap_id_ref()) == custom_ext_inst_set_import =>
343                    {
344                        Some(CustomOp::decode_from_ext_inst(inst))
345                    }
346                    _ => None,
347                };
348
349                fn inst_eq_key(inst: &Instruction) -> impl PartialEq + '_ {
350                    (inst.class.opcode, &inst.operands)
351                }
352
353                // NOTE(eddyb) `fusable_freshly_popped_inlined_frames`-preserving
354                // cases must all use `if can_continue { continue; }` to skip the
355                // draining logic (`can_continue` is only `false` at the very end).
356                let can_continue = inst_idx < insts.len() - 1;
357                let prev_dbg_loc_snapshot = dbg.loc.clone();
358                match (inst.class.opcode, custom_op) {
359                    (Op::Line | Op::NoLine, _)
360                    | (_, Some(CustomOp::SetDebugSrcLoc | CustomOp::ClearDebugSrcLoc)) => {
361                        // HACK(eddyb) prefer keeping older active `DbgLocInst`s,
362                        // if all the details are the same (it helps with fusion).
363                        if dbg.loc.as_ref().is_some_and(|old_dbg_loc| {
364                            inst_eq_key(inst) == inst_eq_key(&insts[old_dbg_loc.inst_idx])
365                        }) {
366                            insts[inst_idx] = nop();
367                            if can_continue {
368                                continue;
369                            }
370                        } else {
371                            dbg.loc = Some(DbgLocInst {
372                                inst_idx,
373                                used: false,
374                            });
375                        }
376                    }
377                    (_, Some(CustomOp::PushInlinedCallFrame)) => {
378                        // HACK(eddyb) attempt fusing this push with the last pop.
379                        let fuse_with_last_pop = fusable_freshly_popped_inlined_frames
380                            .last()
381                            .is_some_and(|last_popped| {
382                                // HACK(eddyb) updating `dbg.loc` deduplicates eagerly,
383                                // so here it suffices to check the (deduped) indices.
384                                let dbg_loc_inst_idx =
385                                    |dbg: &DbgState| dbg.loc.as_ref().map(|d| d.inst_idx);
386                                dbg_loc_inst_idx(&last_popped.frame.call_dbg)
387                                    == dbg_loc_inst_idx(&dbg)
388                                    && inst_eq_key(inst)
389                                        == inst_eq_key(&insts[last_popped.frame.push_inst_idx])
390                            });
391                        if fuse_with_last_pop {
392                            let PoppedFrame {
393                                frame,
394                                callee_has_semantic_insts,
395                                pop_inst_idx,
396                            } = fusable_freshly_popped_inlined_frames.pop().unwrap();
397
398                            insts[pop_inst_idx] = nop();
399
400                            // Can't make entering an inlined function a nop,
401                            // as it needs to reset callee-side `DbgLocInst`,
402                            // but we can replace it in-place and hope later
403                            // it get nop'd out by some real `DbgLocInst`.
404                            insts[inst_idx].operands.splice(
405                                1..,
406                                [Operand::LiteralExtInstInteger(
407                                    CustomOp::ClearDebugSrcLoc as u32,
408                                )],
409                            );
410                            dbg = DbgState {
411                                loc: Some(DbgLocInst {
412                                    inst_idx,
413                                    used: false,
414                                }),
415                                has_semantic_insts: callee_has_semantic_insts,
416                            };
417
418                            inlined_frames.push(frame);
419
420                            // Allow further fusing to occur.
421                            if can_continue {
422                                continue;
423                            }
424                        } else {
425                            // HACK(eddyb) the actual push to `inlined_frames` is
426                            // done at the very end of the loop body, to be able
427                            // to process any pending updates on the previous state.
428                        }
429                    }
430                    (_, Some(CustomOp::PopInlinedCallFrame)) => {
431                        // Leaving an inlined function doesn't use `DbgLocInst`.
432                        if let Some(dbg_loc) = dbg.loc.take() {
433                            // HACK(eddyb) only treat as "definitely unused"
434                            // instructions that are too "recent" to have been
435                            // used by a `PushInlinedCallFrame` with a still
436                            // uncommitted `PopInlinedCallFrame`.
437                            let min_safe_inst_idx_to_nop = fusable_freshly_popped_inlined_frames
438                                .last()
439                                .map_or(0, |last_popped| last_popped.pop_inst_idx);
440                            if dbg_loc.inst_idx > min_safe_inst_idx_to_nop {
441                                dbg_loc.nop_if_unused(insts);
442                            }
443                        }
444                        if let Some(frame) = inlined_frames.pop() {
445                            let callee_has_semantic_insts = dbg.has_semantic_insts;
446                            dbg = frame.call_dbg.clone();
447                            dbg.has_semantic_insts |= callee_has_semantic_insts;
448
449                            // HACK(eddyb) inform future `PushInlinedCallFrame`s
450                            // of potential fusion, by saving a copy of the frame.
451                            fusable_freshly_popped_inlined_frames.push(PoppedFrame {
452                                frame,
453                                callee_has_semantic_insts,
454                                pop_inst_idx: inst_idx,
455                            });
456                        } else {
457                            // FIXME(eddyb) this may indicate a bug elsewhere.
458                            insts[inst_idx] = nop();
459                        }
460                        if can_continue {
461                            continue;
462                        }
463                    }
464                    _ => {
465                        if let Some(dbg_loc) = &mut dbg.loc {
466                            dbg_loc.used = true;
467                        }
468                        dbg.has_semantic_insts = true;
469                    }
470                }
471
472                // NOTE(eddyb) mutable so that it may be marked as used below.
473                let mut freshly_replaced_dbg_loc = prev_dbg_loc_snapshot.filter(|prev_dbg_loc| {
474                    dbg.loc.as_ref().map(|d| d.inst_idx) != Some(prev_dbg_loc.inst_idx)
475                });
476
477                // NOTE(eddyb) the iteration order doesn't matter, as this is
478                // effectively a set of `PopInlinedCallFrame`s which have had
479                // all their other side-effects processed, and didn't get a
480                // chance to be fused away, so they're getting committed.
481                for popped in fusable_freshly_popped_inlined_frames.drain(..) {
482                    let PoppedFrame {
483                        mut frame,
484                        callee_has_semantic_insts,
485                        pop_inst_idx,
486                    } = popped;
487
488                    // HACK(eddyb) this popped frame's `call_dbg.loc` may still
489                    // be used elsewhere, in which case that use takes precedence,
490                    // and is effectively the new "owner" of the `DbgLocInst`.
491                    let call_dbg_loc_used_elsewhere =
492                        frame.call_dbg.loc.as_ref().and_then(|call_dbg_loc| {
493                            [dbg.loc.as_mut(), freshly_replaced_dbg_loc.as_mut()]
494                                .into_iter()
495                                .flatten()
496                                .find(|dbg_loc| dbg_loc.inst_idx == call_dbg_loc.inst_idx)
497                        });
498                    if call_dbg_loc_used_elsewhere.is_some() {
499                        frame.call_dbg.loc = None;
500                    }
501
502                    if callee_has_semantic_insts {
503                        // The `PushInlinedCallFrame` being kept requires its
504                        // original `DbgLocInst` to also be kept around.
505                        if let Some(call_dbg_loc) = call_dbg_loc_used_elsewhere {
506                            call_dbg_loc.used = true;
507                        }
508                    } else {
509                        // If the entire inlined call is all `OpNop`s now,
510                        // entering/leaving it can also become `OpNop`s.
511                        if let Some(call_dbg_loc) = &mut frame.call_dbg.loc {
512                            call_dbg_loc.nop_if_unused(insts);
513                        }
514                        insts[frame.push_inst_idx] = nop();
515                        insts[pop_inst_idx] = nop();
516                    }
517                }
518
519                // Only remove a replaced `DbgLocInst` after it had a chance to
520                // be marked as used above (for e.g. a `PushInlinedCallFrame`).
521                if let Some(old_dbg_loc) = freshly_replaced_dbg_loc {
522                    old_dbg_loc.nop_if_unused(insts);
523                }
524
525                // HACK(eddyb) the actual push to `inlined_frames` is
526                // done at the very end of the loop body, to be able
527                // to process any pending updates on the previous state.
528                if custom_op == Some(CustomOp::PushInlinedCallFrame) {
529                    inlined_frames.push(Frame {
530                        call_dbg: mem::take(&mut dbg),
531                        push_inst_idx: inst_idx,
532                    });
533                }
534            }
535
536            assert!(fusable_freshly_popped_inlined_frames.is_empty());
537
538            block
539                .instructions
540                .retain(|inst| inst.class.opcode != Op::Nop);
541        }
542    }
543}