Skip to main content

rustc_codegen_spirv/linker/
duplicates.rs

1use crate::custom_insts::{self, CustomOp};
2use rspirv::binary::Assemble;
3use rspirv::dr::{Instruction, Module, Operand};
4use rspirv::spirv::{Op, Word};
5use rustc_data_structures::fx::{FxHashMap, FxHashSet};
6use rustc_middle::bug;
7use smallvec::SmallVec;
8use std::collections::hash_map;
9use std::mem;
10
11// FIXME(eddyb) consider deduplicating the `OpString` and `OpSource` created for
12// file-level debuginfo (but using SPIR-T for linking might be better?).
13
14pub fn remove_duplicate_extensions(module: &mut Module) {
15    let mut set = FxHashSet::default();
16
17    module.extensions.retain(|inst| {
18        inst.class.opcode != Op::Extension
19            || set.insert(inst.operands[0].unwrap_literal_string().to_string())
20    });
21}
22
23pub fn remove_duplicate_capabilities(module: &mut Module) {
24    let mut set = FxHashSet::default();
25    module.capabilities.retain(|inst| {
26        inst.class.opcode != Op::Capability || set.insert(inst.operands[0].unwrap_capability())
27    });
28}
29
30pub fn remove_duplicate_ext_inst_imports(module: &mut Module) {
31    // This is a simpler version of remove_duplicate_types, see that for comments
32    let mut ext_to_id = FxHashMap::default();
33    let mut rewrite_rules = FxHashMap::default();
34
35    // First deduplicate the imports
36    for inst in &mut module.ext_inst_imports {
37        if let Operand::LiteralString(ext_inst_import) = &inst.operands[0] {
38            match ext_to_id.entry(ext_inst_import.clone()) {
39                hash_map::Entry::Vacant(entry) => {
40                    entry.insert(inst.result_id.unwrap());
41                }
42                hash_map::Entry::Occupied(entry) => {
43                    let old_value = rewrite_rules.insert(inst.result_id.unwrap(), *entry.get());
44                    assert!(old_value.is_none());
45                    // We're iterating through the vec, so removing items is hard - nop it out.
46                    *inst = Instruction::new(Op::Nop, None, None, vec![]);
47                }
48            }
49        }
50    }
51
52    // Delete the nops we inserted
53    module
54        .ext_inst_imports
55        .retain(|op| op.class.opcode != Op::Nop);
56
57    // Then rewrite all OpExtInst referencing the rewritten IDs
58    for inst in module.all_inst_iter_mut() {
59        if inst.class.opcode == Op::ExtInst
60            && let Operand::IdRef(ref mut id) = inst.operands[0]
61        {
62            *id = rewrite_rules.get(id).copied().unwrap_or(*id);
63        }
64    }
65}
66
67fn make_annotation_key(inst: &Instruction) -> Vec<u32> {
68    let mut data = vec![inst.class.opcode as u32];
69
70    // skip over the target ID
71    for op in inst.operands.iter().skip(1) {
72        op.assemble_into(&mut data);
73    }
74
75    data
76}
77
78fn gather_annotations(annotations: &[Instruction]) -> FxHashMap<Word, Vec<u32>> {
79    let mut map = FxHashMap::default();
80    for inst in annotations {
81        match inst.class.opcode {
82            Op::Decorate
83            | Op::DecorateId
84            | Op::DecorateString
85            | Op::MemberDecorate
86            | Op::MemberDecorateString => match map.entry(inst.operands[0].id_ref_any().unwrap()) {
87                hash_map::Entry::Vacant(entry) => {
88                    entry.insert(vec![make_annotation_key(inst)]);
89                }
90                hash_map::Entry::Occupied(mut entry) => {
91                    entry.get_mut().push(make_annotation_key(inst));
92                }
93            },
94            _ => {}
95        }
96    }
97    map.into_iter()
98        .map(|(key, mut value)| {
99            (key, {
100                value.sort();
101                value.concat()
102            })
103        })
104        .collect()
105}
106
107fn gather_names(debug_names: &[Instruction]) -> FxHashMap<Word, String> {
108    debug_names
109        .iter()
110        .filter(|inst| inst.class.opcode == Op::Name)
111        .map(|inst| {
112            (
113                inst.operands[0].unwrap_id_ref(),
114                inst.operands[1].unwrap_literal_string().to_owned(),
115            )
116        })
117        .collect()
118}
119
120fn make_dedupe_key(
121    inst: &Instruction,
122    unresolved_forward_pointers: &FxHashSet<Word>,
123    annotations: &FxHashMap<Word, Vec<u32>>,
124    names: &FxHashMap<Word, String>,
125) -> Vec<u32> {
126    let mut data = vec![inst.class.opcode as u32];
127
128    if let Some(id) = inst.result_type {
129        // We're not only deduplicating types here, but constants as well. Those contain result_types, and so we
130        // need to include those here. For example, OpConstant can have the same arg, but different result_type,
131        // and it should not be deduplicated (e.g. the constants 1u8 and 1u16).
132        data.push(id);
133    }
134    for op in &inst.operands {
135        if let Operand::IdRef(id) = op {
136            if unresolved_forward_pointers.contains(id) {
137                // TODO: This is implementing forward pointers incorrectly. All unresolved forward pointers will
138                // compare equal.
139                Operand::IdRef(0).assemble_into(&mut data);
140            } else {
141                op.assemble_into(&mut data);
142            }
143        } else {
144            op.assemble_into(&mut data);
145        }
146    }
147    if let Some(id) = inst.result_id {
148        if let Some(annos) = annotations.get(&id) {
149            data.extend_from_slice(annos);
150        }
151        if inst.class.opcode == Op::Variable {
152            // Names only matter for OpVariable.
153            if let Some(name) = names.get(&id) {
154                // Jump through some hoops to shove a String into a Vec<u32>.
155                //
156                // FIXME(eddyb) this should `.assemble_into(&mut data)` the
157                // `Operand::LiteralString(...)` from the original `Op::Name`.
158                for chunk in name.as_bytes().chunks(4) {
159                    let slice = match *chunk {
160                        [a] => [a, 0, 0, 0],
161                        [a, b] => [a, b, 0, 0],
162                        [a, b, c] => [a, b, c, 0],
163                        [a, b, c, d] => [a, b, c, d],
164                        _ => bug!(),
165                    };
166                    data.push(u32::from_le_bytes(slice));
167                }
168            }
169        }
170    }
171
172    data
173}
174
175fn rewrite_inst_with_rules(inst: &mut Instruction, rules: &FxHashMap<u32, u32>) {
176    if let Some(ref mut id) = inst.result_type {
177        // If the rewrite rules contain this ID, replace with the mapped value, otherwise don't touch it.
178        *id = rules.get(id).copied().unwrap_or(*id);
179    }
180    for op in &mut inst.operands {
181        if let Some(id) = op.id_ref_any_mut() {
182            *id = rules.get(id).copied().unwrap_or(*id);
183        }
184    }
185}
186
187pub fn remove_duplicate_types(module: &mut Module) {
188    // Keep in mind, this algorithm requires forward type references to not exist - i.e. it's a valid spir-v module.
189
190    // When a duplicate type is encountered, then this is a map from the deleted ID, to the new, deduplicated ID.
191    let mut rewrite_rules = FxHashMap::default();
192    // Instructions are encoded into "keys": their opcode, followed by arguments, then annotations.
193    // Importantly, result_id is left out. This means that any instruction that declares the same
194    // type, but with different result_id, will result in the same key.
195    let mut key_to_result_id = FxHashMap::default();
196    // TODO: This is implementing forward pointers incorrectly.
197    let mut unresolved_forward_pointers = FxHashSet::default();
198
199    // Collect a map from type ID to an annotation "key blob" (to append to the type key)
200    let annotations = gather_annotations(&module.annotations);
201    let names = gather_names(&module.debug_names);
202
203    for inst in &mut module.types_global_values {
204        if inst.class.opcode == Op::TypeForwardPointer
205            && let Operand::IdRef(id) = inst.operands[0]
206        {
207            unresolved_forward_pointers.insert(id);
208            continue;
209        }
210        if inst.class.opcode == Op::TypePointer
211            && unresolved_forward_pointers.contains(&inst.result_id.unwrap())
212        {
213            unresolved_forward_pointers.remove(&inst.result_id.unwrap());
214        }
215        // This is an important spot: Say that we come upon a duplicated aggregate type (one that references
216        // other types). Its arguments may be duplicated themselves, and so building the key directly will fail
217        // to match up with the first type. However, **because forward references are not allowed**, we're
218        // guaranteed to have already found and deduplicated the argument types! So that means the deduplication
219        // translation is already in rewrite_rules, and we merely need to apply the mapping before generating
220        // the key.
221        // Nit: Overwriting the instruction isn't technically necessary, as it will get handled by the final
222        // all_inst_iter_mut pass below. However, the code is a lil bit cleaner this way I guess.
223        rewrite_inst_with_rules(inst, &rewrite_rules);
224
225        let key = make_dedupe_key(inst, &unresolved_forward_pointers, &annotations, &names);
226
227        match key_to_result_id.entry(key) {
228            hash_map::Entry::Vacant(entry) => {
229                // This is the first time we've seen this key. Insert the key into the map, registering this type as
230                // something other types can deduplicate to.
231                entry.insert(inst.result_id.unwrap());
232            }
233            hash_map::Entry::Occupied(entry) => {
234                // We've already seen this key. We need to do two things:
235                // 1) Add a rewrite rule from this type to the type that we saw before.
236                let old_value = rewrite_rules.insert(inst.result_id.unwrap(), *entry.get());
237                // 2) Erase this instruction. Because we're iterating over this vec, removing an element is hard, so
238                // clear it with OpNop, and then remove it in the retain() call below.
239                assert!(old_value.is_none());
240                *inst = Instruction::new(Op::Nop, None, None, vec![]);
241            }
242        }
243    }
244
245    // We rewrote instructions we wanted to remove with OpNop. Remove them properly.
246    module
247        .types_global_values
248        .retain(|op| op.class.opcode != Op::Nop);
249
250    // Apply the rewrite rules to the whole module
251    for inst in module.all_inst_iter_mut() {
252        rewrite_inst_with_rules(inst, &rewrite_rules);
253    }
254
255    // The same decorations for duplicated types will cause those different types to merge
256    // together. So, we need to deduplicate the annotations as well. (Note we *do* care about the
257    // ID of the type being applied to here, unlike `gather_annotations`)
258    let mut anno_set = FxHashSet::default();
259    module
260        .annotations
261        .retain(|inst| anno_set.insert(inst.assemble()));
262    // Same thing with OpName
263    let mut name_ids = FxHashSet::default();
264    let mut member_name_ids = FxHashSet::default();
265    module.debug_names.retain(|inst| {
266        (inst.class.opcode != Op::Name || name_ids.insert(inst.operands[0].unwrap_id_ref()))
267            && (inst.class.opcode != Op::MemberName
268                || member_name_ids.insert((
269                    inst.operands[0].unwrap_id_ref(),
270                    inst.operands[1].unwrap_literal_bit32(),
271                )))
272    });
273}
274
275pub fn remove_duplicate_debuginfo(module: &mut Module) {
276    // FIXME(eddyb) avoid repeating this across different passes/helpers.
277    let custom_ext_inst_set_import = module
278        .ext_inst_imports
279        .iter()
280        .find(|inst| {
281            assert_eq!(inst.class.opcode, Op::ExtInstImport);
282            inst.operands[0].unwrap_literal_string() == &custom_insts::CUSTOM_EXT_INST_SET[..]
283        })
284        .map(|inst| inst.result_id.unwrap());
285
286    let deduper = DebuginfoDeduplicator {
287        custom_ext_inst_set_import,
288    };
289    for func in &mut module.functions {
290        deduper.remove_duplicate_debuginfo_in_function(func);
291    }
292}
293
294pub struct DebuginfoDeduplicator {
295    pub custom_ext_inst_set_import: Option<Word>,
296}
297
298impl DebuginfoDeduplicator {
299    pub fn remove_duplicate_debuginfo_in_function(&self, func: &mut rspirv::dr::Function) {
300        for block in &mut func.blocks {
301            // Ignore the terminator, it's effectively "outside" debuginfo.
302            let (_, insts) = block.instructions.split_last_mut().unwrap();
303
304            // HACK(eddyb) to make random access easier, we first replace unused
305            // instructions with `OpNop`, and then remove all the `OpNop`s.
306
307            #[derive(Clone)]
308            struct DbgLocInst {
309                inst_idx: usize,
310                used: bool,
311            }
312
313            fn nop() -> Instruction {
314                Instruction::new(Op::Nop, None, None, vec![])
315            }
316            impl DbgLocInst {
317                fn nop_if_unused(&self, insts: &mut [Instruction]) {
318                    if !self.used {
319                        insts[self.inst_idx] = nop();
320                    }
321                }
322            }
323
324            #[derive(Clone, Default)]
325            struct DbgState {
326                loc: Option<DbgLocInst>,
327                has_semantic_insts: bool,
328            }
329            let mut dbg = DbgState::default();
330
331            struct Frame {
332                call_dbg: DbgState,
333                push_inst_idx: usize,
334            }
335            let mut inlined_frames = SmallVec::<[Frame; 8]>::new();
336
337            // HACK(eddyb) `PopInlinedCallFrame` moves `inlined_frames.last()`
338            // `fusable_freshly_popped_inlined_frames.last()`, so a sequence of
339            // N pops will reverse the N last entries of `inlined_frames` into
340            // this vector (and go from outside-in, to inside-out), which allows
341            // *fusing* a pop with a push (of an identical inlined frame), when
342            // no interverning instructions prevent it (such instructions will
343            // clear this vector to indicate the pops are "fully committed").
344            struct PoppedFrame {
345                frame: Frame,
346                callee_has_semantic_insts: bool,
347                pop_inst_idx: usize,
348            }
349            let mut fusable_freshly_popped_inlined_frames = SmallVec::<[PoppedFrame; 8]>::new();
350
351            for inst_idx in 0..insts.len() {
352                let inst = &insts[inst_idx];
353                let custom_op = match inst.class.opcode {
354                    Op::ExtInst
355                        if Some(inst.operands[0].unwrap_id_ref())
356                            == self.custom_ext_inst_set_import =>
357                    {
358                        Some(CustomOp::decode_from_ext_inst(inst))
359                    }
360                    _ => None,
361                };
362
363                fn inst_eq_key(inst: &Instruction) -> impl PartialEq + '_ {
364                    (inst.class.opcode, &inst.operands)
365                }
366
367                // NOTE(eddyb) `fusable_freshly_popped_inlined_frames`-preserving
368                // cases must all use `if can_continue { continue; }` to skip the
369                // draining logic (`can_continue` is only `false` at the very end).
370                let can_continue = inst_idx < insts.len() - 1;
371                let prev_dbg_loc_snapshot = dbg.loc.clone();
372                match (inst.class.opcode, custom_op) {
373                    (Op::Line | Op::NoLine, _)
374                    | (_, Some(CustomOp::SetDebugSrcLoc | CustomOp::ClearDebugSrcLoc)) => {
375                        // HACK(eddyb) prefer keeping older active `DbgLocInst`s,
376                        // if all the details are the same (it helps with fusion).
377                        if dbg.loc.as_ref().is_some_and(|old_dbg_loc| {
378                            inst_eq_key(inst) == inst_eq_key(&insts[old_dbg_loc.inst_idx])
379                        }) {
380                            insts[inst_idx] = nop();
381                            if can_continue {
382                                continue;
383                            }
384                        } else {
385                            dbg.loc = Some(DbgLocInst {
386                                inst_idx,
387                                used: false,
388                            });
389                        }
390                    }
391                    (_, Some(CustomOp::PushInlinedCallFrame)) => {
392                        // HACK(eddyb) attempt fusing this push with the last pop.
393                        let fuse_with_last_pop = fusable_freshly_popped_inlined_frames
394                            .last()
395                            .is_some_and(|last_popped| {
396                                // HACK(eddyb) updating `dbg.loc` deduplicates eagerly,
397                                // so here it suffices to check the (deduped) indices.
398                                let dbg_loc_inst_idx =
399                                    |dbg: &DbgState| dbg.loc.as_ref().map(|d| d.inst_idx);
400                                dbg_loc_inst_idx(&last_popped.frame.call_dbg)
401                                    == dbg_loc_inst_idx(&dbg)
402                                    && inst_eq_key(inst)
403                                        == inst_eq_key(&insts[last_popped.frame.push_inst_idx])
404                            });
405                        if fuse_with_last_pop {
406                            let PoppedFrame {
407                                frame,
408                                callee_has_semantic_insts,
409                                pop_inst_idx,
410                            } = fusable_freshly_popped_inlined_frames.pop().unwrap();
411
412                            insts[pop_inst_idx] = nop();
413
414                            // Can't make entering an inlined function a nop,
415                            // as it needs to reset callee-side `DbgLocInst`,
416                            // but we can replace it in-place and hope later
417                            // it get nop'd out by some real `DbgLocInst`.
418                            insts[inst_idx].operands.splice(
419                                1..,
420                                [Operand::LiteralExtInstInteger(
421                                    CustomOp::ClearDebugSrcLoc as u32,
422                                )],
423                            );
424                            dbg = DbgState {
425                                loc: Some(DbgLocInst {
426                                    inst_idx,
427                                    used: false,
428                                }),
429                                has_semantic_insts: callee_has_semantic_insts,
430                            };
431
432                            inlined_frames.push(frame);
433
434                            // Allow further fusing to occur.
435                            if can_continue {
436                                continue;
437                            }
438                        } else {
439                            // HACK(eddyb) the actual push to `inlined_frames` is
440                            // done at the very end of the loop body, to be able
441                            // to process any pending updates on the previous state.
442                        }
443                    }
444                    (_, Some(CustomOp::PopInlinedCallFrame)) => {
445                        // Leaving an inlined function doesn't use `DbgLocInst`.
446                        if let Some(dbg_loc) = dbg.loc.take() {
447                            // HACK(eddyb) only treat as "definitely unused"
448                            // instructions that are too "recent" to have been
449                            // used by a `PushInlinedCallFrame` with a still
450                            // uncommitted `PopInlinedCallFrame`.
451                            let min_safe_inst_idx_to_nop = fusable_freshly_popped_inlined_frames
452                                .last()
453                                .map_or(0, |last_popped| last_popped.pop_inst_idx);
454                            if dbg_loc.inst_idx > min_safe_inst_idx_to_nop {
455                                dbg_loc.nop_if_unused(insts);
456                            }
457                        }
458                        if let Some(frame) = inlined_frames.pop() {
459                            let callee_has_semantic_insts = dbg.has_semantic_insts;
460                            dbg = frame.call_dbg.clone();
461                            dbg.has_semantic_insts |= callee_has_semantic_insts;
462
463                            // HACK(eddyb) inform future `PushInlinedCallFrame`s
464                            // of potential fusion, by saving a copy of the frame.
465                            fusable_freshly_popped_inlined_frames.push(PoppedFrame {
466                                frame,
467                                callee_has_semantic_insts,
468                                pop_inst_idx: inst_idx,
469                            });
470                        } else {
471                            // FIXME(eddyb) this may indicate a bug elsewhere.
472                            insts[inst_idx] = nop();
473                        }
474                        if can_continue {
475                            continue;
476                        }
477                    }
478                    _ => {
479                        if let Some(dbg_loc) = &mut dbg.loc {
480                            dbg_loc.used = true;
481                        }
482                        dbg.has_semantic_insts = true;
483                    }
484                }
485
486                // NOTE(eddyb) mutable so that it may be marked as used below.
487                let mut freshly_replaced_dbg_loc = prev_dbg_loc_snapshot.filter(|prev_dbg_loc| {
488                    dbg.loc.as_ref().map(|d| d.inst_idx) != Some(prev_dbg_loc.inst_idx)
489                });
490
491                // NOTE(eddyb) the iteration order doesn't matter, as this is
492                // effectively a set of `PopInlinedCallFrame`s which have had
493                // all their other side-effects processed, and didn't get a
494                // chance to be fused away, so they're getting committed.
495                for popped in fusable_freshly_popped_inlined_frames.drain(..) {
496                    let PoppedFrame {
497                        mut frame,
498                        callee_has_semantic_insts,
499                        pop_inst_idx,
500                    } = popped;
501
502                    // HACK(eddyb) this popped frame's `call_dbg.loc` may still
503                    // be used elsewhere, in which case that use takes precedence,
504                    // and is effectively the new "owner" of the `DbgLocInst`.
505                    let call_dbg_loc_used_elsewhere =
506                        frame.call_dbg.loc.as_ref().and_then(|call_dbg_loc| {
507                            [dbg.loc.as_mut(), freshly_replaced_dbg_loc.as_mut()]
508                                .into_iter()
509                                .flatten()
510                                .find(|dbg_loc| dbg_loc.inst_idx == call_dbg_loc.inst_idx)
511                        });
512                    if call_dbg_loc_used_elsewhere.is_some() {
513                        frame.call_dbg.loc = None;
514                    }
515
516                    if callee_has_semantic_insts {
517                        // The `PushInlinedCallFrame` being kept requires its
518                        // original `DbgLocInst` to also be kept around.
519                        if let Some(call_dbg_loc) = call_dbg_loc_used_elsewhere {
520                            call_dbg_loc.used = true;
521                        }
522                    } else {
523                        // If the entire inlined call is all `OpNop`s now,
524                        // entering/leaving it can also become `OpNop`s.
525                        if let Some(call_dbg_loc) = &mut frame.call_dbg.loc {
526                            call_dbg_loc.nop_if_unused(insts);
527                        }
528                        insts[frame.push_inst_idx] = nop();
529                        insts[pop_inst_idx] = nop();
530                    }
531                }
532
533                // Only remove a replaced `DbgLocInst` after it had a chance to
534                // be marked as used above (for e.g. a `PushInlinedCallFrame`).
535                if let Some(old_dbg_loc) = freshly_replaced_dbg_loc {
536                    old_dbg_loc.nop_if_unused(insts);
537                }
538
539                // HACK(eddyb) the actual push to `inlined_frames` is
540                // done at the very end of the loop body, to be able
541                // to process any pending updates on the previous state.
542                if custom_op == Some(CustomOp::PushInlinedCallFrame) {
543                    inlined_frames.push(Frame {
544                        call_dbg: mem::take(&mut dbg),
545                        push_inst_idx: inst_idx,
546                    });
547                }
548            }
549
550            assert!(fusable_freshly_popped_inlined_frames.is_empty());
551
552            block
553                .instructions
554                .retain(|inst| inst.class.opcode != Op::Nop);
555        }
556    }
557}