rustc_codegen_spirv/linker/duplicates.rs
1use crate::custom_insts::{self, CustomOp};
2use rspirv::binary::Assemble;
3use rspirv::dr::{Instruction, Module, Operand};
4use rspirv::spirv::{Op, Word};
5use rustc_data_structures::fx::{FxHashMap, FxHashSet};
6use rustc_middle::bug;
7use smallvec::SmallVec;
8use std::collections::hash_map;
9use std::mem;
10
11// FIXME(eddyb) consider deduplicating the `OpString` and `OpSource` created for
12// file-level debuginfo (but using SPIR-T for linking might be better?).
13
14pub fn remove_duplicate_extensions(module: &mut Module) {
15 let mut set = FxHashSet::default();
16
17 module.extensions.retain(|inst| {
18 inst.class.opcode != Op::Extension
19 || set.insert(inst.operands[0].unwrap_literal_string().to_string())
20 });
21}
22
23pub fn remove_duplicate_capabilities(module: &mut Module) {
24 let mut set = FxHashSet::default();
25 module.capabilities.retain(|inst| {
26 inst.class.opcode != Op::Capability || set.insert(inst.operands[0].unwrap_capability())
27 });
28}
29
30pub fn remove_duplicate_ext_inst_imports(module: &mut Module) {
31 // This is a simpler version of remove_duplicate_types, see that for comments
32 let mut ext_to_id = FxHashMap::default();
33 let mut rewrite_rules = FxHashMap::default();
34
35 // First deduplicate the imports
36 for inst in &mut module.ext_inst_imports {
37 if let Operand::LiteralString(ext_inst_import) = &inst.operands[0] {
38 match ext_to_id.entry(ext_inst_import.clone()) {
39 hash_map::Entry::Vacant(entry) => {
40 entry.insert(inst.result_id.unwrap());
41 }
42 hash_map::Entry::Occupied(entry) => {
43 let old_value = rewrite_rules.insert(inst.result_id.unwrap(), *entry.get());
44 assert!(old_value.is_none());
45 // We're iterating through the vec, so removing items is hard - nop it out.
46 *inst = Instruction::new(Op::Nop, None, None, vec![]);
47 }
48 }
49 }
50 }
51
52 // Delete the nops we inserted
53 module
54 .ext_inst_imports
55 .retain(|op| op.class.opcode != Op::Nop);
56
57 // Then rewrite all OpExtInst referencing the rewritten IDs
58 for inst in module.all_inst_iter_mut() {
59 if inst.class.opcode == Op::ExtInst
60 && let Operand::IdRef(ref mut id) = inst.operands[0]
61 {
62 *id = rewrite_rules.get(id).copied().unwrap_or(*id);
63 }
64 }
65}
66
67fn make_annotation_key(inst: &Instruction) -> Vec<u32> {
68 let mut data = vec![inst.class.opcode as u32];
69
70 // skip over the target ID
71 for op in inst.operands.iter().skip(1) {
72 op.assemble_into(&mut data);
73 }
74
75 data
76}
77
78fn gather_annotations(annotations: &[Instruction]) -> FxHashMap<Word, Vec<u32>> {
79 let mut map = FxHashMap::default();
80 for inst in annotations {
81 match inst.class.opcode {
82 Op::Decorate
83 | Op::DecorateId
84 | Op::DecorateString
85 | Op::MemberDecorate
86 | Op::MemberDecorateString => match map.entry(inst.operands[0].id_ref_any().unwrap()) {
87 hash_map::Entry::Vacant(entry) => {
88 entry.insert(vec![make_annotation_key(inst)]);
89 }
90 hash_map::Entry::Occupied(mut entry) => {
91 entry.get_mut().push(make_annotation_key(inst));
92 }
93 },
94 _ => {}
95 }
96 }
97 map.into_iter()
98 .map(|(key, mut value)| {
99 (key, {
100 value.sort();
101 value.concat()
102 })
103 })
104 .collect()
105}
106
107fn gather_names(debug_names: &[Instruction]) -> FxHashMap<Word, String> {
108 debug_names
109 .iter()
110 .filter(|inst| inst.class.opcode == Op::Name)
111 .map(|inst| {
112 (
113 inst.operands[0].unwrap_id_ref(),
114 inst.operands[1].unwrap_literal_string().to_owned(),
115 )
116 })
117 .collect()
118}
119
120fn make_dedupe_key(
121 inst: &Instruction,
122 unresolved_forward_pointers: &FxHashSet<Word>,
123 annotations: &FxHashMap<Word, Vec<u32>>,
124 names: &FxHashMap<Word, String>,
125) -> Vec<u32> {
126 let mut data = vec![inst.class.opcode as u32];
127
128 if let Some(id) = inst.result_type {
129 // We're not only deduplicating types here, but constants as well. Those contain result_types, and so we
130 // need to include those here. For example, OpConstant can have the same arg, but different result_type,
131 // and it should not be deduplicated (e.g. the constants 1u8 and 1u16).
132 data.push(id);
133 }
134 for op in &inst.operands {
135 if let Operand::IdRef(id) = op {
136 if unresolved_forward_pointers.contains(id) {
137 // TODO: This is implementing forward pointers incorrectly. All unresolved forward pointers will
138 // compare equal.
139 Operand::IdRef(0).assemble_into(&mut data);
140 } else {
141 op.assemble_into(&mut data);
142 }
143 } else {
144 op.assemble_into(&mut data);
145 }
146 }
147 if let Some(id) = inst.result_id {
148 if let Some(annos) = annotations.get(&id) {
149 data.extend_from_slice(annos);
150 }
151 if inst.class.opcode == Op::Variable {
152 // Names only matter for OpVariable.
153 if let Some(name) = names.get(&id) {
154 // Jump through some hoops to shove a String into a Vec<u32>.
155 //
156 // FIXME(eddyb) this should `.assemble_into(&mut data)` the
157 // `Operand::LiteralString(...)` from the original `Op::Name`.
158 for chunk in name.as_bytes().chunks(4) {
159 let slice = match *chunk {
160 [a] => [a, 0, 0, 0],
161 [a, b] => [a, b, 0, 0],
162 [a, b, c] => [a, b, c, 0],
163 [a, b, c, d] => [a, b, c, d],
164 _ => bug!(),
165 };
166 data.push(u32::from_le_bytes(slice));
167 }
168 }
169 }
170 }
171
172 data
173}
174
175fn rewrite_inst_with_rules(inst: &mut Instruction, rules: &FxHashMap<u32, u32>) {
176 if let Some(ref mut id) = inst.result_type {
177 // If the rewrite rules contain this ID, replace with the mapped value, otherwise don't touch it.
178 *id = rules.get(id).copied().unwrap_or(*id);
179 }
180 for op in &mut inst.operands {
181 if let Some(id) = op.id_ref_any_mut() {
182 *id = rules.get(id).copied().unwrap_or(*id);
183 }
184 }
185}
186
187pub fn remove_duplicate_types(module: &mut Module) {
188 // Keep in mind, this algorithm requires forward type references to not exist - i.e. it's a valid spir-v module.
189
190 // When a duplicate type is encountered, then this is a map from the deleted ID, to the new, deduplicated ID.
191 let mut rewrite_rules = FxHashMap::default();
192 // Instructions are encoded into "keys": their opcode, followed by arguments, then annotations.
193 // Importantly, result_id is left out. This means that any instruction that declares the same
194 // type, but with different result_id, will result in the same key.
195 let mut key_to_result_id = FxHashMap::default();
196 // TODO: This is implementing forward pointers incorrectly.
197 let mut unresolved_forward_pointers = FxHashSet::default();
198
199 // Collect a map from type ID to an annotation "key blob" (to append to the type key)
200 let annotations = gather_annotations(&module.annotations);
201 let names = gather_names(&module.debug_names);
202
203 for inst in &mut module.types_global_values {
204 if inst.class.opcode == Op::TypeForwardPointer
205 && let Operand::IdRef(id) = inst.operands[0]
206 {
207 unresolved_forward_pointers.insert(id);
208 continue;
209 }
210 if inst.class.opcode == Op::TypePointer
211 && unresolved_forward_pointers.contains(&inst.result_id.unwrap())
212 {
213 unresolved_forward_pointers.remove(&inst.result_id.unwrap());
214 }
215 // This is an important spot: Say that we come upon a duplicated aggregate type (one that references
216 // other types). Its arguments may be duplicated themselves, and so building the key directly will fail
217 // to match up with the first type. However, **because forward references are not allowed**, we're
218 // guaranteed to have already found and deduplicated the argument types! So that means the deduplication
219 // translation is already in rewrite_rules, and we merely need to apply the mapping before generating
220 // the key.
221 // Nit: Overwriting the instruction isn't technically necessary, as it will get handled by the final
222 // all_inst_iter_mut pass below. However, the code is a lil bit cleaner this way I guess.
223 rewrite_inst_with_rules(inst, &rewrite_rules);
224
225 let key = make_dedupe_key(inst, &unresolved_forward_pointers, &annotations, &names);
226
227 match key_to_result_id.entry(key) {
228 hash_map::Entry::Vacant(entry) => {
229 // This is the first time we've seen this key. Insert the key into the map, registering this type as
230 // something other types can deduplicate to.
231 entry.insert(inst.result_id.unwrap());
232 }
233 hash_map::Entry::Occupied(entry) => {
234 // We've already seen this key. We need to do two things:
235 // 1) Add a rewrite rule from this type to the type that we saw before.
236 let old_value = rewrite_rules.insert(inst.result_id.unwrap(), *entry.get());
237 // 2) Erase this instruction. Because we're iterating over this vec, removing an element is hard, so
238 // clear it with OpNop, and then remove it in the retain() call below.
239 assert!(old_value.is_none());
240 *inst = Instruction::new(Op::Nop, None, None, vec![]);
241 }
242 }
243 }
244
245 // We rewrote instructions we wanted to remove with OpNop. Remove them properly.
246 module
247 .types_global_values
248 .retain(|op| op.class.opcode != Op::Nop);
249
250 // Apply the rewrite rules to the whole module
251 for inst in module.all_inst_iter_mut() {
252 rewrite_inst_with_rules(inst, &rewrite_rules);
253 }
254
255 // The same decorations for duplicated types will cause those different types to merge
256 // together. So, we need to deduplicate the annotations as well. (Note we *do* care about the
257 // ID of the type being applied to here, unlike `gather_annotations`)
258 let mut anno_set = FxHashSet::default();
259 module
260 .annotations
261 .retain(|inst| anno_set.insert(inst.assemble()));
262 // Same thing with OpName
263 let mut name_ids = FxHashSet::default();
264 let mut member_name_ids = FxHashSet::default();
265 module.debug_names.retain(|inst| {
266 (inst.class.opcode != Op::Name || name_ids.insert(inst.operands[0].unwrap_id_ref()))
267 && (inst.class.opcode != Op::MemberName
268 || member_name_ids.insert((
269 inst.operands[0].unwrap_id_ref(),
270 inst.operands[1].unwrap_literal_bit32(),
271 )))
272 });
273}
274
275pub fn remove_duplicate_debuginfo(module: &mut Module) {
276 // FIXME(eddyb) avoid repeating this across different passes/helpers.
277 let custom_ext_inst_set_import = module
278 .ext_inst_imports
279 .iter()
280 .find(|inst| {
281 assert_eq!(inst.class.opcode, Op::ExtInstImport);
282 inst.operands[0].unwrap_literal_string() == &custom_insts::CUSTOM_EXT_INST_SET[..]
283 })
284 .map(|inst| inst.result_id.unwrap());
285
286 let deduper = DebuginfoDeduplicator {
287 custom_ext_inst_set_import,
288 };
289 for func in &mut module.functions {
290 deduper.remove_duplicate_debuginfo_in_function(func);
291 }
292}
293
294pub struct DebuginfoDeduplicator {
295 pub custom_ext_inst_set_import: Option<Word>,
296}
297
298impl DebuginfoDeduplicator {
299 pub fn remove_duplicate_debuginfo_in_function(&self, func: &mut rspirv::dr::Function) {
300 for block in &mut func.blocks {
301 // Ignore the terminator, it's effectively "outside" debuginfo.
302 let (_, insts) = block.instructions.split_last_mut().unwrap();
303
304 // HACK(eddyb) to make random access easier, we first replace unused
305 // instructions with `OpNop`, and then remove all the `OpNop`s.
306
307 #[derive(Clone)]
308 struct DbgLocInst {
309 inst_idx: usize,
310 used: bool,
311 }
312
313 fn nop() -> Instruction {
314 Instruction::new(Op::Nop, None, None, vec![])
315 }
316 impl DbgLocInst {
317 fn nop_if_unused(&self, insts: &mut [Instruction]) {
318 if !self.used {
319 insts[self.inst_idx] = nop();
320 }
321 }
322 }
323
324 #[derive(Clone, Default)]
325 struct DbgState {
326 loc: Option<DbgLocInst>,
327 has_semantic_insts: bool,
328 }
329 let mut dbg = DbgState::default();
330
331 struct Frame {
332 call_dbg: DbgState,
333 push_inst_idx: usize,
334 }
335 let mut inlined_frames = SmallVec::<[Frame; 8]>::new();
336
337 // HACK(eddyb) `PopInlinedCallFrame` moves `inlined_frames.last()`
338 // `fusable_freshly_popped_inlined_frames.last()`, so a sequence of
339 // N pops will reverse the N last entries of `inlined_frames` into
340 // this vector (and go from outside-in, to inside-out), which allows
341 // *fusing* a pop with a push (of an identical inlined frame), when
342 // no interverning instructions prevent it (such instructions will
343 // clear this vector to indicate the pops are "fully committed").
344 struct PoppedFrame {
345 frame: Frame,
346 callee_has_semantic_insts: bool,
347 pop_inst_idx: usize,
348 }
349 let mut fusable_freshly_popped_inlined_frames = SmallVec::<[PoppedFrame; 8]>::new();
350
351 for inst_idx in 0..insts.len() {
352 let inst = &insts[inst_idx];
353 let custom_op = match inst.class.opcode {
354 Op::ExtInst
355 if Some(inst.operands[0].unwrap_id_ref())
356 == self.custom_ext_inst_set_import =>
357 {
358 Some(CustomOp::decode_from_ext_inst(inst))
359 }
360 _ => None,
361 };
362
363 fn inst_eq_key(inst: &Instruction) -> impl PartialEq + '_ {
364 (inst.class.opcode, &inst.operands)
365 }
366
367 // NOTE(eddyb) `fusable_freshly_popped_inlined_frames`-preserving
368 // cases must all use `if can_continue { continue; }` to skip the
369 // draining logic (`can_continue` is only `false` at the very end).
370 let can_continue = inst_idx < insts.len() - 1;
371 let prev_dbg_loc_snapshot = dbg.loc.clone();
372 match (inst.class.opcode, custom_op) {
373 (Op::Line | Op::NoLine, _)
374 | (_, Some(CustomOp::SetDebugSrcLoc | CustomOp::ClearDebugSrcLoc)) => {
375 // HACK(eddyb) prefer keeping older active `DbgLocInst`s,
376 // if all the details are the same (it helps with fusion).
377 if dbg.loc.as_ref().is_some_and(|old_dbg_loc| {
378 inst_eq_key(inst) == inst_eq_key(&insts[old_dbg_loc.inst_idx])
379 }) {
380 insts[inst_idx] = nop();
381 if can_continue {
382 continue;
383 }
384 } else {
385 dbg.loc = Some(DbgLocInst {
386 inst_idx,
387 used: false,
388 });
389 }
390 }
391 (_, Some(CustomOp::PushInlinedCallFrame)) => {
392 // HACK(eddyb) attempt fusing this push with the last pop.
393 let fuse_with_last_pop = fusable_freshly_popped_inlined_frames
394 .last()
395 .is_some_and(|last_popped| {
396 // HACK(eddyb) updating `dbg.loc` deduplicates eagerly,
397 // so here it suffices to check the (deduped) indices.
398 let dbg_loc_inst_idx =
399 |dbg: &DbgState| dbg.loc.as_ref().map(|d| d.inst_idx);
400 dbg_loc_inst_idx(&last_popped.frame.call_dbg)
401 == dbg_loc_inst_idx(&dbg)
402 && inst_eq_key(inst)
403 == inst_eq_key(&insts[last_popped.frame.push_inst_idx])
404 });
405 if fuse_with_last_pop {
406 let PoppedFrame {
407 frame,
408 callee_has_semantic_insts,
409 pop_inst_idx,
410 } = fusable_freshly_popped_inlined_frames.pop().unwrap();
411
412 insts[pop_inst_idx] = nop();
413
414 // Can't make entering an inlined function a nop,
415 // as it needs to reset callee-side `DbgLocInst`,
416 // but we can replace it in-place and hope later
417 // it get nop'd out by some real `DbgLocInst`.
418 insts[inst_idx].operands.splice(
419 1..,
420 [Operand::LiteralExtInstInteger(
421 CustomOp::ClearDebugSrcLoc as u32,
422 )],
423 );
424 dbg = DbgState {
425 loc: Some(DbgLocInst {
426 inst_idx,
427 used: false,
428 }),
429 has_semantic_insts: callee_has_semantic_insts,
430 };
431
432 inlined_frames.push(frame);
433
434 // Allow further fusing to occur.
435 if can_continue {
436 continue;
437 }
438 } else {
439 // HACK(eddyb) the actual push to `inlined_frames` is
440 // done at the very end of the loop body, to be able
441 // to process any pending updates on the previous state.
442 }
443 }
444 (_, Some(CustomOp::PopInlinedCallFrame)) => {
445 // Leaving an inlined function doesn't use `DbgLocInst`.
446 if let Some(dbg_loc) = dbg.loc.take() {
447 // HACK(eddyb) only treat as "definitely unused"
448 // instructions that are too "recent" to have been
449 // used by a `PushInlinedCallFrame` with a still
450 // uncommitted `PopInlinedCallFrame`.
451 let min_safe_inst_idx_to_nop = fusable_freshly_popped_inlined_frames
452 .last()
453 .map_or(0, |last_popped| last_popped.pop_inst_idx);
454 if dbg_loc.inst_idx > min_safe_inst_idx_to_nop {
455 dbg_loc.nop_if_unused(insts);
456 }
457 }
458 if let Some(frame) = inlined_frames.pop() {
459 let callee_has_semantic_insts = dbg.has_semantic_insts;
460 dbg = frame.call_dbg.clone();
461 dbg.has_semantic_insts |= callee_has_semantic_insts;
462
463 // HACK(eddyb) inform future `PushInlinedCallFrame`s
464 // of potential fusion, by saving a copy of the frame.
465 fusable_freshly_popped_inlined_frames.push(PoppedFrame {
466 frame,
467 callee_has_semantic_insts,
468 pop_inst_idx: inst_idx,
469 });
470 } else {
471 // FIXME(eddyb) this may indicate a bug elsewhere.
472 insts[inst_idx] = nop();
473 }
474 if can_continue {
475 continue;
476 }
477 }
478 _ => {
479 if let Some(dbg_loc) = &mut dbg.loc {
480 dbg_loc.used = true;
481 }
482 dbg.has_semantic_insts = true;
483 }
484 }
485
486 // NOTE(eddyb) mutable so that it may be marked as used below.
487 let mut freshly_replaced_dbg_loc = prev_dbg_loc_snapshot.filter(|prev_dbg_loc| {
488 dbg.loc.as_ref().map(|d| d.inst_idx) != Some(prev_dbg_loc.inst_idx)
489 });
490
491 // NOTE(eddyb) the iteration order doesn't matter, as this is
492 // effectively a set of `PopInlinedCallFrame`s which have had
493 // all their other side-effects processed, and didn't get a
494 // chance to be fused away, so they're getting committed.
495 for popped in fusable_freshly_popped_inlined_frames.drain(..) {
496 let PoppedFrame {
497 mut frame,
498 callee_has_semantic_insts,
499 pop_inst_idx,
500 } = popped;
501
502 // HACK(eddyb) this popped frame's `call_dbg.loc` may still
503 // be used elsewhere, in which case that use takes precedence,
504 // and is effectively the new "owner" of the `DbgLocInst`.
505 let call_dbg_loc_used_elsewhere =
506 frame.call_dbg.loc.as_ref().and_then(|call_dbg_loc| {
507 [dbg.loc.as_mut(), freshly_replaced_dbg_loc.as_mut()]
508 .into_iter()
509 .flatten()
510 .find(|dbg_loc| dbg_loc.inst_idx == call_dbg_loc.inst_idx)
511 });
512 if call_dbg_loc_used_elsewhere.is_some() {
513 frame.call_dbg.loc = None;
514 }
515
516 if callee_has_semantic_insts {
517 // The `PushInlinedCallFrame` being kept requires its
518 // original `DbgLocInst` to also be kept around.
519 if let Some(call_dbg_loc) = call_dbg_loc_used_elsewhere {
520 call_dbg_loc.used = true;
521 }
522 } else {
523 // If the entire inlined call is all `OpNop`s now,
524 // entering/leaving it can also become `OpNop`s.
525 if let Some(call_dbg_loc) = &mut frame.call_dbg.loc {
526 call_dbg_loc.nop_if_unused(insts);
527 }
528 insts[frame.push_inst_idx] = nop();
529 insts[pop_inst_idx] = nop();
530 }
531 }
532
533 // Only remove a replaced `DbgLocInst` after it had a chance to
534 // be marked as used above (for e.g. a `PushInlinedCallFrame`).
535 if let Some(old_dbg_loc) = freshly_replaced_dbg_loc {
536 old_dbg_loc.nop_if_unused(insts);
537 }
538
539 // HACK(eddyb) the actual push to `inlined_frames` is
540 // done at the very end of the loop body, to be able
541 // to process any pending updates on the previous state.
542 if custom_op == Some(CustomOp::PushInlinedCallFrame) {
543 inlined_frames.push(Frame {
544 call_dbg: mem::take(&mut dbg),
545 push_inst_idx: inst_idx,
546 });
547 }
548 }
549
550 assert!(fusable_freshly_popped_inlined_frames.is_empty());
551
552 block
553 .instructions
554 .retain(|inst| inst.class.opcode != Op::Nop);
555 }
556 }
557}