rustc_codegen_spirv/linker/duplicates.rs
1use crate::custom_insts::{self, CustomOp};
2use rspirv::binary::Assemble;
3use rspirv::dr::{Instruction, Module, Operand};
4use rspirv::spirv::{Op, Word};
5use rustc_data_structures::fx::{FxHashMap, FxHashSet};
6use rustc_middle::bug;
7use smallvec::SmallVec;
8use std::collections::hash_map;
9use std::mem;
10
11// FIXME(eddyb) consider deduplicating the `OpString` and `OpSource` created for
12// file-level debuginfo (but using SPIR-T for linking might be better?).
13
14pub fn remove_duplicate_extensions(module: &mut Module) {
15 let mut set = FxHashSet::default();
16
17 module.extensions.retain(|inst| {
18 inst.class.opcode != Op::Extension
19 || set.insert(inst.operands[0].unwrap_literal_string().to_string())
20 });
21}
22
23pub fn remove_duplicate_capabilities(module: &mut Module) {
24 let mut set = FxHashSet::default();
25 module.capabilities.retain(|inst| {
26 inst.class.opcode != Op::Capability || set.insert(inst.operands[0].unwrap_capability())
27 });
28}
29
30pub fn remove_duplicate_ext_inst_imports(module: &mut Module) {
31 // This is a simpler version of remove_duplicate_types, see that for comments
32 let mut ext_to_id = FxHashMap::default();
33 let mut rewrite_rules = FxHashMap::default();
34
35 // First deduplicate the imports
36 for inst in &mut module.ext_inst_imports {
37 if let Operand::LiteralString(ext_inst_import) = &inst.operands[0] {
38 match ext_to_id.entry(ext_inst_import.clone()) {
39 hash_map::Entry::Vacant(entry) => {
40 entry.insert(inst.result_id.unwrap());
41 }
42 hash_map::Entry::Occupied(entry) => {
43 let old_value = rewrite_rules.insert(inst.result_id.unwrap(), *entry.get());
44 assert!(old_value.is_none());
45 // We're iterating through the vec, so removing items is hard - nop it out.
46 *inst = Instruction::new(Op::Nop, None, None, vec![]);
47 }
48 }
49 }
50 }
51
52 // Delete the nops we inserted
53 module
54 .ext_inst_imports
55 .retain(|op| op.class.opcode != Op::Nop);
56
57 // Then rewrite all OpExtInst referencing the rewritten IDs
58 for inst in module.all_inst_iter_mut() {
59 if inst.class.opcode == Op::ExtInst
60 && let Operand::IdRef(ref mut id) = inst.operands[0]
61 {
62 *id = rewrite_rules.get(id).copied().unwrap_or(*id);
63 }
64 }
65}
66
67fn make_annotation_key(inst: &Instruction) -> Vec<u32> {
68 let mut data = vec![inst.class.opcode as u32];
69
70 // skip over the target ID
71 for op in inst.operands.iter().skip(1) {
72 op.assemble_into(&mut data);
73 }
74
75 data
76}
77
78fn gather_annotations(annotations: &[Instruction]) -> FxHashMap<Word, Vec<u32>> {
79 let mut map = FxHashMap::default();
80 for inst in annotations {
81 match inst.class.opcode {
82 Op::Decorate
83 | Op::DecorateId
84 | Op::DecorateString
85 | Op::MemberDecorate
86 | Op::MemberDecorateString => match map.entry(inst.operands[0].id_ref_any().unwrap()) {
87 hash_map::Entry::Vacant(entry) => {
88 entry.insert(vec![make_annotation_key(inst)]);
89 }
90 hash_map::Entry::Occupied(mut entry) => {
91 entry.get_mut().push(make_annotation_key(inst));
92 }
93 },
94 _ => {}
95 }
96 }
97 map.into_iter()
98 .map(|(key, mut value)| {
99 (key, {
100 value.sort();
101 value.concat()
102 })
103 })
104 .collect()
105}
106
107fn gather_names(debug_names: &[Instruction]) -> FxHashMap<Word, String> {
108 debug_names
109 .iter()
110 .filter(|inst| inst.class.opcode == Op::Name)
111 .map(|inst| {
112 (
113 inst.operands[0].unwrap_id_ref(),
114 inst.operands[1].unwrap_literal_string().to_owned(),
115 )
116 })
117 .collect()
118}
119
120fn make_dedupe_key(
121 inst: &Instruction,
122 unresolved_forward_pointers: &FxHashSet<Word>,
123 annotations: &FxHashMap<Word, Vec<u32>>,
124 names: &FxHashMap<Word, String>,
125) -> Vec<u32> {
126 let mut data = vec![inst.class.opcode as u32];
127
128 if let Some(id) = inst.result_type {
129 // We're not only deduplicating types here, but constants as well. Those contain result_types, and so we
130 // need to include those here. For example, OpConstant can have the same arg, but different result_type,
131 // and it should not be deduplicated (e.g. the constants 1u8 and 1u16).
132 data.push(id);
133 }
134 for op in &inst.operands {
135 if let Operand::IdRef(id) = op {
136 if unresolved_forward_pointers.contains(id) {
137 // TODO: This is implementing forward pointers incorrectly. All unresolved forward pointers will
138 // compare equal.
139 Operand::IdRef(0).assemble_into(&mut data);
140 } else {
141 op.assemble_into(&mut data);
142 }
143 } else {
144 op.assemble_into(&mut data);
145 }
146 }
147 if let Some(id) = inst.result_id {
148 if let Some(annos) = annotations.get(&id) {
149 data.extend_from_slice(annos);
150 }
151 if inst.class.opcode == Op::Variable {
152 // Names only matter for OpVariable.
153 if let Some(name) = names.get(&id) {
154 // Jump through some hoops to shove a String into a Vec<u32>.
155 //
156 // FIXME(eddyb) this should `.assemble_into(&mut data)` the
157 // `Operand::LiteralString(...)` from the original `Op::Name`.
158 for chunk in name.as_bytes().chunks(4) {
159 let slice = match *chunk {
160 [a] => [a, 0, 0, 0],
161 [a, b] => [a, b, 0, 0],
162 [a, b, c] => [a, b, c, 0],
163 [a, b, c, d] => [a, b, c, d],
164 _ => bug!(),
165 };
166 data.push(u32::from_le_bytes(slice));
167 }
168 }
169 }
170 }
171
172 data
173}
174
175fn rewrite_inst_with_rules(inst: &mut Instruction, rules: &FxHashMap<u32, u32>) {
176 if let Some(ref mut id) = inst.result_type {
177 // If the rewrite rules contain this ID, replace with the mapped value, otherwise don't touch it.
178 *id = rules.get(id).copied().unwrap_or(*id);
179 }
180 for op in &mut inst.operands {
181 if let Some(id) = op.id_ref_any_mut() {
182 *id = rules.get(id).copied().unwrap_or(*id);
183 }
184 }
185}
186
187pub fn remove_duplicate_types(module: &mut Module) {
188 // Keep in mind, this algorithm requires forward type references to not exist - i.e. it's a valid spir-v module.
189
190 // When a duplicate type is encountered, then this is a map from the deleted ID, to the new, deduplicated ID.
191 let mut rewrite_rules = FxHashMap::default();
192 // Instructions are encoded into "keys": their opcode, followed by arguments, then annotations.
193 // Importantly, result_id is left out. This means that any instruction that declares the same
194 // type, but with different result_id, will result in the same key.
195 let mut key_to_result_id = FxHashMap::default();
196 // TODO: This is implementing forward pointers incorrectly.
197 let mut unresolved_forward_pointers = FxHashSet::default();
198
199 // Collect a map from type ID to an annotation "key blob" (to append to the type key)
200 let annotations = gather_annotations(&module.annotations);
201 let names = gather_names(&module.debug_names);
202
203 for inst in &mut module.types_global_values {
204 if inst.class.opcode == Op::TypeForwardPointer
205 && let Operand::IdRef(id) = inst.operands[0]
206 {
207 unresolved_forward_pointers.insert(id);
208 continue;
209 }
210 if inst.class.opcode == Op::TypePointer
211 && unresolved_forward_pointers.contains(&inst.result_id.unwrap())
212 {
213 unresolved_forward_pointers.remove(&inst.result_id.unwrap());
214 }
215 // This is an important spot: Say that we come upon a duplicated aggregate type (one that references
216 // other types). Its arguments may be duplicated themselves, and so building the key directly will fail
217 // to match up with the first type. However, **because forward references are not allowed**, we're
218 // guaranteed to have already found and deduplicated the argument types! So that means the deduplication
219 // translation is already in rewrite_rules, and we merely need to apply the mapping before generating
220 // the key.
221 // Nit: Overwriting the instruction isn't technically necessary, as it will get handled by the final
222 // all_inst_iter_mut pass below. However, the code is a lil bit cleaner this way I guess.
223 rewrite_inst_with_rules(inst, &rewrite_rules);
224
225 let key = make_dedupe_key(inst, &unresolved_forward_pointers, &annotations, &names);
226
227 match key_to_result_id.entry(key) {
228 hash_map::Entry::Vacant(entry) => {
229 // This is the first time we've seen this key. Insert the key into the map, registering this type as
230 // something other types can deduplicate to.
231 entry.insert(inst.result_id.unwrap());
232 }
233 hash_map::Entry::Occupied(entry) => {
234 // We've already seen this key. We need to do two things:
235 // 1) Add a rewrite rule from this type to the type that we saw before.
236 let old_value = rewrite_rules.insert(inst.result_id.unwrap(), *entry.get());
237 // 2) Erase this instruction. Because we're iterating over this vec, removing an element is hard, so
238 // clear it with OpNop, and then remove it in the retain() call below.
239 assert!(old_value.is_none());
240 *inst = Instruction::new(Op::Nop, None, None, vec![]);
241 }
242 }
243 }
244
245 // We rewrote instructions we wanted to remove with OpNop. Remove them properly.
246 module
247 .types_global_values
248 .retain(|op| op.class.opcode != Op::Nop);
249
250 // Apply the rewrite rules to the whole module
251 for inst in module.all_inst_iter_mut() {
252 rewrite_inst_with_rules(inst, &rewrite_rules);
253 }
254
255 // The same decorations for duplicated types will cause those different types to merge
256 // together. So, we need to deduplicate the annotations as well. (Note we *do* care about the
257 // ID of the type being applied to here, unlike `gather_annotations`)
258 let mut anno_set = FxHashSet::default();
259 module
260 .annotations
261 .retain(|inst| anno_set.insert(inst.assemble()));
262 // Same thing with OpName
263 let mut name_ids = FxHashSet::default();
264 let mut member_name_ids = FxHashSet::default();
265 module.debug_names.retain(|inst| {
266 (inst.class.opcode != Op::Name || name_ids.insert(inst.operands[0].unwrap_id_ref()))
267 && (inst.class.opcode != Op::MemberName
268 || member_name_ids.insert((
269 inst.operands[0].unwrap_id_ref(),
270 inst.operands[1].unwrap_literal_bit32(),
271 )))
272 });
273}
274
275pub fn remove_duplicate_debuginfo(module: &mut Module) {
276 // FIXME(eddyb) avoid repeating this across different passes/helpers.
277 let custom_ext_inst_set_import = module
278 .ext_inst_imports
279 .iter()
280 .find(|inst| {
281 assert_eq!(inst.class.opcode, Op::ExtInstImport);
282 inst.operands[0].unwrap_literal_string() == &custom_insts::CUSTOM_EXT_INST_SET[..]
283 })
284 .map(|inst| inst.result_id.unwrap());
285
286 for func in &mut module.functions {
287 for block in &mut func.blocks {
288 // Ignore the terminator, it's effectively "outside" debuginfo.
289 let (_, insts) = block.instructions.split_last_mut().unwrap();
290
291 // HACK(eddyb) to make random access easier, we first replace unused
292 // instructions with `OpNop`, and then remove all the `OpNop`s.
293
294 #[derive(Clone)]
295 struct DbgLocInst {
296 inst_idx: usize,
297 used: bool,
298 }
299
300 fn nop() -> Instruction {
301 Instruction::new(Op::Nop, None, None, vec![])
302 }
303 impl DbgLocInst {
304 fn nop_if_unused(&self, insts: &mut [Instruction]) {
305 if !self.used {
306 insts[self.inst_idx] = nop();
307 }
308 }
309 }
310
311 #[derive(Clone, Default)]
312 struct DbgState {
313 loc: Option<DbgLocInst>,
314 has_semantic_insts: bool,
315 }
316 let mut dbg = DbgState::default();
317
318 struct Frame {
319 call_dbg: DbgState,
320 push_inst_idx: usize,
321 }
322 let mut inlined_frames = SmallVec::<[Frame; 8]>::new();
323
324 // HACK(eddyb) `PopInlinedCallFrame` moves `inlined_frames.last()`
325 // `fusable_freshly_popped_inlined_frames.last()`, so a sequence of
326 // N pops will reverse the N last entries of `inlined_frames` into
327 // this vector (and go from outside-in, to inside-out), which allows
328 // *fusing* a pop with a push (of an identical inlined frame), when
329 // no interverning instructions prevent it (such instructions will
330 // clear this vector to indicate the pops are "fully committed").
331 struct PoppedFrame {
332 frame: Frame,
333 callee_has_semantic_insts: bool,
334 pop_inst_idx: usize,
335 }
336 let mut fusable_freshly_popped_inlined_frames = SmallVec::<[PoppedFrame; 8]>::new();
337
338 for inst_idx in 0..insts.len() {
339 let inst = &insts[inst_idx];
340 let custom_op = match inst.class.opcode {
341 Op::ExtInst
342 if Some(inst.operands[0].unwrap_id_ref()) == custom_ext_inst_set_import =>
343 {
344 Some(CustomOp::decode_from_ext_inst(inst))
345 }
346 _ => None,
347 };
348
349 fn inst_eq_key(inst: &Instruction) -> impl PartialEq + '_ {
350 (inst.class.opcode, &inst.operands)
351 }
352
353 // NOTE(eddyb) `fusable_freshly_popped_inlined_frames`-preserving
354 // cases must all use `if can_continue { continue; }` to skip the
355 // draining logic (`can_continue` is only `false` at the very end).
356 let can_continue = inst_idx < insts.len() - 1;
357 let prev_dbg_loc_snapshot = dbg.loc.clone();
358 match (inst.class.opcode, custom_op) {
359 (Op::Line | Op::NoLine, _)
360 | (_, Some(CustomOp::SetDebugSrcLoc | CustomOp::ClearDebugSrcLoc)) => {
361 // HACK(eddyb) prefer keeping older active `DbgLocInst`s,
362 // if all the details are the same (it helps with fusion).
363 if dbg.loc.as_ref().is_some_and(|old_dbg_loc| {
364 inst_eq_key(inst) == inst_eq_key(&insts[old_dbg_loc.inst_idx])
365 }) {
366 insts[inst_idx] = nop();
367 if can_continue {
368 continue;
369 }
370 } else {
371 dbg.loc = Some(DbgLocInst {
372 inst_idx,
373 used: false,
374 });
375 }
376 }
377 (_, Some(CustomOp::PushInlinedCallFrame)) => {
378 // HACK(eddyb) attempt fusing this push with the last pop.
379 let fuse_with_last_pop = fusable_freshly_popped_inlined_frames
380 .last()
381 .is_some_and(|last_popped| {
382 // HACK(eddyb) updating `dbg.loc` deduplicates eagerly,
383 // so here it suffices to check the (deduped) indices.
384 let dbg_loc_inst_idx =
385 |dbg: &DbgState| dbg.loc.as_ref().map(|d| d.inst_idx);
386 dbg_loc_inst_idx(&last_popped.frame.call_dbg)
387 == dbg_loc_inst_idx(&dbg)
388 && inst_eq_key(inst)
389 == inst_eq_key(&insts[last_popped.frame.push_inst_idx])
390 });
391 if fuse_with_last_pop {
392 let PoppedFrame {
393 frame,
394 callee_has_semantic_insts,
395 pop_inst_idx,
396 } = fusable_freshly_popped_inlined_frames.pop().unwrap();
397
398 insts[pop_inst_idx] = nop();
399
400 // Can't make entering an inlined function a nop,
401 // as it needs to reset callee-side `DbgLocInst`,
402 // but we can replace it in-place and hope later
403 // it get nop'd out by some real `DbgLocInst`.
404 insts[inst_idx].operands.splice(
405 1..,
406 [Operand::LiteralExtInstInteger(
407 CustomOp::ClearDebugSrcLoc as u32,
408 )],
409 );
410 dbg = DbgState {
411 loc: Some(DbgLocInst {
412 inst_idx,
413 used: false,
414 }),
415 has_semantic_insts: callee_has_semantic_insts,
416 };
417
418 inlined_frames.push(frame);
419
420 // Allow further fusing to occur.
421 if can_continue {
422 continue;
423 }
424 } else {
425 // HACK(eddyb) the actual push to `inlined_frames` is
426 // done at the very end of the loop body, to be able
427 // to process any pending updates on the previous state.
428 }
429 }
430 (_, Some(CustomOp::PopInlinedCallFrame)) => {
431 // Leaving an inlined function doesn't use `DbgLocInst`.
432 if let Some(dbg_loc) = dbg.loc.take() {
433 // HACK(eddyb) only treat as "definitely unused"
434 // instructions that are too "recent" to have been
435 // used by a `PushInlinedCallFrame` with a still
436 // uncommitted `PopInlinedCallFrame`.
437 let min_safe_inst_idx_to_nop = fusable_freshly_popped_inlined_frames
438 .last()
439 .map_or(0, |last_popped| last_popped.pop_inst_idx);
440 if dbg_loc.inst_idx > min_safe_inst_idx_to_nop {
441 dbg_loc.nop_if_unused(insts);
442 }
443 }
444 if let Some(frame) = inlined_frames.pop() {
445 let callee_has_semantic_insts = dbg.has_semantic_insts;
446 dbg = frame.call_dbg.clone();
447 dbg.has_semantic_insts |= callee_has_semantic_insts;
448
449 // HACK(eddyb) inform future `PushInlinedCallFrame`s
450 // of potential fusion, by saving a copy of the frame.
451 fusable_freshly_popped_inlined_frames.push(PoppedFrame {
452 frame,
453 callee_has_semantic_insts,
454 pop_inst_idx: inst_idx,
455 });
456 } else {
457 // FIXME(eddyb) this may indicate a bug elsewhere.
458 insts[inst_idx] = nop();
459 }
460 if can_continue {
461 continue;
462 }
463 }
464 _ => {
465 if let Some(dbg_loc) = &mut dbg.loc {
466 dbg_loc.used = true;
467 }
468 dbg.has_semantic_insts = true;
469 }
470 }
471
472 // NOTE(eddyb) mutable so that it may be marked as used below.
473 let mut freshly_replaced_dbg_loc = prev_dbg_loc_snapshot.filter(|prev_dbg_loc| {
474 dbg.loc.as_ref().map(|d| d.inst_idx) != Some(prev_dbg_loc.inst_idx)
475 });
476
477 // NOTE(eddyb) the iteration order doesn't matter, as this is
478 // effectively a set of `PopInlinedCallFrame`s which have had
479 // all their other side-effects processed, and didn't get a
480 // chance to be fused away, so they're getting committed.
481 for popped in fusable_freshly_popped_inlined_frames.drain(..) {
482 let PoppedFrame {
483 mut frame,
484 callee_has_semantic_insts,
485 pop_inst_idx,
486 } = popped;
487
488 // HACK(eddyb) this popped frame's `call_dbg.loc` may still
489 // be used elsewhere, in which case that use takes precedence,
490 // and is effectively the new "owner" of the `DbgLocInst`.
491 let call_dbg_loc_used_elsewhere =
492 frame.call_dbg.loc.as_ref().and_then(|call_dbg_loc| {
493 [dbg.loc.as_mut(), freshly_replaced_dbg_loc.as_mut()]
494 .into_iter()
495 .flatten()
496 .find(|dbg_loc| dbg_loc.inst_idx == call_dbg_loc.inst_idx)
497 });
498 if call_dbg_loc_used_elsewhere.is_some() {
499 frame.call_dbg.loc = None;
500 }
501
502 if callee_has_semantic_insts {
503 // The `PushInlinedCallFrame` being kept requires its
504 // original `DbgLocInst` to also be kept around.
505 if let Some(call_dbg_loc) = call_dbg_loc_used_elsewhere {
506 call_dbg_loc.used = true;
507 }
508 } else {
509 // If the entire inlined call is all `OpNop`s now,
510 // entering/leaving it can also become `OpNop`s.
511 if let Some(call_dbg_loc) = &mut frame.call_dbg.loc {
512 call_dbg_loc.nop_if_unused(insts);
513 }
514 insts[frame.push_inst_idx] = nop();
515 insts[pop_inst_idx] = nop();
516 }
517 }
518
519 // Only remove a replaced `DbgLocInst` after it had a chance to
520 // be marked as used above (for e.g. a `PushInlinedCallFrame`).
521 if let Some(old_dbg_loc) = freshly_replaced_dbg_loc {
522 old_dbg_loc.nop_if_unused(insts);
523 }
524
525 // HACK(eddyb) the actual push to `inlined_frames` is
526 // done at the very end of the loop body, to be able
527 // to process any pending updates on the previous state.
528 if custom_op == Some(CustomOp::PushInlinedCallFrame) {
529 inlined_frames.push(Frame {
530 call_dbg: mem::take(&mut dbg),
531 push_inst_idx: inst_idx,
532 });
533 }
534 }
535
536 assert!(fusable_freshly_popped_inlined_frames.is_empty());
537
538 block
539 .instructions
540 .retain(|inst| inst.class.opcode != Op::Nop);
541 }
542 }
543}