spirv_std/arch/
subgroup.rs

1#[cfg(target_arch = "spirv")]
2use crate::arch::barrier;
3use crate::float::Float;
4use crate::integer::{Integer, SignedInteger, UnsignedInteger};
5#[cfg(target_arch = "spirv")]
6use crate::memory::{Scope, Semantics};
7use crate::vector::VectorOrScalar;
8#[cfg(target_arch = "spirv")]
9use core::arch::asm;
10
11#[cfg(target_arch = "spirv")]
12const SUBGROUP: u32 = Scope::Subgroup as u32;
13
14/// `SubgroupMask` is a [`glam::UVec4`] representing a bitmask of all invocations within a subgroup.
15/// Mostly used in group ballot operations.
16#[repr(transparent)]
17#[derive(Copy, Clone, Default, Eq, PartialEq)]
18#[cfg_attr(feature = "bytemuck", derive(bytemuck::Zeroable, bytemuck::Pod))]
19pub struct SubgroupMask(pub glam::UVec4);
20
21/// Defines the class of group operation.
22#[non_exhaustive]
23#[derive(Debug, PartialEq, Eq)]
24pub enum GroupOperation {
25    /// A reduction operation for all values of a specific value X specified by invocations within a workgroup.
26    Reduce = 0,
27    /// A binary operation with an identity I and n (where n is the size of the workgroup)
28    /// elements[a0, a1, … an-1] resulting in [a0, (a0 op a1), …(a0 op a1 op … op an-1)]
29    InclusiveScan = 1,
30    /// A binary operation with an identity I and n (where n is the size of the workgroup)
31    /// elements[a0, a1, … an-1] resulting in [I, a0, (a0 op a1), … (a0 op a1 op … op an-2)].
32    ExclusiveScan = 2,
33    /// The [`GroupOperation`] `ClusteredReduce`.
34    ///
35    /// All instructions with a [`GroupOperation`] require an additional `ClusterSize` parameter when [`GroupOperation`] is
36    /// `ClusteredReduce`. To map this requirement into rust, all function have a base version accepting [`GroupOperation`]
37    /// as a const generic, and a `_clustered` variant that is fixed to `ClusteredReduce` and takes the additional
38    /// `ClusterSize` parameter as a const generic. To not accidentally use a `ClusteredReduce` in the base variant of the
39    /// function, it was removed from the [`GroupOperation`] enum and instead resides individually.
40    ClusteredReduce = 3,
41    /// Reserved.
42    ///
43    /// Requires Capability `GroupNonUniformPartitionedNV`.
44    PartitionedReduceNV = 6,
45    /// Reserved.
46    ///
47    /// Requires Capability `GroupNonUniformPartitionedNV`.
48    PartitionedInclusiveScanNV = 7,
49    /// Reserved.
50    ///
51    /// Requires Capability `GroupNonUniformPartitionedNV`.
52    PartitionedExclusiveScanNV = 8,
53}
54
55/// The function `subgroupBarrier()` enforces that all active invocations within a
56/// subgroup must execute this function before any are allowed to continue their
57/// execution, and the results of any memory stores performed using coherent
58/// variables performed prior to the call will be visible to any future
59/// coherent access to the same memory performed by any other shader invocation
60/// within the same subgroup.
61///
62/// Requires Capability `GroupNonUniform`.
63#[spirv_std_macros::gpu_only]
64#[doc(alias = "subgroupBarrier")]
65#[inline]
66pub fn subgroup_barrier() {
67    barrier::control_barrier::<
68        SUBGROUP,
69        SUBGROUP,
70        {
71            Semantics::ACQUIRE_RELEASE.bits()
72                | Semantics::UNIFORM_MEMORY.bits()
73                | Semantics::WORKGROUP_MEMORY.bits()
74                | Semantics::IMAGE_MEMORY.bits()
75        },
76    >();
77}
78
79/// The function `subgroupMemoryBarrier()` enforces the ordering of all memory
80/// transactions issued within a single shader invocation, as viewed by other
81/// invocations in the same subgroup.
82///
83/// Requires Capability `GroupNonUniform`.
84#[spirv_std_macros::gpu_only]
85#[doc(alias = "subgroupMemoryBarrier")]
86#[inline]
87pub fn subgroup_memory_barrier() {
88    barrier::memory_barrier::<
89        SUBGROUP,
90        {
91            Semantics::ACQUIRE_RELEASE.bits()
92                | Semantics::UNIFORM_MEMORY.bits()
93                | Semantics::WORKGROUP_MEMORY.bits()
94                | Semantics::IMAGE_MEMORY.bits()
95        },
96    >();
97}
98
99/// The function `subgroupMemoryBarrierBuffer()` enforces the ordering of all
100/// memory transactions to buffer variables issued within a single shader
101/// invocation, as viewed by other invocations in the same subgroup.
102///
103/// Requires Capability `GroupNonUniform`.
104#[spirv_std_macros::gpu_only]
105#[doc(alias = "subgroupMemoryBarrierBuffer")]
106#[inline]
107pub fn subgroup_memory_barrier_buffer() {
108    barrier::memory_barrier::<
109        SUBGROUP,
110        { Semantics::ACQUIRE_RELEASE.bits() | Semantics::UNIFORM_MEMORY.bits() },
111    >();
112}
113
114/// The function `subgroupMemoryBarrierShared()` enforces the ordering of all
115/// memory transactions to shared variables issued within a single shader
116/// invocation, as viewed by other invocations in the same subgroup.
117///
118/// Only available in compute shaders.
119///
120/// Requires Capability `GroupNonUniform`.
121#[spirv_std_macros::gpu_only]
122#[doc(alias = "subgroupMemoryBarrierShared")]
123#[inline]
124pub fn subgroup_memory_barrier_shared() {
125    barrier::memory_barrier::<
126        SUBGROUP,
127        { Semantics::ACQUIRE_RELEASE.bits() | Semantics::WORKGROUP_MEMORY.bits() },
128    >();
129}
130
131/// The function `subgroupMemoryBarrierImage()` enforces the ordering of all
132/// memory transactions to images issued within a single shader invocation, as
133/// viewed by other invocations in the same subgroup.
134///
135/// Requires Capability `GroupNonUniform`.
136#[spirv_std_macros::gpu_only]
137#[doc(alias = "subgroupMemoryBarrierImage")]
138#[inline]
139pub fn subgroup_memory_barrier_image() {
140    barrier::memory_barrier::<
141        SUBGROUP,
142        { Semantics::ACQUIRE_RELEASE.bits() | Semantics::IMAGE_MEMORY.bits() },
143    >();
144}
145
146/// Result is true only in the active invocation with the lowest id in the group, otherwise result is false.
147///
148/// Result Type must be a Boolean type.
149///
150/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
151///
152/// Requires Capability `GroupNonUniform`.
153#[spirv_std_macros::gpu_only]
154#[doc(alias = "OpGroupNonUniformElect")]
155#[inline]
156pub fn subgroup_elect() -> bool {
157    let mut result = false;
158
159    unsafe {
160        asm! {
161            "%bool = OpTypeBool",
162            "%u32 = OpTypeInt 32 0",
163            "%subgroup = OpConstant %u32 {subgroup}",
164            "%result = OpGroupNonUniformElect %bool %subgroup",
165            "OpStore {result} %result",
166            subgroup = const SUBGROUP,
167            result = in(reg) &mut result,
168        }
169    }
170
171    result
172}
173
174/// Evaluates a `predicate` for all active invocations in the group, resulting in true if `predicate` evaluates to true for all active invocations in the group, otherwise the result is false.
175///
176/// Result Type must be a Boolean type.
177///
178/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
179///
180/// `predicate` must be a Boolean type.
181///
182/// Requires Capability `GroupNonUniformVote`.
183#[spirv_std_macros::gpu_only]
184#[doc(alias = "OpGroupNonUniformAll")]
185#[inline]
186pub fn subgroup_all(predicate: bool) -> bool {
187    let mut result = false;
188
189    unsafe {
190        asm! {
191            "%bool = OpTypeBool",
192            "%u32 = OpTypeInt 32 0",
193            "%subgroup = OpConstant %u32 {subgroup}",
194            "%predicate = OpLoad _ {predicate}",
195            "%result = OpGroupNonUniformAll %bool %subgroup %predicate",
196            "OpStore {result} %result",
197            subgroup = const SUBGROUP,
198            predicate = in(reg) &predicate,
199            result = in(reg) &mut result,
200        }
201    }
202
203    result
204}
205
206/// Evaluates a `predicate` for all active invocations in the group, resulting in true if `predicate` evaluates to true for any active invocation in the group, otherwise the result is false.
207///
208/// Result Type must be a Boolean type.
209///
210/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
211///
212/// `predicate` must be a Boolean type.
213///
214/// Requires Capability `GroupNonUniformVote`.
215#[spirv_std_macros::gpu_only]
216#[doc(alias = "OpGroupNonUniformAny")]
217#[inline]
218pub fn subgroup_any(predicate: bool) -> bool {
219    let mut result = false;
220
221    unsafe {
222        asm! {
223            "%bool = OpTypeBool",
224            "%u32 = OpTypeInt 32 0",
225            "%subgroup = OpConstant %u32 {subgroup}",
226            "%predicate = OpLoad _ {predicate}",
227            "%result = OpGroupNonUniformAny %bool %subgroup %predicate",
228            "OpStore {result} %result",
229            subgroup = const SUBGROUP,
230            predicate = in(reg) &predicate,
231            result = in(reg) &mut result,
232        }
233    }
234
235    result
236}
237
238/// Evaluates a `value` for all active invocations in the group. The result is true if `value` is equal for all active invocations in the group. Otherwise, the result is false.
239///
240/// Result Type must be a Boolean type.
241///
242/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
243///
244/// `value` must be a scalar or vector of floating-point type, integer type, or Boolean type. The compare operation is based on this type, and if it is a floating-point type, an ordered-and-equal compare is used.
245///
246/// Requires Capability `GroupNonUniformVote`.
247#[spirv_std_macros::gpu_only]
248#[doc(alias = "OpGroupNonUniformAllEqual")]
249#[inline]
250pub fn subgroup_all_equal<T: VectorOrScalar>(value: T) -> bool {
251    let mut result = false;
252
253    unsafe {
254        asm! {
255            "%bool = OpTypeBool",
256            "%u32 = OpTypeInt 32 0",
257            "%subgroup = OpConstant %u32 {subgroup}",
258            "%value = OpLoad _ {value}",
259            "%result = OpGroupNonUniformAllEqual %bool %subgroup %value",
260            "OpStore {result} %result",
261            subgroup = const SUBGROUP,
262            value = in(reg) &value,
263            result = in(reg) &mut result,
264        }
265    }
266
267    result
268}
269
270/// Result is the `value` of the invocation identified by the id `id` to all active invocations in the group.
271///
272/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
273///
274/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
275///
276/// The type of `value` must be the same as Result Type.
277///
278/// `id` must be a scalar of integer type, whose Signedness operand is 0.
279///
280/// Before version 1.5, `id` must come from a constant instruction. Starting with version 1.5, this restriction is lifted. However, behavior is undefined when `id` is not dynamically uniform.
281///
282/// The resulting value is undefined if `id` is an inactive invocation, or is greater than or equal to the size of the group.
283///
284/// Requires Capability `GroupNonUniformBallot`.
285///
286/// # Safety
287/// * `id` must not be dynamically uniform
288/// * before 1.5: `id` must be constant
289/// * Result is undefined if `id` is an inactive invocation or out of bounds
290#[spirv_std_macros::gpu_only]
291#[doc(alias = "OpGroupNonUniformBroadcast")]
292#[inline]
293pub unsafe fn subgroup_broadcast<T: VectorOrScalar>(value: T, id: u32) -> T {
294    let mut result = T::default();
295
296    unsafe {
297        asm! {
298            "%u32 = OpTypeInt 32 0",
299            "%subgroup = OpConstant %u32 {subgroup}",
300            "%value = OpLoad _ {value}",
301            "%id = OpLoad _ {id}",
302            "%result = OpGroupNonUniformBroadcast _ %subgroup %value %id",
303            "OpStore {result} %result",
304            subgroup = const SUBGROUP,
305            value = in(reg) &value,
306            id = in(reg) &id,
307            result = in(reg) &mut result,
308        }
309    }
310
311    result
312}
313
314/// Result is the `value` of the invocation from the active invocation with the lowest id in the group to all active invocations in the group.
315///
316/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
317///
318/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
319///
320/// The type of `value` must be the same as Result Type.
321///
322/// Requires Capability `GroupNonUniformBallot`.
323#[spirv_std_macros::gpu_only]
324#[doc(alias = "OpGroupNonUniformBroadcastFirst")]
325#[inline]
326pub fn subgroup_broadcast_first<T: VectorOrScalar>(value: T) -> T {
327    let mut result = T::default();
328
329    unsafe {
330        asm! {
331            "%u32 = OpTypeInt 32 0",
332            "%subgroup = OpConstant %u32 {subgroup}",
333            "%value = OpLoad _ {value}",
334            "%result = OpGroupNonUniformBroadcastFirst _ %subgroup %value",
335            "OpStore {result} %result",
336            subgroup = const SUBGROUP,
337            value = in(reg) &value,
338            result = in(reg) &mut result,
339        }
340    }
341
342    result
343}
344
345/// Result is a bitfield value combining the `predicate` value from all invocations in the group that execute the same dynamic instance of this instruction. The bit is set to one if the corresponding invocation is active and the `predicate` for that invocation evaluated to true; otherwise, it is set to zero.
346///
347/// Result Type must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
348///
349/// Result is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
350///
351/// Execution is a Scope that identifies the group of invocations affected by this command.
352///
353/// `predicate` must be a Boolean type.
354///
355/// Requires Capability `GroupNonUniformBallot`.
356#[spirv_std_macros::gpu_only]
357#[doc(alias = "OpGroupNonUniformBallot")]
358#[inline]
359pub fn subgroup_ballot(predicate: bool) -> SubgroupMask {
360    let mut result = SubgroupMask::default();
361
362    unsafe {
363        asm! {
364            "%u32 = OpTypeInt 32 0",
365            "%subgroup = OpConstant %u32 {subgroup}",
366            "%predicate = OpLoad _ {predicate}",
367            "%result = OpGroupNonUniformBallot typeof*{result} %subgroup %predicate",
368            "OpStore {result} %result",
369            subgroup = const SUBGROUP,
370            predicate = in(reg) &predicate,
371            result = in(reg) &mut result,
372        }
373    }
374
375    result
376}
377
378/// Evaluates a `value` for all active invocations in the group, resulting in true if the bit in `value` for the corresponding invocation is set to one, otherwise the result is false.
379///
380/// Result Type must be a Boolean type.
381///
382/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
383///
384/// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
385///
386/// Behavior is undefined unless `value` is the same for all invocations that execute the same dynamic instance of this instruction.
387///
388/// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
389///
390/// Requires Capability `GroupNonUniformBallot`.
391///
392/// # Safety
393/// * `value` must be the same for all dynamic instances of this instruction
394#[spirv_std_macros::gpu_only]
395#[doc(alias = "OpGroupNonUniformInverseBallot")]
396#[inline]
397pub unsafe fn subgroup_inverse_ballot(value: SubgroupMask) -> bool {
398    let mut result = false;
399
400    unsafe {
401        asm! {
402            "%bool = OpTypeBool",
403            "%u32 = OpTypeInt 32 0",
404            "%subgroup = OpConstant %u32 {subgroup}",
405            "%value = OpLoad _ {value}",
406            "%result = OpGroupNonUniformInverseBallot %bool %subgroup %value",
407            "OpStore {result} %result",
408            subgroup = const SUBGROUP,
409            value = in(reg) &value,
410            result = in(reg) &mut result,
411        }
412    }
413
414    result
415}
416
417/// Evaluates a value for all active invocations in the group, resulting in true if the bit in `value` that corresponds to `index` is set to one, otherwise the result is false.
418///
419/// Result Type must be a Boolean type.
420///
421/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
422///
423/// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
424///
425/// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
426///
427/// `index` must be a scalar of integer type, whose Signedness operand is 0.
428///
429/// The resulting value is undefined if `index` is greater than or equal to the size of the group.
430///
431/// Requires Capability `GroupNonUniformBallot`.
432///
433/// # Safety
434/// * This function is safe
435/// * Result is undefined if `id` is out of bounds
436#[spirv_std_macros::gpu_only]
437#[doc(alias = "OpGroupNonUniformBallotBitExtract")]
438#[inline]
439pub fn subgroup_ballot_bit_extract(value: SubgroupMask, index: u32) -> bool {
440    let mut result = false;
441
442    unsafe {
443        asm! {
444            "%bool = OpTypeBool",
445            "%u32 = OpTypeInt 32 0",
446            "%subgroup = OpConstant %u32 {subgroup}",
447            "%value = OpLoad _ {value}",
448            "%index = OpLoad _ {index}",
449            "%result = OpGroupNonUniformBallotBitExtract %bool %subgroup %value %index",
450            "OpStore {result} %result",
451            subgroup = const SUBGROUP,
452            value = in(reg) &value,
453            index = in(reg) &index,
454            result = in(reg) &mut result,
455        }
456    }
457
458    result
459}
460
461macro_rules! macro_subgroup_ballot_bit_count {
462    ($name:ident, $group_op:expr) => {
463        /// Result is the number of bits that are set to 1 in `value`, considering only the bits in `value` required to represent all bits of the group's invocations.
464        ///
465        /// Result Type must be a scalar of integer type, whose Signedness operand is 0.
466        ///
467        /// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
468        ///
469        /// The identity I for Operation is 0.
470        ///
471        /// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
472        ///
473        /// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
474        ///
475        /// Requires Capability `GroupNonUniformBallot`.
476        #[spirv_std_macros::gpu_only]
477        #[doc(alias = "OpGroupNonUniformBallotBitCount")]
478        #[inline]
479        pub fn $name(value: SubgroupMask) -> u32 {
480            let mut result = 0;
481
482            unsafe {
483                asm! {
484                    "%u32 = OpTypeInt 32 0",
485                    "%subgroup = OpConstant %u32 {subgroup}",
486                    "%value = OpLoad _ {value}",
487                    "%result = OpGroupNonUniformBallotBitCount %u32 %subgroup {groupop} %value",
488                    "OpStore {result} %result",
489                    subgroup = const SUBGROUP,
490                    groupop = const ($group_op as u32),
491                    value = in(reg) &value,
492                    result = in(reg) &mut result,
493                }
494            }
495
496            result
497        }
498    };
499}
500
501macro_subgroup_ballot_bit_count!(subgroup_ballot_bit_count, GroupOperation::Reduce);
502macro_subgroup_ballot_bit_count!(
503    subgroup_ballot_inclusive_bit_count,
504    GroupOperation::InclusiveScan
505);
506macro_subgroup_ballot_bit_count!(
507    subgroup_ballot_exclusive_bit_count,
508    GroupOperation::ExclusiveScan
509);
510
511/// Find the least significant bit set to 1 in `value`, considering only the bits in `value` required to represent all bits of the group's invocations. If none of the considered bits is set to 1, the resulting value is undefined.
512///
513/// Result Type must be a scalar of integer type, whose Signedness operand is 0.
514///
515/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
516///
517/// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
518///
519/// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
520///
521/// Requires Capability `GroupNonUniformBallot`.
522///
523/// # Safety
524/// * This function is safe
525/// * Result is undefined if `id` is an inactive invocation or out of bounds
526#[spirv_std_macros::gpu_only]
527#[doc(alias = "OpGroupNonUniformBallotFindLSB")]
528#[inline]
529pub fn subgroup_ballot_find_lsb(value: SubgroupMask) -> u32 {
530    let mut result = 0;
531
532    unsafe {
533        asm! {
534            "%u32 = OpTypeInt 32 0",
535            "%subgroup = OpConstant %u32 {subgroup}",
536            "%value = OpLoad _ {value}",
537            "%result = OpGroupNonUniformBallotFindLSB %u32 %subgroup %value",
538            "OpStore {result} %result",
539            subgroup = const SUBGROUP,
540            value = in(reg) &value,
541            result = in(reg) &mut result,
542        }
543    }
544
545    result
546}
547
548/// Find the most significant bit set to 1 in `value`, considering only the bits in `value` required to represent all bits of the group's invocations. If none of the considered bits is set to 1, the resulting value is undefined.
549///
550/// Result Type must be a scalar of integer type, whose Signedness operand is 0.
551///
552/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
553///
554/// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
555///
556/// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
557///
558/// Requires Capability `GroupNonUniformBallot`.
559#[spirv_std_macros::gpu_only]
560#[doc(alias = "OpGroupNonUniformBallotFindMSB")]
561#[inline]
562pub fn subgroup_ballot_find_msb(value: SubgroupMask) -> u32 {
563    let mut result = 0;
564
565    unsafe {
566        asm! {
567            "%u32 = OpTypeInt 32 0",
568            "%subgroup = OpConstant %u32 {subgroup}",
569            "%value = OpLoad _ {value}",
570            "%result = OpGroupNonUniformBallotFindMSB %u32 %subgroup %value",
571            "OpStore {result} %result",
572            subgroup = const SUBGROUP,
573            value = in(reg) &value,
574            result = in(reg) &mut result,
575        }
576    }
577
578    result
579}
580
581/// Result is the `value` of the invocation identified by the id `id`.
582///
583/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
584///
585/// Execution is a Scope that identifies the group of invocations affected by this command.
586///
587/// The type of `value` must be the same as Result Type.
588///
589/// `id` must be a scalar of integer type, whose Signedness operand is 0.
590///
591/// The resulting value is undefined if `id` is an inactive invocation, or is greater than or equal to the size of the group.
592///
593/// Requires Capability `GroupNonUniformShuffle`.
594///
595/// # Safety
596/// * This function is safe
597/// * Result is undefined if `id` is an inactive invocation or out of bounds
598#[spirv_std_macros::gpu_only]
599#[doc(alias = "OpGroupNonUniformShuffle")]
600#[inline]
601pub fn subgroup_shuffle<T: VectorOrScalar>(value: T, id: u32) -> T {
602    let mut result = T::default();
603
604    unsafe {
605        asm! {
606            "%u32 = OpTypeInt 32 0",
607            "%subgroup = OpConstant %u32 {subgroup}",
608            "%value = OpLoad _ {value}",
609            "%id = OpLoad _ {id}",
610            "%result = OpGroupNonUniformShuffle _ %subgroup %value %id",
611            "OpStore {result} %result",
612            subgroup = const SUBGROUP,
613            value = in(reg) &value,
614            id = in(reg) &id,
615            result = in(reg) &mut result,
616        }
617    }
618
619    result
620}
621
622/// Result is the `value` of the invocation identified by the current invocation’s id within the group xor’ed with Mask.
623///
624/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
625///
626/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
627///
628/// The type of `value` must be the same as Result Type.
629///
630/// Mask must be a scalar of integer type, whose Signedness operand is 0.
631///
632/// The resulting value is undefined if current invocation’s id within the group xor’ed with Mask is an inactive invocation, or is greater than or equal to the size of the group.
633///
634/// Requires Capability `GroupNonUniformShuffle`.
635///
636/// # Safety
637/// * This function is safe
638/// * Result is undefined if current invocation’s id within the group xor’ed with `mask` is an inactive invocation or out of bounds
639#[spirv_std_macros::gpu_only]
640#[doc(alias = "OpGroupNonUniformShuffleXor")]
641#[inline]
642pub fn subgroup_shuffle_xor<T: VectorOrScalar>(value: T, mask: u32) -> T {
643    let mut result = T::default();
644
645    unsafe {
646        asm! {
647            "%u32 = OpTypeInt 32 0",
648            "%subgroup = OpConstant %u32 {subgroup}",
649            "%value = OpLoad _ {value}",
650            "%mask = OpLoad _ {mask}",
651            "%result = OpGroupNonUniformShuffleXor _ %subgroup %value %mask",
652            "OpStore {result} %result",
653            subgroup = const SUBGROUP,
654            value = in(reg) &value,
655            mask = in(reg) &mask,
656            result = in(reg) &mut result,
657        }
658    }
659
660    result
661}
662
663/// Result is the `value` of the invocation identified by the current invocation’s id within the group - Delta.
664///
665/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
666///
667/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
668///
669/// The type of `value` must be the same as Result Type.
670///
671/// Delta must be a scalar of integer type, whose Signedness operand is 0.
672///
673/// Delta is treated as unsigned and the resulting value is undefined if Delta is greater than the current invocation’s id within the group or if the selected lane is inactive.
674///
675/// Requires Capability `GroupNonUniformShuffleRelative`.
676///
677/// # Safety
678/// * This function is safe
679/// * Result is undefined if `delta` is greater than the current invocation’s id within the group or if the selected lane is inactive
680#[spirv_std_macros::gpu_only]
681#[doc(alias = "OpGroupNonUniformShuffleUp")]
682#[inline]
683pub fn subgroup_shuffle_up<T: VectorOrScalar>(value: T, delta: u32) -> T {
684    let mut result = T::default();
685
686    unsafe {
687        asm! {
688            "%u32 = OpTypeInt 32 0",
689            "%subgroup = OpConstant %u32 {subgroup}",
690            "%value = OpLoad _ {value}",
691            "%delta = OpLoad _ {delta}",
692            "%result = OpGroupNonUniformShuffleUp _ %subgroup %value %delta",
693            "OpStore {result} %result",
694            subgroup = const SUBGROUP,
695            value = in(reg) &value,
696            delta = in(reg) &delta,
697            result = in(reg) &mut result,
698        }
699    }
700
701    result
702}
703
704/// Result is the `value` of the invocation identified by the current invocation’s id within the group + Delta.
705///
706/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
707///
708/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
709///
710/// The type of `value` must be the same as Result Type.
711///
712/// Delta must be a scalar of integer type, whose Signedness operand is 0.
713///
714/// Delta is treated as unsigned and the resulting value is undefined if Delta is greater than or equal to the size of the group, or if the current invocation’s id within the group + Delta is either an inactive invocation or greater than or equal to the size of the group.
715///
716/// Requires Capability `GroupNonUniformShuffleRelative`.
717///
718/// # Safety
719/// * This function is safe
720/// * Result is undefined if `delta` is greater than or equal to the size of the group, or if the current invocation’s id within the group + `delta` is either an inactive invocation or greater than or equal to the size of the group.
721#[spirv_std_macros::gpu_only]
722#[doc(alias = "OpGroupNonUniformShuffleDown")]
723#[inline]
724pub fn subgroup_shuffle_down<T: VectorOrScalar>(value: T, delta: u32) -> T {
725    let mut result = T::default();
726
727    unsafe {
728        asm! {
729            "%u32 = OpTypeInt 32 0",
730            "%subgroup = OpConstant %u32 {subgroup}",
731            "%value = OpLoad _ {value}",
732            "%delta = OpLoad _ {delta}",
733            "%result = OpGroupNonUniformShuffleDown _ %subgroup %value %delta",
734            "OpStore {result} %result",
735            subgroup = const SUBGROUP,
736            value = in(reg) &value,
737            delta = in(reg) &delta,
738            result = in(reg) &mut result,
739        }
740    }
741
742    result
743}
744
745macro_rules! macro_subgroup_op {
746    ($scalar:ty, $asm_op:literal, $($name:ident, $group_op:expr),+; $docs:literal) => { $(
747        #[doc = $docs]
748        #[spirv_std_macros::gpu_only]
749        #[doc(alias = $asm_op)]
750        #[inline]
751        pub fn $name<I: VectorOrScalar<Scalar = $scalar>>(
752            value: I,
753        ) -> I {
754            let mut result = I::default();
755            unsafe {
756                asm! {
757                    "%u32 = OpTypeInt 32 0",
758                    "%subgroup = OpConstant %u32 {subgroup}",
759                    "%value = OpLoad _ {value}",
760                    concat!("%result = ", $asm_op, " _ %subgroup {groupop} %value"),
761                    "OpStore {result} %result",
762                    subgroup = const SUBGROUP,
763                    groupop = const ($group_op as u32),
764                    value = in(reg) &value,
765                    result = in(reg) &mut result,
766                }
767            }
768            result
769        }
770    )+ };
771}
772
773macro_rules! macro_subgroup_op_clustered {
774    ($scalar:ty, $asm_op:literal, $name:ident; $docs:literal) => {
775        #[doc = $docs]
776        #[spirv_std_macros::gpu_only]
777        #[doc(alias = $asm_op)]
778        #[inline]
779        pub unsafe fn $name<const CLUSTER_SIZE: u32, I: VectorOrScalar<Scalar = $scalar>>(
780            value: I,
781        ) -> I {
782            const {
783                assert!(CLUSTER_SIZE >= 1, "`ClusterSize` must be at least 1");
784                assert!(
785                    CLUSTER_SIZE.is_power_of_two(),
786                    "`ClusterSize` must be a power of 2"
787                );
788                // Cannot be verified with static assertions:
789                // `ClusterSize` must not be greater than the size of the group
790            }
791
792            let mut result = I::default();
793
794            unsafe {
795                asm! {
796                    "%u32 = OpTypeInt 32 0",
797                    "%subgroup = OpConstant %u32 {subgroup}",
798                    "%value = OpLoad _ {value}",
799                    "%clustersize = OpConstant %u32 {clustersize}",
800                    concat!("%result = ", $asm_op, " _ %subgroup {groupop} %value %clustersize"),
801                    "OpStore {result} %result",
802                    subgroup = const SUBGROUP,
803                    groupop = const (GroupOperation::ClusteredReduce as u32),
804                    clustersize = const CLUSTER_SIZE,
805                    value = in(reg) &value,
806                    result = in(reg) &mut result,
807                }
808            }
809
810            result
811        }
812    };
813}
814
815// add
816macro_subgroup_op!(impl Integer, "OpGroupNonUniformIAdd", subgroup_i_add, GroupOperation::Reduce, subgroup_inclusive_i_add, GroupOperation::InclusiveScan, subgroup_exclusive_i_add, GroupOperation::ExclusiveScan; r"
817An integer add group operation of all `value` operands contributed by active invocations in the group.
818
819Result Type must be a scalar or vector of integer type.
820
821Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
822
823The identity I for Operation is 0.
824
825The type of `value` must be the same as Result Type.
826
827Requires Capability `GroupNonUniformArithmetic`.
828");
829macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformIAdd", subgroup_clustered_i_add; r"
830An integer add group operation of all `value` operands contributed by active invocations in the group.
831
832Result Type must be a scalar or vector of integer type.
833
834Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
835
836The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
837
838The type of `value` must be the same as Result Type.
839
840`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
841
842Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
843
844# Safety
845* `ClusterSize` must not be greater than the size of the group
846");
847macro_subgroup_op!(impl Float, "OpGroupNonUniformFAdd", subgroup_f_add, GroupOperation::Reduce, subgroup_inclusive_f_add, GroupOperation::InclusiveScan, subgroup_exclusive_f_add, GroupOperation::ExclusiveScan; r"
848A floating point add group operation of all `value` operands contributed by active invocations in the group.
849
850Result Type must be a scalar or vector of floating-point type.
851
852Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
853
854The identity I for Operation is 0.
855
856The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
857
858Requires Capability `GroupNonUniformArithmetic`.
859");
860macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFAdd", subgroup_clustered_f_add; r"
861A floating point add group operation of all `value` operands contributed by active invocations in the group.
862
863Result Type must be a scalar or vector of floating-point type.
864
865Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
866
867The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
868
869The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
870
871`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
872
873Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
874
875# Safety
876* `ClusterSize` must not be greater than the size of the group
877");
878
879// mul
880macro_subgroup_op!(impl Integer, "OpGroupNonUniformIMul", subgroup_i_mul, GroupOperation::Reduce, subgroup_inclusive_i_mul, GroupOperation::InclusiveScan, subgroup_exclusive_i_mul, GroupOperation::ExclusiveScan; r"
881An integer multiply group operation of all `value` operands contributed by active invocations in the group.
882
883Result Type must be a scalar or vector of integer type.
884
885Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
886
887The identity I for Operation is 1.
888
889The type of `value` must be the same as Result Type.
890
891Requires Capability `GroupNonUniformArithmetic`.
892");
893macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformIMul", subgroup_clustered_i_mul; r"
894An integer multiply group operation of all `value` operands contributed by active invocations in the group.
895
896Result Type must be a scalar or vector of integer type.
897
898Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
899
900The identity I for Operation is 1. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
901
902The type of `value` must be the same as Result Type.
903
904`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
905
906Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
907
908# Safety
909* `ClusterSize` must not be greater than the size of the group
910");
911macro_subgroup_op!(impl Float, "OpGroupNonUniformFMul", subgroup_f_mul, GroupOperation::Reduce, subgroup_inclusive_f_mul, GroupOperation::InclusiveScan, subgroup_exclusive_f_mul, GroupOperation::ExclusiveScan; r"
912A floating point multiply group operation of all `value` operands contributed by active invocations in the group.
913
914Result Type must be a scalar or vector of floating-point type.
915
916Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
917
918The identity I for Operation is 1.
919
920The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
921
922Requires Capability `GroupNonUniformArithmetic`.
923");
924macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFMul", subgroup_clustered_f_mul; r"
925A floating point multiply group operation of all `value` operands contributed by active invocations in the group.
926
927Result Type must be a scalar or vector of floating-point type.
928
929Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
930
931The identity I for Operation is 1. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
932
933The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
934
935`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
936
937Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
938
939# Safety
940* `ClusterSize` must not be greater than the size of the group
941");
942
943// min
944macro_subgroup_op!(impl SignedInteger, "OpGroupNonUniformSMin", subgroup_s_min, GroupOperation::Reduce, subgroup_inclusive_s_min, GroupOperation::InclusiveScan, subgroup_exclusive_s_min, GroupOperation::ExclusiveScan; r"
945A signed integer minimum group operation of all `value` operands contributed by active invocations in the group.
946
947Result Type must be a scalar or vector of integer type.
948
949Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
950
951The identity I for Operation is `INT_MAX`.
952
953The type of `value` must be the same as Result Type.
954
955Requires Capability `GroupNonUniformArithmetic`.
956");
957macro_subgroup_op_clustered!(impl SignedInteger, "OpGroupNonUniformSMin", subgroup_clustered_s_min; r"
958A signed integer minimum group operation of all `value` operands contributed by active invocations in the group.
959
960Result Type must be a scalar or vector of integer type.
961
962Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
963
964The identity I for Operation is `INT_MAX`. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
965
966The type of `value` must be the same as Result Type.
967
968`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
969
970Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
971
972# Safety
973* `ClusterSize` must not be greater than the size of the group
974");
975macro_subgroup_op!(impl UnsignedInteger, "OpGroupNonUniformUMin", subgroup_u_min, GroupOperation::Reduce, subgroup_inclusive_u_min, GroupOperation::InclusiveScan, subgroup_exclusive_u_min, GroupOperation::ExclusiveScan; r"
976An unsigned integer minimum group operation of all `value` operands contributed by active invocations in the group.
977
978Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
979
980Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
981
982The identity I for Operation is `UINT_MAX`.
983
984The type of `value` must be the same as Result Type.
985
986Requires Capability `GroupNonUniformArithmetic`.
987");
988macro_subgroup_op_clustered!(impl UnsignedInteger, "OpGroupNonUniformUMin", subgroup_clustered_u_min; r"
989An unsigned integer minimum group operation of all `value` operands contributed by active invocations in the group.
990
991Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
992
993Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
994
995The identity I for Operation is `UINT_MAX`. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
996
997The type of `value` must be the same as Result Type.
998
999`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1000
1001Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1002
1003# Safety
1004* `ClusterSize` must not be greater than the size of the group
1005");
1006macro_subgroup_op!(impl Float, "OpGroupNonUniformFMin", subgroup_f_min, GroupOperation::Reduce, subgroup_inclusive_f_min, GroupOperation::InclusiveScan, subgroup_exclusive_f_min, GroupOperation::ExclusiveScan; r"
1007A floating point minimum group operation of all `value` operands contributed by active invocations in the group.
1008
1009Result Type must be a scalar or vector of floating-point type.
1010
1011Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1012
1013The identity I for Operation is +INF.
1014
1015The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
1016
1017Requires Capability `GroupNonUniformArithmetic`.
1018");
1019macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFMin", subgroup_clustered_f_min; r"
1020A floating point minimum group operation of all `value` operands contributed by active invocations in the group.
1021
1022Result Type must be a scalar or vector of floating-point type.
1023
1024Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1025
1026The identity I for Operation is +INF. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1027
1028The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
1029
1030`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1031
1032Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1033
1034# Safety
1035* `ClusterSize` must not be greater than the size of the group
1036");
1037
1038// max
1039macro_subgroup_op!(impl SignedInteger, "OpGroupNonUniformSMax", subgroup_s_max, GroupOperation::Reduce, subgroup_inclusive_s_max, GroupOperation::InclusiveScan, subgroup_exclusive_s_max, GroupOperation::ExclusiveScan; r"
1040A signed integer maximum group operation of all `value` operands contributed by active invocations in the group.
1041
1042Result Type must be a scalar or vector of integer type.
1043
1044Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1045
1046The identity I for Operation is `INT_MIN`.
1047
1048The type of `value` must be the same as Result Type.
1049
1050Requires Capability `GroupNonUniformArithmetic`.
1051");
1052macro_subgroup_op_clustered!(impl SignedInteger, "OpGroupNonUniformSMax", subgroup_clustered_s_max; r"
1053A signed integer maximum group operation of all `value` operands contributed by active invocations in the group.
1054
1055Result Type must be a scalar or vector of integer type.
1056
1057Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1058
1059The identity I for Operation is `INT_MIN`. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1060
1061The type of `value` must be the same as Result Type.
1062
1063`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1064
1065Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1066
1067# Safety
1068* `ClusterSize` must not be greater than the size of the group
1069");
1070macro_subgroup_op!(impl UnsignedInteger, "OpGroupNonUniformUMax", subgroup_u_max, GroupOperation::Reduce, subgroup_inclusive_u_max, GroupOperation::InclusiveScan, subgroup_exclusive_u_max, GroupOperation::ExclusiveScan; r"
1071An unsigned integer maximum group operation of all `value` operands contributed by active invocations in the group.
1072
1073Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
1074
1075Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1076
1077The identity I for Operation is 0.
1078
1079The type of `value` must be the same as Result Type.
1080
1081Requires Capability `GroupNonUniformArithmetic`.
1082");
1083macro_subgroup_op_clustered!(impl UnsignedInteger, "OpGroupNonUniformUMax", subgroup_clustered_u_max; r"
1084An unsigned integer maximum group operation of all `value` operands contributed by active invocations in the group.
1085
1086Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
1087
1088Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1089
1090The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1091
1092The type of `value` must be the same as Result Type.
1093
1094`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1095
1096Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1097
1098# Safety
1099* `ClusterSize` must not be greater than the size of the group
1100");
1101macro_subgroup_op!(impl Float, "OpGroupNonUniformFMax", subgroup_f_max, GroupOperation::Reduce, subgroup_inclusive_f_max, GroupOperation::InclusiveScan, subgroup_exclusive_f_max, GroupOperation::ExclusiveScan; r"
1102A floating point maximum group operation of all `value` operands contributed by active invocations in by group.
1103
1104Result Type must be a scalar or vector of floating-point type.
1105
1106Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1107
1108The identity I for Operation is -INF.
1109
1110The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
1111
1112Requires Capability `GroupNonUniformArithmetic`.
1113");
1114macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFMax", subgroup_clustered_f_max; r"
1115A floating point maximum group operation of all `value` operands contributed by active invocations in by group.
1116
1117Result Type must be a scalar or vector of floating-point type.
1118
1119Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1120
1121The identity I for Operation is -INF.
1122
1123The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
1124
1125Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1126
1127# Safety
1128* `ClusterSize` must not be greater than the size of the group
1129");
1130
1131// and
1132macro_subgroup_op!(impl Integer, "OpGroupNonUniformBitwiseAnd", subgroup_and, GroupOperation::Reduce, subgroup_inclusive_and, GroupOperation::InclusiveScan, subgroup_exclusive_and, GroupOperation::ExclusiveScan; r"
1133A bitwise and group operation of all `value` operands contributed by active invocations in the group.
1134
1135Result Type must be a scalar or vector of integer type.
1136
1137Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1138
1139The identity I for Operation is ~0.
1140
1141The type of `value` must be the same as Result Type.
1142
1143Requires Capability `GroupNonUniformArithmetic`.
1144");
1145macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformBitwiseAnd", subgroup_clustered_and; r"
1146A bitwise and group operation of all `value` operands contributed by active invocations in the group.
1147
1148Result Type must be a scalar or vector of integer type.
1149
1150Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1151
1152The identity I for Operation is ~0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1153
1154The type of `value` must be the same as Result Type.
1155
1156`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1157
1158Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1159
1160# Safety
1161* `ClusterSize` must not be greater than the size of the group
1162");
1163
1164// or
1165macro_subgroup_op!(impl Integer, "OpGroupNonUniformBitwiseOr", subgroup_or, GroupOperation::Reduce, subgroup_inclusive_or, GroupOperation::InclusiveScan, subgroup_exclusive_or, GroupOperation::ExclusiveScan; r"
1166A bitwise or group operation of all `value` operands contributed by active invocations in the group.
1167
1168Result Type must be a scalar or vector of integer type.
1169
1170Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1171
1172The identity I for Operation is 0.
1173
1174The type of `value` must be the same as Result Type.
1175
1176Requires Capability `GroupNonUniformArithmetic`.
1177");
1178macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformBitwiseOr", subgroup_clustered_or; r"
1179A bitwise or group operation of all `value` operands contributed by active invocations in the group.
1180
1181Result Type must be a scalar or vector of integer type.
1182
1183Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1184
1185The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1186
1187The type of `value` must be the same as Result Type.
1188
1189`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1190
1191Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1192
1193# Safety
1194* `ClusterSize` must not be greater than the size of the group
1195");
1196
1197// xor
1198macro_subgroup_op!(impl Integer, "OpGroupNonUniformBitwiseXor", subgroup_xor, GroupOperation::Reduce, subgroup_inclusive_xor, GroupOperation::InclusiveScan, subgroup_exclusive_xor, GroupOperation::ExclusiveScan; r"
1199A bitwise xor group operation of all `value` operands contributed by active invocations in the group.
1200
1201Result Type must be a scalar or vector of integer type.
1202
1203Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1204
1205The identity I for Operation is 0.
1206
1207The type of `value` must be the same as Result Type.
1208
1209Requires Capability `GroupNonUniformArithmetic`.
1210");
1211macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformBitwiseXor", subgroup_clustered_xor; r"
1212A bitwise xor group operation of all `value` operands contributed by active invocations in the group.
1213
1214Result Type must be a scalar or vector of integer type.
1215
1216Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1217
1218The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1219
1220The type of `value` must be the same as Result Type.
1221
1222`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1223
1224Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1225
1226# Safety
1227* `ClusterSize` must not be greater than the size of the group
1228");
1229
1230// logical and
1231macro_subgroup_op!(bool, "OpGroupNonUniformLogicalAnd", subgroup_logical_and, GroupOperation::Reduce, subgroup_inclusive_logical_and, GroupOperation::InclusiveScan, subgroup_exclusive_logical_and, GroupOperation::ExclusiveScan; r"
1232A logical and group operation of all `value` operands contributed by active invocations in the group.
1233
1234Result Type must be a scalar or vector of Boolean type.
1235
1236Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1237
1238The identity I for Operation is ~0.
1239
1240The type of `value` must be the same as Result Type.
1241
1242Requires Capability `GroupNonUniformArithmetic`.
1243");
1244macro_subgroup_op_clustered!(bool, "OpGroupNonUniformLogicalAnd", subgroup_clustered_logical_and; r"
1245A logical and group operation of all `value` operands contributed by active invocations in the group.
1246
1247Result Type must be a scalar or vector of Boolean type.
1248
1249Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1250
1251The identity I for Operation is ~0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1252
1253The type of `value` must be the same as Result Type.
1254
1255`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1256
1257Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1258
1259# Safety
1260* `ClusterSize` must not be greater than the size of the group
1261");
1262
1263// logical or
1264macro_subgroup_op!(bool, "OpGroupNonUniformLogicalOr", subgroup_logical_or, GroupOperation::Reduce, subgroup_inclusive_logical_or, GroupOperation::InclusiveScan, subgroup_exclusive_logical_or, GroupOperation::ExclusiveScan; r"
1265A logical or group operation of all `value` operands contributed by active invocations in the group.
1266
1267Result Type must be a scalar or vector of Boolean type.
1268
1269Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1270
1271The identity I for Operation is 0.
1272
1273The type of `value` must be the same as Result Type.
1274
1275Requires Capability `GroupNonUniformArithmetic`.
1276");
1277macro_subgroup_op_clustered!(bool, "OpGroupNonUniformLogicalOr", subgroup_clustered_logical_or; r"
1278A logical or group operation of all `value` operands contributed by active invocations in the group.
1279
1280Result Type must be a scalar or vector of Boolean type.
1281
1282Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1283
1284The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1285
1286The type of `value` must be the same as Result Type.
1287
1288`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1289
1290Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1291
1292# Safety
1293* `ClusterSize` must not be greater than the size of the group
1294");
1295
1296// logical xor
1297macro_subgroup_op!(bool, "OpGroupNonUniformLogicalXor", subgroup_logical_xor, GroupOperation::Reduce, subgroup_inclusive_logical_xor, GroupOperation::InclusiveScan, subgroup_exclusive_logical_xor, GroupOperation::ExclusiveScan; r"
1298A logical xor group operation of all `value` operands contributed by active invocations in the group.
1299
1300Result Type must be a scalar or vector of Boolean type.
1301
1302Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1303
1304The identity I for Operation is 0.
1305
1306The type of `value` must be the same as Result Type.
1307
1308Requires Capability `GroupNonUniformArithmetic`.
1309");
1310macro_subgroup_op_clustered!(bool, "OpGroupNonUniformLogicalXor", subgroup_clustered_logical_xor; r"
1311A logical xor group operation of all `value` operands contributed by active invocations in the group.
1312
1313Result Type must be a scalar or vector of Boolean type.
1314
1315Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1316
1317The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1318
1319The type of `value` must be the same as Result Type.
1320
1321`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1322
1323Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1324
1325# Safety
1326* `ClusterSize` must not be greater than the size of the group
1327");
1328
1329/// Result is the `value` of the invocation within the quad with a quad index equal to `index`.
1330///
1331/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
1332///
1333/// Execution is a Scope, but has no effect on the behavior of this instruction. It must be Subgroup.
1334///
1335/// The type of `value` must be the same as Result Type.
1336///
1337/// `index` must be a scalar of integer type, whose Signedness operand is 0.
1338///
1339/// Before version 1.5, `index` must come from a constant instruction. Starting with version 1.5, `index` must be dynamically uniform.
1340///
1341/// If the value of `index` is greater than or equal to 4, or refers to an inactive invocation, the resulting value is undefined.
1342///
1343/// Requires Capability `GroupNonUniformQuad`.
1344///
1345/// # Safety
1346/// * This function is safe
1347/// * Result is undefined if the value of `index` is greater than or equal to 4, or refers to an inactive invocation
1348#[spirv_std_macros::gpu_only]
1349#[doc(alias = "OpGroupNonUniformQuadBroadcast")]
1350#[inline]
1351pub fn subgroup_quad_broadcast<T: VectorOrScalar>(value: T, index: u32) -> T {
1352    let mut result = T::default();
1353
1354    unsafe {
1355        asm! {
1356            "%u32 = OpTypeInt 32 0",
1357            "%subgroup = OpConstant %u32 {subgroup}",
1358            "%value = OpLoad _ {value}",
1359            "%index = OpLoad _ {index}",
1360            "%result = OpGroupNonUniformQuadBroadcast _ %subgroup %value %index",
1361            "OpStore {result} %result",
1362            subgroup = const SUBGROUP,
1363            value = in(reg) &value,
1364            index = in(reg) &index,
1365            result = in(reg) &mut result,
1366        }
1367    }
1368
1369    result
1370}
1371
1372/// Direction is the kind of swap to perform.
1373///
1374/// Direction must be a scalar of integer type, whose Signedness operand is 0.
1375///
1376/// Direction must come from a constant instruction.
1377///
1378/// The value returned in Result is the value provided to `value` by another invocation in the same quad scope instance. The invocation providing this `value` is determined according to Direction.
1379///
1380/// Requires Capability `GroupNonUniformQuad`.
1381pub enum QuadDirection {
1382    /// A Direction of 0 indicates a horizontal swap;
1383    /// - Invocations with quad indices of 0 and 1 swap values
1384    /// - Invocations with quad indices of 2 and 3 swap values
1385    Horizontal = 0,
1386    /// A Direction of 1 indicates a vertical swap;
1387    /// - Invocations with quad indices of 0 and 2 swap values
1388    /// - Invocations with quad indices of 1 and 3 swap values
1389    Vertical = 1,
1390    /// A Direction of 2 indicates a diagonal swap;
1391    /// - Invocations with quad indices of 0 and 3 swap values
1392    /// - Invocations with quad indices of 1 and 2 swap values
1393    Diagonal = 2,
1394}
1395
1396/// Swap the `value` of the invocation within the quad with another invocation in the quad using Direction.
1397///
1398/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
1399///
1400/// Execution is a Scope, but has no effect on the behavior of this instruction. It must be Subgroup.
1401///
1402/// The type of `value` must be the same as Result Type.
1403///
1404/// Direction is the kind of swap to perform.
1405///
1406/// Direction must be a scalar of integer type, whose Signedness operand is 0.
1407///
1408/// Direction must come from a constant instruction.
1409///
1410/// The value returned in Result is the value provided to `value` by another invocation in the same quad scope instance. The invocation providing this `value` is determined according to Direction.
1411///
1412/// A Direction of 0 indicates a horizontal swap;
1413/// - Invocations with quad indices of 0 and 1 swap values
1414/// - Invocations with quad indices of 2 and 3 swap values
1415/// A Direction of 1 indicates a vertical swap;
1416/// - Invocations with quad indices of 0 and 2 swap values
1417/// - Invocations with quad indices of 1 and 3 swap values
1418/// A Direction of 2 indicates a diagonal swap;
1419/// - Invocations with quad indices of 0 and 3 swap values
1420/// - Invocations with quad indices of 1 and 2 swap values
1421///
1422/// Direction must be one of the above values.
1423///
1424/// If an active invocation reads `value` from an inactive invocation, the resulting value is undefined.
1425///
1426/// Requires Capability `GroupNonUniformQuad`.
1427///
1428/// # Safety
1429/// * This function is safe
1430/// * Result is undefined if an active invocation reads `value` from an inactive invocation
1431#[spirv_std_macros::gpu_only]
1432#[doc(alias = "OpGroupNonUniformQuadSwap")]
1433#[inline]
1434pub fn subgroup_quad_swap<const DIRECTION: u32, T: VectorOrScalar>(value: T) -> T {
1435    let mut result = T::default();
1436
1437    unsafe {
1438        asm! {
1439            "%u32 = OpTypeInt 32 0",
1440            "%subgroup = OpConstant %u32 {subgroup}",
1441            "%direction = OpConstant %u32 {direction}",
1442            "%value = OpLoad _ {value}",
1443            "%result = OpGroupNonUniformQuadSwap _ %subgroup %value %direction",
1444            "OpStore {result} %result",
1445            subgroup = const SUBGROUP,
1446            direction = const DIRECTION,
1447            value = in(reg) &value,
1448            result = in(reg) &mut result,
1449        }
1450    }
1451
1452    result
1453}