spirv_std/arch/
subgroup.rs

1#[cfg(target_arch = "spirv")]
2use crate::arch::barrier;
3use crate::float::Float;
4use crate::integer::{Integer, SignedInteger, UnsignedInteger};
5#[cfg(target_arch = "spirv")]
6use crate::memory::{Scope, Semantics};
7use crate::vector::VectorOrScalar;
8#[cfg(target_arch = "spirv")]
9use core::arch::asm;
10
11#[cfg(target_arch = "spirv")]
12const SUBGROUP: u32 = Scope::Subgroup as u32;
13
14/// `SubgroupMask` is a [`glam::UVec4`] representing a bitmask of all invocations within a subgroup.
15/// Mostly used in group ballot operations.
16pub type SubgroupMask = glam::UVec4;
17
18/// Defines the class of group operation.
19#[non_exhaustive]
20#[derive(Debug, PartialEq, Eq)]
21pub enum GroupOperation {
22    /// A reduction operation for all values of a specific value X specified by invocations within a workgroup.
23    Reduce = 0,
24    /// A binary operation with an identity I and n (where n is the size of the workgroup)
25    /// elements[a0, a1, … an-1] resulting in [a0, (a0 op a1), …(a0 op a1 op … op an-1)]
26    InclusiveScan = 1,
27    /// A binary operation with an identity I and n (where n is the size of the workgroup)
28    /// elements[a0, a1, … an-1] resulting in [I, a0, (a0 op a1), … (a0 op a1 op … op an-2)].
29    ExclusiveScan = 2,
30    /// The [`GroupOperation`] `ClusteredReduce`.
31    ///
32    /// All instructions with a [`GroupOperation`] require an additional `ClusterSize` parameter when [`GroupOperation`] is
33    /// `ClusteredReduce`. To map this requirement into rust, all function have a base version accepting [`GroupOperation`]
34    /// as a const generic, and a `_clustered` variant that is fixed to `ClusteredReduce` and takes the additional
35    /// `ClusterSize` parameter as a const generic. To not accidentally use a `ClusteredReduce` in the base variant of the
36    /// function, it was removed from the [`GroupOperation`] enum and instead resides individually.
37    ClusteredReduce = 3,
38    /// Reserved.
39    ///
40    /// Requires Capability `GroupNonUniformPartitionedNV`.
41    PartitionedReduceNV = 6,
42    /// Reserved.
43    ///
44    /// Requires Capability `GroupNonUniformPartitionedNV`.
45    PartitionedInclusiveScanNV = 7,
46    /// Reserved.
47    ///
48    /// Requires Capability `GroupNonUniformPartitionedNV`.
49    PartitionedExclusiveScanNV = 8,
50}
51
52/// The function `subgroupBarrier()` enforces that all active invocations within a
53/// subgroup must execute this function before any are allowed to continue their
54/// execution, and the results of any memory stores performed using coherent
55/// variables performed prior to the call will be visible to any future
56/// coherent access to the same memory performed by any other shader invocation
57/// within the same subgroup.
58///
59/// Requires Capability `GroupNonUniform`.
60#[spirv_std_macros::gpu_only]
61#[doc(alias = "subgroupBarrier")]
62#[inline]
63pub fn subgroup_barrier() {
64    barrier::control_barrier::<
65        SUBGROUP,
66        SUBGROUP,
67        {
68            Semantics::ACQUIRE_RELEASE.bits()
69                | Semantics::UNIFORM_MEMORY.bits()
70                | Semantics::WORKGROUP_MEMORY.bits()
71                | Semantics::IMAGE_MEMORY.bits()
72        },
73    >();
74}
75
76/// The function `subgroupMemoryBarrier()` enforces the ordering of all memory
77/// transactions issued within a single shader invocation, as viewed by other
78/// invocations in the same subgroup.
79///
80/// Requires Capability `GroupNonUniform`.
81#[spirv_std_macros::gpu_only]
82#[doc(alias = "subgroupMemoryBarrier")]
83#[inline]
84pub fn subgroup_memory_barrier() {
85    barrier::memory_barrier::<
86        SUBGROUP,
87        {
88            Semantics::ACQUIRE_RELEASE.bits()
89                | Semantics::UNIFORM_MEMORY.bits()
90                | Semantics::WORKGROUP_MEMORY.bits()
91                | Semantics::IMAGE_MEMORY.bits()
92        },
93    >();
94}
95
96/// The function `subgroupMemoryBarrierBuffer()` enforces the ordering of all
97/// memory transactions to buffer variables issued within a single shader
98/// invocation, as viewed by other invocations in the same subgroup.
99///
100/// Requires Capability `GroupNonUniform`.
101#[spirv_std_macros::gpu_only]
102#[doc(alias = "subgroupMemoryBarrierBuffer")]
103#[inline]
104pub fn subgroup_memory_barrier_buffer() {
105    barrier::memory_barrier::<
106        SUBGROUP,
107        { Semantics::ACQUIRE_RELEASE.bits() | Semantics::UNIFORM_MEMORY.bits() },
108    >();
109}
110
111/// The function `subgroupMemoryBarrierShared()` enforces the ordering of all
112/// memory transactions to shared variables issued within a single shader
113/// invocation, as viewed by other invocations in the same subgroup.
114///
115/// Only available in compute shaders.
116///
117/// Requires Capability `GroupNonUniform`.
118#[spirv_std_macros::gpu_only]
119#[doc(alias = "subgroupMemoryBarrierShared")]
120#[inline]
121pub fn subgroup_memory_barrier_shared() {
122    barrier::memory_barrier::<
123        SUBGROUP,
124        { Semantics::ACQUIRE_RELEASE.bits() | Semantics::WORKGROUP_MEMORY.bits() },
125    >();
126}
127
128/// The function `subgroupMemoryBarrierImage()` enforces the ordering of all
129/// memory transactions to images issued within a single shader invocation, as
130/// viewed by other invocations in the same subgroup.
131///
132/// Requires Capability `GroupNonUniform`.
133#[spirv_std_macros::gpu_only]
134#[doc(alias = "subgroupMemoryBarrierImage")]
135#[inline]
136pub fn subgroup_memory_barrier_image() {
137    barrier::memory_barrier::<
138        SUBGROUP,
139        { Semantics::ACQUIRE_RELEASE.bits() | Semantics::IMAGE_MEMORY.bits() },
140    >();
141}
142
143/// Result is true only in the active invocation with the lowest id in the group, otherwise result is false.
144///
145/// Result Type must be a Boolean type.
146///
147/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
148///
149/// Requires Capability `GroupNonUniform`.
150#[spirv_std_macros::gpu_only]
151#[doc(alias = "OpGroupNonUniformElect")]
152#[inline]
153pub fn subgroup_elect() -> bool {
154    let mut result = false;
155
156    unsafe {
157        asm! {
158            "%bool = OpTypeBool",
159            "%u32 = OpTypeInt 32 0",
160            "%subgroup = OpConstant %u32 {subgroup}",
161            "%result = OpGroupNonUniformElect %bool %subgroup",
162            "OpStore {result} %result",
163            subgroup = const SUBGROUP,
164            result = in(reg) &mut result,
165        }
166    }
167
168    result
169}
170
171/// Evaluates a `predicate` for all active invocations in the group, resulting in true if `predicate` evaluates to true for all active invocations in the group, otherwise the result is false.
172///
173/// Result Type must be a Boolean type.
174///
175/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
176///
177/// `predicate` must be a Boolean type.
178///
179/// Requires Capability `GroupNonUniformVote`.
180#[spirv_std_macros::gpu_only]
181#[doc(alias = "OpGroupNonUniformAll")]
182#[inline]
183pub fn subgroup_all(predicate: bool) -> bool {
184    let mut result = false;
185
186    unsafe {
187        asm! {
188            "%bool = OpTypeBool",
189            "%u32 = OpTypeInt 32 0",
190            "%subgroup = OpConstant %u32 {subgroup}",
191            "%predicate = OpLoad _ {predicate}",
192            "%result = OpGroupNonUniformAll %bool %subgroup %predicate",
193            "OpStore {result} %result",
194            subgroup = const SUBGROUP,
195            predicate = in(reg) &predicate,
196            result = in(reg) &mut result,
197        }
198    }
199
200    result
201}
202
203/// Evaluates a `predicate` for all active invocations in the group, resulting in true if `predicate` evaluates to true for any active invocation in the group, otherwise the result is false.
204///
205/// Result Type must be a Boolean type.
206///
207/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
208///
209/// `predicate` must be a Boolean type.
210///
211/// Requires Capability `GroupNonUniformVote`.
212#[spirv_std_macros::gpu_only]
213#[doc(alias = "OpGroupNonUniformAny")]
214#[inline]
215pub fn subgroup_any(predicate: bool) -> bool {
216    let mut result = false;
217
218    unsafe {
219        asm! {
220            "%bool = OpTypeBool",
221            "%u32 = OpTypeInt 32 0",
222            "%subgroup = OpConstant %u32 {subgroup}",
223            "%predicate = OpLoad _ {predicate}",
224            "%result = OpGroupNonUniformAny %bool %subgroup %predicate",
225            "OpStore {result} %result",
226            subgroup = const SUBGROUP,
227            predicate = in(reg) &predicate,
228            result = in(reg) &mut result,
229        }
230    }
231
232    result
233}
234
235/// Evaluates a `value` for all active invocations in the group. The result is true if `value` is equal for all active invocations in the group. Otherwise, the result is false.
236///
237/// Result Type must be a Boolean type.
238///
239/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
240///
241/// `value` must be a scalar or vector of floating-point type, integer type, or Boolean type. The compare operation is based on this type, and if it is a floating-point type, an ordered-and-equal compare is used.
242///
243/// Requires Capability `GroupNonUniformVote`.
244#[spirv_std_macros::gpu_only]
245#[doc(alias = "OpGroupNonUniformAllEqual")]
246#[inline]
247pub fn subgroup_all_equal<T: VectorOrScalar>(value: T) -> bool {
248    let mut result = false;
249
250    unsafe {
251        asm! {
252            "%bool = OpTypeBool",
253            "%u32 = OpTypeInt 32 0",
254            "%subgroup = OpConstant %u32 {subgroup}",
255            "%value = OpLoad _ {value}",
256            "%result = OpGroupNonUniformAllEqual %bool %subgroup %value",
257            "OpStore {result} %result",
258            subgroup = const SUBGROUP,
259            value = in(reg) &value,
260            result = in(reg) &mut result,
261        }
262    }
263
264    result
265}
266
267/// Result is the `value` of the invocation identified by the id `id` to all active invocations in the group.
268///
269/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
270///
271/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
272///
273/// The type of `value` must be the same as Result Type.
274///
275/// `id` must be a scalar of integer type, whose Signedness operand is 0.
276///
277/// Before version 1.5, `id` must come from a constant instruction. Starting with version 1.5, this restriction is lifted. However, behavior is undefined when `id` is not dynamically uniform.
278///
279/// The resulting value is undefined if `id` is an inactive invocation, or is greater than or equal to the size of the group.
280///
281/// Requires Capability `GroupNonUniformBallot`.
282///
283/// # Safety
284/// * `id` must not be dynamically uniform
285/// * before 1.5: `id` must be constant
286/// * Result is undefined if `id` is an inactive invocation or out of bounds
287#[spirv_std_macros::gpu_only]
288#[doc(alias = "OpGroupNonUniformBroadcast")]
289#[inline]
290pub unsafe fn subgroup_broadcast<T: VectorOrScalar>(value: T, id: u32) -> T {
291    let mut result = T::default();
292
293    unsafe {
294        asm! {
295            "%u32 = OpTypeInt 32 0",
296            "%subgroup = OpConstant %u32 {subgroup}",
297            "%value = OpLoad _ {value}",
298            "%id = OpLoad _ {id}",
299            "%result = OpGroupNonUniformBroadcast _ %subgroup %value %id",
300            "OpStore {result} %result",
301            subgroup = const SUBGROUP,
302            value = in(reg) &value,
303            id = in(reg) &id,
304            result = in(reg) &mut result,
305        }
306    }
307
308    result
309}
310
311/// Result is the `value` of the invocation from the active invocation with the lowest id in the group to all active invocations in the group.
312///
313/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
314///
315/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
316///
317/// The type of `value` must be the same as Result Type.
318///
319/// Requires Capability `GroupNonUniformBallot`.
320#[spirv_std_macros::gpu_only]
321#[doc(alias = "OpGroupNonUniformBroadcastFirst")]
322#[inline]
323pub fn subgroup_broadcast_first<T: VectorOrScalar>(value: T) -> T {
324    let mut result = T::default();
325
326    unsafe {
327        asm! {
328            "%u32 = OpTypeInt 32 0",
329            "%subgroup = OpConstant %u32 {subgroup}",
330            "%value = OpLoad _ {value}",
331            "%result = OpGroupNonUniformBroadcastFirst _ %subgroup %value",
332            "OpStore {result} %result",
333            subgroup = const SUBGROUP,
334            value = in(reg) &value,
335            result = in(reg) &mut result,
336        }
337    }
338
339    result
340}
341
342/// Result is a bitfield value combining the `predicate` value from all invocations in the group that execute the same dynamic instance of this instruction. The bit is set to one if the corresponding invocation is active and the `predicate` for that invocation evaluated to true; otherwise, it is set to zero.
343///
344/// Result Type must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
345///
346/// Result is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
347///
348/// Execution is a Scope that identifies the group of invocations affected by this command.
349///
350/// `predicate` must be a Boolean type.
351///
352/// Requires Capability `GroupNonUniformBallot`.
353#[spirv_std_macros::gpu_only]
354#[doc(alias = "OpGroupNonUniformBallot")]
355#[inline]
356pub fn subgroup_ballot(predicate: bool) -> SubgroupMask {
357    let mut result = SubgroupMask::default();
358
359    unsafe {
360        asm! {
361            "%u32 = OpTypeInt 32 0",
362            "%subgroup = OpConstant %u32 {subgroup}",
363            "%predicate = OpLoad _ {predicate}",
364            "%result = OpGroupNonUniformBallot typeof*{result} %subgroup %predicate",
365            "OpStore {result} %result",
366            subgroup = const SUBGROUP,
367            predicate = in(reg) &predicate,
368            result = in(reg) &mut result,
369        }
370    }
371
372    result
373}
374
375/// Evaluates a `value` for all active invocations in the group, resulting in true if the bit in `value` for the corresponding invocation is set to one, otherwise the result is false.
376///
377/// Result Type must be a Boolean type.
378///
379/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
380///
381/// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
382///
383/// Behavior is undefined unless `value` is the same for all invocations that execute the same dynamic instance of this instruction.
384///
385/// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
386///
387/// Requires Capability `GroupNonUniformBallot`.
388///
389/// # Safety
390/// * `value` must be the same for all dynamic instances of this instruction
391#[spirv_std_macros::gpu_only]
392#[doc(alias = "OpGroupNonUniformInverseBallot")]
393#[inline]
394pub unsafe fn subgroup_inverse_ballot(value: SubgroupMask) -> bool {
395    let mut result = false;
396
397    unsafe {
398        asm! {
399            "%bool = OpTypeBool",
400            "%u32 = OpTypeInt 32 0",
401            "%subgroup = OpConstant %u32 {subgroup}",
402            "%value = OpLoad _ {value}",
403            "%result = OpGroupNonUniformInverseBallot %bool %subgroup %value",
404            "OpStore {result} %result",
405            subgroup = const SUBGROUP,
406            value = in(reg) &value,
407            result = in(reg) &mut result,
408        }
409    }
410
411    result
412}
413
414/// Evaluates a value for all active invocations in the group, resulting in true if the bit in `value` that corresponds to `index` is set to one, otherwise the result is false.
415///
416/// Result Type must be a Boolean type.
417///
418/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
419///
420/// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
421///
422/// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
423///
424/// `index` must be a scalar of integer type, whose Signedness operand is 0.
425///
426/// The resulting value is undefined if `index` is greater than or equal to the size of the group.
427///
428/// Requires Capability `GroupNonUniformBallot`.
429///
430/// # Safety
431/// * This function is safe
432/// * Result is undefined if `id` is out of bounds
433#[spirv_std_macros::gpu_only]
434#[doc(alias = "OpGroupNonUniformBallotBitExtract")]
435#[inline]
436pub fn subgroup_ballot_bit_extract(value: SubgroupMask, index: u32) -> bool {
437    let mut result = false;
438
439    unsafe {
440        asm! {
441            "%bool = OpTypeBool",
442            "%u32 = OpTypeInt 32 0",
443            "%subgroup = OpConstant %u32 {subgroup}",
444            "%value = OpLoad _ {value}",
445            "%index = OpLoad _ {index}",
446            "%result = OpGroupNonUniformBallotBitExtract %bool %subgroup %value %index",
447            "OpStore {result} %result",
448            subgroup = const SUBGROUP,
449            value = in(reg) &value,
450            index = in(reg) &index,
451            result = in(reg) &mut result,
452        }
453    }
454
455    result
456}
457
458macro_rules! macro_subgroup_ballot_bit_count {
459    ($name:ident, $group_op:expr) => {
460        /// Result is the number of bits that are set to 1 in `value`, considering only the bits in `value` required to represent all bits of the group's invocations.
461        ///
462        /// Result Type must be a scalar of integer type, whose Signedness operand is 0.
463        ///
464        /// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
465        ///
466        /// The identity I for Operation is 0.
467        ///
468        /// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
469        ///
470        /// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
471        ///
472        /// Requires Capability `GroupNonUniformBallot`.
473        #[spirv_std_macros::gpu_only]
474        #[doc(alias = "OpGroupNonUniformBallotBitCount")]
475        #[inline]
476        pub fn $name(value: SubgroupMask) -> u32 {
477            let mut result = 0;
478
479            unsafe {
480                asm! {
481                    "%u32 = OpTypeInt 32 0",
482                    "%subgroup = OpConstant %u32 {subgroup}",
483                    "%value = OpLoad _ {value}",
484                    "%result = OpGroupNonUniformBallotBitCount %u32 %subgroup {groupop} %value",
485                    "OpStore {result} %result",
486                    subgroup = const SUBGROUP,
487                    groupop = const ($group_op as u32),
488                    value = in(reg) &value,
489                    result = in(reg) &mut result,
490                }
491            }
492
493            result
494        }
495    };
496}
497
498macro_subgroup_ballot_bit_count!(subgroup_ballot_bit_count, GroupOperation::Reduce);
499macro_subgroup_ballot_bit_count!(
500    subgroup_ballot_inclusive_bit_count,
501    GroupOperation::InclusiveScan
502);
503macro_subgroup_ballot_bit_count!(
504    subgroup_ballot_exclusive_bit_count,
505    GroupOperation::ExclusiveScan
506);
507
508/// Find the least significant bit set to 1 in `value`, considering only the bits in `value` required to represent all bits of the group's invocations. If none of the considered bits is set to 1, the resulting value is undefined.
509///
510/// Result Type must be a scalar of integer type, whose Signedness operand is 0.
511///
512/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
513///
514/// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
515///
516/// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
517///
518/// Requires Capability `GroupNonUniformBallot`.
519///
520/// # Safety
521/// * This function is safe
522/// * Result is undefined if `id` is an inactive invocation or out of bounds
523#[spirv_std_macros::gpu_only]
524#[doc(alias = "OpGroupNonUniformBallotFindLSB")]
525#[inline]
526pub fn subgroup_ballot_find_lsb(value: SubgroupMask) -> u32 {
527    let mut result = 0;
528
529    unsafe {
530        asm! {
531            "%u32 = OpTypeInt 32 0",
532            "%subgroup = OpConstant %u32 {subgroup}",
533            "%value = OpLoad _ {value}",
534            "%result = OpGroupNonUniformBallotFindLSB %u32 %subgroup %value",
535            "OpStore {result} %result",
536            subgroup = const SUBGROUP,
537            value = in(reg) &value,
538            result = in(reg) &mut result,
539        }
540    }
541
542    result
543}
544
545/// Find the most significant bit set to 1 in `value`, considering only the bits in `value` required to represent all bits of the group's invocations. If none of the considered bits is set to 1, the resulting value is undefined.
546///
547/// Result Type must be a scalar of integer type, whose Signedness operand is 0.
548///
549/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
550///
551/// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
552///
553/// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
554///
555/// Requires Capability `GroupNonUniformBallot`.
556#[spirv_std_macros::gpu_only]
557#[doc(alias = "OpGroupNonUniformBallotFindMSB")]
558#[inline]
559pub fn subgroup_ballot_find_msb(value: SubgroupMask) -> u32 {
560    let mut result = 0;
561
562    unsafe {
563        asm! {
564            "%u32 = OpTypeInt 32 0",
565            "%subgroup = OpConstant %u32 {subgroup}",
566            "%value = OpLoad _ {value}",
567            "%result = OpGroupNonUniformBallotFindMSB %u32 %subgroup %value",
568            "OpStore {result} %result",
569            subgroup = const SUBGROUP,
570            value = in(reg) &value,
571            result = in(reg) &mut result,
572        }
573    }
574
575    result
576}
577
578/// Result is the `value` of the invocation identified by the id `id`.
579///
580/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
581///
582/// Execution is a Scope that identifies the group of invocations affected by this command.
583///
584/// The type of `value` must be the same as Result Type.
585///
586/// `id` must be a scalar of integer type, whose Signedness operand is 0.
587///
588/// The resulting value is undefined if `id` is an inactive invocation, or is greater than or equal to the size of the group.
589///
590/// Requires Capability `GroupNonUniformShuffle`.
591///
592/// # Safety
593/// * This function is safe
594/// * Result is undefined if `id` is an inactive invocation or out of bounds
595#[spirv_std_macros::gpu_only]
596#[doc(alias = "OpGroupNonUniformShuffle")]
597#[inline]
598pub fn subgroup_shuffle<T: VectorOrScalar>(value: T, id: u32) -> T {
599    let mut result = T::default();
600
601    unsafe {
602        asm! {
603            "%u32 = OpTypeInt 32 0",
604            "%subgroup = OpConstant %u32 {subgroup}",
605            "%value = OpLoad _ {value}",
606            "%id = OpLoad _ {id}",
607            "%result = OpGroupNonUniformShuffle _ %subgroup %value %id",
608            "OpStore {result} %result",
609            subgroup = const SUBGROUP,
610            value = in(reg) &value,
611            id = in(reg) &id,
612            result = in(reg) &mut result,
613        }
614    }
615
616    result
617}
618
619/// Result is the `value` of the invocation identified by the current invocation’s id within the group xor’ed with Mask.
620///
621/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
622///
623/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
624///
625/// The type of `value` must be the same as Result Type.
626///
627/// Mask must be a scalar of integer type, whose Signedness operand is 0.
628///
629/// The resulting value is undefined if current invocation’s id within the group xor’ed with Mask is an inactive invocation, or is greater than or equal to the size of the group.
630///
631/// Requires Capability `GroupNonUniformShuffle`.
632///
633/// # Safety
634/// * This function is safe
635/// * Result is undefined if current invocation’s id within the group xor’ed with `mask` is an inactive invocation or out of bounds
636#[spirv_std_macros::gpu_only]
637#[doc(alias = "OpGroupNonUniformShuffleXor")]
638#[inline]
639pub fn subgroup_shuffle_xor<T: VectorOrScalar>(value: T, mask: u32) -> T {
640    let mut result = T::default();
641
642    unsafe {
643        asm! {
644            "%u32 = OpTypeInt 32 0",
645            "%subgroup = OpConstant %u32 {subgroup}",
646            "%value = OpLoad _ {value}",
647            "%mask = OpLoad _ {mask}",
648            "%result = OpGroupNonUniformShuffleXor _ %subgroup %value %mask",
649            "OpStore {result} %result",
650            subgroup = const SUBGROUP,
651            value = in(reg) &value,
652            mask = in(reg) &mask,
653            result = in(reg) &mut result,
654        }
655    }
656
657    result
658}
659
660/// Result is the `value` of the invocation identified by the current invocation’s id within the group - Delta.
661///
662/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
663///
664/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
665///
666/// The type of `value` must be the same as Result Type.
667///
668/// Delta must be a scalar of integer type, whose Signedness operand is 0.
669///
670/// Delta is treated as unsigned and the resulting value is undefined if Delta is greater than the current invocation’s id within the group or if the selected lane is inactive.
671///
672/// Requires Capability `GroupNonUniformShuffleRelative`.
673///
674/// # Safety
675/// * This function is safe
676/// * Result is undefined if `delta` is greater than the current invocation’s id within the group or if the selected lane is inactive
677#[spirv_std_macros::gpu_only]
678#[doc(alias = "OpGroupNonUniformShuffleUp")]
679#[inline]
680pub fn subgroup_shuffle_up<T: VectorOrScalar>(value: T, delta: u32) -> T {
681    let mut result = T::default();
682
683    unsafe {
684        asm! {
685            "%u32 = OpTypeInt 32 0",
686            "%subgroup = OpConstant %u32 {subgroup}",
687            "%value = OpLoad _ {value}",
688            "%delta = OpLoad _ {delta}",
689            "%result = OpGroupNonUniformShuffleUp _ %subgroup %value %delta",
690            "OpStore {result} %result",
691            subgroup = const SUBGROUP,
692            value = in(reg) &value,
693            delta = in(reg) &delta,
694            result = in(reg) &mut result,
695        }
696    }
697
698    result
699}
700
701/// Result is the `value` of the invocation identified by the current invocation’s id within the group + Delta.
702///
703/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
704///
705/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
706///
707/// The type of `value` must be the same as Result Type.
708///
709/// Delta must be a scalar of integer type, whose Signedness operand is 0.
710///
711/// Delta is treated as unsigned and the resulting value is undefined if Delta is greater than or equal to the size of the group, or if the current invocation’s id within the group + Delta is either an inactive invocation or greater than or equal to the size of the group.
712///
713/// Requires Capability `GroupNonUniformShuffleRelative`.
714///
715/// # Safety
716/// * This function is safe
717/// * Result is undefined if `delta` is greater than or equal to the size of the group, or if the current invocation’s id within the group + `delta` is either an inactive invocation or greater than or equal to the size of the group.
718#[spirv_std_macros::gpu_only]
719#[doc(alias = "OpGroupNonUniformShuffleDown")]
720#[inline]
721pub fn subgroup_shuffle_down<T: VectorOrScalar>(value: T, delta: u32) -> T {
722    let mut result = T::default();
723
724    unsafe {
725        asm! {
726            "%u32 = OpTypeInt 32 0",
727            "%subgroup = OpConstant %u32 {subgroup}",
728            "%value = OpLoad _ {value}",
729            "%delta = OpLoad _ {delta}",
730            "%result = OpGroupNonUniformShuffleDown _ %subgroup %value %delta",
731            "OpStore {result} %result",
732            subgroup = const SUBGROUP,
733            value = in(reg) &value,
734            delta = in(reg) &delta,
735            result = in(reg) &mut result,
736        }
737    }
738
739    result
740}
741
742macro_rules! macro_subgroup_op {
743    ($scalar:ty, $asm_op:literal, $($name:ident, $group_op:expr),+; $docs:literal) => { $(
744        #[doc = $docs]
745        #[spirv_std_macros::gpu_only]
746        #[doc(alias = $asm_op)]
747        #[inline]
748        pub fn $name<I: VectorOrScalar<Scalar = $scalar>>(
749            value: I,
750        ) -> I {
751            let mut result = I::default();
752            unsafe {
753                asm! {
754                    "%u32 = OpTypeInt 32 0",
755                    "%subgroup = OpConstant %u32 {subgroup}",
756                    "%value = OpLoad _ {value}",
757                    concat!("%result = ", $asm_op, " _ %subgroup {groupop} %value"),
758                    "OpStore {result} %result",
759                    subgroup = const SUBGROUP,
760                    groupop = const ($group_op as u32),
761                    value = in(reg) &value,
762                    result = in(reg) &mut result,
763                }
764            }
765            result
766        }
767    )+ };
768}
769
770macro_rules! macro_subgroup_op_clustered {
771    ($scalar:ty, $asm_op:literal, $name:ident; $docs:literal) => {
772        #[doc = $docs]
773        #[spirv_std_macros::gpu_only]
774        #[doc(alias = $asm_op)]
775        #[inline]
776        pub unsafe fn $name<const CLUSTER_SIZE: u32, I: VectorOrScalar<Scalar = $scalar>>(
777            value: I,
778        ) -> I {
779            const {
780                assert!(CLUSTER_SIZE >= 1, "`ClusterSize` must be at least 1");
781                assert!(
782                    CLUSTER_SIZE.is_power_of_two(),
783                    "`ClusterSize` must be a power of 2"
784                );
785                // Cannot be verified with static assertions:
786                // `ClusterSize` must not be greater than the size of the group
787            }
788
789            let mut result = I::default();
790
791            unsafe {
792                asm! {
793                    "%u32 = OpTypeInt 32 0",
794                    "%subgroup = OpConstant %u32 {subgroup}",
795                    "%value = OpLoad _ {value}",
796                    "%clustersize = OpConstant %u32 {clustersize}",
797                    concat!("%result = ", $asm_op, " _ %subgroup {groupop} %value %clustersize"),
798                    "OpStore {result} %result",
799                    subgroup = const SUBGROUP,
800                    groupop = const (GroupOperation::ClusteredReduce as u32),
801                    clustersize = const CLUSTER_SIZE,
802                    value = in(reg) &value,
803                    result = in(reg) &mut result,
804                }
805            }
806
807            result
808        }
809    };
810}
811
812// add
813macro_subgroup_op!(impl Integer, "OpGroupNonUniformIAdd", subgroup_i_add, GroupOperation::Reduce, subgroup_inclusive_i_add, GroupOperation::InclusiveScan, subgroup_exclusive_i_add, GroupOperation::ExclusiveScan; r"
814An integer add group operation of all `value` operands contributed by active invocations in the group.
815
816Result Type must be a scalar or vector of integer type.
817
818Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
819
820The identity I for Operation is 0.
821
822The type of `value` must be the same as Result Type.
823
824Requires Capability `GroupNonUniformArithmetic`.
825");
826macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformIAdd", subgroup_clustered_i_add; r"
827An integer add group operation of all `value` operands contributed by active invocations in the group.
828
829Result Type must be a scalar or vector of integer type.
830
831Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
832
833The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
834
835The type of `value` must be the same as Result Type.
836
837`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
838
839Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
840
841# Safety
842* `ClusterSize` must not be greater than the size of the group
843");
844macro_subgroup_op!(impl Float, "OpGroupNonUniformFAdd", subgroup_f_add, GroupOperation::Reduce, subgroup_inclusive_f_add, GroupOperation::InclusiveScan, subgroup_exclusive_f_add, GroupOperation::ExclusiveScan; r"
845A floating point add group operation of all `value` operands contributed by active invocations in the group.
846
847Result Type must be a scalar or vector of floating-point type.
848
849Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
850
851The identity I for Operation is 0.
852
853The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
854
855Requires Capability `GroupNonUniformArithmetic`.
856");
857macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFAdd", subgroup_clustered_f_add; r"
858A floating point add group operation of all `value` operands contributed by active invocations in the group.
859
860Result Type must be a scalar or vector of floating-point type.
861
862Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
863
864The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
865
866The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
867
868`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
869
870Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
871
872# Safety
873* `ClusterSize` must not be greater than the size of the group
874");
875
876// mul
877macro_subgroup_op!(impl Integer, "OpGroupNonUniformIMul", subgroup_i_mul, GroupOperation::Reduce, subgroup_inclusive_i_mul, GroupOperation::InclusiveScan, subgroup_exclusive_i_mul, GroupOperation::ExclusiveScan; r"
878An integer multiply group operation of all `value` operands contributed by active invocations in the group.
879
880Result Type must be a scalar or vector of integer type.
881
882Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
883
884The identity I for Operation is 1.
885
886The type of `value` must be the same as Result Type.
887
888Requires Capability `GroupNonUniformArithmetic`.
889");
890macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformIMul", subgroup_clustered_i_mul; r"
891An integer multiply group operation of all `value` operands contributed by active invocations in the group.
892
893Result Type must be a scalar or vector of integer type.
894
895Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
896
897The identity I for Operation is 1. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
898
899The type of `value` must be the same as Result Type.
900
901`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
902
903Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
904
905# Safety
906* `ClusterSize` must not be greater than the size of the group
907");
908macro_subgroup_op!(impl Float, "OpGroupNonUniformFMul", subgroup_f_mul, GroupOperation::Reduce, subgroup_inclusive_f_mul, GroupOperation::InclusiveScan, subgroup_exclusive_f_mul, GroupOperation::ExclusiveScan; r"
909A floating point multiply group operation of all `value` operands contributed by active invocations in the group.
910
911Result Type must be a scalar or vector of floating-point type.
912
913Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
914
915The identity I for Operation is 1.
916
917The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
918
919Requires Capability `GroupNonUniformArithmetic`.
920");
921macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFMul", subgroup_clustered_f_mul; r"
922A floating point multiply group operation of all `value` operands contributed by active invocations in the group.
923
924Result Type must be a scalar or vector of floating-point type.
925
926Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
927
928The identity I for Operation is 1. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
929
930The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
931
932`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
933
934Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
935
936# Safety
937* `ClusterSize` must not be greater than the size of the group
938");
939
940// min
941macro_subgroup_op!(impl SignedInteger, "OpGroupNonUniformSMin", subgroup_s_min, GroupOperation::Reduce, subgroup_inclusive_s_min, GroupOperation::InclusiveScan, subgroup_exclusive_s_min, GroupOperation::ExclusiveScan; r"
942A signed integer minimum group operation of all `value` operands contributed by active invocations in the group.
943
944Result Type must be a scalar or vector of integer type.
945
946Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
947
948The identity I for Operation is `INT_MAX`.
949
950The type of `value` must be the same as Result Type.
951
952Requires Capability `GroupNonUniformArithmetic`.
953");
954macro_subgroup_op_clustered!(impl SignedInteger, "OpGroupNonUniformSMin", subgroup_clustered_s_min; r"
955A signed integer minimum group operation of all `value` operands contributed by active invocations in the group.
956
957Result Type must be a scalar or vector of integer type.
958
959Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
960
961The identity I for Operation is `INT_MAX`. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
962
963The type of `value` must be the same as Result Type.
964
965`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
966
967Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
968
969# Safety
970* `ClusterSize` must not be greater than the size of the group
971");
972macro_subgroup_op!(impl UnsignedInteger, "OpGroupNonUniformUMin", subgroup_u_min, GroupOperation::Reduce, subgroup_inclusive_u_min, GroupOperation::InclusiveScan, subgroup_exclusive_u_min, GroupOperation::ExclusiveScan; r"
973An unsigned integer minimum group operation of all `value` operands contributed by active invocations in the group.
974
975Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
976
977Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
978
979The identity I for Operation is `UINT_MAX`.
980
981The type of `value` must be the same as Result Type.
982
983Requires Capability `GroupNonUniformArithmetic`.
984");
985macro_subgroup_op_clustered!(impl UnsignedInteger, "OpGroupNonUniformUMin", subgroup_clustered_u_min; r"
986An unsigned integer minimum group operation of all `value` operands contributed by active invocations in the group.
987
988Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
989
990Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
991
992The identity I for Operation is `UINT_MAX`. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
993
994The type of `value` must be the same as Result Type.
995
996`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
997
998Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
999
1000# Safety
1001* `ClusterSize` must not be greater than the size of the group
1002");
1003macro_subgroup_op!(impl Float, "OpGroupNonUniformFMin", subgroup_f_min, GroupOperation::Reduce, subgroup_inclusive_f_min, GroupOperation::InclusiveScan, subgroup_exclusive_f_min, GroupOperation::ExclusiveScan; r"
1004A floating point minimum group operation of all `value` operands contributed by active invocations in the group.
1005
1006Result Type must be a scalar or vector of floating-point type.
1007
1008Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1009
1010The identity I for Operation is +INF.
1011
1012The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
1013
1014Requires Capability `GroupNonUniformArithmetic`.
1015");
1016macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFMin", subgroup_clustered_f_min; r"
1017A floating point minimum group operation of all `value` operands contributed by active invocations in the group.
1018
1019Result Type must be a scalar or vector of floating-point type.
1020
1021Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1022
1023The identity I for Operation is +INF. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1024
1025The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
1026
1027`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1028
1029Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1030
1031# Safety
1032* `ClusterSize` must not be greater than the size of the group
1033");
1034
1035// max
1036macro_subgroup_op!(impl SignedInteger, "OpGroupNonUniformSMax", subgroup_s_max, GroupOperation::Reduce, subgroup_inclusive_s_max, GroupOperation::InclusiveScan, subgroup_exclusive_s_max, GroupOperation::ExclusiveScan; r"
1037A signed integer maximum group operation of all `value` operands contributed by active invocations in the group.
1038
1039Result Type must be a scalar or vector of integer type.
1040
1041Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1042
1043The identity I for Operation is `INT_MIN`.
1044
1045The type of `value` must be the same as Result Type.
1046
1047Requires Capability `GroupNonUniformArithmetic`.
1048");
1049macro_subgroup_op_clustered!(impl SignedInteger, "OpGroupNonUniformSMax", subgroup_clustered_s_max; r"
1050A signed integer maximum group operation of all `value` operands contributed by active invocations in the group.
1051
1052Result Type must be a scalar or vector of integer type.
1053
1054Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1055
1056The identity I for Operation is `INT_MIN`. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1057
1058The type of `value` must be the same as Result Type.
1059
1060`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1061
1062Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1063
1064# Safety
1065* `ClusterSize` must not be greater than the size of the group
1066");
1067macro_subgroup_op!(impl UnsignedInteger, "OpGroupNonUniformUMax", subgroup_u_max, GroupOperation::Reduce, subgroup_inclusive_u_max, GroupOperation::InclusiveScan, subgroup_exclusive_u_max, GroupOperation::ExclusiveScan; r"
1068An unsigned integer maximum group operation of all `value` operands contributed by active invocations in the group.
1069
1070Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
1071
1072Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1073
1074The identity I for Operation is 0.
1075
1076The type of `value` must be the same as Result Type.
1077
1078Requires Capability `GroupNonUniformArithmetic`.
1079");
1080macro_subgroup_op_clustered!(impl UnsignedInteger, "OpGroupNonUniformUMax", subgroup_clustered_u_max; r"
1081An unsigned integer maximum group operation of all `value` operands contributed by active invocations in the group.
1082
1083Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
1084
1085Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1086
1087The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1088
1089The type of `value` must be the same as Result Type.
1090
1091`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1092
1093Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1094
1095# Safety
1096* `ClusterSize` must not be greater than the size of the group
1097");
1098macro_subgroup_op!(impl Float, "OpGroupNonUniformFMax", subgroup_f_max, GroupOperation::Reduce, subgroup_inclusive_f_max, GroupOperation::InclusiveScan, subgroup_exclusive_f_max, GroupOperation::ExclusiveScan; r"
1099A floating point maximum group operation of all `value` operands contributed by active invocations in by group.
1100
1101Result Type must be a scalar or vector of floating-point type.
1102
1103Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1104
1105The identity I for Operation is -INF.
1106
1107The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
1108
1109Requires Capability `GroupNonUniformArithmetic`.
1110");
1111macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFMax", subgroup_clustered_f_max; r"
1112A floating point maximum group operation of all `value` operands contributed by active invocations in by group.
1113
1114Result Type must be a scalar or vector of floating-point type.
1115
1116Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1117
1118The identity I for Operation is -INF.
1119
1120The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
1121
1122Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1123
1124# Safety
1125* `ClusterSize` must not be greater than the size of the group
1126");
1127
1128// and
1129macro_subgroup_op!(impl Integer, "OpGroupNonUniformBitwiseAnd", subgroup_and, GroupOperation::Reduce, subgroup_inclusive_and, GroupOperation::InclusiveScan, subgroup_exclusive_and, GroupOperation::ExclusiveScan; r"
1130A bitwise and group operation of all `value` operands contributed by active invocations in the group.
1131
1132Result Type must be a scalar or vector of integer type.
1133
1134Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1135
1136The identity I for Operation is ~0.
1137
1138The type of `value` must be the same as Result Type.
1139
1140Requires Capability `GroupNonUniformArithmetic`.
1141");
1142macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformBitwiseAnd", subgroup_clustered_and; r"
1143A bitwise and group operation of all `value` operands contributed by active invocations in the group.
1144
1145Result Type must be a scalar or vector of integer type.
1146
1147Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1148
1149The identity I for Operation is ~0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1150
1151The type of `value` must be the same as Result Type.
1152
1153`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1154
1155Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1156
1157# Safety
1158* `ClusterSize` must not be greater than the size of the group
1159");
1160
1161// or
1162macro_subgroup_op!(impl Integer, "OpGroupNonUniformBitwiseOr", subgroup_or, GroupOperation::Reduce, subgroup_inclusive_or, GroupOperation::InclusiveScan, subgroup_exclusive_or, GroupOperation::ExclusiveScan; r"
1163A bitwise or group operation of all `value` operands contributed by active invocations in the group.
1164
1165Result Type must be a scalar or vector of integer type.
1166
1167Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1168
1169The identity I for Operation is 0.
1170
1171The type of `value` must be the same as Result Type.
1172
1173Requires Capability `GroupNonUniformArithmetic`.
1174");
1175macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformBitwiseOr", subgroup_clustered_or; r"
1176A bitwise or group operation of all `value` operands contributed by active invocations in the group.
1177
1178Result Type must be a scalar or vector of integer type.
1179
1180Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1181
1182The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1183
1184The type of `value` must be the same as Result Type.
1185
1186`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1187
1188Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1189
1190# Safety
1191* `ClusterSize` must not be greater than the size of the group
1192");
1193
1194// xor
1195macro_subgroup_op!(impl Integer, "OpGroupNonUniformBitwiseXor", subgroup_xor, GroupOperation::Reduce, subgroup_inclusive_xor, GroupOperation::InclusiveScan, subgroup_exclusive_xor, GroupOperation::ExclusiveScan; r"
1196A bitwise xor group operation of all `value` operands contributed by active invocations in the group.
1197
1198Result Type must be a scalar or vector of integer type.
1199
1200Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1201
1202The identity I for Operation is 0.
1203
1204The type of `value` must be the same as Result Type.
1205
1206Requires Capability `GroupNonUniformArithmetic`.
1207");
1208macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformBitwiseXor", subgroup_clustered_xor; r"
1209A bitwise xor group operation of all `value` operands contributed by active invocations in the group.
1210
1211Result Type must be a scalar or vector of integer type.
1212
1213Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1214
1215The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1216
1217The type of `value` must be the same as Result Type.
1218
1219`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1220
1221Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1222
1223# Safety
1224* `ClusterSize` must not be greater than the size of the group
1225");
1226
1227// logical and
1228macro_subgroup_op!(bool, "OpGroupNonUniformLogicalAnd", subgroup_logical_and, GroupOperation::Reduce, subgroup_inclusive_logical_and, GroupOperation::InclusiveScan, subgroup_exclusive_logical_and, GroupOperation::ExclusiveScan; r"
1229A logical and group operation of all `value` operands contributed by active invocations in the group.
1230
1231Result Type must be a scalar or vector of Boolean type.
1232
1233Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1234
1235The identity I for Operation is ~0.
1236
1237The type of `value` must be the same as Result Type.
1238
1239Requires Capability `GroupNonUniformArithmetic`.
1240");
1241macro_subgroup_op_clustered!(bool, "OpGroupNonUniformLogicalAnd", subgroup_clustered_logical_and; r"
1242A logical and group operation of all `value` operands contributed by active invocations in the group.
1243
1244Result Type must be a scalar or vector of Boolean type.
1245
1246Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1247
1248The identity I for Operation is ~0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1249
1250The type of `value` must be the same as Result Type.
1251
1252`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1253
1254Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1255
1256# Safety
1257* `ClusterSize` must not be greater than the size of the group
1258");
1259
1260// logical or
1261macro_subgroup_op!(bool, "OpGroupNonUniformLogicalOr", subgroup_logical_or, GroupOperation::Reduce, subgroup_inclusive_logical_or, GroupOperation::InclusiveScan, subgroup_exclusive_logical_or, GroupOperation::ExclusiveScan; r"
1262A logical or group operation of all `value` operands contributed by active invocations in the group.
1263
1264Result Type must be a scalar or vector of Boolean type.
1265
1266Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1267
1268The identity I for Operation is 0.
1269
1270The type of `value` must be the same as Result Type.
1271
1272Requires Capability `GroupNonUniformArithmetic`.
1273");
1274macro_subgroup_op_clustered!(bool, "OpGroupNonUniformLogicalOr", subgroup_clustered_logical_or; r"
1275A logical or group operation of all `value` operands contributed by active invocations in the group.
1276
1277Result Type must be a scalar or vector of Boolean type.
1278
1279Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1280
1281The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1282
1283The type of `value` must be the same as Result Type.
1284
1285`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1286
1287Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1288
1289# Safety
1290* `ClusterSize` must not be greater than the size of the group
1291");
1292
1293// logical xor
1294macro_subgroup_op!(bool, "OpGroupNonUniformLogicalXor", subgroup_logical_xor, GroupOperation::Reduce, subgroup_inclusive_logical_xor, GroupOperation::InclusiveScan, subgroup_exclusive_logical_xor, GroupOperation::ExclusiveScan; r"
1295A logical xor group operation of all `value` operands contributed by active invocations in the group.
1296
1297Result Type must be a scalar or vector of Boolean type.
1298
1299Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1300
1301The identity I for Operation is 0.
1302
1303The type of `value` must be the same as Result Type.
1304
1305Requires Capability `GroupNonUniformArithmetic`.
1306");
1307macro_subgroup_op_clustered!(bool, "OpGroupNonUniformLogicalXor", subgroup_clustered_logical_xor; r"
1308A logical xor group operation of all `value` operands contributed by active invocations in the group.
1309
1310Result Type must be a scalar or vector of Boolean type.
1311
1312Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1313
1314The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1315
1316The type of `value` must be the same as Result Type.
1317
1318`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1319
1320Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1321
1322# Safety
1323* `ClusterSize` must not be greater than the size of the group
1324");
1325
1326/// Result is the `value` of the invocation within the quad with a quad index equal to `index`.
1327///
1328/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
1329///
1330/// Execution is a Scope, but has no effect on the behavior of this instruction. It must be Subgroup.
1331///
1332/// The type of `value` must be the same as Result Type.
1333///
1334/// `index` must be a scalar of integer type, whose Signedness operand is 0.
1335///
1336/// Before version 1.5, `index` must come from a constant instruction. Starting with version 1.5, `index` must be dynamically uniform.
1337///
1338/// If the value of `index` is greater than or equal to 4, or refers to an inactive invocation, the resulting value is undefined.
1339///
1340/// Requires Capability `GroupNonUniformQuad`.
1341///
1342/// # Safety
1343/// * This function is safe
1344/// * Result is undefined if the value of `index` is greater than or equal to 4, or refers to an inactive invocation
1345#[spirv_std_macros::gpu_only]
1346#[doc(alias = "OpGroupNonUniformQuadBroadcast")]
1347#[inline]
1348pub fn subgroup_quad_broadcast<T: VectorOrScalar>(value: T, index: u32) -> T {
1349    let mut result = T::default();
1350
1351    unsafe {
1352        asm! {
1353            "%u32 = OpTypeInt 32 0",
1354            "%subgroup = OpConstant %u32 {subgroup}",
1355            "%value = OpLoad _ {value}",
1356            "%index = OpLoad _ {index}",
1357            "%result = OpGroupNonUniformQuadBroadcast _ %subgroup %value %index",
1358            "OpStore {result} %result",
1359            subgroup = const SUBGROUP,
1360            value = in(reg) &value,
1361            index = in(reg) &index,
1362            result = in(reg) &mut result,
1363        }
1364    }
1365
1366    result
1367}
1368
1369/// Direction is the kind of swap to perform.
1370///
1371/// Direction must be a scalar of integer type, whose Signedness operand is 0.
1372///
1373/// Direction must come from a constant instruction.
1374///
1375/// The value returned in Result is the value provided to `value` by another invocation in the same quad scope instance. The invocation providing this `value` is determined according to Direction.
1376///
1377/// Requires Capability `GroupNonUniformQuad`.
1378pub enum QuadDirection {
1379    /// A Direction of 0 indicates a horizontal swap;
1380    /// - Invocations with quad indices of 0 and 1 swap values
1381    /// - Invocations with quad indices of 2 and 3 swap values
1382    Horizontal = 0,
1383    /// A Direction of 1 indicates a vertical swap;
1384    /// - Invocations with quad indices of 0 and 2 swap values
1385    /// - Invocations with quad indices of 1 and 3 swap values
1386    Vertical = 1,
1387    /// A Direction of 2 indicates a diagonal swap;
1388    /// - Invocations with quad indices of 0 and 3 swap values
1389    /// - Invocations with quad indices of 1 and 2 swap values
1390    Diagonal = 2,
1391}
1392
1393/// Swap the `value` of the invocation within the quad with another invocation in the quad using Direction.
1394///
1395/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
1396///
1397/// Execution is a Scope, but has no effect on the behavior of this instruction. It must be Subgroup.
1398///
1399/// The type of `value` must be the same as Result Type.
1400///
1401/// Direction is the kind of swap to perform.
1402///
1403/// Direction must be a scalar of integer type, whose Signedness operand is 0.
1404///
1405/// Direction must come from a constant instruction.
1406///
1407/// The value returned in Result is the value provided to `value` by another invocation in the same quad scope instance. The invocation providing this `value` is determined according to Direction.
1408///
1409/// A Direction of 0 indicates a horizontal swap;
1410/// - Invocations with quad indices of 0 and 1 swap values
1411/// - Invocations with quad indices of 2 and 3 swap values
1412/// A Direction of 1 indicates a vertical swap;
1413/// - Invocations with quad indices of 0 and 2 swap values
1414/// - Invocations with quad indices of 1 and 3 swap values
1415/// A Direction of 2 indicates a diagonal swap;
1416/// - Invocations with quad indices of 0 and 3 swap values
1417/// - Invocations with quad indices of 1 and 2 swap values
1418///
1419/// Direction must be one of the above values.
1420///
1421/// If an active invocation reads `value` from an inactive invocation, the resulting value is undefined.
1422///
1423/// Requires Capability `GroupNonUniformQuad`.
1424///
1425/// # Safety
1426/// * This function is safe
1427/// * Result is undefined if an active invocation reads `value` from an inactive invocation
1428#[spirv_std_macros::gpu_only]
1429#[doc(alias = "OpGroupNonUniformQuadSwap")]
1430#[inline]
1431pub fn subgroup_quad_swap<const DIRECTION: u32, T: VectorOrScalar>(value: T) -> T {
1432    let mut result = T::default();
1433
1434    unsafe {
1435        asm! {
1436            "%u32 = OpTypeInt 32 0",
1437            "%subgroup = OpConstant %u32 {subgroup}",
1438            "%direction = OpConstant %u32 {direction}",
1439            "%value = OpLoad _ {value}",
1440            "%result = OpGroupNonUniformQuadSwap _ %subgroup %value %direction",
1441            "OpStore {result} %result",
1442            subgroup = const SUBGROUP,
1443            direction = const DIRECTION,
1444            value = in(reg) &value,
1445            result = in(reg) &mut result,
1446        }
1447    }
1448
1449    result
1450}