spirv_std/arch/
subgroup.rs

1#[cfg(target_arch = "spirv")]
2use crate::arch::barrier;
3use crate::float::Float;
4use crate::integer::{Integer, SignedInteger, UnsignedInteger};
5#[cfg(target_arch = "spirv")]
6use crate::memory::{Scope, Semantics};
7use crate::vector::VectorOrScalar;
8#[cfg(target_arch = "spirv")]
9use core::arch::asm;
10
11#[cfg(target_arch = "spirv")]
12const SUBGROUP: u32 = Scope::Subgroup as u32;
13
14/// `SubgroupMask` is a [`glam::UVec4`] representing a bitmask of all invocations within a subgroup.
15/// Mostly used in group ballot operations.
16#[repr(transparent)]
17#[derive(Copy, Clone, Default, Eq, PartialEq)]
18#[cfg_attr(feature = "bytemuck", derive(bytemuck::Zeroable, bytemuck::Pod))]
19pub struct SubgroupMask(pub glam::UVec4);
20
21/// Defines the class of group operation.
22#[non_exhaustive]
23#[derive(Debug, PartialEq, Eq)]
24pub enum GroupOperation {
25    /// A reduction operation for all values of a specific value X specified by invocations within a workgroup.
26    Reduce = 0,
27    /// A binary operation with an identity I and n (where n is the size of the workgroup)
28    /// elements[a0, a1, … an-1] resulting in [a0, (a0 op a1), …(a0 op a1 op … op an-1)]
29    InclusiveScan = 1,
30    /// A binary operation with an identity I and n (where n is the size of the workgroup)
31    /// elements[a0, a1, … an-1] resulting in [I, a0, (a0 op a1), … (a0 op a1 op … op an-2)].
32    ExclusiveScan = 2,
33    /// The [`GroupOperation`] `ClusteredReduce`.
34    ///
35    /// All instructions with a [`GroupOperation`] require an additional `ClusterSize` parameter when [`GroupOperation`] is
36    /// `ClusteredReduce`. To map this requirement into rust, all function have a base version accepting [`GroupOperation`]
37    /// as a const generic, and a `_clustered` variant that is fixed to `ClusteredReduce` and takes the additional
38    /// `ClusterSize` parameter as a const generic. To not accidentally use a `ClusteredReduce` in the base variant of the
39    /// function, it was removed from the [`GroupOperation`] enum and instead resides individually.
40    ClusteredReduce = 3,
41    /// Reserved.
42    ///
43    /// Requires Capability `GroupNonUniformPartitionedNV`.
44    PartitionedReduceNV = 6,
45    /// Reserved.
46    ///
47    /// Requires Capability `GroupNonUniformPartitionedNV`.
48    PartitionedInclusiveScanNV = 7,
49    /// Reserved.
50    ///
51    /// Requires Capability `GroupNonUniformPartitionedNV`.
52    PartitionedExclusiveScanNV = 8,
53}
54
55/// The function `subgroupBarrier()` enforces that all active invocations within a
56/// subgroup must execute this function before any are allowed to continue their
57/// execution, and the results of any memory stores performed using coherent
58/// variables performed prior to the call will be visible to any future
59/// coherent access to the same memory performed by any other shader invocation
60/// within the same subgroup.
61///
62/// Requires Capability `GroupNonUniform`.
63#[spirv_std_macros::gpu_only]
64#[doc(alias = "subgroupBarrier")]
65#[inline]
66pub fn subgroup_barrier() {
67    unsafe {
68        barrier::control_barrier::<
69            SUBGROUP,
70            SUBGROUP,
71            {
72                Semantics::ACQUIRE_RELEASE.bits()
73                    | Semantics::UNIFORM_MEMORY.bits()
74                    | Semantics::WORKGROUP_MEMORY.bits()
75                    | Semantics::IMAGE_MEMORY.bits()
76            },
77        >();
78    }
79}
80
81/// The function `subgroupMemoryBarrier()` enforces the ordering of all memory
82/// transactions issued within a single shader invocation, as viewed by other
83/// invocations in the same subgroup.
84///
85/// Requires Capability `GroupNonUniform`.
86#[spirv_std_macros::gpu_only]
87#[doc(alias = "subgroupMemoryBarrier")]
88#[inline]
89pub fn subgroup_memory_barrier() {
90    unsafe {
91        barrier::memory_barrier::<
92            SUBGROUP,
93            {
94                Semantics::ACQUIRE_RELEASE.bits()
95                    | Semantics::UNIFORM_MEMORY.bits()
96                    | Semantics::WORKGROUP_MEMORY.bits()
97                    | Semantics::IMAGE_MEMORY.bits()
98            },
99        >();
100    }
101}
102
103/// The function `subgroupMemoryBarrierBuffer()` enforces the ordering of all
104/// memory transactions to buffer variables issued within a single shader
105/// invocation, as viewed by other invocations in the same subgroup.
106///
107/// Requires Capability `GroupNonUniform`.
108#[spirv_std_macros::gpu_only]
109#[doc(alias = "subgroupMemoryBarrierBuffer")]
110#[inline]
111pub fn subgroup_memory_barrier_buffer() {
112    unsafe {
113        barrier::memory_barrier::<
114            SUBGROUP,
115            { Semantics::ACQUIRE_RELEASE.bits() | Semantics::UNIFORM_MEMORY.bits() },
116        >();
117    }
118}
119
120/// The function `subgroupMemoryBarrierShared()` enforces the ordering of all
121/// memory transactions to shared variables issued within a single shader
122/// invocation, as viewed by other invocations in the same subgroup.
123///
124/// Only available in compute shaders.
125///
126/// Requires Capability `GroupNonUniform`.
127#[spirv_std_macros::gpu_only]
128#[doc(alias = "subgroupMemoryBarrierShared")]
129#[inline]
130pub fn subgroup_memory_barrier_shared() {
131    unsafe {
132        barrier::memory_barrier::<
133            SUBGROUP,
134            { Semantics::ACQUIRE_RELEASE.bits() | Semantics::WORKGROUP_MEMORY.bits() },
135        >();
136    }
137}
138
139/// The function `subgroupMemoryBarrierImage()` enforces the ordering of all
140/// memory transactions to images issued within a single shader invocation, as
141/// viewed by other invocations in the same subgroup.
142///
143/// Requires Capability `GroupNonUniform`.
144#[spirv_std_macros::gpu_only]
145#[doc(alias = "subgroupMemoryBarrierImage")]
146#[inline]
147pub fn subgroup_memory_barrier_image() {
148    unsafe {
149        barrier::memory_barrier::<
150            SUBGROUP,
151            { Semantics::ACQUIRE_RELEASE.bits() | Semantics::IMAGE_MEMORY.bits() },
152        >();
153    }
154}
155
156/// Result is true only in the active invocation with the lowest id in the group, otherwise result is false.
157///
158/// Result Type must be a Boolean type.
159///
160/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
161///
162/// Requires Capability `GroupNonUniform`.
163#[spirv_std_macros::gpu_only]
164#[doc(alias = "OpGroupNonUniformElect")]
165#[inline]
166pub fn subgroup_elect() -> bool {
167    let mut result = false;
168
169    unsafe {
170        asm! {
171            "%bool = OpTypeBool",
172            "%u32 = OpTypeInt 32 0",
173            "%subgroup = OpConstant %u32 {subgroup}",
174            "%result = OpGroupNonUniformElect %bool %subgroup",
175            "OpStore {result} %result",
176            subgroup = const SUBGROUP,
177            result = in(reg) &mut result,
178        }
179    }
180
181    result
182}
183
184/// Evaluates a `predicate` for all active invocations in the group, resulting in true if `predicate` evaluates to true for all active invocations in the group, otherwise the result is false.
185///
186/// Result Type must be a Boolean type.
187///
188/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
189///
190/// `predicate` must be a Boolean type.
191///
192/// Requires Capability `GroupNonUniformVote`.
193#[spirv_std_macros::gpu_only]
194#[doc(alias = "OpGroupNonUniformAll")]
195#[inline]
196pub fn subgroup_all(predicate: bool) -> bool {
197    let mut result = false;
198
199    unsafe {
200        asm! {
201            "%bool = OpTypeBool",
202            "%u32 = OpTypeInt 32 0",
203            "%subgroup = OpConstant %u32 {subgroup}",
204            "%predicate = OpLoad _ {predicate}",
205            "%result = OpGroupNonUniformAll %bool %subgroup %predicate",
206            "OpStore {result} %result",
207            subgroup = const SUBGROUP,
208            predicate = in(reg) &predicate,
209            result = in(reg) &mut result,
210        }
211    }
212
213    result
214}
215
216/// Evaluates a `predicate` for all active invocations in the group, resulting in true if `predicate` evaluates to true for any active invocation in the group, otherwise the result is false.
217///
218/// Result Type must be a Boolean type.
219///
220/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
221///
222/// `predicate` must be a Boolean type.
223///
224/// Requires Capability `GroupNonUniformVote`.
225#[spirv_std_macros::gpu_only]
226#[doc(alias = "OpGroupNonUniformAny")]
227#[inline]
228pub fn subgroup_any(predicate: bool) -> bool {
229    let mut result = false;
230
231    unsafe {
232        asm! {
233            "%bool = OpTypeBool",
234            "%u32 = OpTypeInt 32 0",
235            "%subgroup = OpConstant %u32 {subgroup}",
236            "%predicate = OpLoad _ {predicate}",
237            "%result = OpGroupNonUniformAny %bool %subgroup %predicate",
238            "OpStore {result} %result",
239            subgroup = const SUBGROUP,
240            predicate = in(reg) &predicate,
241            result = in(reg) &mut result,
242        }
243    }
244
245    result
246}
247
248/// Evaluates a `value` for all active invocations in the group. The result is true if `value` is equal for all active invocations in the group. Otherwise, the result is false.
249///
250/// Result Type must be a Boolean type.
251///
252/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
253///
254/// `value` must be a scalar or vector of floating-point type, integer type, or Boolean type. The compare operation is based on this type, and if it is a floating-point type, an ordered-and-equal compare is used.
255///
256/// Requires Capability `GroupNonUniformVote`.
257#[spirv_std_macros::gpu_only]
258#[doc(alias = "OpGroupNonUniformAllEqual")]
259#[inline]
260pub fn subgroup_all_equal<T: VectorOrScalar>(value: T) -> bool {
261    let mut result = false;
262
263    unsafe {
264        asm! {
265            "%bool = OpTypeBool",
266            "%u32 = OpTypeInt 32 0",
267            "%subgroup = OpConstant %u32 {subgroup}",
268            "%value = OpLoad _ {value}",
269            "%result = OpGroupNonUniformAllEqual %bool %subgroup %value",
270            "OpStore {result} %result",
271            subgroup = const SUBGROUP,
272            value = in(reg) &value,
273            result = in(reg) &mut result,
274        }
275    }
276
277    result
278}
279
280/// Result is the `value` of the invocation identified by the id `id` to all active invocations in the group.
281///
282/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
283///
284/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
285///
286/// The type of `value` must be the same as Result Type.
287///
288/// `id` must be a scalar of integer type, whose Signedness operand is 0.
289///
290/// Before version 1.5, `id` must come from a constant instruction. Starting with version 1.5, this restriction is lifted. However, behavior is undefined when `id` is not dynamically uniform.
291///
292/// The resulting value is undefined if `id` is an inactive invocation, or is greater than or equal to the size of the group.
293///
294/// Requires Capability `GroupNonUniformBallot`.
295///
296/// # Safety
297/// * `id` must not be dynamically uniform
298/// * before 1.5: `id` must be constant
299/// * Result is undefined if `id` is an inactive invocation or out of bounds
300#[spirv_std_macros::gpu_only]
301#[doc(alias = "OpGroupNonUniformBroadcast")]
302#[inline]
303pub unsafe fn subgroup_broadcast<T: VectorOrScalar>(value: T, id: u32) -> T {
304    let mut result = T::default();
305
306    unsafe {
307        asm! {
308            "%u32 = OpTypeInt 32 0",
309            "%subgroup = OpConstant %u32 {subgroup}",
310            "%value = OpLoad _ {value}",
311            "%id = OpLoad _ {id}",
312            "%result = OpGroupNonUniformBroadcast _ %subgroup %value %id",
313            "OpStore {result} %result",
314            subgroup = const SUBGROUP,
315            value = in(reg) &value,
316            id = in(reg) &id,
317            result = in(reg) &mut result,
318        }
319    }
320
321    result
322}
323
324/// Result is the `value` of the invocation from the active invocation with the lowest id in the group to all active invocations in the group.
325///
326/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
327///
328/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
329///
330/// The type of `value` must be the same as Result Type.
331///
332/// Requires Capability `GroupNonUniformBallot`.
333#[spirv_std_macros::gpu_only]
334#[doc(alias = "OpGroupNonUniformBroadcastFirst")]
335#[inline]
336pub fn subgroup_broadcast_first<T: VectorOrScalar>(value: T) -> T {
337    let mut result = T::default();
338
339    unsafe {
340        asm! {
341            "%u32 = OpTypeInt 32 0",
342            "%subgroup = OpConstant %u32 {subgroup}",
343            "%value = OpLoad _ {value}",
344            "%result = OpGroupNonUniformBroadcastFirst _ %subgroup %value",
345            "OpStore {result} %result",
346            subgroup = const SUBGROUP,
347            value = in(reg) &value,
348            result = in(reg) &mut result,
349        }
350    }
351
352    result
353}
354
355/// Result is a bitfield value combining the `predicate` value from all invocations in the group that execute the same dynamic instance of this instruction. The bit is set to one if the corresponding invocation is active and the `predicate` for that invocation evaluated to true; otherwise, it is set to zero.
356///
357/// Result Type must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
358///
359/// Result is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
360///
361/// Execution is a Scope that identifies the group of invocations affected by this command.
362///
363/// `predicate` must be a Boolean type.
364///
365/// Requires Capability `GroupNonUniformBallot`.
366#[spirv_std_macros::gpu_only]
367#[doc(alias = "OpGroupNonUniformBallot")]
368#[inline]
369pub fn subgroup_ballot(predicate: bool) -> SubgroupMask {
370    let mut result = SubgroupMask::default();
371
372    unsafe {
373        asm! {
374            "%u32 = OpTypeInt 32 0",
375            "%groupmask = OpTypeVector %u32 4",
376            "%subgroup = OpConstant %u32 {subgroup}",
377            "%predicate = OpLoad _ {predicate}",
378            "%result = OpGroupNonUniformBallot %groupmask %subgroup %predicate",
379            "OpStore {result} %result",
380            subgroup = const SUBGROUP,
381            predicate = in(reg) &predicate,
382            result = in(reg) &mut result,
383        }
384    }
385
386    result
387}
388
389/// Evaluates a `value` for all active invocations in the group, resulting in true if the bit in `value` for the corresponding invocation is set to one, otherwise the result is false.
390///
391/// Result Type must be a Boolean type.
392///
393/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
394///
395/// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
396///
397/// Behavior is undefined unless `value` is the same for all invocations that execute the same dynamic instance of this instruction.
398///
399/// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
400///
401/// Requires Capability `GroupNonUniformBallot`.
402///
403/// # Safety
404/// * `value` must be the same for all dynamic instances of this instruction
405#[spirv_std_macros::gpu_only]
406#[doc(alias = "OpGroupNonUniformInverseBallot")]
407#[inline]
408pub unsafe fn subgroup_inverse_ballot(value: SubgroupMask) -> bool {
409    let mut result = false;
410
411    unsafe {
412        asm! {
413            "%bool = OpTypeBool",
414            "%u32 = OpTypeInt 32 0",
415            "%subgroup = OpConstant %u32 {subgroup}",
416            "%value = OpLoad _ {value}",
417            "%result = OpGroupNonUniformInverseBallot %bool %subgroup %value",
418            "OpStore {result} %result",
419            subgroup = const SUBGROUP,
420            value = in(reg) &value,
421            result = in(reg) &mut result,
422        }
423    }
424
425    result
426}
427
428/// Evaluates a value for all active invocations in the group, resulting in true if the bit in `value` that corresponds to `index` is set to one, otherwise the result is false.
429///
430/// Result Type must be a Boolean type.
431///
432/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
433///
434/// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
435///
436/// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
437///
438/// `index` must be a scalar of integer type, whose Signedness operand is 0.
439///
440/// The resulting value is undefined if `index` is greater than or equal to the size of the group.
441///
442/// Requires Capability `GroupNonUniformBallot`.
443///
444/// # Safety
445/// * This function is safe
446/// * Result is undefined if `id` is out of bounds
447#[spirv_std_macros::gpu_only]
448#[doc(alias = "OpGroupNonUniformBallotBitExtract")]
449#[inline]
450pub fn subgroup_ballot_bit_extract(value: SubgroupMask, index: u32) -> bool {
451    let mut result = false;
452
453    unsafe {
454        asm! {
455            "%bool = OpTypeBool",
456            "%u32 = OpTypeInt 32 0",
457            "%subgroup = OpConstant %u32 {subgroup}",
458            "%value = OpLoad _ {value}",
459            "%index = OpLoad _ {index}",
460            "%result = OpGroupNonUniformBallotBitExtract %bool %subgroup %value %index",
461            "OpStore {result} %result",
462            subgroup = const SUBGROUP,
463            value = in(reg) &value,
464            index = in(reg) &index,
465            result = in(reg) &mut result,
466        }
467    }
468
469    result
470}
471
472macro_rules! macro_subgroup_ballot_bit_count {
473    ($name:ident, $group_op:expr) => {
474        /// Result is the number of bits that are set to 1 in `value`, considering only the bits in `value` required to represent all bits of the group's invocations.
475        ///
476        /// Result Type must be a scalar of integer type, whose Signedness operand is 0.
477        ///
478        /// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
479        ///
480        /// The identity I for Operation is 0.
481        ///
482        /// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
483        ///
484        /// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
485        ///
486        /// Requires Capability `GroupNonUniformBallot`.
487        #[spirv_std_macros::gpu_only]
488        #[doc(alias = "OpGroupNonUniformBallotBitCount")]
489        #[inline]
490        pub fn $name(value: SubgroupMask) -> u32 {
491            let mut result = 0;
492
493            unsafe {
494                asm! {
495                    "%u32 = OpTypeInt 32 0",
496                    "%subgroup = OpConstant %u32 {subgroup}",
497                    "%value = OpLoad _ {value}",
498                    "%result = OpGroupNonUniformBallotBitCount %u32 %subgroup {groupop} %value",
499                    "OpStore {result} %result",
500                    subgroup = const SUBGROUP,
501                    groupop = const ($group_op as u32),
502                    value = in(reg) &value,
503                    result = in(reg) &mut result,
504                }
505            }
506
507            result
508        }
509    };
510}
511
512macro_subgroup_ballot_bit_count!(subgroup_ballot_bit_count, GroupOperation::Reduce);
513macro_subgroup_ballot_bit_count!(
514    subgroup_ballot_inclusive_bit_count,
515    GroupOperation::InclusiveScan
516);
517macro_subgroup_ballot_bit_count!(
518    subgroup_ballot_exclusive_bit_count,
519    GroupOperation::ExclusiveScan
520);
521
522/// Find the least significant bit set to 1 in `value`, considering only the bits in `value` required to represent all bits of the group's invocations. If none of the considered bits is set to 1, the resulting value is undefined.
523///
524/// Result Type must be a scalar of integer type, whose Signedness operand is 0.
525///
526/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
527///
528/// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
529///
530/// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
531///
532/// Requires Capability `GroupNonUniformBallot`.
533///
534/// # Safety
535/// * This function is safe
536/// * Result is undefined if `id` is an inactive invocation or out of bounds
537#[spirv_std_macros::gpu_only]
538#[doc(alias = "OpGroupNonUniformBallotFindLSB")]
539#[inline]
540pub fn subgroup_ballot_find_lsb(value: SubgroupMask) -> u32 {
541    let mut result = 0;
542
543    unsafe {
544        asm! {
545            "%u32 = OpTypeInt 32 0",
546            "%subgroup = OpConstant %u32 {subgroup}",
547            "%value = OpLoad _ {value}",
548            "%result = OpGroupNonUniformBallotFindLSB %u32 %subgroup %value",
549            "OpStore {result} %result",
550            subgroup = const SUBGROUP,
551            value = in(reg) &value,
552            result = in(reg) &mut result,
553        }
554    }
555
556    result
557}
558
559/// Find the most significant bit set to 1 in `value`, considering only the bits in `value` required to represent all bits of the group's invocations. If none of the considered bits is set to 1, the resulting value is undefined.
560///
561/// Result Type must be a scalar of integer type, whose Signedness operand is 0.
562///
563/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
564///
565/// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
566///
567/// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
568///
569/// Requires Capability `GroupNonUniformBallot`.
570#[spirv_std_macros::gpu_only]
571#[doc(alias = "OpGroupNonUniformBallotFindMSB")]
572#[inline]
573pub fn subgroup_ballot_find_msb(value: SubgroupMask) -> u32 {
574    let mut result = 0;
575
576    unsafe {
577        asm! {
578            "%u32 = OpTypeInt 32 0",
579            "%subgroup = OpConstant %u32 {subgroup}",
580            "%value = OpLoad _ {value}",
581            "%result = OpGroupNonUniformBallotFindMSB %u32 %subgroup %value",
582            "OpStore {result} %result",
583            subgroup = const SUBGROUP,
584            value = in(reg) &value,
585            result = in(reg) &mut result,
586        }
587    }
588
589    result
590}
591
592/// Result is the `value` of the invocation identified by the id `id`.
593///
594/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
595///
596/// Execution is a Scope that identifies the group of invocations affected by this command.
597///
598/// The type of `value` must be the same as Result Type.
599///
600/// `id` must be a scalar of integer type, whose Signedness operand is 0.
601///
602/// The resulting value is undefined if `id` is an inactive invocation, or is greater than or equal to the size of the group.
603///
604/// Requires Capability `GroupNonUniformShuffle`.
605///
606/// # Safety
607/// * This function is safe
608/// * Result is undefined if `id` is an inactive invocation or out of bounds
609#[spirv_std_macros::gpu_only]
610#[doc(alias = "OpGroupNonUniformShuffle")]
611#[inline]
612pub fn subgroup_shuffle<T: VectorOrScalar>(value: T, id: u32) -> T {
613    let mut result = T::default();
614
615    unsafe {
616        asm! {
617            "%u32 = OpTypeInt 32 0",
618            "%subgroup = OpConstant %u32 {subgroup}",
619            "%value = OpLoad _ {value}",
620            "%id = OpLoad _ {id}",
621            "%result = OpGroupNonUniformShuffle _ %subgroup %value %id",
622            "OpStore {result} %result",
623            subgroup = const SUBGROUP,
624            value = in(reg) &value,
625            id = in(reg) &id,
626            result = in(reg) &mut result,
627        }
628    }
629
630    result
631}
632
633/// Result is the `value` of the invocation identified by the current invocation’s id within the group xor’ed with Mask.
634///
635/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
636///
637/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
638///
639/// The type of `value` must be the same as Result Type.
640///
641/// Mask must be a scalar of integer type, whose Signedness operand is 0.
642///
643/// The resulting value is undefined if current invocation’s id within the group xor’ed with Mask is an inactive invocation, or is greater than or equal to the size of the group.
644///
645/// Requires Capability `GroupNonUniformShuffle`.
646///
647/// # Safety
648/// * This function is safe
649/// * Result is undefined if current invocation’s id within the group xor’ed with `mask` is an inactive invocation or out of bounds
650#[spirv_std_macros::gpu_only]
651#[doc(alias = "OpGroupNonUniformShuffleXor")]
652#[inline]
653pub fn subgroup_shuffle_xor<T: VectorOrScalar>(value: T, mask: u32) -> T {
654    let mut result = T::default();
655
656    unsafe {
657        asm! {
658            "%u32 = OpTypeInt 32 0",
659            "%subgroup = OpConstant %u32 {subgroup}",
660            "%value = OpLoad _ {value}",
661            "%mask = OpLoad _ {mask}",
662            "%result = OpGroupNonUniformShuffleXor _ %subgroup %value %mask",
663            "OpStore {result} %result",
664            subgroup = const SUBGROUP,
665            value = in(reg) &value,
666            mask = in(reg) &mask,
667            result = in(reg) &mut result,
668        }
669    }
670
671    result
672}
673
674/// Result is the `value` of the invocation identified by the current invocation’s id within the group - Delta.
675///
676/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
677///
678/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
679///
680/// The type of `value` must be the same as Result Type.
681///
682/// Delta must be a scalar of integer type, whose Signedness operand is 0.
683///
684/// Delta is treated as unsigned and the resulting value is undefined if Delta is greater than the current invocation’s id within the group or if the selected lane is inactive.
685///
686/// Requires Capability `GroupNonUniformShuffleRelative`.
687///
688/// # Safety
689/// * This function is safe
690/// * Result is undefined if `delta` is greater than the current invocation’s id within the group or if the selected lane is inactive
691#[spirv_std_macros::gpu_only]
692#[doc(alias = "OpGroupNonUniformShuffleUp")]
693#[inline]
694pub fn subgroup_shuffle_up<T: VectorOrScalar>(value: T, delta: u32) -> T {
695    let mut result = T::default();
696
697    unsafe {
698        asm! {
699            "%u32 = OpTypeInt 32 0",
700            "%subgroup = OpConstant %u32 {subgroup}",
701            "%value = OpLoad _ {value}",
702            "%delta = OpLoad _ {delta}",
703            "%result = OpGroupNonUniformShuffleUp _ %subgroup %value %delta",
704            "OpStore {result} %result",
705            subgroup = const SUBGROUP,
706            value = in(reg) &value,
707            delta = in(reg) &delta,
708            result = in(reg) &mut result,
709        }
710    }
711
712    result
713}
714
715/// Result is the `value` of the invocation identified by the current invocation’s id within the group + Delta.
716///
717/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
718///
719/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
720///
721/// The type of `value` must be the same as Result Type.
722///
723/// Delta must be a scalar of integer type, whose Signedness operand is 0.
724///
725/// Delta is treated as unsigned and the resulting value is undefined if Delta is greater than or equal to the size of the group, or if the current invocation’s id within the group + Delta is either an inactive invocation or greater than or equal to the size of the group.
726///
727/// Requires Capability `GroupNonUniformShuffleRelative`.
728///
729/// # Safety
730/// * This function is safe
731/// * Result is undefined if `delta` is greater than or equal to the size of the group, or if the current invocation’s id within the group + `delta` is either an inactive invocation or greater than or equal to the size of the group.
732#[spirv_std_macros::gpu_only]
733#[doc(alias = "OpGroupNonUniformShuffleDown")]
734#[inline]
735pub fn subgroup_shuffle_down<T: VectorOrScalar>(value: T, delta: u32) -> T {
736    let mut result = T::default();
737
738    unsafe {
739        asm! {
740            "%u32 = OpTypeInt 32 0",
741            "%subgroup = OpConstant %u32 {subgroup}",
742            "%value = OpLoad _ {value}",
743            "%delta = OpLoad _ {delta}",
744            "%result = OpGroupNonUniformShuffleDown _ %subgroup %value %delta",
745            "OpStore {result} %result",
746            subgroup = const SUBGROUP,
747            value = in(reg) &value,
748            delta = in(reg) &delta,
749            result = in(reg) &mut result,
750        }
751    }
752
753    result
754}
755
756macro_rules! macro_subgroup_op {
757    ($scalar:ty, $asm_op:literal, $($name:ident, $group_op:expr),+; $docs:literal) => { $(
758        #[doc = $docs]
759        #[spirv_std_macros::gpu_only]
760        #[doc(alias = $asm_op)]
761        #[inline]
762        pub fn $name<I: VectorOrScalar<Scalar = $scalar>>(
763            value: I,
764        ) -> I {
765            let mut result = I::default();
766            unsafe {
767                asm! {
768                    "%u32 = OpTypeInt 32 0",
769                    "%subgroup = OpConstant %u32 {subgroup}",
770                    "%value = OpLoad _ {value}",
771                    concat!("%result = ", $asm_op, " _ %subgroup {groupop} %value"),
772                    "OpStore {result} %result",
773                    subgroup = const SUBGROUP,
774                    groupop = const ($group_op as u32),
775                    value = in(reg) &value,
776                    result = in(reg) &mut result,
777                }
778            }
779            result
780        }
781    )+ };
782}
783
784macro_rules! macro_subgroup_op_clustered {
785    ($scalar:ty, $asm_op:literal, $name:ident; $docs:literal) => {
786        #[doc = $docs]
787        #[spirv_std_macros::gpu_only]
788        #[doc(alias = $asm_op)]
789        #[inline]
790        pub unsafe fn $name<const CLUSTER_SIZE: u32, I: VectorOrScalar<Scalar = $scalar>>(
791            value: I,
792        ) -> I {
793            const {
794                assert!(CLUSTER_SIZE >= 1, "`ClusterSize` must be at least 1");
795                assert!(
796                    CLUSTER_SIZE.is_power_of_two(),
797                    "`ClusterSize` must be a power of 2"
798                );
799                // Cannot be verified with static assertions:
800                // `ClusterSize` must not be greater than the size of the group
801            }
802
803            let mut result = I::default();
804
805            unsafe {
806                asm! {
807                    "%u32 = OpTypeInt 32 0",
808                    "%subgroup = OpConstant %u32 {subgroup}",
809                    "%value = OpLoad _ {value}",
810                    "%clustersize = OpConstant %u32 {clustersize}",
811                    concat!("%result = ", $asm_op, " _ %subgroup {groupop} %value %clustersize"),
812                    "OpStore {result} %result",
813                    subgroup = const SUBGROUP,
814                    groupop = const (GroupOperation::ClusteredReduce as u32),
815                    clustersize = const CLUSTER_SIZE,
816                    value = in(reg) &value,
817                    result = in(reg) &mut result,
818                }
819            }
820
821            result
822        }
823    };
824}
825
826// add
827macro_subgroup_op!(impl Integer, "OpGroupNonUniformIAdd", subgroup_i_add, GroupOperation::Reduce, subgroup_inclusive_i_add, GroupOperation::InclusiveScan, subgroup_exclusive_i_add, GroupOperation::ExclusiveScan; r"
828An integer add group operation of all `value` operands contributed by active invocations in the group.
829
830Result Type must be a scalar or vector of integer type.
831
832Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
833
834The identity I for Operation is 0.
835
836The type of `value` must be the same as Result Type.
837
838Requires Capability `GroupNonUniformArithmetic`.
839");
840macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformIAdd", subgroup_clustered_i_add; r"
841An integer add group operation of all `value` operands contributed by active invocations in the group.
842
843Result Type must be a scalar or vector of integer type.
844
845Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
846
847The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
848
849The type of `value` must be the same as Result Type.
850
851`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
852
853Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
854
855# Safety
856* `ClusterSize` must not be greater than the size of the group
857");
858macro_subgroup_op!(impl Float, "OpGroupNonUniformFAdd", subgroup_f_add, GroupOperation::Reduce, subgroup_inclusive_f_add, GroupOperation::InclusiveScan, subgroup_exclusive_f_add, GroupOperation::ExclusiveScan; r"
859A floating point add group operation of all `value` operands contributed by active invocations in the group.
860
861Result Type must be a scalar or vector of floating-point type.
862
863Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
864
865The identity I for Operation is 0.
866
867The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
868
869Requires Capability `GroupNonUniformArithmetic`.
870");
871macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFAdd", subgroup_clustered_f_add; r"
872A floating point add group operation of all `value` operands contributed by active invocations in the group.
873
874Result Type must be a scalar or vector of floating-point type.
875
876Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
877
878The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
879
880The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
881
882`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
883
884Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
885
886# Safety
887* `ClusterSize` must not be greater than the size of the group
888");
889
890// mul
891macro_subgroup_op!(impl Integer, "OpGroupNonUniformIMul", subgroup_i_mul, GroupOperation::Reduce, subgroup_inclusive_i_mul, GroupOperation::InclusiveScan, subgroup_exclusive_i_mul, GroupOperation::ExclusiveScan; r"
892An integer multiply group operation of all `value` operands contributed by active invocations in the group.
893
894Result Type must be a scalar or vector of integer type.
895
896Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
897
898The identity I for Operation is 1.
899
900The type of `value` must be the same as Result Type.
901
902Requires Capability `GroupNonUniformArithmetic`.
903");
904macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformIMul", subgroup_clustered_i_mul; r"
905An integer multiply group operation of all `value` operands contributed by active invocations in the group.
906
907Result Type must be a scalar or vector of integer type.
908
909Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
910
911The identity I for Operation is 1. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
912
913The type of `value` must be the same as Result Type.
914
915`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
916
917Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
918
919# Safety
920* `ClusterSize` must not be greater than the size of the group
921");
922macro_subgroup_op!(impl Float, "OpGroupNonUniformFMul", subgroup_f_mul, GroupOperation::Reduce, subgroup_inclusive_f_mul, GroupOperation::InclusiveScan, subgroup_exclusive_f_mul, GroupOperation::ExclusiveScan; r"
923A floating point multiply group operation of all `value` operands contributed by active invocations in the group.
924
925Result Type must be a scalar or vector of floating-point type.
926
927Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
928
929The identity I for Operation is 1.
930
931The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
932
933Requires Capability `GroupNonUniformArithmetic`.
934");
935macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFMul", subgroup_clustered_f_mul; r"
936A floating point multiply group operation of all `value` operands contributed by active invocations in the group.
937
938Result Type must be a scalar or vector of floating-point type.
939
940Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
941
942The identity I for Operation is 1. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
943
944The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
945
946`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
947
948Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
949
950# Safety
951* `ClusterSize` must not be greater than the size of the group
952");
953
954// min
955macro_subgroup_op!(impl SignedInteger, "OpGroupNonUniformSMin", subgroup_s_min, GroupOperation::Reduce, subgroup_inclusive_s_min, GroupOperation::InclusiveScan, subgroup_exclusive_s_min, GroupOperation::ExclusiveScan; r"
956A signed integer minimum group operation of all `value` operands contributed by active invocations in the group.
957
958Result Type must be a scalar or vector of integer type.
959
960Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
961
962The identity I for Operation is `INT_MAX`.
963
964The type of `value` must be the same as Result Type.
965
966Requires Capability `GroupNonUniformArithmetic`.
967");
968macro_subgroup_op_clustered!(impl SignedInteger, "OpGroupNonUniformSMin", subgroup_clustered_s_min; r"
969A signed integer minimum group operation of all `value` operands contributed by active invocations in the group.
970
971Result Type must be a scalar or vector of integer type.
972
973Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
974
975The identity I for Operation is `INT_MAX`. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
976
977The type of `value` must be the same as Result Type.
978
979`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
980
981Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
982
983# Safety
984* `ClusterSize` must not be greater than the size of the group
985");
986macro_subgroup_op!(impl UnsignedInteger, "OpGroupNonUniformUMin", subgroup_u_min, GroupOperation::Reduce, subgroup_inclusive_u_min, GroupOperation::InclusiveScan, subgroup_exclusive_u_min, GroupOperation::ExclusiveScan; r"
987An unsigned integer minimum group operation of all `value` operands contributed by active invocations in the group.
988
989Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
990
991Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
992
993The identity I for Operation is `UINT_MAX`.
994
995The type of `value` must be the same as Result Type.
996
997Requires Capability `GroupNonUniformArithmetic`.
998");
999macro_subgroup_op_clustered!(impl UnsignedInteger, "OpGroupNonUniformUMin", subgroup_clustered_u_min; r"
1000An unsigned integer minimum group operation of all `value` operands contributed by active invocations in the group.
1001
1002Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
1003
1004Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1005
1006The identity I for Operation is `UINT_MAX`. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1007
1008The type of `value` must be the same as Result Type.
1009
1010`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1011
1012Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1013
1014# Safety
1015* `ClusterSize` must not be greater than the size of the group
1016");
1017macro_subgroup_op!(impl Float, "OpGroupNonUniformFMin", subgroup_f_min, GroupOperation::Reduce, subgroup_inclusive_f_min, GroupOperation::InclusiveScan, subgroup_exclusive_f_min, GroupOperation::ExclusiveScan; r"
1018A floating point minimum group operation of all `value` operands contributed by active invocations in the group.
1019
1020Result Type must be a scalar or vector of floating-point type.
1021
1022Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1023
1024The identity I for Operation is +INF.
1025
1026The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
1027
1028Requires Capability `GroupNonUniformArithmetic`.
1029");
1030macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFMin", subgroup_clustered_f_min; r"
1031A floating point minimum group operation of all `value` operands contributed by active invocations in the group.
1032
1033Result Type must be a scalar or vector of floating-point type.
1034
1035Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1036
1037The identity I for Operation is +INF. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1038
1039The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
1040
1041`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1042
1043Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1044
1045# Safety
1046* `ClusterSize` must not be greater than the size of the group
1047");
1048
1049// max
1050macro_subgroup_op!(impl SignedInteger, "OpGroupNonUniformSMax", subgroup_s_max, GroupOperation::Reduce, subgroup_inclusive_s_max, GroupOperation::InclusiveScan, subgroup_exclusive_s_max, GroupOperation::ExclusiveScan; r"
1051A signed integer maximum group operation of all `value` operands contributed by active invocations in the group.
1052
1053Result Type must be a scalar or vector of integer type.
1054
1055Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1056
1057The identity I for Operation is `INT_MIN`.
1058
1059The type of `value` must be the same as Result Type.
1060
1061Requires Capability `GroupNonUniformArithmetic`.
1062");
1063macro_subgroup_op_clustered!(impl SignedInteger, "OpGroupNonUniformSMax", subgroup_clustered_s_max; r"
1064A signed integer maximum group operation of all `value` operands contributed by active invocations in the group.
1065
1066Result Type must be a scalar or vector of integer type.
1067
1068Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1069
1070The identity I for Operation is `INT_MIN`. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1071
1072The type of `value` must be the same as Result Type.
1073
1074`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1075
1076Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1077
1078# Safety
1079* `ClusterSize` must not be greater than the size of the group
1080");
1081macro_subgroup_op!(impl UnsignedInteger, "OpGroupNonUniformUMax", subgroup_u_max, GroupOperation::Reduce, subgroup_inclusive_u_max, GroupOperation::InclusiveScan, subgroup_exclusive_u_max, GroupOperation::ExclusiveScan; r"
1082An unsigned integer maximum group operation of all `value` operands contributed by active invocations in the group.
1083
1084Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
1085
1086Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1087
1088The identity I for Operation is 0.
1089
1090The type of `value` must be the same as Result Type.
1091
1092Requires Capability `GroupNonUniformArithmetic`.
1093");
1094macro_subgroup_op_clustered!(impl UnsignedInteger, "OpGroupNonUniformUMax", subgroup_clustered_u_max; r"
1095An unsigned integer maximum group operation of all `value` operands contributed by active invocations in the group.
1096
1097Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
1098
1099Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1100
1101The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1102
1103The type of `value` must be the same as Result Type.
1104
1105`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1106
1107Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1108
1109# Safety
1110* `ClusterSize` must not be greater than the size of the group
1111");
1112macro_subgroup_op!(impl Float, "OpGroupNonUniformFMax", subgroup_f_max, GroupOperation::Reduce, subgroup_inclusive_f_max, GroupOperation::InclusiveScan, subgroup_exclusive_f_max, GroupOperation::ExclusiveScan; r"
1113A floating point maximum group operation of all `value` operands contributed by active invocations in by group.
1114
1115Result Type must be a scalar or vector of floating-point type.
1116
1117Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1118
1119The identity I for Operation is -INF.
1120
1121The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
1122
1123Requires Capability `GroupNonUniformArithmetic`.
1124");
1125macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFMax", subgroup_clustered_f_max; r"
1126A floating point maximum group operation of all `value` operands contributed by active invocations in by group.
1127
1128Result Type must be a scalar or vector of floating-point type.
1129
1130Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1131
1132The identity I for Operation is -INF.
1133
1134The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
1135
1136Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1137
1138# Safety
1139* `ClusterSize` must not be greater than the size of the group
1140");
1141
1142// and
1143macro_subgroup_op!(impl Integer, "OpGroupNonUniformBitwiseAnd", subgroup_and, GroupOperation::Reduce, subgroup_inclusive_and, GroupOperation::InclusiveScan, subgroup_exclusive_and, GroupOperation::ExclusiveScan; r"
1144A bitwise and group operation of all `value` operands contributed by active invocations in the group.
1145
1146Result Type must be a scalar or vector of integer type.
1147
1148Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1149
1150The identity I for Operation is ~0.
1151
1152The type of `value` must be the same as Result Type.
1153
1154Requires Capability `GroupNonUniformArithmetic`.
1155");
1156macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformBitwiseAnd", subgroup_clustered_and; r"
1157A bitwise and group operation of all `value` operands contributed by active invocations in the group.
1158
1159Result Type must be a scalar or vector of integer type.
1160
1161Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1162
1163The identity I for Operation is ~0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1164
1165The type of `value` must be the same as Result Type.
1166
1167`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1168
1169Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1170
1171# Safety
1172* `ClusterSize` must not be greater than the size of the group
1173");
1174
1175// or
1176macro_subgroup_op!(impl Integer, "OpGroupNonUniformBitwiseOr", subgroup_or, GroupOperation::Reduce, subgroup_inclusive_or, GroupOperation::InclusiveScan, subgroup_exclusive_or, GroupOperation::ExclusiveScan; r"
1177A bitwise or group operation of all `value` operands contributed by active invocations in the group.
1178
1179Result Type must be a scalar or vector of integer type.
1180
1181Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1182
1183The identity I for Operation is 0.
1184
1185The type of `value` must be the same as Result Type.
1186
1187Requires Capability `GroupNonUniformArithmetic`.
1188");
1189macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformBitwiseOr", subgroup_clustered_or; r"
1190A bitwise or group operation of all `value` operands contributed by active invocations in the group.
1191
1192Result Type must be a scalar or vector of integer type.
1193
1194Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1195
1196The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1197
1198The type of `value` must be the same as Result Type.
1199
1200`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1201
1202Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1203
1204# Safety
1205* `ClusterSize` must not be greater than the size of the group
1206");
1207
1208// xor
1209macro_subgroup_op!(impl Integer, "OpGroupNonUniformBitwiseXor", subgroup_xor, GroupOperation::Reduce, subgroup_inclusive_xor, GroupOperation::InclusiveScan, subgroup_exclusive_xor, GroupOperation::ExclusiveScan; r"
1210A bitwise xor group operation of all `value` operands contributed by active invocations in the group.
1211
1212Result Type must be a scalar or vector of integer type.
1213
1214Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1215
1216The identity I for Operation is 0.
1217
1218The type of `value` must be the same as Result Type.
1219
1220Requires Capability `GroupNonUniformArithmetic`.
1221");
1222macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformBitwiseXor", subgroup_clustered_xor; r"
1223A bitwise xor group operation of all `value` operands contributed by active invocations in the group.
1224
1225Result Type must be a scalar or vector of integer type.
1226
1227Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1228
1229The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1230
1231The type of `value` must be the same as Result Type.
1232
1233`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1234
1235Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1236
1237# Safety
1238* `ClusterSize` must not be greater than the size of the group
1239");
1240
1241// logical and
1242macro_subgroup_op!(bool, "OpGroupNonUniformLogicalAnd", subgroup_logical_and, GroupOperation::Reduce, subgroup_inclusive_logical_and, GroupOperation::InclusiveScan, subgroup_exclusive_logical_and, GroupOperation::ExclusiveScan; r"
1243A logical and group operation of all `value` operands contributed by active invocations in the group.
1244
1245Result Type must be a scalar or vector of Boolean type.
1246
1247Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1248
1249The identity I for Operation is ~0.
1250
1251The type of `value` must be the same as Result Type.
1252
1253Requires Capability `GroupNonUniformArithmetic`.
1254");
1255macro_subgroup_op_clustered!(bool, "OpGroupNonUniformLogicalAnd", subgroup_clustered_logical_and; r"
1256A logical and group operation of all `value` operands contributed by active invocations in the group.
1257
1258Result Type must be a scalar or vector of Boolean type.
1259
1260Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1261
1262The identity I for Operation is ~0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1263
1264The type of `value` must be the same as Result Type.
1265
1266`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1267
1268Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1269
1270# Safety
1271* `ClusterSize` must not be greater than the size of the group
1272");
1273
1274// logical or
1275macro_subgroup_op!(bool, "OpGroupNonUniformLogicalOr", subgroup_logical_or, GroupOperation::Reduce, subgroup_inclusive_logical_or, GroupOperation::InclusiveScan, subgroup_exclusive_logical_or, GroupOperation::ExclusiveScan; r"
1276A logical or group operation of all `value` operands contributed by active invocations in the group.
1277
1278Result Type must be a scalar or vector of Boolean type.
1279
1280Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1281
1282The identity I for Operation is 0.
1283
1284The type of `value` must be the same as Result Type.
1285
1286Requires Capability `GroupNonUniformArithmetic`.
1287");
1288macro_subgroup_op_clustered!(bool, "OpGroupNonUniformLogicalOr", subgroup_clustered_logical_or; r"
1289A logical or group operation of all `value` operands contributed by active invocations in the group.
1290
1291Result Type must be a scalar or vector of Boolean type.
1292
1293Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1294
1295The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1296
1297The type of `value` must be the same as Result Type.
1298
1299`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1300
1301Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1302
1303# Safety
1304* `ClusterSize` must not be greater than the size of the group
1305");
1306
1307// logical xor
1308macro_subgroup_op!(bool, "OpGroupNonUniformLogicalXor", subgroup_logical_xor, GroupOperation::Reduce, subgroup_inclusive_logical_xor, GroupOperation::InclusiveScan, subgroup_exclusive_logical_xor, GroupOperation::ExclusiveScan; r"
1309A logical xor group operation of all `value` operands contributed by active invocations in the group.
1310
1311Result Type must be a scalar or vector of Boolean type.
1312
1313Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1314
1315The identity I for Operation is 0.
1316
1317The type of `value` must be the same as Result Type.
1318
1319Requires Capability `GroupNonUniformArithmetic`.
1320");
1321macro_subgroup_op_clustered!(bool, "OpGroupNonUniformLogicalXor", subgroup_clustered_logical_xor; r"
1322A logical xor group operation of all `value` operands contributed by active invocations in the group.
1323
1324Result Type must be a scalar or vector of Boolean type.
1325
1326Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1327
1328The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1329
1330The type of `value` must be the same as Result Type.
1331
1332`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1333
1334Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1335
1336# Safety
1337* `ClusterSize` must not be greater than the size of the group
1338");
1339
1340/// Result is the `value` of the invocation within the quad with a quad index equal to `index`.
1341///
1342/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
1343///
1344/// Execution is a Scope, but has no effect on the behavior of this instruction. It must be Subgroup.
1345///
1346/// The type of `value` must be the same as Result Type.
1347///
1348/// `index` must be a scalar of integer type, whose Signedness operand is 0.
1349///
1350/// Before version 1.5, `index` must come from a constant instruction. Starting with version 1.5, `index` must be dynamically uniform.
1351///
1352/// If the value of `index` is greater than or equal to 4, or refers to an inactive invocation, the resulting value is undefined.
1353///
1354/// Requires Capability `GroupNonUniformQuad`.
1355///
1356/// # Safety
1357/// * This function is safe
1358/// * Result is undefined if the value of `index` is greater than or equal to 4, or refers to an inactive invocation
1359#[spirv_std_macros::gpu_only]
1360#[doc(alias = "OpGroupNonUniformQuadBroadcast")]
1361#[inline]
1362pub fn subgroup_quad_broadcast<T: VectorOrScalar>(value: T, index: u32) -> T {
1363    let mut result = T::default();
1364
1365    unsafe {
1366        asm! {
1367            "%u32 = OpTypeInt 32 0",
1368            "%subgroup = OpConstant %u32 {subgroup}",
1369            "%value = OpLoad _ {value}",
1370            "%index = OpLoad _ {index}",
1371            "%result = OpGroupNonUniformQuadBroadcast _ %subgroup %value %index",
1372            "OpStore {result} %result",
1373            subgroup = const SUBGROUP,
1374            value = in(reg) &value,
1375            index = in(reg) &index,
1376            result = in(reg) &mut result,
1377        }
1378    }
1379
1380    result
1381}
1382
1383/// Direction is the kind of swap to perform.
1384///
1385/// Direction must be a scalar of integer type, whose Signedness operand is 0.
1386///
1387/// Direction must come from a constant instruction.
1388///
1389/// The value returned in Result is the value provided to `value` by another invocation in the same quad scope instance. The invocation providing this `value` is determined according to Direction.
1390///
1391/// Requires Capability `GroupNonUniformQuad`.
1392pub enum QuadDirection {
1393    /// A Direction of 0 indicates a horizontal swap;
1394    /// - Invocations with quad indices of 0 and 1 swap values
1395    /// - Invocations with quad indices of 2 and 3 swap values
1396    Horizontal = 0,
1397    /// A Direction of 1 indicates a vertical swap;
1398    /// - Invocations with quad indices of 0 and 2 swap values
1399    /// - Invocations with quad indices of 1 and 3 swap values
1400    Vertical = 1,
1401    /// A Direction of 2 indicates a diagonal swap;
1402    /// - Invocations with quad indices of 0 and 3 swap values
1403    /// - Invocations with quad indices of 1 and 2 swap values
1404    Diagonal = 2,
1405}
1406
1407/// Swap the `value` of the invocation within the quad with another invocation in the quad using Direction.
1408///
1409/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
1410///
1411/// Execution is a Scope, but has no effect on the behavior of this instruction. It must be Subgroup.
1412///
1413/// The type of `value` must be the same as Result Type.
1414///
1415/// Direction is the kind of swap to perform.
1416///
1417/// Direction must be a scalar of integer type, whose Signedness operand is 0.
1418///
1419/// Direction must come from a constant instruction.
1420///
1421/// The value returned in Result is the value provided to `value` by another invocation in the same quad scope instance. The invocation providing this `value` is determined according to Direction.
1422///
1423/// A Direction of 0 indicates a horizontal swap;
1424/// - Invocations with quad indices of 0 and 1 swap values
1425/// - Invocations with quad indices of 2 and 3 swap values
1426/// A Direction of 1 indicates a vertical swap;
1427/// - Invocations with quad indices of 0 and 2 swap values
1428/// - Invocations with quad indices of 1 and 3 swap values
1429/// A Direction of 2 indicates a diagonal swap;
1430/// - Invocations with quad indices of 0 and 3 swap values
1431/// - Invocations with quad indices of 1 and 2 swap values
1432///
1433/// Direction must be one of the above values.
1434///
1435/// If an active invocation reads `value` from an inactive invocation, the resulting value is undefined.
1436///
1437/// Requires Capability `GroupNonUniformQuad`.
1438///
1439/// # Safety
1440/// * This function is safe
1441/// * Result is undefined if an active invocation reads `value` from an inactive invocation
1442#[spirv_std_macros::gpu_only]
1443#[doc(alias = "OpGroupNonUniformQuadSwap")]
1444#[inline]
1445pub fn subgroup_quad_swap<const DIRECTION: u32, T: VectorOrScalar>(value: T) -> T {
1446    let mut result = T::default();
1447
1448    unsafe {
1449        asm! {
1450            "%u32 = OpTypeInt 32 0",
1451            "%subgroup = OpConstant %u32 {subgroup}",
1452            "%direction = OpConstant %u32 {direction}",
1453            "%value = OpLoad _ {value}",
1454            "%result = OpGroupNonUniformQuadSwap _ %subgroup %value %direction",
1455            "OpStore {result} %result",
1456            subgroup = const SUBGROUP,
1457            direction = const DIRECTION,
1458            value = in(reg) &value,
1459            result = in(reg) &mut result,
1460        }
1461    }
1462
1463    result
1464}