spirv_std/arch/
subgroup.rs

1use crate::ScalarOrVector;
2#[cfg(target_arch = "spirv")]
3use crate::arch::barrier;
4#[cfg(target_arch = "spirv")]
5use crate::memory::{Scope, Semantics};
6use crate::{Float, Integer, SignedInteger, UnsignedInteger};
7#[cfg(target_arch = "spirv")]
8use core::arch::asm;
9
10#[cfg(target_arch = "spirv")]
11const SUBGROUP: u32 = Scope::Subgroup as u32;
12
13/// `SubgroupMask` is a [`glam::UVec4`] representing a bitmask of all invocations within a subgroup.
14/// Mostly used in group ballot operations.
15pub type SubgroupMask = glam::UVec4;
16
17/// Defines the class of group operation.
18#[non_exhaustive]
19#[derive(Debug, PartialEq, Eq)]
20pub enum GroupOperation {
21    /// A reduction operation for all values of a specific value X specified by invocations within a workgroup.
22    Reduce = 0,
23    /// A binary operation with an identity I and n (where n is the size of the workgroup)
24    /// elements[a0, a1, … an-1] resulting in [a0, (a0 op a1), …(a0 op a1 op … op an-1)]
25    InclusiveScan = 1,
26    /// A binary operation with an identity I and n (where n is the size of the workgroup)
27    /// elements[a0, a1, … an-1] resulting in [I, a0, (a0 op a1), … (a0 op a1 op … op an-2)].
28    ExclusiveScan = 2,
29    /// The [`GroupOperation`] `ClusteredReduce`.
30    ///
31    /// All instructions with a [`GroupOperation`] require an additional `ClusterSize` parameter when [`GroupOperation`] is
32    /// `ClusteredReduce`. To map this requirement into rust, all function have a base version accepting [`GroupOperation`]
33    /// as a const generic, and a `_clustered` variant that is fixed to `ClusteredReduce` and takes the additional
34    /// `ClusterSize` parameter as a const generic. To not accidentally use a `ClusteredReduce` in the base variant of the
35    /// function, it was removed from the [`GroupOperation`] enum and instead resides individually.
36    ClusteredReduce = 3,
37    /// Reserved.
38    ///
39    /// Requires Capability `GroupNonUniformPartitionedNV`.
40    PartitionedReduceNV = 6,
41    /// Reserved.
42    ///
43    /// Requires Capability `GroupNonUniformPartitionedNV`.
44    PartitionedInclusiveScanNV = 7,
45    /// Reserved.
46    ///
47    /// Requires Capability `GroupNonUniformPartitionedNV`.
48    PartitionedExclusiveScanNV = 8,
49}
50
51/// The function `subgroupBarrier()` enforces that all active invocations within a
52/// subgroup must execute this function before any are allowed to continue their
53/// execution, and the results of any memory stores performed using coherent
54/// variables performed prior to the call will be visible to any future
55/// coherent access to the same memory performed by any other shader invocation
56/// within the same subgroup.
57///
58/// Requires Capability `GroupNonUniform`.
59#[spirv_std_macros::gpu_only]
60#[doc(alias = "subgroupBarrier")]
61#[inline]
62pub fn subgroup_barrier() {
63    barrier::control_barrier::<
64        SUBGROUP,
65        SUBGROUP,
66        {
67            Semantics::ACQUIRE_RELEASE.bits()
68                | Semantics::UNIFORM_MEMORY.bits()
69                | Semantics::WORKGROUP_MEMORY.bits()
70                | Semantics::IMAGE_MEMORY.bits()
71        },
72    >();
73}
74
75/// The function `subgroupMemoryBarrier()` enforces the ordering of all memory
76/// transactions issued within a single shader invocation, as viewed by other
77/// invocations in the same subgroup.
78///
79/// Requires Capability `GroupNonUniform`.
80#[spirv_std_macros::gpu_only]
81#[doc(alias = "subgroupMemoryBarrier")]
82#[inline]
83pub fn subgroup_memory_barrier() {
84    barrier::memory_barrier::<
85        SUBGROUP,
86        {
87            Semantics::ACQUIRE_RELEASE.bits()
88                | Semantics::UNIFORM_MEMORY.bits()
89                | Semantics::WORKGROUP_MEMORY.bits()
90                | Semantics::IMAGE_MEMORY.bits()
91        },
92    >();
93}
94
95/// The function `subgroupMemoryBarrierBuffer()` enforces the ordering of all
96/// memory transactions to buffer variables issued within a single shader
97/// invocation, as viewed by other invocations in the same subgroup.
98///
99/// Requires Capability `GroupNonUniform`.
100#[spirv_std_macros::gpu_only]
101#[doc(alias = "subgroupMemoryBarrierBuffer")]
102#[inline]
103pub fn subgroup_memory_barrier_buffer() {
104    barrier::memory_barrier::<
105        SUBGROUP,
106        { Semantics::ACQUIRE_RELEASE.bits() | Semantics::UNIFORM_MEMORY.bits() },
107    >();
108}
109
110/// The function `subgroupMemoryBarrierShared()` enforces the ordering of all
111/// memory transactions to shared variables issued within a single shader
112/// invocation, as viewed by other invocations in the same subgroup.
113///
114/// Only available in compute shaders.
115///
116/// Requires Capability `GroupNonUniform`.
117#[spirv_std_macros::gpu_only]
118#[doc(alias = "subgroupMemoryBarrierShared")]
119#[inline]
120pub fn subgroup_memory_barrier_shared() {
121    barrier::memory_barrier::<
122        SUBGROUP,
123        { Semantics::ACQUIRE_RELEASE.bits() | Semantics::WORKGROUP_MEMORY.bits() },
124    >();
125}
126
127/// The function `subgroupMemoryBarrierImage()` enforces the ordering of all
128/// memory transactions to images issued within a single shader invocation, as
129/// viewed by other invocations in the same subgroup.
130///
131/// Requires Capability `GroupNonUniform`.
132#[spirv_std_macros::gpu_only]
133#[doc(alias = "subgroupMemoryBarrierImage")]
134#[inline]
135pub fn subgroup_memory_barrier_image() {
136    barrier::memory_barrier::<
137        SUBGROUP,
138        { Semantics::ACQUIRE_RELEASE.bits() | Semantics::IMAGE_MEMORY.bits() },
139    >();
140}
141
142/// Result is true only in the active invocation with the lowest id in the group, otherwise result is false.
143///
144/// Result Type must be a Boolean type.
145///
146/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
147///
148/// Requires Capability `GroupNonUniform`.
149#[spirv_std_macros::gpu_only]
150#[doc(alias = "OpGroupNonUniformElect")]
151#[inline]
152pub fn subgroup_elect() -> bool {
153    let mut result = false;
154
155    unsafe {
156        asm! {
157            "%bool = OpTypeBool",
158            "%u32 = OpTypeInt 32 0",
159            "%subgroup = OpConstant %u32 {subgroup}",
160            "%result = OpGroupNonUniformElect %bool %subgroup",
161            "OpStore {result} %result",
162            subgroup = const SUBGROUP,
163            result = in(reg) &mut result,
164        }
165    }
166
167    result
168}
169
170/// Evaluates a `predicate` for all active invocations in the group, resulting in true if `predicate` evaluates to true for all active invocations in the group, otherwise the result is false.
171///
172/// Result Type must be a Boolean type.
173///
174/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
175///
176/// `predicate` must be a Boolean type.
177///
178/// Requires Capability `GroupNonUniformVote`.
179#[spirv_std_macros::gpu_only]
180#[doc(alias = "OpGroupNonUniformAll")]
181#[inline]
182pub fn subgroup_all(predicate: bool) -> bool {
183    let mut result = false;
184
185    unsafe {
186        asm! {
187            "%bool = OpTypeBool",
188            "%u32 = OpTypeInt 32 0",
189            "%subgroup = OpConstant %u32 {subgroup}",
190            "%predicate = OpLoad _ {predicate}",
191            "%result = OpGroupNonUniformAll %bool %subgroup %predicate",
192            "OpStore {result} %result",
193            subgroup = const SUBGROUP,
194            predicate = in(reg) &predicate,
195            result = in(reg) &mut result,
196        }
197    }
198
199    result
200}
201
202/// Evaluates a `predicate` for all active invocations in the group, resulting in true if `predicate` evaluates to true for any active invocation in the group, otherwise the result is false.
203///
204/// Result Type must be a Boolean type.
205///
206/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
207///
208/// `predicate` must be a Boolean type.
209///
210/// Requires Capability `GroupNonUniformVote`.
211#[spirv_std_macros::gpu_only]
212#[doc(alias = "OpGroupNonUniformAny")]
213#[inline]
214pub fn subgroup_any(predicate: bool) -> bool {
215    let mut result = false;
216
217    unsafe {
218        asm! {
219            "%bool = OpTypeBool",
220            "%u32 = OpTypeInt 32 0",
221            "%subgroup = OpConstant %u32 {subgroup}",
222            "%predicate = OpLoad _ {predicate}",
223            "%result = OpGroupNonUniformAny %bool %subgroup %predicate",
224            "OpStore {result} %result",
225            subgroup = const SUBGROUP,
226            predicate = in(reg) &predicate,
227            result = in(reg) &mut result,
228        }
229    }
230
231    result
232}
233
234/// Evaluates a `value` for all active invocations in the group. The result is true if `value` is equal for all active invocations in the group. Otherwise, the result is false.
235///
236/// Result Type must be a Boolean type.
237///
238/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
239///
240/// `value` must be a scalar or vector of floating-point type, integer type, or Boolean type. The compare operation is based on this type, and if it is a floating-point type, an ordered-and-equal compare is used.
241///
242/// Requires Capability `GroupNonUniformVote`.
243#[spirv_std_macros::gpu_only]
244#[doc(alias = "OpGroupNonUniformAllEqual")]
245#[inline]
246pub fn subgroup_all_equal<T: ScalarOrVector>(value: T) -> bool {
247    let mut result = false;
248
249    unsafe {
250        asm! {
251            "%bool = OpTypeBool",
252            "%u32 = OpTypeInt 32 0",
253            "%subgroup = OpConstant %u32 {subgroup}",
254            "%value = OpLoad _ {value}",
255            "%result = OpGroupNonUniformAllEqual %bool %subgroup %value",
256            "OpStore {result} %result",
257            subgroup = const SUBGROUP,
258            value = in(reg) &value,
259            result = in(reg) &mut result,
260        }
261    }
262
263    result
264}
265
266/// Result is the `value` of the invocation identified by the id `id` to all active invocations in the group.
267///
268/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
269///
270/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
271///
272/// The type of `value` must be the same as Result Type.
273///
274/// `id` must be a scalar of integer type, whose Signedness operand is 0.
275///
276/// Before version 1.5, `id` must come from a constant instruction. Starting with version 1.5, this restriction is lifted. However, behavior is undefined when `id` is not dynamically uniform.
277///
278/// The resulting value is undefined if `id` is an inactive invocation, or is greater than or equal to the size of the group.
279///
280/// Requires Capability `GroupNonUniformBallot`.
281///
282/// # Safety
283/// * `id` must be dynamically uniform
284/// * Result is undefined if `id` is an inactive invocation or out of bounds
285/// * This variant with a dynamic `id` requires at least `spv1.5` or `vulkan1.2`. Alternatively, you can use
286/// [`subgroup_broadcast_const`] with a constant `id`.
287#[spirv_std_macros::gpu_only]
288#[doc(alias = "OpGroupNonUniformBroadcast")]
289#[inline]
290pub unsafe fn subgroup_broadcast<T: ScalarOrVector>(value: T, id: u32) -> T {
291    let mut result = T::default();
292
293    unsafe {
294        asm! {
295            "%u32 = OpTypeInt 32 0",
296            "%subgroup = OpConstant %u32 {subgroup}",
297            "%value = OpLoad _ {value}",
298            "%id = OpLoad _ {id}",
299            "%result = OpGroupNonUniformBroadcast _ %subgroup %value %id",
300            "OpStore {result} %result",
301            subgroup = const SUBGROUP,
302            value = in(reg) &value,
303            id = in(reg) &id,
304            result = in(reg) &mut result,
305        }
306    }
307
308    result
309}
310
311/// Result is the `value` of the invocation identified by the id `id` to all active invocations in the group.
312///
313/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
314///
315/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
316///
317/// The type of `value` must be the same as Result Type.
318///
319/// `id` must be a scalar of integer type, whose Signedness operand is 0.
320///
321/// Before version 1.5, `id` must come from a constant instruction. Starting with version 1.5, this restriction is lifted. However, behavior is undefined when `id` is not dynamically uniform.
322///
323/// The resulting value is undefined if `id` is an inactive invocation, or is greater than or equal to the size of the group.
324///
325/// Requires Capability `GroupNonUniformBallot`.
326///
327/// # Safety
328/// * Result is undefined if `id` is an inactive invocation or out of bounds
329#[spirv_std_macros::gpu_only]
330#[doc(alias = "OpGroupNonUniformBroadcast")]
331#[inline]
332pub unsafe fn subgroup_broadcast_const<T: ScalarOrVector, const ID: u32>(value: T) -> T {
333    let mut result = T::default();
334
335    unsafe {
336        asm! {
337            "%u32 = OpTypeInt 32 0",
338            "%subgroup = OpConstant %u32 {subgroup}",
339            "%id = OpConstant %u32 {id}",
340            "%value = OpLoad _ {value}",
341            "%result = OpGroupNonUniformBroadcast _ %subgroup %value %id",
342            "OpStore {result} %result",
343            subgroup = const SUBGROUP,
344            value = in(reg) &value,
345            id = const ID,
346            result = in(reg) &mut result,
347        }
348    }
349
350    result
351}
352
353/// Result is the `value` of the invocation from the active invocation with the lowest id in the group to all active invocations in the group.
354///
355/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
356///
357/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
358///
359/// The type of `value` must be the same as Result Type.
360///
361/// Requires Capability `GroupNonUniformBallot`.
362#[spirv_std_macros::gpu_only]
363#[doc(alias = "OpGroupNonUniformBroadcastFirst")]
364#[inline]
365pub fn subgroup_broadcast_first<T: ScalarOrVector>(value: T) -> T {
366    let mut result = T::default();
367
368    unsafe {
369        asm! {
370            "%u32 = OpTypeInt 32 0",
371            "%subgroup = OpConstant %u32 {subgroup}",
372            "%value = OpLoad _ {value}",
373            "%result = OpGroupNonUniformBroadcastFirst _ %subgroup %value",
374            "OpStore {result} %result",
375            subgroup = const SUBGROUP,
376            value = in(reg) &value,
377            result = in(reg) &mut result,
378        }
379    }
380
381    result
382}
383
384/// Result is a bitfield value combining the `predicate` value from all invocations in the group that execute the same dynamic instance of this instruction. The bit is set to one if the corresponding invocation is active and the `predicate` for that invocation evaluated to true; otherwise, it is set to zero.
385///
386/// Result Type must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
387///
388/// Result is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
389///
390/// Execution is a Scope that identifies the group of invocations affected by this command.
391///
392/// `predicate` must be a Boolean type.
393///
394/// Requires Capability `GroupNonUniformBallot`.
395#[spirv_std_macros::gpu_only]
396#[doc(alias = "OpGroupNonUniformBallot")]
397#[inline]
398pub fn subgroup_ballot(predicate: bool) -> SubgroupMask {
399    let mut result = SubgroupMask::default();
400
401    unsafe {
402        asm! {
403            "%u32 = OpTypeInt 32 0",
404            "%subgroup = OpConstant %u32 {subgroup}",
405            "%predicate = OpLoad _ {predicate}",
406            "%result = OpGroupNonUniformBallot typeof*{result} %subgroup %predicate",
407            "OpStore {result} %result",
408            subgroup = const SUBGROUP,
409            predicate = in(reg) &predicate,
410            result = in(reg) &mut result,
411        }
412    }
413
414    result
415}
416
417/// Evaluates a `value` for all active invocations in the group, resulting in true if the bit in `value` for the corresponding invocation is set to one, otherwise the result is false.
418///
419/// Result Type must be a Boolean type.
420///
421/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
422///
423/// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
424///
425/// Behavior is undefined unless `value` is the same for all invocations that execute the same dynamic instance of this instruction.
426///
427/// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
428///
429/// Requires Capability `GroupNonUniformBallot`.
430///
431/// # Safety
432/// * `value` must be the same for all dynamic instances of this instruction
433#[spirv_std_macros::gpu_only]
434#[doc(alias = "OpGroupNonUniformInverseBallot")]
435#[inline]
436pub unsafe fn subgroup_inverse_ballot(value: SubgroupMask) -> bool {
437    let mut result = false;
438
439    unsafe {
440        asm! {
441            "%bool = OpTypeBool",
442            "%u32 = OpTypeInt 32 0",
443            "%subgroup = OpConstant %u32 {subgroup}",
444            "%value = OpLoad _ {value}",
445            "%result = OpGroupNonUniformInverseBallot %bool %subgroup %value",
446            "OpStore {result} %result",
447            subgroup = const SUBGROUP,
448            value = in(reg) &value,
449            result = in(reg) &mut result,
450        }
451    }
452
453    result
454}
455
456/// Evaluates a value for all active invocations in the group, resulting in true if the bit in `value` that corresponds to `index` is set to one, otherwise the result is false.
457///
458/// Result Type must be a Boolean type.
459///
460/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
461///
462/// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
463///
464/// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
465///
466/// `index` must be a scalar of integer type, whose Signedness operand is 0.
467///
468/// The resulting value is undefined if `index` is greater than or equal to the size of the group.
469///
470/// Requires Capability `GroupNonUniformBallot`.
471///
472/// # Safety
473/// * This function is safe
474/// * Result is undefined if `id` is out of bounds
475#[spirv_std_macros::gpu_only]
476#[doc(alias = "OpGroupNonUniformBallotBitExtract")]
477#[inline]
478pub fn subgroup_ballot_bit_extract(value: SubgroupMask, index: u32) -> bool {
479    let mut result = false;
480
481    unsafe {
482        asm! {
483            "%bool = OpTypeBool",
484            "%u32 = OpTypeInt 32 0",
485            "%subgroup = OpConstant %u32 {subgroup}",
486            "%value = OpLoad _ {value}",
487            "%index = OpLoad _ {index}",
488            "%result = OpGroupNonUniformBallotBitExtract %bool %subgroup %value %index",
489            "OpStore {result} %result",
490            subgroup = const SUBGROUP,
491            value = in(reg) &value,
492            index = in(reg) &index,
493            result = in(reg) &mut result,
494        }
495    }
496
497    result
498}
499
500macro_rules! macro_subgroup_ballot_bit_count {
501    ($name:ident, $group_op:expr) => {
502        /// Result is the number of bits that are set to 1 in `value`, considering only the bits in `value` required to represent all bits of the group's invocations.
503        ///
504        /// Result Type must be a scalar of integer type, whose Signedness operand is 0.
505        ///
506        /// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
507        ///
508        /// The identity I for Operation is 0.
509        ///
510        /// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
511        ///
512        /// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
513        ///
514        /// Requires Capability `GroupNonUniformBallot`.
515        #[spirv_std_macros::gpu_only]
516        #[doc(alias = "OpGroupNonUniformBallotBitCount")]
517        #[inline]
518        pub fn $name(value: SubgroupMask) -> u32 {
519            let mut result = 0;
520
521            unsafe {
522                asm! {
523                    "%u32 = OpTypeInt 32 0",
524                    "%subgroup = OpConstant %u32 {subgroup}",
525                    "%value = OpLoad _ {value}",
526                    "%result = OpGroupNonUniformBallotBitCount %u32 %subgroup {groupop} %value",
527                    "OpStore {result} %result",
528                    subgroup = const SUBGROUP,
529                    groupop = const ($group_op as u32),
530                    value = in(reg) &value,
531                    result = in(reg) &mut result,
532                }
533            }
534
535            result
536        }
537    };
538}
539
540macro_subgroup_ballot_bit_count!(subgroup_ballot_bit_count, GroupOperation::Reduce);
541macro_subgroup_ballot_bit_count!(
542    subgroup_ballot_inclusive_bit_count,
543    GroupOperation::InclusiveScan
544);
545macro_subgroup_ballot_bit_count!(
546    subgroup_ballot_exclusive_bit_count,
547    GroupOperation::ExclusiveScan
548);
549
550/// Find the least significant bit set to 1 in `value`, considering only the bits in `value` required to represent all bits of the group's invocations. If none of the considered bits is set to 1, the resulting value is undefined.
551///
552/// Result Type must be a scalar of integer type, whose Signedness operand is 0.
553///
554/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
555///
556/// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
557///
558/// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
559///
560/// Requires Capability `GroupNonUniformBallot`.
561///
562/// # Safety
563/// * This function is safe
564/// * Result is undefined if `id` is an inactive invocation or out of bounds
565#[spirv_std_macros::gpu_only]
566#[doc(alias = "OpGroupNonUniformBallotFindLSB")]
567#[inline]
568pub fn subgroup_ballot_find_lsb(value: SubgroupMask) -> u32 {
569    let mut result = 0;
570
571    unsafe {
572        asm! {
573            "%u32 = OpTypeInt 32 0",
574            "%subgroup = OpConstant %u32 {subgroup}",
575            "%value = OpLoad _ {value}",
576            "%result = OpGroupNonUniformBallotFindLSB %u32 %subgroup %value",
577            "OpStore {result} %result",
578            subgroup = const SUBGROUP,
579            value = in(reg) &value,
580            result = in(reg) &mut result,
581        }
582    }
583
584    result
585}
586
587/// Find the most significant bit set to 1 in `value`, considering only the bits in `value` required to represent all bits of the group's invocations. If none of the considered bits is set to 1, the resulting value is undefined.
588///
589/// Result Type must be a scalar of integer type, whose Signedness operand is 0.
590///
591/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
592///
593/// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
594///
595/// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
596///
597/// Requires Capability `GroupNonUniformBallot`.
598#[spirv_std_macros::gpu_only]
599#[doc(alias = "OpGroupNonUniformBallotFindMSB")]
600#[inline]
601pub fn subgroup_ballot_find_msb(value: SubgroupMask) -> u32 {
602    let mut result = 0;
603
604    unsafe {
605        asm! {
606            "%u32 = OpTypeInt 32 0",
607            "%subgroup = OpConstant %u32 {subgroup}",
608            "%value = OpLoad _ {value}",
609            "%result = OpGroupNonUniformBallotFindMSB %u32 %subgroup %value",
610            "OpStore {result} %result",
611            subgroup = const SUBGROUP,
612            value = in(reg) &value,
613            result = in(reg) &mut result,
614        }
615    }
616
617    result
618}
619
620/// Result is the `value` of the invocation identified by the id `id`.
621///
622/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
623///
624/// Execution is a Scope that identifies the group of invocations affected by this command.
625///
626/// The type of `value` must be the same as Result Type.
627///
628/// `id` must be a scalar of integer type, whose Signedness operand is 0.
629///
630/// The resulting value is undefined if `id` is an inactive invocation, or is greater than or equal to the size of the group.
631///
632/// Requires Capability `GroupNonUniformShuffle`.
633///
634/// # Safety
635/// * This function is safe
636/// * Result is undefined if `id` is an inactive invocation or out of bounds
637#[spirv_std_macros::gpu_only]
638#[doc(alias = "OpGroupNonUniformShuffle")]
639#[inline]
640pub fn subgroup_shuffle<T: ScalarOrVector>(value: T, id: u32) -> T {
641    let mut result = T::default();
642
643    unsafe {
644        asm! {
645            "%u32 = OpTypeInt 32 0",
646            "%subgroup = OpConstant %u32 {subgroup}",
647            "%value = OpLoad _ {value}",
648            "%id = OpLoad _ {id}",
649            "%result = OpGroupNonUniformShuffle _ %subgroup %value %id",
650            "OpStore {result} %result",
651            subgroup = const SUBGROUP,
652            value = in(reg) &value,
653            id = in(reg) &id,
654            result = in(reg) &mut result,
655        }
656    }
657
658    result
659}
660
661/// Result is the `value` of the invocation identified by the current invocation’s id within the group xor’ed with Mask.
662///
663/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
664///
665/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
666///
667/// The type of `value` must be the same as Result Type.
668///
669/// Mask must be a scalar of integer type, whose Signedness operand is 0.
670///
671/// The resulting value is undefined if current invocation’s id within the group xor’ed with Mask is an inactive invocation, or is greater than or equal to the size of the group.
672///
673/// Requires Capability `GroupNonUniformShuffle`.
674///
675/// # Safety
676/// * This function is safe
677/// * Result is undefined if current invocation’s id within the group xor’ed with `mask` is an inactive invocation or out of bounds
678#[spirv_std_macros::gpu_only]
679#[doc(alias = "OpGroupNonUniformShuffleXor")]
680#[inline]
681pub fn subgroup_shuffle_xor<T: ScalarOrVector>(value: T, mask: u32) -> T {
682    let mut result = T::default();
683
684    unsafe {
685        asm! {
686            "%u32 = OpTypeInt 32 0",
687            "%subgroup = OpConstant %u32 {subgroup}",
688            "%value = OpLoad _ {value}",
689            "%mask = OpLoad _ {mask}",
690            "%result = OpGroupNonUniformShuffleXor _ %subgroup %value %mask",
691            "OpStore {result} %result",
692            subgroup = const SUBGROUP,
693            value = in(reg) &value,
694            mask = in(reg) &mask,
695            result = in(reg) &mut result,
696        }
697    }
698
699    result
700}
701
702/// Result is the `value` of the invocation identified by the current invocation’s id within the group - Delta.
703///
704/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
705///
706/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
707///
708/// The type of `value` must be the same as Result Type.
709///
710/// Delta must be a scalar of integer type, whose Signedness operand is 0.
711///
712/// Delta is treated as unsigned and the resulting value is undefined if Delta is greater than the current invocation’s id within the group or if the selected lane is inactive.
713///
714/// Requires Capability `GroupNonUniformShuffleRelative`.
715///
716/// # Safety
717/// * This function is safe
718/// * Result is undefined if `delta` is greater than the current invocation’s id within the group or if the selected lane is inactive
719#[spirv_std_macros::gpu_only]
720#[doc(alias = "OpGroupNonUniformShuffleUp")]
721#[inline]
722pub fn subgroup_shuffle_up<T: ScalarOrVector>(value: T, delta: u32) -> T {
723    let mut result = T::default();
724
725    unsafe {
726        asm! {
727            "%u32 = OpTypeInt 32 0",
728            "%subgroup = OpConstant %u32 {subgroup}",
729            "%value = OpLoad _ {value}",
730            "%delta = OpLoad _ {delta}",
731            "%result = OpGroupNonUniformShuffleUp _ %subgroup %value %delta",
732            "OpStore {result} %result",
733            subgroup = const SUBGROUP,
734            value = in(reg) &value,
735            delta = in(reg) &delta,
736            result = in(reg) &mut result,
737        }
738    }
739
740    result
741}
742
743/// Result is the `value` of the invocation identified by the current invocation’s id within the group + Delta.
744///
745/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
746///
747/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
748///
749/// The type of `value` must be the same as Result Type.
750///
751/// Delta must be a scalar of integer type, whose Signedness operand is 0.
752///
753/// Delta is treated as unsigned and the resulting value is undefined if Delta is greater than or equal to the size of the group, or if the current invocation’s id within the group + Delta is either an inactive invocation or greater than or equal to the size of the group.
754///
755/// Requires Capability `GroupNonUniformShuffleRelative`.
756///
757/// # Safety
758/// * This function is safe
759/// * Result is undefined if `delta` is greater than or equal to the size of the group, or if the current invocation’s id within the group + `delta` is either an inactive invocation or greater than or equal to the size of the group.
760#[spirv_std_macros::gpu_only]
761#[doc(alias = "OpGroupNonUniformShuffleDown")]
762#[inline]
763pub fn subgroup_shuffle_down<T: ScalarOrVector>(value: T, delta: u32) -> T {
764    let mut result = T::default();
765
766    unsafe {
767        asm! {
768            "%u32 = OpTypeInt 32 0",
769            "%subgroup = OpConstant %u32 {subgroup}",
770            "%value = OpLoad _ {value}",
771            "%delta = OpLoad _ {delta}",
772            "%result = OpGroupNonUniformShuffleDown _ %subgroup %value %delta",
773            "OpStore {result} %result",
774            subgroup = const SUBGROUP,
775            value = in(reg) &value,
776            delta = in(reg) &delta,
777            result = in(reg) &mut result,
778        }
779    }
780
781    result
782}
783
784macro_rules! macro_subgroup_op {
785    ($scalar:ty, $asm_op:literal, $($name:ident, $group_op:expr),+; $docs:literal) => { $(
786        #[doc = $docs]
787        #[spirv_std_macros::gpu_only]
788        #[doc(alias = $asm_op)]
789        #[inline]
790        pub fn $name<I: ScalarOrVector<Scalar = $scalar>>(
791            value: I,
792        ) -> I {
793            let mut result = I::default();
794            unsafe {
795                asm! {
796                    "%u32 = OpTypeInt 32 0",
797                    "%subgroup = OpConstant %u32 {subgroup}",
798                    "%value = OpLoad _ {value}",
799                    concat!("%result = ", $asm_op, " _ %subgroup {groupop} %value"),
800                    "OpStore {result} %result",
801                    subgroup = const SUBGROUP,
802                    groupop = const ($group_op as u32),
803                    value = in(reg) &value,
804                    result = in(reg) &mut result,
805                }
806            }
807            result
808        }
809    )+ };
810}
811
812macro_rules! macro_subgroup_op_clustered {
813    ($scalar:ty, $asm_op:literal, $name:ident; $docs:literal) => {
814        #[doc = $docs]
815        #[spirv_std_macros::gpu_only]
816        #[doc(alias = $asm_op)]
817        #[inline]
818        pub unsafe fn $name<const CLUSTER_SIZE: u32, I: ScalarOrVector<Scalar = $scalar>>(
819            value: I,
820        ) -> I {
821            const {
822                assert!(CLUSTER_SIZE >= 1, "`ClusterSize` must be at least 1");
823                assert!(
824                    CLUSTER_SIZE.is_power_of_two(),
825                    "`ClusterSize` must be a power of 2"
826                );
827                // Cannot be verified with static assertions:
828                // `ClusterSize` must not be greater than the size of the group
829            }
830
831            let mut result = I::default();
832
833            unsafe {
834                asm! {
835                    "%u32 = OpTypeInt 32 0",
836                    "%subgroup = OpConstant %u32 {subgroup}",
837                    "%value = OpLoad _ {value}",
838                    "%clustersize = OpConstant %u32 {clustersize}",
839                    concat!("%result = ", $asm_op, " _ %subgroup {groupop} %value %clustersize"),
840                    "OpStore {result} %result",
841                    subgroup = const SUBGROUP,
842                    groupop = const (GroupOperation::ClusteredReduce as u32),
843                    clustersize = const CLUSTER_SIZE,
844                    value = in(reg) &value,
845                    result = in(reg) &mut result,
846                }
847            }
848
849            result
850        }
851    };
852}
853
854// add
855macro_subgroup_op!(impl Integer, "OpGroupNonUniformIAdd", subgroup_i_add, GroupOperation::Reduce, subgroup_inclusive_i_add, GroupOperation::InclusiveScan, subgroup_exclusive_i_add, GroupOperation::ExclusiveScan; r"
856An integer add group operation of all `value` operands contributed by active invocations in the group.
857
858Result Type must be a scalar or vector of integer type.
859
860Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
861
862The identity I for Operation is 0.
863
864The type of `value` must be the same as Result Type.
865
866Requires Capability `GroupNonUniformArithmetic`.
867");
868macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformIAdd", subgroup_clustered_i_add; r"
869An integer add group operation of all `value` operands contributed by active invocations in the group.
870
871Result Type must be a scalar or vector of integer type.
872
873Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
874
875The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
876
877The type of `value` must be the same as Result Type.
878
879`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
880
881Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
882
883# Safety
884* `ClusterSize` must not be greater than the size of the group
885");
886macro_subgroup_op!(impl Float, "OpGroupNonUniformFAdd", subgroup_f_add, GroupOperation::Reduce, subgroup_inclusive_f_add, GroupOperation::InclusiveScan, subgroup_exclusive_f_add, GroupOperation::ExclusiveScan; r"
887A floating point add group operation of all `value` operands contributed by active invocations in the group.
888
889Result Type must be a scalar or vector of floating-point type.
890
891Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
892
893The identity I for Operation is 0.
894
895The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
896
897Requires Capability `GroupNonUniformArithmetic`.
898");
899macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFAdd", subgroup_clustered_f_add; r"
900A floating point add group operation of all `value` operands contributed by active invocations in the group.
901
902Result Type must be a scalar or vector of floating-point type.
903
904Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
905
906The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
907
908The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
909
910`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
911
912Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
913
914# Safety
915* `ClusterSize` must not be greater than the size of the group
916");
917
918// mul
919macro_subgroup_op!(impl Integer, "OpGroupNonUniformIMul", subgroup_i_mul, GroupOperation::Reduce, subgroup_inclusive_i_mul, GroupOperation::InclusiveScan, subgroup_exclusive_i_mul, GroupOperation::ExclusiveScan; r"
920An integer multiply group operation of all `value` operands contributed by active invocations in the group.
921
922Result Type must be a scalar or vector of integer type.
923
924Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
925
926The identity I for Operation is 1.
927
928The type of `value` must be the same as Result Type.
929
930Requires Capability `GroupNonUniformArithmetic`.
931");
932macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformIMul", subgroup_clustered_i_mul; r"
933An integer multiply group operation of all `value` operands contributed by active invocations in the group.
934
935Result Type must be a scalar or vector of integer type.
936
937Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
938
939The identity I for Operation is 1. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
940
941The type of `value` must be the same as Result Type.
942
943`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
944
945Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
946
947# Safety
948* `ClusterSize` must not be greater than the size of the group
949");
950macro_subgroup_op!(impl Float, "OpGroupNonUniformFMul", subgroup_f_mul, GroupOperation::Reduce, subgroup_inclusive_f_mul, GroupOperation::InclusiveScan, subgroup_exclusive_f_mul, GroupOperation::ExclusiveScan; r"
951A floating point multiply group operation of all `value` operands contributed by active invocations in the group.
952
953Result Type must be a scalar or vector of floating-point type.
954
955Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
956
957The identity I for Operation is 1.
958
959The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
960
961Requires Capability `GroupNonUniformArithmetic`.
962");
963macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFMul", subgroup_clustered_f_mul; r"
964A floating point multiply group operation of all `value` operands contributed by active invocations in the group.
965
966Result Type must be a scalar or vector of floating-point type.
967
968Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
969
970The identity I for Operation is 1. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
971
972The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
973
974`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
975
976Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
977
978# Safety
979* `ClusterSize` must not be greater than the size of the group
980");
981
982// min
983macro_subgroup_op!(impl SignedInteger, "OpGroupNonUniformSMin", subgroup_s_min, GroupOperation::Reduce, subgroup_inclusive_s_min, GroupOperation::InclusiveScan, subgroup_exclusive_s_min, GroupOperation::ExclusiveScan; r"
984A signed integer minimum group operation of all `value` operands contributed by active invocations in the group.
985
986Result Type must be a scalar or vector of integer type.
987
988Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
989
990The identity I for Operation is `INT_MAX`.
991
992The type of `value` must be the same as Result Type.
993
994Requires Capability `GroupNonUniformArithmetic`.
995");
996macro_subgroup_op_clustered!(impl SignedInteger, "OpGroupNonUniformSMin", subgroup_clustered_s_min; r"
997A signed integer minimum group operation of all `value` operands contributed by active invocations in the group.
998
999Result Type must be a scalar or vector of integer type.
1000
1001Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1002
1003The identity I for Operation is `INT_MAX`. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1004
1005The type of `value` must be the same as Result Type.
1006
1007`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1008
1009Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1010
1011# Safety
1012* `ClusterSize` must not be greater than the size of the group
1013");
1014macro_subgroup_op!(impl UnsignedInteger, "OpGroupNonUniformUMin", subgroup_u_min, GroupOperation::Reduce, subgroup_inclusive_u_min, GroupOperation::InclusiveScan, subgroup_exclusive_u_min, GroupOperation::ExclusiveScan; r"
1015An unsigned integer minimum group operation of all `value` operands contributed by active invocations in the group.
1016
1017Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
1018
1019Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1020
1021The identity I for Operation is `UINT_MAX`.
1022
1023The type of `value` must be the same as Result Type.
1024
1025Requires Capability `GroupNonUniformArithmetic`.
1026");
1027macro_subgroup_op_clustered!(impl UnsignedInteger, "OpGroupNonUniformUMin", subgroup_clustered_u_min; r"
1028An unsigned integer minimum group operation of all `value` operands contributed by active invocations in the group.
1029
1030Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
1031
1032Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1033
1034The identity I for Operation is `UINT_MAX`. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1035
1036The type of `value` must be the same as Result Type.
1037
1038`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1039
1040Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1041
1042# Safety
1043* `ClusterSize` must not be greater than the size of the group
1044");
1045macro_subgroup_op!(impl Float, "OpGroupNonUniformFMin", subgroup_f_min, GroupOperation::Reduce, subgroup_inclusive_f_min, GroupOperation::InclusiveScan, subgroup_exclusive_f_min, GroupOperation::ExclusiveScan; r"
1046A floating point minimum group operation of all `value` operands contributed by active invocations in the group.
1047
1048Result Type must be a scalar or vector of floating-point type.
1049
1050Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1051
1052The identity I for Operation is +INF.
1053
1054The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
1055
1056Requires Capability `GroupNonUniformArithmetic`.
1057");
1058macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFMin", subgroup_clustered_f_min; r"
1059A floating point minimum group operation of all `value` operands contributed by active invocations in the group.
1060
1061Result Type must be a scalar or vector of floating-point type.
1062
1063Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1064
1065The identity I for Operation is +INF. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1066
1067The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
1068
1069`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1070
1071Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1072
1073# Safety
1074* `ClusterSize` must not be greater than the size of the group
1075");
1076
1077// max
1078macro_subgroup_op!(impl SignedInteger, "OpGroupNonUniformSMax", subgroup_s_max, GroupOperation::Reduce, subgroup_inclusive_s_max, GroupOperation::InclusiveScan, subgroup_exclusive_s_max, GroupOperation::ExclusiveScan; r"
1079A signed integer maximum group operation of all `value` operands contributed by active invocations in the group.
1080
1081Result Type must be a scalar or vector of integer type.
1082
1083Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1084
1085The identity I for Operation is `INT_MIN`.
1086
1087The type of `value` must be the same as Result Type.
1088
1089Requires Capability `GroupNonUniformArithmetic`.
1090");
1091macro_subgroup_op_clustered!(impl SignedInteger, "OpGroupNonUniformSMax", subgroup_clustered_s_max; r"
1092A signed integer maximum group operation of all `value` operands contributed by active invocations in the group.
1093
1094Result Type must be a scalar or vector of integer type.
1095
1096Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1097
1098The identity I for Operation is `INT_MIN`. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1099
1100The type of `value` must be the same as Result Type.
1101
1102`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1103
1104Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1105
1106# Safety
1107* `ClusterSize` must not be greater than the size of the group
1108");
1109macro_subgroup_op!(impl UnsignedInteger, "OpGroupNonUniformUMax", subgroup_u_max, GroupOperation::Reduce, subgroup_inclusive_u_max, GroupOperation::InclusiveScan, subgroup_exclusive_u_max, GroupOperation::ExclusiveScan; r"
1110An unsigned integer maximum group operation of all `value` operands contributed by active invocations in the group.
1111
1112Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
1113
1114Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1115
1116The identity I for Operation is 0.
1117
1118The type of `value` must be the same as Result Type.
1119
1120Requires Capability `GroupNonUniformArithmetic`.
1121");
1122macro_subgroup_op_clustered!(impl UnsignedInteger, "OpGroupNonUniformUMax", subgroup_clustered_u_max; r"
1123An unsigned integer maximum group operation of all `value` operands contributed by active invocations in the group.
1124
1125Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
1126
1127Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1128
1129The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1130
1131The type of `value` must be the same as Result Type.
1132
1133`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1134
1135Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1136
1137# Safety
1138* `ClusterSize` must not be greater than the size of the group
1139");
1140macro_subgroup_op!(impl Float, "OpGroupNonUniformFMax", subgroup_f_max, GroupOperation::Reduce, subgroup_inclusive_f_max, GroupOperation::InclusiveScan, subgroup_exclusive_f_max, GroupOperation::ExclusiveScan; r"
1141A floating point maximum group operation of all `value` operands contributed by active invocations in by group.
1142
1143Result Type must be a scalar or vector of floating-point type.
1144
1145Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1146
1147The identity I for Operation is -INF.
1148
1149The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
1150
1151Requires Capability `GroupNonUniformArithmetic`.
1152");
1153macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFMax", subgroup_clustered_f_max; r"
1154A floating point maximum group operation of all `value` operands contributed by active invocations in by group.
1155
1156Result Type must be a scalar or vector of floating-point type.
1157
1158Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1159
1160The identity I for Operation is -INF.
1161
1162The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
1163
1164Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1165
1166# Safety
1167* `ClusterSize` must not be greater than the size of the group
1168");
1169
1170// and
1171macro_subgroup_op!(impl Integer, "OpGroupNonUniformBitwiseAnd", subgroup_and, GroupOperation::Reduce, subgroup_inclusive_and, GroupOperation::InclusiveScan, subgroup_exclusive_and, GroupOperation::ExclusiveScan; r"
1172A bitwise and group operation of all `value` operands contributed by active invocations in the group.
1173
1174Result Type must be a scalar or vector of integer type.
1175
1176Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1177
1178The identity I for Operation is ~0.
1179
1180The type of `value` must be the same as Result Type.
1181
1182Requires Capability `GroupNonUniformArithmetic`.
1183");
1184macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformBitwiseAnd", subgroup_clustered_and; r"
1185A bitwise and group operation of all `value` operands contributed by active invocations in the group.
1186
1187Result Type must be a scalar or vector of integer type.
1188
1189Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1190
1191The identity I for Operation is ~0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1192
1193The type of `value` must be the same as Result Type.
1194
1195`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1196
1197Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1198
1199# Safety
1200* `ClusterSize` must not be greater than the size of the group
1201");
1202
1203// or
1204macro_subgroup_op!(impl Integer, "OpGroupNonUniformBitwiseOr", subgroup_or, GroupOperation::Reduce, subgroup_inclusive_or, GroupOperation::InclusiveScan, subgroup_exclusive_or, GroupOperation::ExclusiveScan; r"
1205A bitwise or group operation of all `value` operands contributed by active invocations in the group.
1206
1207Result Type must be a scalar or vector of integer type.
1208
1209Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1210
1211The identity I for Operation is 0.
1212
1213The type of `value` must be the same as Result Type.
1214
1215Requires Capability `GroupNonUniformArithmetic`.
1216");
1217macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformBitwiseOr", subgroup_clustered_or; r"
1218A bitwise or group operation of all `value` operands contributed by active invocations in the group.
1219
1220Result Type must be a scalar or vector of integer type.
1221
1222Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1223
1224The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1225
1226The type of `value` must be the same as Result Type.
1227
1228`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1229
1230Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1231
1232# Safety
1233* `ClusterSize` must not be greater than the size of the group
1234");
1235
1236// xor
1237macro_subgroup_op!(impl Integer, "OpGroupNonUniformBitwiseXor", subgroup_xor, GroupOperation::Reduce, subgroup_inclusive_xor, GroupOperation::InclusiveScan, subgroup_exclusive_xor, GroupOperation::ExclusiveScan; r"
1238A bitwise xor group operation of all `value` operands contributed by active invocations in the group.
1239
1240Result Type must be a scalar or vector of integer type.
1241
1242Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1243
1244The identity I for Operation is 0.
1245
1246The type of `value` must be the same as Result Type.
1247
1248Requires Capability `GroupNonUniformArithmetic`.
1249");
1250macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformBitwiseXor", subgroup_clustered_xor; r"
1251A bitwise xor group operation of all `value` operands contributed by active invocations in the group.
1252
1253Result Type must be a scalar or vector of integer type.
1254
1255Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1256
1257The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1258
1259The type of `value` must be the same as Result Type.
1260
1261`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1262
1263Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1264
1265# Safety
1266* `ClusterSize` must not be greater than the size of the group
1267");
1268
1269// logical and
1270macro_subgroup_op!(bool, "OpGroupNonUniformLogicalAnd", subgroup_logical_and, GroupOperation::Reduce, subgroup_inclusive_logical_and, GroupOperation::InclusiveScan, subgroup_exclusive_logical_and, GroupOperation::ExclusiveScan; r"
1271A logical and group operation of all `value` operands contributed by active invocations in the group.
1272
1273Result Type must be a scalar or vector of Boolean type.
1274
1275Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1276
1277The identity I for Operation is ~0.
1278
1279The type of `value` must be the same as Result Type.
1280
1281Requires Capability `GroupNonUniformArithmetic`.
1282");
1283macro_subgroup_op_clustered!(bool, "OpGroupNonUniformLogicalAnd", subgroup_clustered_logical_and; r"
1284A logical and group operation of all `value` operands contributed by active invocations in the group.
1285
1286Result Type must be a scalar or vector of Boolean type.
1287
1288Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1289
1290The identity I for Operation is ~0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1291
1292The type of `value` must be the same as Result Type.
1293
1294`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1295
1296Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1297
1298# Safety
1299* `ClusterSize` must not be greater than the size of the group
1300");
1301
1302// logical or
1303macro_subgroup_op!(bool, "OpGroupNonUniformLogicalOr", subgroup_logical_or, GroupOperation::Reduce, subgroup_inclusive_logical_or, GroupOperation::InclusiveScan, subgroup_exclusive_logical_or, GroupOperation::ExclusiveScan; r"
1304A logical or group operation of all `value` operands contributed by active invocations in the group.
1305
1306Result Type must be a scalar or vector of Boolean type.
1307
1308Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1309
1310The identity I for Operation is 0.
1311
1312The type of `value` must be the same as Result Type.
1313
1314Requires Capability `GroupNonUniformArithmetic`.
1315");
1316macro_subgroup_op_clustered!(bool, "OpGroupNonUniformLogicalOr", subgroup_clustered_logical_or; r"
1317A logical or group operation of all `value` operands contributed by active invocations in the group.
1318
1319Result Type must be a scalar or vector of Boolean type.
1320
1321Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1322
1323The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1324
1325The type of `value` must be the same as Result Type.
1326
1327`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1328
1329Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1330
1331# Safety
1332* `ClusterSize` must not be greater than the size of the group
1333");
1334
1335// logical xor
1336macro_subgroup_op!(bool, "OpGroupNonUniformLogicalXor", subgroup_logical_xor, GroupOperation::Reduce, subgroup_inclusive_logical_xor, GroupOperation::InclusiveScan, subgroup_exclusive_logical_xor, GroupOperation::ExclusiveScan; r"
1337A logical xor group operation of all `value` operands contributed by active invocations in the group.
1338
1339Result Type must be a scalar or vector of Boolean type.
1340
1341Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1342
1343The identity I for Operation is 0.
1344
1345The type of `value` must be the same as Result Type.
1346
1347Requires Capability `GroupNonUniformArithmetic`.
1348");
1349macro_subgroup_op_clustered!(bool, "OpGroupNonUniformLogicalXor", subgroup_clustered_logical_xor; r"
1350A logical xor group operation of all `value` operands contributed by active invocations in the group.
1351
1352Result Type must be a scalar or vector of Boolean type.
1353
1354Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1355
1356The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1357
1358The type of `value` must be the same as Result Type.
1359
1360`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1361
1362Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1363
1364# Safety
1365* `ClusterSize` must not be greater than the size of the group
1366");
1367
1368/// Result is the `value` of the invocation within the quad with a quad index equal to `index`.
1369///
1370/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
1371///
1372/// Execution is a Scope, but has no effect on the behavior of this instruction. It must be Subgroup.
1373///
1374/// The type of `value` must be the same as Result Type.
1375///
1376/// `index` must be a scalar of integer type, whose Signedness operand is 0.
1377///
1378/// Before version 1.5, `index` must come from a constant instruction. Starting with version 1.5, `index` must be dynamically uniform.
1379///
1380/// If the value of `index` is greater than or equal to 4, or refers to an inactive invocation, the resulting value is undefined.
1381///
1382/// Requires Capability `GroupNonUniformQuad`.
1383///
1384/// # Safety
1385/// * This function is safe
1386/// * Result is undefined if the value of `index` is greater than or equal to 4, or refers to an inactive invocation
1387#[spirv_std_macros::gpu_only]
1388#[doc(alias = "OpGroupNonUniformQuadBroadcast")]
1389#[inline]
1390pub fn subgroup_quad_broadcast<T: ScalarOrVector>(value: T, index: u32) -> T {
1391    let mut result = T::default();
1392
1393    unsafe {
1394        asm! {
1395            "%u32 = OpTypeInt 32 0",
1396            "%subgroup = OpConstant %u32 {subgroup}",
1397            "%value = OpLoad _ {value}",
1398            "%index = OpLoad _ {index}",
1399            "%result = OpGroupNonUniformQuadBroadcast _ %subgroup %value %index",
1400            "OpStore {result} %result",
1401            subgroup = const SUBGROUP,
1402            value = in(reg) &value,
1403            index = in(reg) &index,
1404            result = in(reg) &mut result,
1405        }
1406    }
1407
1408    result
1409}
1410
1411/// Direction is the kind of swap to perform.
1412///
1413/// Direction must be a scalar of integer type, whose Signedness operand is 0.
1414///
1415/// Direction must come from a constant instruction.
1416///
1417/// The value returned in Result is the value provided to `value` by another invocation in the same quad scope instance. The invocation providing this `value` is determined according to Direction.
1418///
1419/// Requires Capability `GroupNonUniformQuad`.
1420pub enum QuadDirection {
1421    /// A Direction of 0 indicates a horizontal swap;
1422    /// - Invocations with quad indices of 0 and 1 swap values
1423    /// - Invocations with quad indices of 2 and 3 swap values
1424    Horizontal = 0,
1425    /// A Direction of 1 indicates a vertical swap;
1426    /// - Invocations with quad indices of 0 and 2 swap values
1427    /// - Invocations with quad indices of 1 and 3 swap values
1428    Vertical = 1,
1429    /// A Direction of 2 indicates a diagonal swap;
1430    /// - Invocations with quad indices of 0 and 3 swap values
1431    /// - Invocations with quad indices of 1 and 2 swap values
1432    Diagonal = 2,
1433}
1434
1435/// Swap the `value` of the invocation within the quad with another invocation in the quad using Direction.
1436///
1437/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
1438///
1439/// Execution is a Scope, but has no effect on the behavior of this instruction. It must be Subgroup.
1440///
1441/// The type of `value` must be the same as Result Type.
1442///
1443/// Direction is the kind of swap to perform.
1444///
1445/// Direction must be a scalar of integer type, whose Signedness operand is 0.
1446///
1447/// Direction must come from a constant instruction.
1448///
1449/// The value returned in Result is the value provided to `value` by another invocation in the same quad scope instance. The invocation providing this `value` is determined according to Direction.
1450///
1451/// A Direction of 0 indicates a horizontal swap;
1452/// - Invocations with quad indices of 0 and 1 swap values
1453/// - Invocations with quad indices of 2 and 3 swap values
1454/// A Direction of 1 indicates a vertical swap;
1455/// - Invocations with quad indices of 0 and 2 swap values
1456/// - Invocations with quad indices of 1 and 3 swap values
1457/// A Direction of 2 indicates a diagonal swap;
1458/// - Invocations with quad indices of 0 and 3 swap values
1459/// - Invocations with quad indices of 1 and 2 swap values
1460///
1461/// Direction must be one of the above values.
1462///
1463/// If an active invocation reads `value` from an inactive invocation, the resulting value is undefined.
1464///
1465/// Requires Capability `GroupNonUniformQuad`.
1466///
1467/// # Safety
1468/// * This function is safe
1469/// * Result is undefined if an active invocation reads `value` from an inactive invocation
1470#[spirv_std_macros::gpu_only]
1471#[doc(alias = "OpGroupNonUniformQuadSwap")]
1472#[inline]
1473pub fn subgroup_quad_swap<const DIRECTION: u32, T: ScalarOrVector>(value: T) -> T {
1474    let mut result = T::default();
1475
1476    unsafe {
1477        asm! {
1478            "%u32 = OpTypeInt 32 0",
1479            "%subgroup = OpConstant %u32 {subgroup}",
1480            "%direction = OpConstant %u32 {direction}",
1481            "%value = OpLoad _ {value}",
1482            "%result = OpGroupNonUniformQuadSwap _ %subgroup %value %direction",
1483            "OpStore {result} %result",
1484            subgroup = const SUBGROUP,
1485            direction = const DIRECTION,
1486            value = in(reg) &value,
1487            result = in(reg) &mut result,
1488        }
1489    }
1490
1491    result
1492}