spirv_std/arch/
subgroup.rs

1#[cfg(target_arch = "spirv")]
2use crate::ScalarOrVectorTransform;
3#[cfg(target_arch = "spirv")]
4use crate::arch::{asm, barrier};
5#[cfg(target_arch = "spirv")]
6use crate::memory::{Scope, Semantics};
7use crate::{Float, Integer, ScalarComposite, ScalarOrVector, SignedInteger, UnsignedInteger};
8
9#[cfg(target_arch = "spirv")]
10const SUBGROUP: u32 = Scope::Subgroup as u32;
11
12/// `SubgroupMask` is a [`glam::UVec4`] representing a bitmask of all invocations within a subgroup.
13/// Mostly used in group ballot operations.
14pub type SubgroupMask = glam::UVec4;
15
16/// Defines the class of group operation.
17#[non_exhaustive]
18#[derive(Debug, PartialEq, Eq)]
19pub enum GroupOperation {
20    /// A reduction operation for all values of a specific value X specified by invocations within a workgroup.
21    Reduce = 0,
22    /// A binary operation with an identity I and n (where n is the size of the workgroup)
23    /// elements[a0, a1, … an-1] resulting in [a0, (a0 op a1), …(a0 op a1 op … op an-1)]
24    InclusiveScan = 1,
25    /// A binary operation with an identity I and n (where n is the size of the workgroup)
26    /// elements[a0, a1, … an-1] resulting in [I, a0, (a0 op a1), … (a0 op a1 op … op an-2)].
27    ExclusiveScan = 2,
28    /// The [`GroupOperation`] `ClusteredReduce`.
29    ///
30    /// All instructions with a [`GroupOperation`] require an additional `ClusterSize` parameter when [`GroupOperation`] is
31    /// `ClusteredReduce`. To map this requirement into rust, all function have a base version accepting [`GroupOperation`]
32    /// as a const generic, and a `_clustered` variant that is fixed to `ClusteredReduce` and takes the additional
33    /// `ClusterSize` parameter as a const generic. To not accidentally use a `ClusteredReduce` in the base variant of the
34    /// function, it was removed from the [`GroupOperation`] enum and instead resides individually.
35    ClusteredReduce = 3,
36    /// Reserved.
37    ///
38    /// Requires Capability `GroupNonUniformPartitionedNV`.
39    PartitionedReduceNV = 6,
40    /// Reserved.
41    ///
42    /// Requires Capability `GroupNonUniformPartitionedNV`.
43    PartitionedInclusiveScanNV = 7,
44    /// Reserved.
45    ///
46    /// Requires Capability `GroupNonUniformPartitionedNV`.
47    PartitionedExclusiveScanNV = 8,
48}
49
50/// The function `subgroupBarrier()` enforces that all active invocations within a
51/// subgroup must execute this function before any are allowed to continue their
52/// execution, and the results of any memory stores performed using coherent
53/// variables performed prior to the call will be visible to any future
54/// coherent access to the same memory performed by any other shader invocation
55/// within the same subgroup.
56///
57/// Requires Capability `GroupNonUniform`.
58#[spirv_std_macros::gpu_only]
59#[doc(alias = "subgroupBarrier")]
60#[inline]
61pub fn subgroup_barrier() {
62    barrier::control_barrier::<
63        SUBGROUP,
64        SUBGROUP,
65        {
66            Semantics::ACQUIRE_RELEASE.bits()
67                | Semantics::UNIFORM_MEMORY.bits()
68                | Semantics::WORKGROUP_MEMORY.bits()
69                | Semantics::IMAGE_MEMORY.bits()
70        },
71    >();
72}
73
74/// The function `subgroupMemoryBarrier()` enforces the ordering of all memory
75/// transactions issued within a single shader invocation, as viewed by other
76/// invocations in the same subgroup.
77///
78/// Requires Capability `GroupNonUniform`.
79#[spirv_std_macros::gpu_only]
80#[doc(alias = "subgroupMemoryBarrier")]
81#[inline]
82pub fn subgroup_memory_barrier() {
83    barrier::memory_barrier::<
84        SUBGROUP,
85        {
86            Semantics::ACQUIRE_RELEASE.bits()
87                | Semantics::UNIFORM_MEMORY.bits()
88                | Semantics::WORKGROUP_MEMORY.bits()
89                | Semantics::IMAGE_MEMORY.bits()
90        },
91    >();
92}
93
94/// The function `subgroupMemoryBarrierBuffer()` enforces the ordering of all
95/// memory transactions to buffer variables issued within a single shader
96/// invocation, as viewed by other invocations in the same subgroup.
97///
98/// Requires Capability `GroupNonUniform`.
99#[spirv_std_macros::gpu_only]
100#[doc(alias = "subgroupMemoryBarrierBuffer")]
101#[inline]
102pub fn subgroup_memory_barrier_buffer() {
103    barrier::memory_barrier::<
104        SUBGROUP,
105        { Semantics::ACQUIRE_RELEASE.bits() | Semantics::UNIFORM_MEMORY.bits() },
106    >();
107}
108
109/// The function `subgroupMemoryBarrierShared()` enforces the ordering of all
110/// memory transactions to shared variables issued within a single shader
111/// invocation, as viewed by other invocations in the same subgroup.
112///
113/// Only available in compute shaders.
114///
115/// Requires Capability `GroupNonUniform`.
116#[spirv_std_macros::gpu_only]
117#[doc(alias = "subgroupMemoryBarrierShared")]
118#[inline]
119pub fn subgroup_memory_barrier_shared() {
120    barrier::memory_barrier::<
121        SUBGROUP,
122        { Semantics::ACQUIRE_RELEASE.bits() | Semantics::WORKGROUP_MEMORY.bits() },
123    >();
124}
125
126/// The function `subgroupMemoryBarrierImage()` enforces the ordering of all
127/// memory transactions to images issued within a single shader invocation, as
128/// viewed by other invocations in the same subgroup.
129///
130/// Requires Capability `GroupNonUniform`.
131#[spirv_std_macros::gpu_only]
132#[doc(alias = "subgroupMemoryBarrierImage")]
133#[inline]
134pub fn subgroup_memory_barrier_image() {
135    barrier::memory_barrier::<
136        SUBGROUP,
137        { Semantics::ACQUIRE_RELEASE.bits() | Semantics::IMAGE_MEMORY.bits() },
138    >();
139}
140
141/// Result is true only in the active invocation with the lowest id in the group, otherwise result is false.
142///
143/// Result Type must be a Boolean type.
144///
145/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
146///
147/// Requires Capability `GroupNonUniform`.
148#[spirv_std_macros::gpu_only]
149#[doc(alias = "OpGroupNonUniformElect")]
150#[inline]
151pub fn subgroup_elect() -> bool {
152    let mut result = false;
153
154    unsafe {
155        asm! {
156            "%bool = OpTypeBool",
157            "%u32 = OpTypeInt 32 0",
158            "%subgroup = OpConstant %u32 {subgroup}",
159            "%result = OpGroupNonUniformElect %bool %subgroup",
160            "OpStore {result} %result",
161            subgroup = const SUBGROUP,
162            result = in(reg) &mut result,
163        }
164    }
165
166    result
167}
168
169/// Evaluates a `predicate` for all active invocations in the group, resulting in true if `predicate` evaluates to true for all active invocations in the group, otherwise the result is false.
170///
171/// Result Type must be a Boolean type.
172///
173/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
174///
175/// `predicate` must be a Boolean type.
176///
177/// Requires Capability `GroupNonUniformVote`.
178#[spirv_std_macros::gpu_only]
179#[doc(alias = "OpGroupNonUniformAll")]
180#[inline]
181pub fn subgroup_all(predicate: bool) -> bool {
182    let mut result = false;
183
184    unsafe {
185        asm! {
186            "%bool = OpTypeBool",
187            "%u32 = OpTypeInt 32 0",
188            "%subgroup = OpConstant %u32 {subgroup}",
189            "%predicate = OpLoad _ {predicate}",
190            "%result = OpGroupNonUniformAll %bool %subgroup %predicate",
191            "OpStore {result} %result",
192            subgroup = const SUBGROUP,
193            predicate = in(reg) &predicate,
194            result = in(reg) &mut result,
195        }
196    }
197
198    result
199}
200
201/// Evaluates a `predicate` for all active invocations in the group, resulting in true if `predicate` evaluates to true for any active invocation in the group, otherwise the result is false.
202///
203/// Result Type must be a Boolean type.
204///
205/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
206///
207/// `predicate` must be a Boolean type.
208///
209/// Requires Capability `GroupNonUniformVote`.
210#[spirv_std_macros::gpu_only]
211#[doc(alias = "OpGroupNonUniformAny")]
212#[inline]
213pub fn subgroup_any(predicate: bool) -> bool {
214    let mut result = false;
215
216    unsafe {
217        asm! {
218            "%bool = OpTypeBool",
219            "%u32 = OpTypeInt 32 0",
220            "%subgroup = OpConstant %u32 {subgroup}",
221            "%predicate = OpLoad _ {predicate}",
222            "%result = OpGroupNonUniformAny %bool %subgroup %predicate",
223            "OpStore {result} %result",
224            subgroup = const SUBGROUP,
225            predicate = in(reg) &predicate,
226            result = in(reg) &mut result,
227        }
228    }
229
230    result
231}
232
233/// Evaluates a `value` for all active invocations in the group. The result is true if `value` is equal for all active invocations in the group. Otherwise, the result is false.
234///
235/// Result Type must be a Boolean type.
236///
237/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
238///
239/// `value` must be a scalar or vector of floating-point type, integer type, or Boolean type. The compare operation is based on this type, and if it is a floating-point type, an ordered-and-equal compare is used.
240///
241/// Requires Capability `GroupNonUniformVote`.
242#[spirv_std_macros::gpu_only]
243#[doc(alias = "OpGroupNonUniformAllEqual")]
244#[inline]
245pub fn subgroup_all_equal<T: ScalarComposite>(value: T) -> bool {
246    struct Transform(bool);
247
248    impl ScalarOrVectorTransform for Transform {
249        #[inline]
250        fn transform<T: ScalarOrVector>(&mut self, value: T) -> T {
251            let mut result = false;
252            unsafe {
253                asm! {
254                    "%bool = OpTypeBool",
255                    "%u32 = OpTypeInt 32 0",
256                    "%subgroup = OpConstant %u32 {subgroup}",
257                    "%value = OpLoad _ {value}",
258                    "%result = OpGroupNonUniformAllEqual %bool %subgroup %value",
259                    "OpStore {result} %result",
260                    subgroup = const SUBGROUP,
261                    value = in(reg) &value,
262                    result = in(reg) &mut result,
263                }
264            }
265            self.0 &= result;
266            value
267        }
268    }
269
270    let mut transform = Transform(true);
271    // ignore returned value
272    value.transform(&mut transform);
273    transform.0
274}
275
276/// Result is the `value` of the invocation identified by the id `id` to all active invocations in the group.
277///
278/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
279///
280/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
281///
282/// The type of `value` must be the same as Result Type.
283///
284/// `id` must be a scalar of integer type, whose Signedness operand is 0.
285///
286/// Before version 1.5, `id` must come from a constant instruction. Starting with version 1.5, this restriction is lifted. However, behavior is undefined when `id` is not dynamically uniform.
287///
288/// The resulting value is undefined if `id` is an inactive invocation, or is greater than or equal to the size of the group.
289///
290/// Requires Capability `GroupNonUniformBallot`.
291///
292/// # Safety
293/// * `id` must be dynamically uniform
294/// * Result is undefined if `id` is an inactive invocation or out of bounds
295/// * This variant with a dynamic `id` requires at least `spv1.5` or `vulkan1.2`. Alternatively, you can use
296/// [`subgroup_broadcast_const`] with a constant `id`.
297#[spirv_std_macros::gpu_only]
298#[doc(alias = "OpGroupNonUniformBroadcast")]
299#[inline]
300pub unsafe fn subgroup_broadcast<T: ScalarComposite>(value: T, id: u32) -> T {
301    struct Transform {
302        id: u32,
303    }
304
305    impl ScalarOrVectorTransform for Transform {
306        #[inline]
307        fn transform<T: ScalarOrVector>(&mut self, value: T) -> T {
308            let mut result = T::default();
309            unsafe {
310                asm! {
311                    "%u32 = OpTypeInt 32 0",
312                    "%subgroup = OpConstant %u32 {subgroup}",
313                    "%value = OpLoad _ {value}",
314                    "%id = OpLoad _ {id}",
315                    "%result = OpGroupNonUniformBroadcast _ %subgroup %value %id",
316                    "OpStore {result} %result",
317                    subgroup = const SUBGROUP,
318                    value = in(reg) &value,
319                    id = in(reg) &self.id,
320                    result = in(reg) &mut result,
321                }
322            }
323            result
324        }
325    }
326
327    value.transform(&mut Transform { id })
328}
329
330/// Result is the `value` of the invocation identified by the id `id` to all active invocations in the group.
331///
332/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
333///
334/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
335///
336/// The type of `value` must be the same as Result Type.
337///
338/// `id` must be a scalar of integer type, whose Signedness operand is 0.
339///
340/// Before version 1.5, `id` must come from a constant instruction. Starting with version 1.5, this restriction is lifted. However, behavior is undefined when `id` is not dynamically uniform.
341///
342/// The resulting value is undefined if `id` is an inactive invocation, or is greater than or equal to the size of the group.
343///
344/// Requires Capability `GroupNonUniformBallot`.
345///
346/// # Safety
347/// * Result is undefined if `id` is an inactive invocation or out of bounds
348#[spirv_std_macros::gpu_only]
349#[doc(alias = "OpGroupNonUniformBroadcast")]
350#[inline]
351pub unsafe fn subgroup_broadcast_const<T: ScalarOrVector, const ID: u32>(value: T) -> T {
352    struct Transform<const ID: u32>;
353
354    impl<const ID: u32> ScalarOrVectorTransform for Transform<ID> {
355        #[inline]
356        fn transform<T: ScalarOrVector>(&mut self, value: T) -> T {
357            let mut result = T::default();
358            unsafe {
359                asm! {
360                    "%u32 = OpTypeInt 32 0",
361                    "%subgroup = OpConstant %u32 {subgroup}",
362                    "%id = OpConstant %u32 {id}",
363                    "%value = OpLoad _ {value}",
364                    "%result = OpGroupNonUniformBroadcast _ %subgroup %value %id",
365                    "OpStore {result} %result",
366                    subgroup = const SUBGROUP,
367                    value = in(reg) &value,
368                    id = const ID,
369                    result = in(reg) &mut result,
370                }
371            }
372            result
373        }
374    }
375
376    value.transform(&mut Transform::<ID>)
377}
378
379/// Result is the `value` of the invocation from the active invocation with the lowest id in the group to all active invocations in the group.
380///
381/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
382///
383/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
384///
385/// The type of `value` must be the same as Result Type.
386///
387/// Requires Capability `GroupNonUniformBallot`.
388#[spirv_std_macros::gpu_only]
389#[doc(alias = "OpGroupNonUniformBroadcastFirst")]
390#[inline]
391pub fn subgroup_broadcast_first<T: ScalarComposite>(value: T) -> T {
392    struct Transform;
393
394    impl ScalarOrVectorTransform for Transform {
395        #[inline]
396        fn transform<T: ScalarOrVector>(&mut self, value: T) -> T {
397            let mut result = T::default();
398            unsafe {
399                asm! {
400                    "%u32 = OpTypeInt 32 0",
401                    "%subgroup = OpConstant %u32 {subgroup}",
402                    "%value = OpLoad _ {value}",
403                    "%result = OpGroupNonUniformBroadcastFirst _ %subgroup %value",
404                    "OpStore {result} %result",
405                    subgroup = const SUBGROUP,
406                    value = in(reg) &value,
407                    result = in(reg) &mut result,
408                }
409            }
410            result
411        }
412    }
413
414    value.transform(&mut Transform)
415}
416
417/// Result is a bitfield value combining the `predicate` value from all invocations in the group that execute the same dynamic instance of this instruction. The bit is set to one if the corresponding invocation is active and the `predicate` for that invocation evaluated to true; otherwise, it is set to zero.
418///
419/// Result Type must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
420///
421/// Result is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
422///
423/// Execution is a Scope that identifies the group of invocations affected by this command.
424///
425/// `predicate` must be a Boolean type.
426///
427/// Requires Capability `GroupNonUniformBallot`.
428#[spirv_std_macros::gpu_only]
429#[doc(alias = "OpGroupNonUniformBallot")]
430#[inline]
431pub fn subgroup_ballot(predicate: bool) -> SubgroupMask {
432    let mut result = SubgroupMask::default();
433
434    unsafe {
435        asm! {
436            "%u32 = OpTypeInt 32 0",
437            "%subgroup = OpConstant %u32 {subgroup}",
438            "%predicate = OpLoad _ {predicate}",
439            "%result = OpGroupNonUniformBallot typeof*{result} %subgroup %predicate",
440            "OpStore {result} %result",
441            subgroup = const SUBGROUP,
442            predicate = in(reg) &predicate,
443            result = in(reg) &mut result,
444        }
445    }
446
447    result
448}
449
450/// Evaluates a `value` for all active invocations in the group, resulting in true if the bit in `value` for the corresponding invocation is set to one, otherwise the result is false.
451///
452/// Result Type must be a Boolean type.
453///
454/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
455///
456/// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
457///
458/// Behavior is undefined unless `value` is the same for all invocations that execute the same dynamic instance of this instruction.
459///
460/// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
461///
462/// Requires Capability `GroupNonUniformBallot`.
463///
464/// # Safety
465/// * `value` must be the same for all dynamic instances of this instruction
466#[spirv_std_macros::gpu_only]
467#[doc(alias = "OpGroupNonUniformInverseBallot")]
468#[inline]
469pub unsafe fn subgroup_inverse_ballot(value: SubgroupMask) -> bool {
470    let mut result = false;
471
472    unsafe {
473        asm! {
474            "%bool = OpTypeBool",
475            "%u32 = OpTypeInt 32 0",
476            "%subgroup = OpConstant %u32 {subgroup}",
477            "%value = OpLoad _ {value}",
478            "%result = OpGroupNonUniformInverseBallot %bool %subgroup %value",
479            "OpStore {result} %result",
480            subgroup = const SUBGROUP,
481            value = in(reg) &value,
482            result = in(reg) &mut result,
483        }
484    }
485
486    result
487}
488
489/// Evaluates a value for all active invocations in the group, resulting in true if the bit in `value` that corresponds to `index` is set to one, otherwise the result is false.
490///
491/// Result Type must be a Boolean type.
492///
493/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
494///
495/// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
496///
497/// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
498///
499/// `index` must be a scalar of integer type, whose Signedness operand is 0.
500///
501/// The resulting value is undefined if `index` is greater than or equal to the size of the group.
502///
503/// Requires Capability `GroupNonUniformBallot`.
504///
505/// # Safety
506/// * This function is safe
507/// * Result is undefined if `id` is out of bounds
508#[spirv_std_macros::gpu_only]
509#[doc(alias = "OpGroupNonUniformBallotBitExtract")]
510#[inline]
511pub fn subgroup_ballot_bit_extract(value: SubgroupMask, index: u32) -> bool {
512    let mut result = false;
513
514    unsafe {
515        asm! {
516            "%bool = OpTypeBool",
517            "%u32 = OpTypeInt 32 0",
518            "%subgroup = OpConstant %u32 {subgroup}",
519            "%value = OpLoad _ {value}",
520            "%index = OpLoad _ {index}",
521            "%result = OpGroupNonUniformBallotBitExtract %bool %subgroup %value %index",
522            "OpStore {result} %result",
523            subgroup = const SUBGROUP,
524            value = in(reg) &value,
525            index = in(reg) &index,
526            result = in(reg) &mut result,
527        }
528    }
529
530    result
531}
532
533macro_rules! macro_subgroup_ballot_bit_count {
534    ($name:ident, $group_op:expr) => {
535        /// Result is the number of bits that are set to 1 in `value`, considering only the bits in `value` required to represent all bits of the group's invocations.
536        ///
537        /// Result Type must be a scalar of integer type, whose Signedness operand is 0.
538        ///
539        /// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
540        ///
541        /// The identity I for Operation is 0.
542        ///
543        /// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
544        ///
545        /// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
546        ///
547        /// Requires Capability `GroupNonUniformBallot`.
548        #[spirv_std_macros::gpu_only]
549        #[doc(alias = "OpGroupNonUniformBallotBitCount")]
550        #[inline]
551        pub fn $name(value: SubgroupMask) -> u32 {
552            let mut result = 0;
553
554            unsafe {
555                asm! {
556                    "%u32 = OpTypeInt 32 0",
557                    "%subgroup = OpConstant %u32 {subgroup}",
558                    "%value = OpLoad _ {value}",
559                    "%result = OpGroupNonUniformBallotBitCount %u32 %subgroup {groupop} %value",
560                    "OpStore {result} %result",
561                    subgroup = const SUBGROUP,
562                    groupop = const ($group_op as u32),
563                    value = in(reg) &value,
564                    result = in(reg) &mut result,
565                }
566            }
567
568            result
569        }
570    };
571}
572
573macro_subgroup_ballot_bit_count!(subgroup_ballot_bit_count, GroupOperation::Reduce);
574macro_subgroup_ballot_bit_count!(
575    subgroup_ballot_inclusive_bit_count,
576    GroupOperation::InclusiveScan
577);
578macro_subgroup_ballot_bit_count!(
579    subgroup_ballot_exclusive_bit_count,
580    GroupOperation::ExclusiveScan
581);
582
583/// Find the least significant bit set to 1 in `value`, considering only the bits in `value` required to represent all bits of the group's invocations. If none of the considered bits is set to 1, the resulting value is undefined.
584///
585/// Result Type must be a scalar of integer type, whose Signedness operand is 0.
586///
587/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
588///
589/// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
590///
591/// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
592///
593/// Requires Capability `GroupNonUniformBallot`.
594///
595/// # Safety
596/// * This function is safe
597/// * Result is undefined if `id` is an inactive invocation or out of bounds
598#[spirv_std_macros::gpu_only]
599#[doc(alias = "OpGroupNonUniformBallotFindLSB")]
600#[inline]
601pub fn subgroup_ballot_find_lsb(value: SubgroupMask) -> u32 {
602    let mut result = 0;
603
604    unsafe {
605        asm! {
606            "%u32 = OpTypeInt 32 0",
607            "%subgroup = OpConstant %u32 {subgroup}",
608            "%value = OpLoad _ {value}",
609            "%result = OpGroupNonUniformBallotFindLSB %u32 %subgroup %value",
610            "OpStore {result} %result",
611            subgroup = const SUBGROUP,
612            value = in(reg) &value,
613            result = in(reg) &mut result,
614        }
615    }
616
617    result
618}
619
620/// Find the most significant bit set to 1 in `value`, considering only the bits in `value` required to represent all bits of the group's invocations. If none of the considered bits is set to 1, the resulting value is undefined.
621///
622/// Result Type must be a scalar of integer type, whose Signedness operand is 0.
623///
624/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
625///
626/// `value` must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
627///
628/// `value` is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
629///
630/// Requires Capability `GroupNonUniformBallot`.
631#[spirv_std_macros::gpu_only]
632#[doc(alias = "OpGroupNonUniformBallotFindMSB")]
633#[inline]
634pub fn subgroup_ballot_find_msb(value: SubgroupMask) -> u32 {
635    let mut result = 0;
636
637    unsafe {
638        asm! {
639            "%u32 = OpTypeInt 32 0",
640            "%subgroup = OpConstant %u32 {subgroup}",
641            "%value = OpLoad _ {value}",
642            "%result = OpGroupNonUniformBallotFindMSB %u32 %subgroup %value",
643            "OpStore {result} %result",
644            subgroup = const SUBGROUP,
645            value = in(reg) &value,
646            result = in(reg) &mut result,
647        }
648    }
649
650    result
651}
652
653/// Result is the `value` of the invocation identified by the id `id`.
654///
655/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
656///
657/// Execution is a Scope that identifies the group of invocations affected by this command.
658///
659/// The type of `value` must be the same as Result Type.
660///
661/// `id` must be a scalar of integer type, whose Signedness operand is 0.
662///
663/// The resulting value is undefined if `id` is an inactive invocation, or is greater than or equal to the size of the group.
664///
665/// Requires Capability `GroupNonUniformShuffle`.
666///
667/// # Safety
668/// * This function is safe
669/// * Result is undefined if `id` is an inactive invocation or out of bounds
670#[spirv_std_macros::gpu_only]
671#[doc(alias = "OpGroupNonUniformShuffle")]
672#[inline]
673pub fn subgroup_shuffle<T: ScalarComposite>(value: T, id: u32) -> T {
674    struct Transform {
675        id: u32,
676    }
677
678    impl ScalarOrVectorTransform for Transform {
679        #[inline]
680        fn transform<T: ScalarOrVector>(&mut self, value: T) -> T {
681            let mut result = T::default();
682            unsafe {
683                asm! {
684                    "%u32 = OpTypeInt 32 0",
685                    "%subgroup = OpConstant %u32 {subgroup}",
686                    "%value = OpLoad _ {value}",
687                    "%id = OpLoad _ {id}",
688                    "%result = OpGroupNonUniformShuffle _ %subgroup %value %id",
689                    "OpStore {result} %result",
690                    subgroup = const SUBGROUP,
691                    value = in(reg) &value,
692                    id = in(reg) &self.id,
693                    result = in(reg) &mut result,
694                }
695            }
696            result
697        }
698    }
699
700    value.transform(&mut Transform { id })
701}
702
703/// Result is the `value` of the invocation identified by the current invocation’s id within the group xor’ed with Mask.
704///
705/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
706///
707/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
708///
709/// The type of `value` must be the same as Result Type.
710///
711/// Mask must be a scalar of integer type, whose Signedness operand is 0.
712///
713/// The resulting value is undefined if current invocation’s id within the group xor’ed with Mask is an inactive invocation, or is greater than or equal to the size of the group.
714///
715/// Requires Capability `GroupNonUniformShuffle`.
716///
717/// # Safety
718/// * This function is safe
719/// * Result is undefined if current invocation’s id within the group xor’ed with `mask` is an inactive invocation or out of bounds
720#[spirv_std_macros::gpu_only]
721#[doc(alias = "OpGroupNonUniformShuffleXor")]
722#[inline]
723pub fn subgroup_shuffle_xor<T: ScalarComposite>(value: T, mask: u32) -> T {
724    struct Transform {
725        mask: u32,
726    }
727
728    impl ScalarOrVectorTransform for Transform {
729        #[inline]
730        fn transform<T: ScalarOrVector>(&mut self, value: T) -> T {
731            let mut result = T::default();
732            unsafe {
733                asm! {
734                    "%u32 = OpTypeInt 32 0",
735                    "%subgroup = OpConstant %u32 {subgroup}",
736                    "%value = OpLoad _ {value}",
737                    "%mask = OpLoad _ {mask}",
738                    "%result = OpGroupNonUniformShuffleXor _ %subgroup %value %mask",
739                    "OpStore {result} %result",
740                    subgroup = const SUBGROUP,
741                    value = in(reg) &value,
742                    mask = in(reg) &self.mask,
743                    result = in(reg) &mut result,
744                }
745            }
746            result
747        }
748    }
749
750    value.transform(&mut Transform { mask })
751}
752
753/// Result is the `value` of the invocation identified by the current invocation’s id within the group - Delta.
754///
755/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
756///
757/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
758///
759/// The type of `value` must be the same as Result Type.
760///
761/// Delta must be a scalar of integer type, whose Signedness operand is 0.
762///
763/// Delta is treated as unsigned and the resulting value is undefined if Delta is greater than the current invocation’s id within the group or if the selected lane is inactive.
764///
765/// Requires Capability `GroupNonUniformShuffleRelative`.
766///
767/// # Safety
768/// * This function is safe
769/// * Result is undefined if `delta` is greater than the current invocation’s id within the group or if the selected lane is inactive
770#[spirv_std_macros::gpu_only]
771#[doc(alias = "OpGroupNonUniformShuffleUp")]
772#[inline]
773pub fn subgroup_shuffle_up<T: ScalarComposite>(value: T, delta: u32) -> T {
774    struct Transform {
775        delta: u32,
776    }
777
778    impl ScalarOrVectorTransform for Transform {
779        #[inline]
780        fn transform<T: ScalarOrVector>(&mut self, value: T) -> T {
781            let mut result = T::default();
782            unsafe {
783                asm! {
784                    "%u32 = OpTypeInt 32 0",
785                    "%subgroup = OpConstant %u32 {subgroup}",
786                    "%value = OpLoad _ {value}",
787                    "%delta = OpLoad _ {delta}",
788                    "%result = OpGroupNonUniformShuffleUp _ %subgroup %value %delta",
789                    "OpStore {result} %result",
790                    subgroup = const SUBGROUP,
791                    value = in(reg) &value,
792                    delta = in(reg) &self.delta,
793                    result = in(reg) &mut result,
794                }
795            }
796            result
797        }
798    }
799
800    value.transform(&mut Transform { delta })
801}
802
803/// Result is the `value` of the invocation identified by the current invocation’s id within the group + Delta.
804///
805/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
806///
807/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
808///
809/// The type of `value` must be the same as Result Type.
810///
811/// Delta must be a scalar of integer type, whose Signedness operand is 0.
812///
813/// Delta is treated as unsigned and the resulting value is undefined if Delta is greater than or equal to the size of the group, or if the current invocation’s id within the group + Delta is either an inactive invocation or greater than or equal to the size of the group.
814///
815/// Requires Capability `GroupNonUniformShuffleRelative`.
816///
817/// # Safety
818/// * This function is safe
819/// * Result is undefined if `delta` is greater than or equal to the size of the group, or if the current invocation’s id within the group + `delta` is either an inactive invocation or greater than or equal to the size of the group.
820#[spirv_std_macros::gpu_only]
821#[doc(alias = "OpGroupNonUniformShuffleDown")]
822#[inline]
823pub fn subgroup_shuffle_down<T: ScalarComposite>(value: T, delta: u32) -> T {
824    struct Transform {
825        delta: u32,
826    }
827
828    impl ScalarOrVectorTransform for Transform {
829        #[inline]
830        fn transform<T: ScalarOrVector>(&mut self, value: T) -> T {
831            let mut result = T::default();
832            unsafe {
833                asm! {
834                    "%u32 = OpTypeInt 32 0",
835                    "%subgroup = OpConstant %u32 {subgroup}",
836                    "%value = OpLoad _ {value}",
837                    "%delta = OpLoad _ {delta}",
838                    "%result = OpGroupNonUniformShuffleDown _ %subgroup %value %delta",
839                    "OpStore {result} %result",
840                    subgroup = const SUBGROUP,
841                    value = in(reg) &value,
842                    delta = in(reg) &self.delta,
843                    result = in(reg) &mut result,
844                }
845            }
846            result
847        }
848    }
849
850    value.transform(&mut Transform { delta })
851}
852
853macro_rules! macro_subgroup_op {
854    ($scalar:ty, $asm_op:literal, $($name:ident, $group_op:expr),+; $docs:literal) => { $(
855        #[doc = $docs]
856        #[spirv_std_macros::gpu_only]
857        #[doc(alias = $asm_op)]
858        #[inline]
859        pub fn $name<I: ScalarOrVector<Scalar = $scalar>>(
860            value: I,
861        ) -> I {
862            let mut result = I::default();
863            unsafe {
864                asm! {
865                    "%u32 = OpTypeInt 32 0",
866                    "%subgroup = OpConstant %u32 {subgroup}",
867                    "%value = OpLoad _ {value}",
868                    concat!("%result = ", $asm_op, " _ %subgroup {groupop} %value"),
869                    "OpStore {result} %result",
870                    subgroup = const SUBGROUP,
871                    groupop = const ($group_op as u32),
872                    value = in(reg) &value,
873                    result = in(reg) &mut result,
874                }
875            }
876            result
877        }
878    )+ };
879}
880
881macro_rules! macro_subgroup_op_clustered {
882    ($scalar:ty, $asm_op:literal, $name:ident; $docs:literal) => {
883        #[doc = $docs]
884        #[spirv_std_macros::gpu_only]
885        #[doc(alias = $asm_op)]
886        #[inline]
887        pub unsafe fn $name<const CLUSTER_SIZE: u32, I: ScalarOrVector<Scalar = $scalar>>(
888            value: I,
889        ) -> I {
890            const {
891                assert!(CLUSTER_SIZE >= 1, "`ClusterSize` must be at least 1");
892                assert!(
893                    CLUSTER_SIZE.is_power_of_two(),
894                    "`ClusterSize` must be a power of 2"
895                );
896                // Cannot be verified with static assertions:
897                // `ClusterSize` must not be greater than the size of the group
898            }
899
900            let mut result = I::default();
901
902            unsafe {
903                asm! {
904                    "%u32 = OpTypeInt 32 0",
905                    "%subgroup = OpConstant %u32 {subgroup}",
906                    "%value = OpLoad _ {value}",
907                    "%clustersize = OpConstant %u32 {clustersize}",
908                    concat!("%result = ", $asm_op, " _ %subgroup {groupop} %value %clustersize"),
909                    "OpStore {result} %result",
910                    subgroup = const SUBGROUP,
911                    groupop = const (GroupOperation::ClusteredReduce as u32),
912                    clustersize = const CLUSTER_SIZE,
913                    value = in(reg) &value,
914                    result = in(reg) &mut result,
915                }
916            }
917
918            result
919        }
920    };
921}
922
923// add
924macro_subgroup_op!(impl Integer, "OpGroupNonUniformIAdd", subgroup_i_add, GroupOperation::Reduce, subgroup_inclusive_i_add, GroupOperation::InclusiveScan, subgroup_exclusive_i_add, GroupOperation::ExclusiveScan; r"
925An integer add group operation of all `value` operands contributed by active invocations in the group.
926
927Result Type must be a scalar or vector of integer type.
928
929Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
930
931The identity I for Operation is 0.
932
933The type of `value` must be the same as Result Type.
934
935Requires Capability `GroupNonUniformArithmetic`.
936");
937macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformIAdd", subgroup_clustered_i_add; r"
938An integer add group operation of all `value` operands contributed by active invocations in the group.
939
940Result Type must be a scalar or vector of integer type.
941
942Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
943
944The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
945
946The type of `value` must be the same as Result Type.
947
948`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
949
950Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
951
952# Safety
953* `ClusterSize` must not be greater than the size of the group
954");
955macro_subgroup_op!(impl Float, "OpGroupNonUniformFAdd", subgroup_f_add, GroupOperation::Reduce, subgroup_inclusive_f_add, GroupOperation::InclusiveScan, subgroup_exclusive_f_add, GroupOperation::ExclusiveScan; r"
956A floating point add group operation of all `value` operands contributed by active invocations in the group.
957
958Result Type must be a scalar or vector of floating-point type.
959
960Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
961
962The identity I for Operation is 0.
963
964The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
965
966Requires Capability `GroupNonUniformArithmetic`.
967");
968macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFAdd", subgroup_clustered_f_add; r"
969A floating point add group operation of all `value` operands contributed by active invocations in the group.
970
971Result Type must be a scalar or vector of floating-point type.
972
973Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
974
975The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
976
977The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
978
979`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
980
981Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
982
983# Safety
984* `ClusterSize` must not be greater than the size of the group
985");
986
987// mul
988macro_subgroup_op!(impl Integer, "OpGroupNonUniformIMul", subgroup_i_mul, GroupOperation::Reduce, subgroup_inclusive_i_mul, GroupOperation::InclusiveScan, subgroup_exclusive_i_mul, GroupOperation::ExclusiveScan; r"
989An integer multiply group operation of all `value` operands contributed by active invocations in the group.
990
991Result Type must be a scalar or vector of integer type.
992
993Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
994
995The identity I for Operation is 1.
996
997The type of `value` must be the same as Result Type.
998
999Requires Capability `GroupNonUniformArithmetic`.
1000");
1001macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformIMul", subgroup_clustered_i_mul; r"
1002An integer multiply group operation of all `value` operands contributed by active invocations in the group.
1003
1004Result Type must be a scalar or vector of integer type.
1005
1006Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1007
1008The identity I for Operation is 1. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1009
1010The type of `value` must be the same as Result Type.
1011
1012`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1013
1014Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1015
1016# Safety
1017* `ClusterSize` must not be greater than the size of the group
1018");
1019macro_subgroup_op!(impl Float, "OpGroupNonUniformFMul", subgroup_f_mul, GroupOperation::Reduce, subgroup_inclusive_f_mul, GroupOperation::InclusiveScan, subgroup_exclusive_f_mul, GroupOperation::ExclusiveScan; r"
1020A floating point multiply group operation of all `value` operands contributed by active invocations in the group.
1021
1022Result Type must be a scalar or vector of floating-point type.
1023
1024Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1025
1026The identity I for Operation is 1.
1027
1028The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
1029
1030Requires Capability `GroupNonUniformArithmetic`.
1031");
1032macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFMul", subgroup_clustered_f_mul; r"
1033A floating point multiply group operation of all `value` operands contributed by active invocations in the group.
1034
1035Result Type must be a scalar or vector of floating-point type.
1036
1037Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1038
1039The identity I for Operation is 1. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1040
1041The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
1042
1043`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1044
1045Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1046
1047# Safety
1048* `ClusterSize` must not be greater than the size of the group
1049");
1050
1051// min
1052macro_subgroup_op!(impl SignedInteger, "OpGroupNonUniformSMin", subgroup_s_min, GroupOperation::Reduce, subgroup_inclusive_s_min, GroupOperation::InclusiveScan, subgroup_exclusive_s_min, GroupOperation::ExclusiveScan; r"
1053A signed integer minimum group operation of all `value` operands contributed by active invocations in the group.
1054
1055Result Type must be a scalar or vector of integer type.
1056
1057Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1058
1059The identity I for Operation is `INT_MAX`.
1060
1061The type of `value` must be the same as Result Type.
1062
1063Requires Capability `GroupNonUniformArithmetic`.
1064");
1065macro_subgroup_op_clustered!(impl SignedInteger, "OpGroupNonUniformSMin", subgroup_clustered_s_min; r"
1066A signed integer minimum group operation of all `value` operands contributed by active invocations in the group.
1067
1068Result Type must be a scalar or vector of integer type.
1069
1070Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1071
1072The identity I for Operation is `INT_MAX`. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1073
1074The type of `value` must be the same as Result Type.
1075
1076`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1077
1078Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1079
1080# Safety
1081* `ClusterSize` must not be greater than the size of the group
1082");
1083macro_subgroup_op!(impl UnsignedInteger, "OpGroupNonUniformUMin", subgroup_u_min, GroupOperation::Reduce, subgroup_inclusive_u_min, GroupOperation::InclusiveScan, subgroup_exclusive_u_min, GroupOperation::ExclusiveScan; r"
1084An unsigned integer minimum group operation of all `value` operands contributed by active invocations in the group.
1085
1086Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
1087
1088Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1089
1090The identity I for Operation is `UINT_MAX`.
1091
1092The type of `value` must be the same as Result Type.
1093
1094Requires Capability `GroupNonUniformArithmetic`.
1095");
1096macro_subgroup_op_clustered!(impl UnsignedInteger, "OpGroupNonUniformUMin", subgroup_clustered_u_min; r"
1097An unsigned integer minimum group operation of all `value` operands contributed by active invocations in the group.
1098
1099Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
1100
1101Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1102
1103The identity I for Operation is `UINT_MAX`. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1104
1105The type of `value` must be the same as Result Type.
1106
1107`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1108
1109Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1110
1111# Safety
1112* `ClusterSize` must not be greater than the size of the group
1113");
1114macro_subgroup_op!(impl Float, "OpGroupNonUniformFMin", subgroup_f_min, GroupOperation::Reduce, subgroup_inclusive_f_min, GroupOperation::InclusiveScan, subgroup_exclusive_f_min, GroupOperation::ExclusiveScan; r"
1115A floating point minimum group operation of all `value` operands contributed by active invocations in the group.
1116
1117Result Type must be a scalar or vector of floating-point type.
1118
1119Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1120
1121The identity I for Operation is +INF.
1122
1123The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
1124
1125Requires Capability `GroupNonUniformArithmetic`.
1126");
1127macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFMin", subgroup_clustered_f_min; r"
1128A floating point minimum group operation of all `value` operands contributed by active invocations in the group.
1129
1130Result Type must be a scalar or vector of floating-point type.
1131
1132Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1133
1134The identity I for Operation is +INF. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1135
1136The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
1137
1138`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1139
1140Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1141
1142# Safety
1143* `ClusterSize` must not be greater than the size of the group
1144");
1145
1146// max
1147macro_subgroup_op!(impl SignedInteger, "OpGroupNonUniformSMax", subgroup_s_max, GroupOperation::Reduce, subgroup_inclusive_s_max, GroupOperation::InclusiveScan, subgroup_exclusive_s_max, GroupOperation::ExclusiveScan; r"
1148A signed integer maximum group operation of all `value` operands contributed by active invocations in the group.
1149
1150Result Type must be a scalar or vector of integer type.
1151
1152Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1153
1154The identity I for Operation is `INT_MIN`.
1155
1156The type of `value` must be the same as Result Type.
1157
1158Requires Capability `GroupNonUniformArithmetic`.
1159");
1160macro_subgroup_op_clustered!(impl SignedInteger, "OpGroupNonUniformSMax", subgroup_clustered_s_max; r"
1161A signed integer maximum group operation of all `value` operands contributed by active invocations in the group.
1162
1163Result Type must be a scalar or vector of integer type.
1164
1165Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1166
1167The identity I for Operation is `INT_MIN`. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1168
1169The type of `value` must be the same as Result Type.
1170
1171`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1172
1173Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1174
1175# Safety
1176* `ClusterSize` must not be greater than the size of the group
1177");
1178macro_subgroup_op!(impl UnsignedInteger, "OpGroupNonUniformUMax", subgroup_u_max, GroupOperation::Reduce, subgroup_inclusive_u_max, GroupOperation::InclusiveScan, subgroup_exclusive_u_max, GroupOperation::ExclusiveScan; r"
1179An unsigned integer maximum group operation of all `value` operands contributed by active invocations in the group.
1180
1181Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
1182
1183Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1184
1185The identity I for Operation is 0.
1186
1187The type of `value` must be the same as Result Type.
1188
1189Requires Capability `GroupNonUniformArithmetic`.
1190");
1191macro_subgroup_op_clustered!(impl UnsignedInteger, "OpGroupNonUniformUMax", subgroup_clustered_u_max; r"
1192An unsigned integer maximum group operation of all `value` operands contributed by active invocations in the group.
1193
1194Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
1195
1196Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1197
1198The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1199
1200The type of `value` must be the same as Result Type.
1201
1202`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1203
1204Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1205
1206# Safety
1207* `ClusterSize` must not be greater than the size of the group
1208");
1209macro_subgroup_op!(impl Float, "OpGroupNonUniformFMax", subgroup_f_max, GroupOperation::Reduce, subgroup_inclusive_f_max, GroupOperation::InclusiveScan, subgroup_exclusive_f_max, GroupOperation::ExclusiveScan; r"
1210A floating point maximum group operation of all `value` operands contributed by active invocations in by group.
1211
1212Result Type must be a scalar or vector of floating-point type.
1213
1214Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1215
1216The identity I for Operation is -INF.
1217
1218The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
1219
1220Requires Capability `GroupNonUniformArithmetic`.
1221");
1222macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFMax", subgroup_clustered_f_max; r"
1223A floating point maximum group operation of all `value` operands contributed by active invocations in by group.
1224
1225Result Type must be a scalar or vector of floating-point type.
1226
1227Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1228
1229The identity I for Operation is -INF.
1230
1231The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
1232
1233Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1234
1235# Safety
1236* `ClusterSize` must not be greater than the size of the group
1237");
1238
1239// and
1240macro_subgroup_op!(impl Integer, "OpGroupNonUniformBitwiseAnd", subgroup_and, GroupOperation::Reduce, subgroup_inclusive_and, GroupOperation::InclusiveScan, subgroup_exclusive_and, GroupOperation::ExclusiveScan; r"
1241A bitwise and group operation of all `value` operands contributed by active invocations in the group.
1242
1243Result Type must be a scalar or vector of integer type.
1244
1245Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1246
1247The identity I for Operation is ~0.
1248
1249The type of `value` must be the same as Result Type.
1250
1251Requires Capability `GroupNonUniformArithmetic`.
1252");
1253macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformBitwiseAnd", subgroup_clustered_and; r"
1254A bitwise and group operation of all `value` operands contributed by active invocations in the group.
1255
1256Result Type must be a scalar or vector of integer type.
1257
1258Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1259
1260The identity I for Operation is ~0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1261
1262The type of `value` must be the same as Result Type.
1263
1264`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1265
1266Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1267
1268# Safety
1269* `ClusterSize` must not be greater than the size of the group
1270");
1271
1272// or
1273macro_subgroup_op!(impl Integer, "OpGroupNonUniformBitwiseOr", subgroup_or, GroupOperation::Reduce, subgroup_inclusive_or, GroupOperation::InclusiveScan, subgroup_exclusive_or, GroupOperation::ExclusiveScan; r"
1274A bitwise or group operation of all `value` operands contributed by active invocations in the group.
1275
1276Result Type must be a scalar or vector of integer type.
1277
1278Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1279
1280The identity I for Operation is 0.
1281
1282The type of `value` must be the same as Result Type.
1283
1284Requires Capability `GroupNonUniformArithmetic`.
1285");
1286macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformBitwiseOr", subgroup_clustered_or; r"
1287A bitwise or group operation of all `value` operands contributed by active invocations in the group.
1288
1289Result Type must be a scalar or vector of integer type.
1290
1291Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1292
1293The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1294
1295The type of `value` must be the same as Result Type.
1296
1297`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1298
1299Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1300
1301# Safety
1302* `ClusterSize` must not be greater than the size of the group
1303");
1304
1305// xor
1306macro_subgroup_op!(impl Integer, "OpGroupNonUniformBitwiseXor", subgroup_xor, GroupOperation::Reduce, subgroup_inclusive_xor, GroupOperation::InclusiveScan, subgroup_exclusive_xor, GroupOperation::ExclusiveScan; r"
1307A bitwise xor group operation of all `value` operands contributed by active invocations in the group.
1308
1309Result Type must be a scalar or vector of integer type.
1310
1311Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1312
1313The identity I for Operation is 0.
1314
1315The type of `value` must be the same as Result Type.
1316
1317Requires Capability `GroupNonUniformArithmetic`.
1318");
1319macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformBitwiseXor", subgroup_clustered_xor; r"
1320A bitwise xor group operation of all `value` operands contributed by active invocations in the group.
1321
1322Result Type must be a scalar or vector of integer type.
1323
1324Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1325
1326The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1327
1328The type of `value` must be the same as Result Type.
1329
1330`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1331
1332Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1333
1334# Safety
1335* `ClusterSize` must not be greater than the size of the group
1336");
1337
1338// logical and
1339macro_subgroup_op!(bool, "OpGroupNonUniformLogicalAnd", subgroup_logical_and, GroupOperation::Reduce, subgroup_inclusive_logical_and, GroupOperation::InclusiveScan, subgroup_exclusive_logical_and, GroupOperation::ExclusiveScan; r"
1340A logical and group operation of all `value` operands contributed by active invocations in the group.
1341
1342Result Type must be a scalar or vector of Boolean type.
1343
1344Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1345
1346The identity I for Operation is ~0.
1347
1348The type of `value` must be the same as Result Type.
1349
1350Requires Capability `GroupNonUniformArithmetic`.
1351");
1352macro_subgroup_op_clustered!(bool, "OpGroupNonUniformLogicalAnd", subgroup_clustered_logical_and; r"
1353A logical and group operation of all `value` operands contributed by active invocations in the group.
1354
1355Result Type must be a scalar or vector of Boolean type.
1356
1357Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1358
1359The identity I for Operation is ~0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1360
1361The type of `value` must be the same as Result Type.
1362
1363`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1364
1365Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1366
1367# Safety
1368* `ClusterSize` must not be greater than the size of the group
1369");
1370
1371// logical or
1372macro_subgroup_op!(bool, "OpGroupNonUniformLogicalOr", subgroup_logical_or, GroupOperation::Reduce, subgroup_inclusive_logical_or, GroupOperation::InclusiveScan, subgroup_exclusive_logical_or, GroupOperation::ExclusiveScan; r"
1373A logical or group operation of all `value` operands contributed by active invocations in the group.
1374
1375Result Type must be a scalar or vector of Boolean type.
1376
1377Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1378
1379The identity I for Operation is 0.
1380
1381The type of `value` must be the same as Result Type.
1382
1383Requires Capability `GroupNonUniformArithmetic`.
1384");
1385macro_subgroup_op_clustered!(bool, "OpGroupNonUniformLogicalOr", subgroup_clustered_logical_or; r"
1386A logical or group operation of all `value` operands contributed by active invocations in the group.
1387
1388Result Type must be a scalar or vector of Boolean type.
1389
1390Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1391
1392The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1393
1394The type of `value` must be the same as Result Type.
1395
1396`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1397
1398Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1399
1400# Safety
1401* `ClusterSize` must not be greater than the size of the group
1402");
1403
1404// logical xor
1405macro_subgroup_op!(bool, "OpGroupNonUniformLogicalXor", subgroup_logical_xor, GroupOperation::Reduce, subgroup_inclusive_logical_xor, GroupOperation::InclusiveScan, subgroup_exclusive_logical_xor, GroupOperation::ExclusiveScan; r"
1406A logical xor group operation of all `value` operands contributed by active invocations in the group.
1407
1408Result Type must be a scalar or vector of Boolean type.
1409
1410Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1411
1412The identity I for Operation is 0.
1413
1414The type of `value` must be the same as Result Type.
1415
1416Requires Capability `GroupNonUniformArithmetic`.
1417");
1418macro_subgroup_op_clustered!(bool, "OpGroupNonUniformLogicalXor", subgroup_clustered_logical_xor; r"
1419A logical xor group operation of all `value` operands contributed by active invocations in the group.
1420
1421Result Type must be a scalar or vector of Boolean type.
1422
1423Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
1424
1425The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
1426
1427The type of `value` must be the same as Result Type.
1428
1429`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
1430
1431Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
1432
1433# Safety
1434* `ClusterSize` must not be greater than the size of the group
1435");
1436
1437/// Result is the `value` of the invocation within the quad with a quad index equal to `index`.
1438///
1439/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
1440///
1441/// Execution is a Scope, but has no effect on the behavior of this instruction. It must be Subgroup.
1442///
1443/// The type of `value` must be the same as Result Type.
1444///
1445/// `index` must be a scalar of integer type, whose Signedness operand is 0.
1446///
1447/// Before version 1.5, `index` must come from a constant instruction. Starting with version 1.5, `index` must be dynamically uniform.
1448///
1449/// If the value of `index` is greater than or equal to 4, or refers to an inactive invocation, the resulting value is undefined.
1450///
1451/// Requires Capability `GroupNonUniformQuad`.
1452///
1453/// # Safety
1454/// * This function is safe
1455/// * Result is undefined if the value of `index` is greater than or equal to 4, or refers to an inactive invocation
1456#[spirv_std_macros::gpu_only]
1457#[doc(alias = "OpGroupNonUniformQuadBroadcast")]
1458#[inline]
1459pub fn subgroup_quad_broadcast<T: ScalarComposite>(value: T, index: u32) -> T {
1460    struct Transform {
1461        index: u32,
1462    }
1463
1464    impl ScalarOrVectorTransform for Transform {
1465        #[inline]
1466        fn transform<T: ScalarOrVector>(&mut self, value: T) -> T {
1467            let mut result = T::default();
1468            unsafe {
1469                asm! {
1470                    "%u32 = OpTypeInt 32 0",
1471                    "%subgroup = OpConstant %u32 {subgroup}",
1472                    "%value = OpLoad _ {value}",
1473                    "%index = OpLoad _ {index}",
1474                    "%result = OpGroupNonUniformQuadBroadcast _ %subgroup %value %index",
1475                    "OpStore {result} %result",
1476                    subgroup = const SUBGROUP,
1477                    value = in(reg) &value,
1478                    index = in(reg) &self.index,
1479                    result = in(reg) &mut result,
1480                }
1481            }
1482            result
1483        }
1484    }
1485
1486    value.transform(&mut Transform { index })
1487}
1488
1489/// Direction is the kind of swap to perform.
1490///
1491/// Direction must be a scalar of integer type, whose Signedness operand is 0.
1492///
1493/// Direction must come from a constant instruction.
1494///
1495/// The value returned in Result is the value provided to `value` by another invocation in the same quad scope instance. The invocation providing this `value` is determined according to Direction.
1496///
1497/// Requires Capability `GroupNonUniformQuad`.
1498pub enum QuadDirection {
1499    /// A Direction of 0 indicates a horizontal swap;
1500    /// - Invocations with quad indices of 0 and 1 swap values
1501    /// - Invocations with quad indices of 2 and 3 swap values
1502    Horizontal = 0,
1503    /// A Direction of 1 indicates a vertical swap;
1504    /// - Invocations with quad indices of 0 and 2 swap values
1505    /// - Invocations with quad indices of 1 and 3 swap values
1506    Vertical = 1,
1507    /// A Direction of 2 indicates a diagonal swap;
1508    /// - Invocations with quad indices of 0 and 3 swap values
1509    /// - Invocations with quad indices of 1 and 2 swap values
1510    Diagonal = 2,
1511}
1512
1513/// Swap the `value` of the invocation within the quad with another invocation in the quad using Direction.
1514///
1515/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
1516///
1517/// Execution is a Scope, but has no effect on the behavior of this instruction. It must be Subgroup.
1518///
1519/// The type of `value` must be the same as Result Type.
1520///
1521/// Direction is the kind of swap to perform.
1522///
1523/// Direction must be a scalar of integer type, whose Signedness operand is 0.
1524///
1525/// Direction must come from a constant instruction.
1526///
1527/// The value returned in Result is the value provided to `value` by another invocation in the same quad scope instance. The invocation providing this `value` is determined according to Direction.
1528///
1529/// A Direction of 0 indicates a horizontal swap;
1530/// - Invocations with quad indices of 0 and 1 swap values
1531/// - Invocations with quad indices of 2 and 3 swap values
1532/// A Direction of 1 indicates a vertical swap;
1533/// - Invocations with quad indices of 0 and 2 swap values
1534/// - Invocations with quad indices of 1 and 3 swap values
1535/// A Direction of 2 indicates a diagonal swap;
1536/// - Invocations with quad indices of 0 and 3 swap values
1537/// - Invocations with quad indices of 1 and 2 swap values
1538///
1539/// Direction must be one of the above values.
1540///
1541/// If an active invocation reads `value` from an inactive invocation, the resulting value is undefined.
1542///
1543/// Requires Capability `GroupNonUniformQuad`.
1544///
1545/// # Safety
1546/// * This function is safe
1547/// * Result is undefined if an active invocation reads `value` from an inactive invocation
1548#[spirv_std_macros::gpu_only]
1549#[doc(alias = "OpGroupNonUniformQuadSwap")]
1550#[inline]
1551pub fn subgroup_quad_swap<const DIRECTION: u32, T: ScalarComposite>(value: T) -> T {
1552    struct Transform<const DIRECTION: u32>;
1553
1554    impl<const DIRECTION: u32> ScalarOrVectorTransform for Transform<DIRECTION> {
1555        #[inline]
1556        fn transform<T: ScalarOrVector>(&mut self, value: T) -> T {
1557            let mut result = T::default();
1558            unsafe {
1559                asm! {
1560                    "%u32 = OpTypeInt 32 0",
1561                    "%subgroup = OpConstant %u32 {subgroup}",
1562                    "%direction = OpConstant %u32 {direction}",
1563                    "%value = OpLoad _ {value}",
1564                    "%result = OpGroupNonUniformQuadSwap _ %subgroup %value %direction",
1565                    "OpStore {result} %result",
1566                    subgroup = const SUBGROUP,
1567                    direction = const DIRECTION,
1568                    value = in(reg) &value,
1569                    result = in(reg) &mut result,
1570                }
1571            }
1572            result
1573        }
1574    }
1575
1576    value.transform(&mut Transform::<DIRECTION>)
1577}