HIP: Heterogenous-computing Interface for Portability
Loading...
Searching...
No Matches
amd_hip_atomic.h
1/*
2Copyright (c) 2015 - Present Advanced Micro Devices, Inc. All rights reserved.
3
4Permission is hereby granted, free of charge, to any person obtaining a copy
5of this software and associated documentation files (the "Software"), to deal
6in the Software without restriction, including without limitation the rights
7to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8copies of the Software, and to permit persons to whom the Software is
9furnished to do so, subject to the following conditions:
10
11The above copyright notice and this permission notice shall be included in
12all copies or substantial portions of the Software.
13
14THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20THE SOFTWARE.
21*/
22
23#pragma once
24
25#if !defined(__HIPCC_RTC__)
26#include "amd_device_functions.h"
27#endif
28
29#if __has_builtin(__hip_atomic_compare_exchange_strong)
30
31template<bool B, typename T, typename F> struct Cond_t;
32
33template<typename T, typename F> struct Cond_t<true, T, F> { using type = T; };
34template<typename T, typename F> struct Cond_t<false, T, F> { using type = F; };
35
36#if !__HIP_DEVICE_COMPILE__
37//TODO: Remove this after compiler pre-defines the following Macros.
38#define __HIP_MEMORY_SCOPE_SINGLETHREAD 1
39#define __HIP_MEMORY_SCOPE_WAVEFRONT 2
40#define __HIP_MEMORY_SCOPE_WORKGROUP 3
41#define __HIP_MEMORY_SCOPE_AGENT 4
42#define __HIP_MEMORY_SCOPE_SYSTEM 5
43#endif
44
45#if !defined(__HIPCC_RTC__)
46#include "amd_hip_unsafe_atomics.h"
47#endif
48
49// Atomic expanders
50template<
51 int mem_order = __ATOMIC_SEQ_CST,
52 int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
53 typename T,
54 typename Op,
55 typename F>
56inline
57__attribute__((always_inline, device))
58T hip_cas_expander(T* p, T x, Op op, F f) noexcept
59{
60 using FP = __attribute__((address_space(0))) const void*;
61
62 __device__
63 extern bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
64
65 if (is_shared_workaround((FP)p))
66 return f();
67
68 using U = typename Cond_t<
69 sizeof(T) == sizeof(unsigned int), unsigned int, unsigned long long>::type;
70
71 auto q = reinterpret_cast<U*>(p);
72
73 U tmp0{__hip_atomic_load(q, mem_order, mem_scope)};
74 U tmp1;
75 do {
76 tmp1 = tmp0;
77
78 op(reinterpret_cast<T&>(tmp1), x);
79 } while (!__hip_atomic_compare_exchange_strong(q, &tmp0, tmp1, mem_order,
80 mem_order, mem_scope));
81
82 return reinterpret_cast<const T&>(tmp0);
83}
84
85template<
86 int mem_order = __ATOMIC_SEQ_CST,
87 int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
88 typename T,
89 typename Cmp,
90 typename F>
91inline
92__attribute__((always_inline, device))
93T hip_cas_extrema_expander(T* p, T x, Cmp cmp, F f) noexcept
94{
95 using FP = __attribute__((address_space(0))) const void*;
96
97 __device__
98 extern bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
99
100 if (is_shared_workaround((FP)p))
101 return f();
102
103 using U = typename Cond_t<
104 sizeof(T) == sizeof(unsigned int), unsigned int, unsigned long long>::type;
105
106 auto q = reinterpret_cast<U*>(p);
107
108 U tmp{__hip_atomic_load(q, mem_order, mem_scope)};
109 while (cmp(x, reinterpret_cast<const T&>(tmp)) &&
110 !__hip_atomic_compare_exchange_strong(q, &tmp, x, mem_order, mem_order,
111 mem_scope));
112
113 return reinterpret_cast<const T&>(tmp);
114}
115
116__device__
117inline
118int atomicCAS(int* address, int compare, int val) {
119 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
120 __HIP_MEMORY_SCOPE_AGENT);
121 return compare;
122}
123
124__device__
125inline
126int atomicCAS_system(int* address, int compare, int val) {
127 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
128 __HIP_MEMORY_SCOPE_SYSTEM);
129 return compare;
130}
131
132__device__
133inline
134unsigned int atomicCAS(unsigned int* address, unsigned int compare, unsigned int val) {
135 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
136 __HIP_MEMORY_SCOPE_AGENT);
137 return compare;
138}
139
140__device__
141inline
142unsigned int atomicCAS_system(unsigned int* address, unsigned int compare, unsigned int val) {
143 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
144 __HIP_MEMORY_SCOPE_SYSTEM);
145 return compare;
146}
147
148__device__
149inline
150unsigned long atomicCAS(unsigned long* address, unsigned long compare, unsigned long val) {
151 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
152 __HIP_MEMORY_SCOPE_AGENT);
153 return compare;
154}
155
156__device__
157inline
158unsigned long atomicCAS_system(unsigned long* address, unsigned long compare, unsigned long val) {
159 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
160 __HIP_MEMORY_SCOPE_SYSTEM);
161 return compare;
162}
163
164__device__
165inline
166unsigned long long atomicCAS(unsigned long long* address, unsigned long long compare,
167 unsigned long long val) {
168 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
169 __HIP_MEMORY_SCOPE_AGENT);
170 return compare;
171}
172
173__device__
174inline
175unsigned long long atomicCAS_system(unsigned long long* address, unsigned long long compare,
176 unsigned long long val) {
177 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
178 __HIP_MEMORY_SCOPE_SYSTEM);
179 return compare;
180}
181
182__device__
183inline
184float atomicCAS(float* address, float compare, float val) {
185 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
186 __HIP_MEMORY_SCOPE_AGENT);
187 return compare;
188}
189
190__device__
191inline
192float atomicCAS_system(float* address, float compare, float val) {
193 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
194 __HIP_MEMORY_SCOPE_SYSTEM);
195 return compare;
196}
197
198__device__
199inline
200double atomicCAS(double* address, double compare, double val) {
201 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
202 __HIP_MEMORY_SCOPE_AGENT);
203 return compare;
204}
205
206__device__
207inline
208double atomicCAS_system(double* address, double compare, double val) {
209 __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
210 __HIP_MEMORY_SCOPE_SYSTEM);
211 return compare;
212}
213
214__device__
215inline
216int atomicAdd(int* address, int val) {
217 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
218}
219
220__device__
221inline
222int atomicAdd_system(int* address, int val) {
223 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
224}
225
226__device__
227inline
228unsigned int atomicAdd(unsigned int* address, unsigned int val) {
229 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
230}
231
232__device__
233inline
234unsigned int atomicAdd_system(unsigned int* address, unsigned int val) {
235 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
236}
237
238__device__
239inline
240unsigned long atomicAdd(unsigned long* address, unsigned long val) {
241 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
242}
243
244__device__
245inline
246unsigned long atomicAdd_system(unsigned long* address, unsigned long val) {
247 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
248}
249
250__device__
251inline
252unsigned long long atomicAdd(unsigned long long* address, unsigned long long val) {
253 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
254}
255
256__device__
257inline
258unsigned long long atomicAdd_system(unsigned long long* address, unsigned long long val) {
259 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
260}
261
262__device__
263inline
264float atomicAdd(float* address, float val) {
265#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
266 return unsafeAtomicAdd(address, val);
267#else
268 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
269#endif
270}
271
272__device__
273inline
274float atomicAdd_system(float* address, float val) {
275 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
276}
277
278#if !defined(__HIPCC_RTC__)
279DEPRECATED("use atomicAdd instead")
280#endif // !defined(__HIPCC_RTC__)
281__device__
282inline
283void atomicAddNoRet(float* address, float val)
284{
285 __ockl_atomic_add_noret_f32(address, val);
286}
287
288__device__
289inline
290double atomicAdd(double* address, double val) {
291#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
292 return unsafeAtomicAdd(address, val);
293#else
294 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
295#endif
296}
297
298__device__
299inline
300double atomicAdd_system(double* address, double val) {
301 return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
302}
303
304__device__
305inline
306int atomicSub(int* address, int val) {
307 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
308}
309
310__device__
311inline
312int atomicSub_system(int* address, int val) {
313 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
314}
315
316__device__
317inline
318unsigned int atomicSub(unsigned int* address, unsigned int val) {
319 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
320}
321
322__device__
323inline
324unsigned int atomicSub_system(unsigned int* address, unsigned int val) {
325 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
326}
327
328__device__
329inline
330unsigned long atomicSub(unsigned long* address, unsigned long val) {
331 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
332}
333
334__device__
335inline
336unsigned long atomicSub_system(unsigned long* address, unsigned long val) {
337 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
338}
339
340__device__
341inline
342unsigned long long atomicSub(unsigned long long* address, unsigned long long val) {
343 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
344}
345
346__device__
347inline
348unsigned long long atomicSub_system(unsigned long long* address, unsigned long long val) {
349 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
350}
351
352__device__
353inline
354float atomicSub(float* address, float val) {
355#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
356 return unsafeAtomicAdd(address, -val);
357#else
358 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
359#endif
360}
361
362__device__
363inline
364float atomicSub_system(float* address, float val) {
365 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
366}
367
368__device__
369inline
370double atomicSub(double* address, double val) {
371#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
372 return unsafeAtomicAdd(address, -val);
373#else
374 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
375#endif
376}
377
378__device__
379inline
380double atomicSub_system(double* address, double val) {
381 return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
382}
383
384__device__
385inline
386int atomicExch(int* address, int val) {
387 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
388}
389
390__device__
391inline
392int atomicExch_system(int* address, int val) {
393 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
394}
395
396__device__
397inline
398unsigned int atomicExch(unsigned int* address, unsigned int val) {
399 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
400}
401
402__device__
403inline
404unsigned int atomicExch_system(unsigned int* address, unsigned int val) {
405 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
406}
407
408__device__
409inline
410unsigned long atomicExch(unsigned long* address, unsigned long val) {
411 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
412}
413
414__device__
415inline
416unsigned long atomicExch_system(unsigned long* address, unsigned long val) {
417 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
418}
419
420__device__
421inline
422unsigned long long atomicExch(unsigned long long* address, unsigned long long val) {
423 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
424}
425
426__device__
427inline
428unsigned long long atomicExch_system(unsigned long long* address, unsigned long long val) {
429 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
430}
431
432__device__
433inline
434float atomicExch(float* address, float val) {
435 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
436}
437
438__device__
439inline
440float atomicExch_system(float* address, float val) {
441 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
442}
443
444__device__
445inline
446double atomicExch(double* address, double val) {
447 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
448}
449
450__device__
451inline
452double atomicExch_system(double* address, double val) {
453 return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
454}
455
456__device__
457inline
458int atomicMin(int* address, int val) {
459#if defined(__gfx941__)
460 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
461 address, val, [](int x, int y) { return x < y; }, [=]() {
462 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
463 __HIP_MEMORY_SCOPE_AGENT);
464 });
465#else
466 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
467#endif // __gfx941__
468}
469
470__device__
471inline
472int atomicMin_system(int* address, int val) {
473#if defined(__gfx941__)
474 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
475 address, val, [](int x, int y) { return x < y; }, [=]() {
476 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
477 __HIP_MEMORY_SCOPE_SYSTEM);
478 });
479#else
480 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
481#endif // __gfx941__
482}
483
484__device__
485inline
486unsigned int atomicMin(unsigned int* address, unsigned int val) {
487#if defined(__gfx941__)
488 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
489 address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() {
490 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
491 __HIP_MEMORY_SCOPE_AGENT);
492 });
493#else
494 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
495#endif // __gfx941__
496
497}
498
499__device__
500inline
501unsigned int atomicMin_system(unsigned int* address, unsigned int val) {
502#if defined(__gfx941__)
503 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
504 address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() {
505 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
506 __HIP_MEMORY_SCOPE_SYSTEM);
507 });
508#else
509 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
510#endif // __gfx941__
511}
512
513__device__
514inline
515unsigned long long atomicMin(unsigned long* address, unsigned long val) {
516#if defined(__gfx941__)
517 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
518 address,
519 val,
520 [](unsigned long x, unsigned long y) { return x < y; },
521 [=]() {
522 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
523 __HIP_MEMORY_SCOPE_AGENT);
524 });
525#else
526 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
527#endif // __gfx941__
528}
529
530__device__
531inline
532unsigned long atomicMin_system(unsigned long* address, unsigned long val) {
533#if defined(__gfx941__)
534 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
535 address,
536 val,
537 [](unsigned long x, unsigned long y) { return x < y; },
538 [=]() {
539 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
540 __HIP_MEMORY_SCOPE_SYSTEM);
541 });
542#else
543 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
544#endif // __gfx941__
545}
546
547__device__
548inline
549unsigned long long atomicMin(unsigned long long* address, unsigned long long val) {
550#if defined(__gfx941__)
551 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
552 address,
553 val,
554 [](unsigned long long x, unsigned long long y) { return x < y; },
555 [=]() {
556 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
557 __HIP_MEMORY_SCOPE_AGENT);
558 });
559#else
560 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
561#endif // __gfx941__
562}
563
564__device__
565inline
566unsigned long long atomicMin_system(unsigned long long* address, unsigned long long val) {
567#if defined(__gfx941__)
568 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
569 address,
570 val,
571 [](unsigned long long x, unsigned long long y) { return x < y; },
572 [=]() {
573 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
574 __HIP_MEMORY_SCOPE_SYSTEM);
575 });
576#else
577 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
578#endif // __gfx941__
579}
580
581__device__
582inline
583long long atomicMin(long long* address, long long val) {
584#if defined(__gfx941__)
585 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
586 address, val, [](long long x, long long y) { return x < y; },
587 [=]() {
588 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
589 });
590#else
591 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
592#endif // __gfx941__
593}
594
595__device__
596inline
597long long atomicMin_system(long long* address, long long val) {
598#if defined(__gfx941__)
599 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
600 address, val, [](long long x, long long y) { return x < y; },
601 [=]() {
602 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
603 });
604#else
605 return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
606#endif // __gfx941__
607}
608
609__device__
610inline
611float atomicMin(float* addr, float val) {
612#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
613 return unsafeAtomicMin(addr, val);
614#else
615 #if __has_builtin(__hip_atomic_load) && \
616 __has_builtin(__hip_atomic_compare_exchange_strong)
617 float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
618 bool done = false;
619 while (!done && value > val) {
620 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
621 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
622 }
623 return value;
624 #else
625 unsigned int *uaddr = (unsigned int *)addr;
626 unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
627 bool done = false;
628 while (!done && __uint_as_float(value) > val) {
629 done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false,
630 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
631 }
632 return __uint_as_float(value);
633 #endif
634#endif
635}
636
637__device__
638inline
639float atomicMin_system(float* address, float val) {
640 unsigned int* uaddr { reinterpret_cast<unsigned int*>(address) };
641 #if __has_builtin(__hip_atomic_load)
642 unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
643 #else
644 unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
645 #endif
646 float value = __uint_as_float(tmp);
647
648 while (val < value) {
649 value = atomicCAS_system(address, value, val);
650 }
651
652 return value;
653}
654
655__device__
656inline
657double atomicMin(double* addr, double val) {
658#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
659 return unsafeAtomicMin(addr, val);
660#else
661 #if __has_builtin(__hip_atomic_load) && \
662 __has_builtin(__hip_atomic_compare_exchange_strong)
663 double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
664 bool done = false;
665 while (!done && value > val) {
666 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
667 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
668 }
669 return value;
670 #else
671 unsigned long long *uaddr = (unsigned long long *)addr;
672 unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
673 bool done = false;
674 while (!done && __longlong_as_double(value) > val) {
675 done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false,
676 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
677 }
678 return __longlong_as_double(value);
679 #endif
680#endif
681}
682
683__device__
684inline
685double atomicMin_system(double* address, double val) {
686 unsigned long long* uaddr { reinterpret_cast<unsigned long long*>(address) };
687 #if __has_builtin(__hip_atomic_load)
688 unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
689 #else
690 unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
691 #endif
692 double value = __longlong_as_double(tmp);
693
694 while (val < value) {
695 value = atomicCAS_system(address, value, val);
696 }
697
698 return value;
699}
700
701__device__
702inline
703int atomicMax(int* address, int val) {
704#if defined(__gfx941__)
705 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
706 address, val, [](int x, int y) { return y < x; }, [=]() {
707 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
708 __HIP_MEMORY_SCOPE_AGENT);
709 });
710#else
711 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
712#endif // __gfx941__
713}
714
715__device__
716inline
717int atomicMax_system(int* address, int val) {
718#if defined(__gfx941__)
719 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
720 address, val, [](int x, int y) { return y < x; }, [=]() {
721 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
722 __HIP_MEMORY_SCOPE_SYSTEM);
723 });
724#else
725 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
726#endif // __gfx941__
727}
728
729__device__
730inline
731unsigned int atomicMax(unsigned int* address, unsigned int val) {
732#if defined(__gfx941__)
733 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
734 address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() {
735 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
736 __HIP_MEMORY_SCOPE_AGENT);
737 });
738#else
739 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
740#endif // __gfx941__
741}
742
743__device__
744inline
745unsigned int atomicMax_system(unsigned int* address, unsigned int val) {
746#if defined(__gfx941__)
747 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
748 address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() {
749 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
750 __HIP_MEMORY_SCOPE_SYSTEM);
751 });
752#else
753 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
754#endif // __gfx941__
755}
756
757__device__
758inline
759unsigned long atomicMax(unsigned long* address, unsigned long val) {
760#if defined(__gfx941__)
761 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
762 address,
763 val,
764 [](unsigned long x, unsigned long y) { return y < x; },
765 [=]() {
766 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
767 __HIP_MEMORY_SCOPE_AGENT);
768 });
769#else
770 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
771#endif // __gfx941__
772}
773
774__device__
775inline
776unsigned long atomicMax_system(unsigned long* address, unsigned long val) {
777#if defined(__gfx941__)
778 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
779 address,
780 val,
781 [](unsigned long x, unsigned long y) { return y < x; },
782 [=]() {
783 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
784 __HIP_MEMORY_SCOPE_SYSTEM);
785 });
786#else
787 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
788#endif // __gfx941__
789}
790
791__device__
792inline
793unsigned long long atomicMax(unsigned long long* address, unsigned long long val) {
794#if defined(__gfx941__)
795 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
796 address,
797 val,
798 [](unsigned long long x, unsigned long long y) { return y < x; },
799 [=]() {
800 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
801 __HIP_MEMORY_SCOPE_AGENT);
802 });
803#else
804 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
805#endif // __gfx941__
806}
807
808__device__
809inline
810unsigned long long atomicMax_system(unsigned long long* address, unsigned long long val) {
811#if defined(__gfx941__)
812 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
813 address,
814 val,
815 [](unsigned long long x, unsigned long long y) { return y < x; },
816 [=]() {
817 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
818 __HIP_MEMORY_SCOPE_SYSTEM);
819 });
820#else
821 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
822#endif // __gfx941__
823}
824
825__device__
826inline
827long long atomicMax(long long* address, long long val) {
828 #if defined(__gfx941__)
829 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
830 address, val, [](long long x, long long y) { return y < x; },
831 [=]() {
832 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
833 });
834#else
835 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
836#endif // __gfx941__
837}
838
839__device__
840inline
841long long atomicMax_system(long long* address, long long val) {
842#if defined(__gfx941__)
843 return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
844 address, val, [](long long x, long long y) { return y < x; },
845 [=]() {
846 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
847 });
848#else
849 return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
850#endif // __gfx941__
851}
852
853__device__
854inline
855float atomicMax(float* addr, float val) {
856#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
857 return unsafeAtomicMax(addr, val);
858#else
859 #if __has_builtin(__hip_atomic_load) && \
860 __has_builtin(__hip_atomic_compare_exchange_strong)
861 float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
862 bool done = false;
863 while (!done && value < val) {
864 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
865 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
866 }
867 return value;
868 #else
869 unsigned int *uaddr = (unsigned int *)addr;
870 unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
871 bool done = false;
872 while (!done && __uint_as_float(value) < val) {
873 done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false,
874 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
875 }
876 return __uint_as_float(value);
877 #endif
878#endif
879}
880
881__device__
882inline
883float atomicMax_system(float* address, float val) {
884 unsigned int* uaddr { reinterpret_cast<unsigned int*>(address) };
885 #if __has_builtin(__hip_atomic_load)
886 unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
887 #else
888 unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
889 #endif
890 float value = __uint_as_float(tmp);
891
892 while (value < val) {
893 value = atomicCAS_system(address, value, val);
894 }
895
896 return value;
897}
898
899__device__
900inline
901double atomicMax(double* addr, double val) {
902#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
903 return unsafeAtomicMax(addr, val);
904#else
905 #if __has_builtin(__hip_atomic_load) && \
906 __has_builtin(__hip_atomic_compare_exchange_strong)
907 double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
908 bool done = false;
909 while (!done && value < val) {
910 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
911 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
912 }
913 return value;
914 #else
915 unsigned long long *uaddr = (unsigned long long *)addr;
916 unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
917 bool done = false;
918 while (!done && __longlong_as_double(value) < val) {
919 done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false,
920 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
921 }
922 return __longlong_as_double(value);
923 #endif
924#endif
925}
926
927__device__
928inline
929double atomicMax_system(double* address, double val) {
930 unsigned long long* uaddr { reinterpret_cast<unsigned long long*>(address) };
931 #if __has_builtin(__hip_atomic_load)
932 unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
933 #else
934 unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
935 #endif
936 double value = __longlong_as_double(tmp);
937
938 while (value < val) {
939 value = atomicCAS_system(address, value, val);
940 }
941
942 return value;
943}
944
945__device__
946inline
947unsigned int atomicInc(unsigned int* address, unsigned int val)
948{
949#if defined(__gfx941__)
950 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
951 address,
952 val,
953 [](unsigned int& x, unsigned int y) { x = (x >= y) ? 0 : (x + 1); },
954 [=]() {
955 return
956 __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
957 });
958#else
959 return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
960#endif // __gfx941__
961
962}
963
964__device__
965inline
966unsigned int atomicDec(unsigned int* address, unsigned int val)
967{
968#if defined(__gfx941__)
969 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
970 address,
971 val,
972 [](unsigned int& x, unsigned int y) { x = (!x || x > y) ? y : (x - 1); },
973 [=]() {
974 return
975 __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
976 });
977#else
978 return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
979#endif // __gfx941__
980
981}
982
983__device__
984inline
985int atomicAnd(int* address, int val) {
986#if defined(__gfx941__)
987 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
988 address, val, [](int& x, int y) { x &= y; }, [=]() {
989 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
990 __HIP_MEMORY_SCOPE_AGENT);
991 });
992#else
993 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
994#endif // __gfx941__
995}
996
997__device__
998inline
999int atomicAnd_system(int* address, int val) {
1000#if defined(__gfx941__)
1001 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1002 address, val, [](int& x, int y) { x &= y; }, [=]() {
1003 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1004 __HIP_MEMORY_SCOPE_SYSTEM);
1005 });
1006#else
1007 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1008#endif // __gfx941__
1009}
1010
1011__device__
1012inline
1013unsigned int atomicAnd(unsigned int* address, unsigned int val) {
1014#if defined(__gfx941__)
1015 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1016 address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() {
1017 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1018 __HIP_MEMORY_SCOPE_AGENT);
1019 });
1020#else
1021 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1022#endif // __gfx941__
1023}
1024
1025__device__
1026inline
1027unsigned int atomicAnd_system(unsigned int* address, unsigned int val) {
1028#if defined(__gfx941__)
1029 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1030 address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() {
1031 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1032 __HIP_MEMORY_SCOPE_SYSTEM);
1033 });
1034#else
1035 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1036#endif // __gfx941__
1037}
1038
1039__device__
1040inline
1041unsigned long atomicAnd(unsigned long* address, unsigned long val) {
1042#if defined(__gfx941__)
1043 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1044 address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() {
1045 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1046 __HIP_MEMORY_SCOPE_AGENT);
1047 });
1048#else
1049 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1050#endif // __gfx941__
1051}
1052
1053__device__
1054inline
1055unsigned long atomicAnd_system(unsigned long* address, unsigned long val) {
1056#if defined(__gfx941__)
1057 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1058 address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() {
1059 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1060 __HIP_MEMORY_SCOPE_SYSTEM);
1061 });
1062#else
1063 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1064#endif // __gfx941__
1065}
1066
1067__device__
1068inline
1069unsigned long long atomicAnd(unsigned long long* address, unsigned long long val) {
1070#if defined(__gfx941__)
1071 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1072 address,
1073 val,
1074 [](unsigned long long& x, unsigned long long y) { x &= y; },
1075 [=]() {
1076 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1077 __HIP_MEMORY_SCOPE_AGENT);
1078 });
1079#else
1080 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1081#endif // __gfx941__
1082}
1083
1084__device__
1085inline
1086unsigned long long atomicAnd_system(unsigned long long* address, unsigned long long val) {
1087#if defined(__gfx941__)
1088 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1089 address,
1090 val,
1091 [](unsigned long long& x, unsigned long long y) { x &= y; },
1092 [=]() {
1093 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
1094 __HIP_MEMORY_SCOPE_SYSTEM);
1095 });
1096#else
1097 return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1098#endif // __gfx941__
1099}
1100
1101__device__
1102inline
1103int atomicOr(int* address, int val) {
1104#if defined(__gfx941__)
1105 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1106 address, val, [](int& x, int y) { x |= y; }, [=]() {
1107 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1108 __HIP_MEMORY_SCOPE_AGENT);
1109 });
1110#else
1111 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1112#endif // __gfx941__
1113}
1114
1115__device__
1116inline
1117int atomicOr_system(int* address, int val) {
1118#if defined(__gfx941__)
1119 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1120 address, val, [](int& x, int y) { x |= y; }, [=]() {
1121 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1122 __HIP_MEMORY_SCOPE_SYSTEM);
1123 });
1124#else
1125 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1126#endif // __gfx941__
1127}
1128
1129__device__
1130inline
1131unsigned int atomicOr(unsigned int* address, unsigned int val) {
1132#if defined(__gfx941__)
1133 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1134 address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() {
1135 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1136 __HIP_MEMORY_SCOPE_AGENT);
1137 });
1138#else
1139 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1140#endif // __gfx941__
1141}
1142
1143__device__
1144inline
1145unsigned int atomicOr_system(unsigned int* address, unsigned int val) {
1146#if defined(__gfx941__)
1147 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1148 address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() {
1149 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1150 __HIP_MEMORY_SCOPE_SYSTEM);
1151 });
1152#else
1153 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1154#endif // __gfx941__
1155}
1156
1157__device__
1158inline
1159unsigned long atomicOr(unsigned long* address, unsigned long val) {
1160#if defined(__gfx941__)
1161 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1162 address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() {
1163 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1164 __HIP_MEMORY_SCOPE_AGENT);
1165 });
1166#else
1167 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1168#endif // __gfx941__
1169}
1170
1171__device__
1172inline
1173unsigned long atomicOr_system(unsigned long* address, unsigned long val) {
1174#if defined(__gfx941__)
1175 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1176 address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() {
1177 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1178 __HIP_MEMORY_SCOPE_SYSTEM);
1179 });
1180#else
1181 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1182#endif // __gfx941__
1183}
1184
1185__device__
1186inline
1187unsigned long long atomicOr(unsigned long long* address, unsigned long long val) {
1188#if defined(__gfx941__)
1189 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1190 address,
1191 val,
1192 [](unsigned long long& x, unsigned long long y) { x |= y; },
1193 [=]() {
1194 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1195 __HIP_MEMORY_SCOPE_AGENT);
1196 });
1197#else
1198 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1199#endif // __gfx941__
1200}
1201
1202__device__
1203inline
1204unsigned long long atomicOr_system(unsigned long long* address, unsigned long long val) {
1205#if defined(__gfx941__)
1206 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1207 address,
1208 val,
1209 [](unsigned long long& x, unsigned long long y) { x |= y; },
1210 [=]() {
1211 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
1212 __HIP_MEMORY_SCOPE_SYSTEM);
1213 });
1214#else
1215 return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1216#endif // __gfx941__
1217}
1218
1219__device__
1220inline
1221int atomicXor(int* address, int val) {
1222#if defined(__gfx941__)
1223 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1224 address, val, [](int& x, int y) { x ^= y; }, [=]() {
1225 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1226 __HIP_MEMORY_SCOPE_AGENT);
1227 });
1228#else
1229 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1230#endif // __gfx941__
1231}
1232
1233__device__
1234inline
1235int atomicXor_system(int* address, int val) {
1236#if defined(__gfx941__)
1237 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1238 address, val, [](int& x, int y) { x ^= y; }, [=]() {
1239 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1240 __HIP_MEMORY_SCOPE_SYSTEM);
1241 });
1242#else
1243 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1244#endif // __gfx941__
1245}
1246
1247__device__
1248inline
1249unsigned int atomicXor(unsigned int* address, unsigned int val) {
1250#if defined(__gfx941__)
1251 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1252 address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() {
1253 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1254 __HIP_MEMORY_SCOPE_AGENT);
1255 });
1256#else
1257 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1258#endif // __gfx941__
1259}
1260
1261__device__
1262inline
1263unsigned int atomicXor_system(unsigned int* address, unsigned int val) {
1264#if defined(__gfx941__)
1265 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1266 address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() {
1267 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1268 __HIP_MEMORY_SCOPE_SYSTEM);
1269 });
1270#else
1271 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1272#endif // __gfx941__
1273}
1274
1275__device__
1276inline
1277unsigned long atomicXor(unsigned long* address, unsigned long val) {
1278#if defined(__gfx941__)
1279 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1280 address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() {
1281 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1282 __HIP_MEMORY_SCOPE_AGENT);
1283 });
1284#else
1285 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1286#endif // __gfx941__
1287}
1288
1289__device__
1290inline
1291unsigned long atomicXor_system(unsigned long* address, unsigned long val) {
1292#if defined(__gfx941__)
1293 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
1294 address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() {
1295 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1296 __HIP_MEMORY_SCOPE_SYSTEM);
1297 });
1298#else
1299 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1300#endif // __gfx941__
1301}
1302
1303__device__
1304inline
1305unsigned long long atomicXor(unsigned long long* address, unsigned long long val) {
1306#if defined(__gfx941__)
1307 return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
1308 address,
1309 val,
1310 [](unsigned long long& x, unsigned long long y) { x ^= y; },
1311 [=]() {
1312 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
1313 __HIP_MEMORY_SCOPE_AGENT);
1314 });
1315#else
1316 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
1317#endif // __gfx941__
1318}
1319
1320__device__
1321inline
1322unsigned long long atomicXor_system(unsigned long long* address, unsigned long long val) {
1323 return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
1324}
1325
1326#else // __hip_atomic_compare_exchange_strong
1327
1328__device__
1329inline
1330int atomicCAS(int* address, int compare, int val)
1331{
1332 __atomic_compare_exchange_n(
1333 address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1334
1335 return compare;
1336}
1337__device__
1338inline
1339unsigned int atomicCAS(
1340 unsigned int* address, unsigned int compare, unsigned int val)
1341{
1342 __atomic_compare_exchange_n(
1343 address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1344
1345 return compare;
1346}
1347__device__
1348inline
1349unsigned long long atomicCAS(
1350 unsigned long long* address,
1351 unsigned long long compare,
1352 unsigned long long val)
1353{
1354 __atomic_compare_exchange_n(
1355 address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
1356
1357 return compare;
1358}
1359
1360__device__
1361inline
1362int atomicAdd(int* address, int val)
1363{
1364 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1365}
1366__device__
1367inline
1368unsigned int atomicAdd(unsigned int* address, unsigned int val)
1369{
1370 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1371}
1372__device__
1373inline
1374unsigned long long atomicAdd(
1375 unsigned long long* address, unsigned long long val)
1376{
1377 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1378}
1379__device__
1380inline
1381float atomicAdd(float* address, float val)
1382{
1383#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
1384 return unsafeAtomicAdd(address, val);
1385#else
1386 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1387#endif
1388}
1389
1390#if !defined(__HIPCC_RTC__)
1391DEPRECATED("use atomicAdd instead")
1392#endif // !defined(__HIPCC_RTC__)
1393__device__
1394inline
1395void atomicAddNoRet(float* address, float val)
1396{
1397 __ockl_atomic_add_noret_f32(address, val);
1398}
1399
1400__device__
1401inline
1402double atomicAdd(double* address, double val)
1403{
1404#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
1405 return unsafeAtomicAdd(address, val);
1406#else
1407 return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
1408#endif
1409}
1410
1411__device__
1412inline
1413int atomicSub(int* address, int val)
1414{
1415 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
1416}
1417__device__
1418inline
1419unsigned int atomicSub(unsigned int* address, unsigned int val)
1420{
1421 return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
1422}
1423
1424__device__
1425inline
1426int atomicExch(int* address, int val)
1427{
1428 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1429}
1430__device__
1431inline
1432unsigned int atomicExch(unsigned int* address, unsigned int val)
1433{
1434 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1435}
1436__device__
1437inline
1438unsigned long long atomicExch(unsigned long long* address, unsigned long long val)
1439{
1440 return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
1441}
1442__device__
1443inline
1444float atomicExch(float* address, float val)
1445{
1446 return __uint_as_float(__atomic_exchange_n(
1447 reinterpret_cast<unsigned int*>(address),
1448 __float_as_uint(val),
1449 __ATOMIC_RELAXED));
1450}
1451
1452__device__
1453inline
1454int atomicMin(int* address, int val)
1455{
1456 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
1457}
1458__device__
1459inline
1460unsigned int atomicMin(unsigned int* address, unsigned int val)
1461{
1462 return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
1463}
1464__device__
1465inline
1466unsigned long long atomicMin(
1467 unsigned long long* address, unsigned long long val)
1468{
1469 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1470 while (val < tmp) {
1471 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1472
1473 if (tmp1 != tmp) { tmp = tmp1; continue; }
1474
1475 tmp = atomicCAS(address, tmp, val);
1476 }
1477
1478 return tmp;
1479}
1480__device__ inline long long atomicMin(long long* address, long long val) {
1481 long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1482 while (val < tmp) {
1483 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1484
1485 if (tmp1 != tmp) {
1486 tmp = tmp1;
1487 continue;
1488 }
1489
1490 tmp = atomicCAS(address, tmp, val);
1491 }
1492 return tmp;
1493}
1494
1495__device__
1496inline
1497int atomicMax(int* address, int val)
1498{
1499 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
1500}
1501__device__
1502inline
1503unsigned int atomicMax(unsigned int* address, unsigned int val)
1504{
1505 return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
1506}
1507__device__
1508inline
1509unsigned long long atomicMax(
1510 unsigned long long* address, unsigned long long val)
1511{
1512 unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1513 while (tmp < val) {
1514 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1515
1516 if (tmp1 != tmp) { tmp = tmp1; continue; }
1517
1518 tmp = atomicCAS(address, tmp, val);
1519 }
1520
1521 return tmp;
1522}
1523__device__ inline long long atomicMax(long long* address, long long val) {
1524 long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
1525 while (tmp < val) {
1526 const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
1527
1528 if (tmp1 != tmp) {
1529 tmp = tmp1;
1530 continue;
1531 }
1532
1533 tmp = atomicCAS(address, tmp, val);
1534 }
1535 return tmp;
1536}
1537
1538__device__
1539inline
1540unsigned int atomicInc(unsigned int* address, unsigned int val)
1541{
1542 return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
1543}
1544
1545__device__
1546inline
1547unsigned int atomicDec(unsigned int* address, unsigned int val)
1548{
1549 return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
1550}
1551
1552__device__
1553inline
1554int atomicAnd(int* address, int val)
1555{
1556 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1557}
1558__device__
1559inline
1560unsigned int atomicAnd(unsigned int* address, unsigned int val)
1561{
1562 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1563}
1564__device__
1565inline
1566unsigned long long atomicAnd(
1567 unsigned long long* address, unsigned long long val)
1568{
1569 return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
1570}
1571
1572__device__
1573inline
1574int atomicOr(int* address, int val)
1575{
1576 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1577}
1578__device__
1579inline
1580unsigned int atomicOr(unsigned int* address, unsigned int val)
1581{
1582 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1583}
1584__device__
1585inline
1586unsigned long long atomicOr(
1587 unsigned long long* address, unsigned long long val)
1588{
1589 return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
1590}
1591
1592__device__
1593inline
1594int atomicXor(int* address, int val)
1595{
1596 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1597}
1598__device__
1599inline
1600unsigned int atomicXor(unsigned int* address, unsigned int val)
1601{
1602 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1603}
1604__device__
1605inline
1606unsigned long long atomicXor(
1607 unsigned long long* address, unsigned long long val)
1608{
1609 return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
1610}
1611
1612#endif // __hip_atomic_compare_exchange_strong