HIP: Heterogenous-computing Interface for Portability
Loading...
Searching...
No Matches
amd_hip_bf16.h
Go to the documentation of this file.
1
96#ifndef _HIP_INCLUDE_HIP_AMD_DETAIL_HIP_BF16_H_
97#define _HIP_INCLUDE_HIP_AMD_DETAIL_HIP_BF16_H_
98
99#if !defined(__HIPCC_RTC__)
100#include <hip/amd_detail/amd_hip_common.h>
101#endif // !defined(__HIPCC_RTC__)
102
103#include "amd_hip_vector_types.h" // float2 etc
104#include "device_library_decls.h" // ocml conversion functions
105#include "math_fwd.h" // ocml device functions
106
107#define __BF16_DEVICE__ __device__
108#if defined(__HIPCC_RTC__)
109#define __BF16_HOST_DEVICE__ __BF16_DEVICE__
110#else
111#include <algorithm>
112#include <climits>
113#include <cmath>
114#define __BF16_HOST_DEVICE__ __host__ __BF16_DEVICE__
115#endif
116#define __BF16_DEVICE_STATIC__ __BF16_DEVICE__ static inline
117#define __BF16_HOST_DEVICE_STATIC__ __BF16_HOST_DEVICE__ static inline
118
119#if defined(__AVX512VL__) and defined(__AVX512BF16__) and not defined(__HIP_DEVICE_COMPILE__)
120// Enable with -mavx512vl -mavx512bf16
121#if defined(__MINGW64__)
122#include <intrin.h>
123#else
124#include <immintrin.h>
125#endif
126#define HIP_BF16_AVX512_OP 1
127static_assert(sizeof(__bf16) == sizeof(unsigned short),
128 "sizeof __bf16 should match sizeof unsigned short");
129#else
130#define HIP_BF16_AVX512_OP 0
131#endif
132
133#define HIPRT_ONE_BF16 __float2bfloat16(1.0f)
134#define HIPRT_ZERO_BF16 __float2bfloat16(0.0f)
135#define HIPRT_INF_BF16 __ushort_as_bfloat16((unsigned short)0x7F80U)
136#define HIPRT_MAX_NORMAL_BF16 __ushort_as_bfloat16((unsigned short)0x7F7FU)
137#define HIPRT_MIN_DENORM_BF16 __ushort_as_bfloat16((unsigned short)0x0001U)
138#define HIPRT_NAN_BF16 __ushort_as_bfloat16((unsigned short)0x7FFFU)
139#define HIPRT_NEG_ZERO_BF16 __ushort_as_bfloat16((unsigned short)0x8000U)
140
141// Since we are using unsigned short to represent data in bfloat16, it can be of different sizes on
142// different machines. These naive checks should prevent some undefined behavior on systems which
143// have different sizes for basic types.
144#if !defined(__HIPCC_RTC__)
145static_assert(CHAR_BIT == 8, "byte size should be of 8 bits");
146#endif
147static_assert(sizeof(unsigned short) == 2, "size of unsigned short should be 2 bytes");
148
153typedef struct __attribute__((aligned(2))) {
154 unsigned short x;
155} __hip_bfloat16_raw;
156
161typedef struct __attribute__((aligned(4))) {
162 unsigned short x;
163 unsigned short y;
164} __hip_bfloat162_raw;
165
172struct __attribute__((aligned(2))) __hip_bfloat16 {
173 private:
174 __BF16_HOST_DEVICE_STATIC__ float bfloatraw_2_float(unsigned short val) {
175#if HIP_BF16_AVX512_OP
176 union {
177 unsigned short us;
178 __bf16 bf16;
179 } u = {val};
180 return _mm_cvtsbh_ss(u.bf16);
181#else
182 unsigned int uval = val << 16;
183 union {
184 unsigned int u32;
185 float fp32;
186 } u = {uval};
187 return u.fp32;
188#endif
189 }
190 __BF16_HOST_DEVICE_STATIC__ unsigned short float_2_bfloatraw(float f) {
191#if HIP_BF16_AVX512_OP
192 union {
193 __bf16 bf16;
194 unsigned short us;
195 } u = {_mm_cvtness_sbh(f)};
196 return u.us;
197#else
198 union {
199 float fp32;
200 unsigned int u32;
201 } u = {f};
202 if (~u.u32 & 0x7f800000) {
203 // When the exponent bits are not all 1s, then the value is zero, normal,
204 // or subnormal. We round the bfloat16 mantissa up by adding 0x7FFF, plus
205 // 1 if the least significant bit of the bfloat16 mantissa is 1 (odd).
206 // This causes the bfloat16's mantissa to be incremented by 1 if the 16
207 // least significant bits of the float mantissa are greater than 0x8000,
208 // or if they are equal to 0x8000 and the least significant bit of the
209 // bfloat16 mantissa is 1 (odd). This causes it to be rounded to even when
210 // the lower 16 bits are exactly 0x8000. If the bfloat16 mantissa already
211 // has the value 0x7f, then incrementing it causes it to become 0x00 and
212 // the exponent is incremented by one, which is the next higher FP value
213 // to the unrounded bfloat16 value. When the bfloat16 value is subnormal
214 // with an exponent of 0x00 and a mantissa of 0x7F, it may be rounded up
215 // to a normal value with an exponent of 0x01 and a mantissa of 0x00.
216 // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
217 // incrementing it causes it to become an exponent of 0xFF and a mantissa
218 // of 0x00, which is Inf, the next higher value to the unrounded value.
219 u.u32 += 0x7fff + ((u.u32 >> 16) & 1); // Round to nearest, round to even
220 } else if (u.u32 & 0xffff) {
221 // When all of the exponent bits are 1, the value is Inf or NaN.
222 // Inf is indicated by a zero mantissa. NaN is indicated by any nonzero
223 // mantissa bit. Quiet NaN is indicated by the most significant mantissa
224 // bit being 1. Signaling NaN is indicated by the most significant
225 // mantissa bit being 0 but some other bit(s) being 1. If any of the
226 // lower 16 bits of the mantissa are 1, we set the least significant bit
227 // of the bfloat16 mantissa, in order to preserve signaling NaN in case
228 // the bloat16's mantissa bits are all 0.
229 u.u32 |= 0x10000; // Preserve signaling NaN
230 }
231 return static_cast<unsigned short>(u.u32 >> 16);
232#endif
233 }
234
235 __BF16_HOST_DEVICE_STATIC__ unsigned short double_2_bfloatraw(double d_in) {
236 union {
237 float fp32;
238 unsigned int u32;
239 } u = {static_cast<float>(d_in)};
240 double d = u.fp32;
241
242 // Round to odd
243 if ((d_in > 0.0 && d > d_in) || (d_in < 0.0 && d < d_in)) {
244 u.u32--;
245 u.u32 |= 1;
246 }
247
248 return float_2_bfloatraw(u.fp32);
249 }
250
251 protected:
253 unsigned short __x;
254
255 public:
256 // TODO: SWDEV-452411
257 // Need to add constructor of __hip_bfloat16 from
258 // unsigned long long
259 // long long
260 // long
261 // unsigned long
262 // Casting directly to double might lead to double rounding.
263
265 __BF16_HOST_DEVICE__ __hip_bfloat16(unsigned int val)
266 : __x(double_2_bfloatraw(static_cast<double>(val))) {}
267
269 __BF16_HOST_DEVICE__ __hip_bfloat16(int val)
270 : __x(double_2_bfloatraw(static_cast<double>(val))) {}
271
273 __BF16_HOST_DEVICE__ __hip_bfloat16(unsigned short val)
274 : __x(float_2_bfloatraw(static_cast<float>(val))) {}
275
277 __BF16_HOST_DEVICE__ __hip_bfloat16(short val)
278 : __x(float_2_bfloatraw(static_cast<float>(val))) {}
279
281 __BF16_HOST_DEVICE__ __hip_bfloat16(const double val) : __x(double_2_bfloatraw(val)) {}
282
284 __BF16_HOST_DEVICE__ __hip_bfloat16(const float val) : __x(float_2_bfloatraw(val)) {}
285
287 __BF16_HOST_DEVICE__ __hip_bfloat16(const __hip_bfloat16_raw& val) : __x(val.x) {}
288
290 __BF16_HOST_DEVICE__ __hip_bfloat16() = default;
291
293 __BF16_HOST_DEVICE__ operator __hip_bfloat16_raw() const { return __hip_bfloat16_raw{__x}; }
294
296 __BF16_HOST_DEVICE__ operator __hip_bfloat16_raw() const volatile {
297 return __hip_bfloat16_raw{__x};
298 }
299
301 __BF16_HOST_DEVICE__ operator bool() const {
302 auto val = bfloatraw_2_float(__x);
303 return val != 0.0f && val != -0.0f;
304 }
305
307 __BF16_HOST_DEVICE__ operator char() const { return static_cast<char>(bfloatraw_2_float(__x)); }
308
310 __BF16_HOST_DEVICE__ operator float() const { return bfloatraw_2_float(__x); }
311
313 __BF16_HOST_DEVICE__ operator int() const { return static_cast<int>(bfloatraw_2_float(__x)); }
314
316 __BF16_HOST_DEVICE__ operator long() const { return static_cast<long>(bfloatraw_2_float(__x)); }
317
319 __BF16_HOST_DEVICE__ operator long long() const {
320 return static_cast<long long>(bfloatraw_2_float(__x));
321 }
322
324 __BF16_HOST_DEVICE__ operator short() const { return static_cast<short>(bfloatraw_2_float(__x)); }
325
327 __BF16_HOST_DEVICE__ operator signed char() const {
328 return static_cast<signed char>(bfloatraw_2_float(__x));
329 }
330
332 __BF16_HOST_DEVICE__ operator unsigned char() const {
333 return static_cast<unsigned char>(bfloatraw_2_float(__x));
334 }
335
337 __BF16_HOST_DEVICE__ operator unsigned int() const {
338 return static_cast<unsigned int>(bfloatraw_2_float(__x));
339 }
340
342 __BF16_HOST_DEVICE__ operator unsigned long() const {
343 return static_cast<unsigned long>(bfloatraw_2_float(__x));
344 }
345
347 __BF16_HOST_DEVICE__ operator unsigned long long() const {
348 return static_cast<unsigned long long>(bfloatraw_2_float(__x));
349 }
350
352 __BF16_HOST_DEVICE__ operator unsigned short() const {
353 return static_cast<unsigned short>(bfloatraw_2_float(__x));
354 }
355
356 // TODO: SWDEV-452411 add operator which converts unsigned long long and long long to bfloat
357
359 __BF16_HOST_DEVICE__ __hip_bfloat16& operator=(unsigned int val) {
360 __x = float_2_bfloatraw(static_cast<float>(val));
361 return *this;
362 }
363
365 __BF16_HOST_DEVICE__ __hip_bfloat16& operator=(int val) {
366 __x = float_2_bfloatraw(static_cast<float>(val));
367 return *this;
368 }
369
371 __BF16_HOST_DEVICE__ __hip_bfloat16& operator=(unsigned short val) {
372 __x = float_2_bfloatraw(static_cast<float>(val));
373 return *this;
374 }
375
377 __BF16_HOST_DEVICE__ __hip_bfloat16& operator=(short val) {
378 __x = float_2_bfloatraw(static_cast<float>(val));
379 return *this;
380 }
381
383 __BF16_HOST_DEVICE__ __hip_bfloat16& operator=(const double f) {
384 __x = float_2_bfloatraw(static_cast<float>(f));
385 return *this;
386 }
387
389 __BF16_HOST_DEVICE__ __hip_bfloat16& operator=(const float f) {
390 __x = float_2_bfloatraw(f);
391 return *this;
392 }
393
395 __BF16_HOST_DEVICE__ __hip_bfloat16& operator=(const __hip_bfloat16_raw& hr) {
396 __x = hr.x;
397 return *this;
398 }
399
401 __BF16_HOST_DEVICE__ volatile __hip_bfloat16& operator=(const __hip_bfloat16_raw& hr) volatile {
402 __x = hr.x;
403 return *this;
404 }
405
407 __BF16_HOST_DEVICE__ volatile __hip_bfloat16& operator=(
408 const volatile __hip_bfloat16_raw& hr) volatile {
409 __x = hr.x;
410 return *this;
411 }
412};
421struct __attribute__((aligned(4))) __hip_bfloat162 {
422 public:
423 __hip_bfloat16 x;
424 __hip_bfloat16 y;
427 public:
429 __BF16_HOST_DEVICE__ __hip_bfloat162(const __hip_bfloat162_raw& h2r)
430 : x(__hip_bfloat16(__hip_bfloat16_raw{h2r.x})),
431 y(__hip_bfloat16(__hip_bfloat16_raw{h2r.y})) {}
432
434 __BF16_HOST_DEVICE__ __hip_bfloat162(const __hip_bfloat162& val) {
435 __hip_bfloat162_raw hr = val;
436 x = __hip_bfloat16_raw{hr.x};
437 y = __hip_bfloat16_raw{hr.y};
438 }
439
441 __BF16_HOST_DEVICE__ __hip_bfloat162(const __hip_bfloat16& a, const __hip_bfloat16& b)
442 : x(a), y(b) {}
443
445 __BF16_HOST_DEVICE__ __hip_bfloat162() = default;
446
448 __BF16_HOST_DEVICE__ operator __hip_bfloat162_raw() const {
449 __hip_bfloat16_raw l = x;
450 __hip_bfloat16_raw r = y;
451 return __hip_bfloat162_raw{l.x, r.x};
452 }
453
455 __BF16_HOST_DEVICE__ operator float2() const {
456#if HIP_BF16_AVX512_OP
457 union {
458 __hip_bfloat162_raw raw2;
459 __bf16 bf162[2];
460 static_assert(sizeof(__bf16[2]) == sizeof(__hip_bfloat162_raw));
461 } u;
462 u.raw2 = *this;
463 __m128bh pbf16{u.bf162[0], u.bf162[1], 0, 0};
464 __m128 pf32 = _mm_cvtpbh_ps(pbf16);
465 float2 ret(pf32[0], pf32[1]);
466#else
467 float2 ret(x, y);
468#endif
469 return ret;
470 }
471
473 __BF16_HOST_DEVICE__ __hip_bfloat162& operator=(const __hip_bfloat162_raw& h2r) {
474 x = __hip_bfloat16(__hip_bfloat16_raw{h2r.x});
475 y = __hip_bfloat16(__hip_bfloat16_raw{h2r.y});
476 return *this;
477 }
478
480 __BF16_HOST_DEVICE__ __hip_bfloat162& operator=(const __hip_bfloat162& src) {
481 __hip_bfloat162_raw hr = src;
482 x = __hip_bfloat16(__hip_bfloat16_raw{hr.x});
483 y = __hip_bfloat16(__hip_bfloat16_raw{hr.y});
484 return *this;
485 }
486};
493__BF16_HOST_DEVICE_STATIC__ float __bfloat162float(__hip_bfloat16 a) {
494 float ret = a;
495 return ret;
496}
497
502__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __float2bfloat16(float f) {
503 __hip_bfloat16 ret{f};
504 return ret;
505}
506
511__BF16_HOST_DEVICE_STATIC__ float2 __bfloat1622float2(const __hip_bfloat162 a) {
512 float2 ret = a;
513 return ret;
514}
515
520__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __bfloat162bfloat162(const __hip_bfloat16 a) {
521 return __hip_bfloat162(a, a);
522}
523
528__BF16_HOST_DEVICE_STATIC__ short int __bfloat16_as_short(const __hip_bfloat16 h) {
529 short ret = h;
530 return ret;
531}
532
537__BF16_HOST_DEVICE_STATIC__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h) {
538 unsigned short ret = h;
539 return ret;
540}
541
546__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __double2bfloat16(const double a) {
547 __hip_bfloat16 ret{a};
548 return ret;
549}
550
555__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __float22bfloat162_rn(const float2 a) {
556 return __hip_bfloat162{__float2bfloat16(a.x), __float2bfloat16(a.y)};
557}
558
563__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __halves2bfloat162(const __hip_bfloat16 a,
564 const __hip_bfloat16 b) {
565 return __hip_bfloat162(a, b);
566}
567
572__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __high2bfloat16(const __hip_bfloat162 a) {
573 __hip_bfloat162_raw hr = a;
574 return __hip_bfloat16(__hip_bfloat16_raw{hr.y});
575}
576
581__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __high2bfloat162(const __hip_bfloat162 a) {
582 __hip_bfloat162_raw hr = a;
583 return __hip_bfloat162(__hip_bfloat16_raw{hr.y}, __hip_bfloat16_raw{hr.y});
584}
585
590__BF16_HOST_DEVICE_STATIC__ float __high2float(const __hip_bfloat162 a) {
591 __hip_bfloat162_raw hr = a;
592 return __bfloat162float(__hip_bfloat16(__hip_bfloat16_raw{hr.y}));
593}
594
599__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __highs2bfloat162(const __hip_bfloat162 a,
600 const __hip_bfloat162 b) {
601 __hip_bfloat162_raw hr_a = a;
602 __hip_bfloat162_raw hr_b = b;
603 return __hip_bfloat162(__hip_bfloat162_raw{hr_a.y, hr_b.y});
604}
605
610__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a) {
611 __hip_bfloat162_raw hr = a;
612 return __hip_bfloat16(hr.x);
613}
614
619__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __low2bfloat162(const __hip_bfloat162 a) {
620 __hip_bfloat162_raw hr = a;
621 return __hip_bfloat162(hr.x, hr.x);
622}
623
628__BF16_HOST_DEVICE_STATIC__ float __low2float(const __hip_bfloat162 a) {
629 __hip_bfloat162_raw hr = a;
630 return __bfloat162float(__hip_bfloat16(__hip_bfloat16_raw{hr.x}));
631}
632
637__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __lowhigh2highlow(const __hip_bfloat162 a) {
638 __hip_bfloat162_raw hr = a;
639 return __hip_bfloat162(__hip_bfloat162_raw{hr.y, hr.x});
640}
641
646__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __lows2bfloat162(const __hip_bfloat162 a,
647 const __hip_bfloat162 b) {
648 __hip_bfloat162_raw hr_a = a;
649 __hip_bfloat162_raw hr_b = b;
650 return __hip_bfloat162(__hip_bfloat162_raw{hr_a.x, hr_b.x});
651}
652
657__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __short_as_bfloat16(const short int a) {
658 return __hip_bfloat16(a);
659}
660
665__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a) {
666 return __hip_bfloat16(a);
667}
668
673__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hadd(const __hip_bfloat16 a, const __hip_bfloat16 b) {
675}
676
681__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hsub(const __hip_bfloat16 a, const __hip_bfloat16 b) {
683}
684
689__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hdiv(const __hip_bfloat16 a, const __hip_bfloat16 b) {
691}
692
697__BF16_DEVICE_STATIC__ __hip_bfloat16 __hfma(const __hip_bfloat16 a, const __hip_bfloat16 b,
698 const __hip_bfloat16 c) {
699 return __float2bfloat16(
700 __ocml_fma_f32(__bfloat162float(a), __bfloat162float(b), __bfloat162float(c)));
701}
702
707__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmul(const __hip_bfloat16 a, const __hip_bfloat16 b) {
709}
710
715__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hneg(const __hip_bfloat16 a) {
716 __hip_bfloat16_raw hr = a;
717 hr.x ^= 0x8000;
718 return __hip_bfloat16(hr);
719}
720
725__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __habs(const __hip_bfloat16 a) {
726 __hip_bfloat16_raw hr = a;
727 hr.x &= 0x7FFF;
728 return __hip_bfloat16(hr);
729}
730
735__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __h2div(const __hip_bfloat162 a,
736 const __hip_bfloat162 b) {
737 __hip_bfloat162_raw hr_a = a;
738 __hip_bfloat162_raw hr_b = b;
739 return __hip_bfloat162(__float2bfloat16(__bfloat162float(__hip_bfloat16_raw{hr_a.x}) /
740 __bfloat162float(__hip_bfloat16_raw{hr_b.x})),
741 __float2bfloat16(__bfloat162float(__hip_bfloat16_raw{hr_a.y}) /
742 __bfloat162float(__hip_bfloat16_raw{hr_b.y})));
743}
744
749__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __habs2(const __hip_bfloat162 a) {
750 __hip_bfloat162_raw hr_a = a;
751 return __hip_bfloat162(__habs(__hip_bfloat16_raw{hr_a.x}), __habs(__hip_bfloat16_raw{hr_a.y}));
752}
753
758__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hadd2(const __hip_bfloat162 a,
759 const __hip_bfloat162 b) {
760 __hip_bfloat162_raw hr_a = a;
761 __hip_bfloat162_raw hr_b = b;
762 return __hip_bfloat162(__hadd(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}),
763 __hadd(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y}));
764}
765
770__BF16_DEVICE_STATIC__ __hip_bfloat162 __hfma2(const __hip_bfloat162 a, const __hip_bfloat162 b,
771 const __hip_bfloat162 c) {
772 __hip_bfloat162_raw hr_a = a;
773 __hip_bfloat162_raw hr_b = b;
774 __hip_bfloat162_raw hr_c = c;
775 return __hip_bfloat162(
776 __hfma(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}, __hip_bfloat16_raw{hr_c.x}),
777 __hfma(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y}, __hip_bfloat16_raw{hr_c.y}));
778}
779
784__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmul2(const __hip_bfloat162 a,
785 const __hip_bfloat162 b) {
786 __hip_bfloat162_raw hr_a = a;
787 __hip_bfloat162_raw hr_b = b;
788 return __hip_bfloat162(__hmul(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}),
789 __hmul(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y}));
790}
791
796__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hneg2(const __hip_bfloat162 a) {
797 __hip_bfloat162_raw hr_a = a;
798 return __hip_bfloat162(__hneg(__hip_bfloat16_raw{hr_a.x}), __hneg(__hip_bfloat16_raw{hr_a.y}));
799}
800
805__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hsub2(const __hip_bfloat162 a,
806 const __hip_bfloat162 b) {
807 __hip_bfloat162_raw hr_a = a;
808 __hip_bfloat162_raw hr_b = b;
809 return __hip_bfloat162(__hsub(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}),
810 __hsub(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y}));
811}
812
817__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator*(const __hip_bfloat16& l,
818 const __hip_bfloat16& r) {
819 return __hmul(l, r);
820}
821
826__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16& operator*=(__hip_bfloat16& l, const __hip_bfloat16& r) {
827 l = __hmul(l, r);
828 return l;
829}
830
835__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator+(const __hip_bfloat16& l) { return l; }
836
841__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator+(const __hip_bfloat16& l,
842 const __hip_bfloat16& r) {
843 return __hadd(l, r);
844}
845
850__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator-(const __hip_bfloat16& l) { return __hneg(l); }
851
856__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator-(const __hip_bfloat16& l,
857 const __hip_bfloat16& r) {
858 return __hsub(l, r);
859}
860
865__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator++(__hip_bfloat16& l, const int) {
866 auto ret = l;
867 l = __hadd(l, HIPRT_ONE_BF16);
868 return ret;
869}
870
875__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16& operator++(__hip_bfloat16& l) {
876 l = __hadd(l, HIPRT_ONE_BF16);
877 return l;
878}
879
884__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator--(__hip_bfloat16& l, const int) {
885 auto ret = l;
886 l = __hsub(l, HIPRT_ONE_BF16);
887 return ret;
888}
889
894__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16& operator--(__hip_bfloat16& l) {
895 l = __hsub(l, HIPRT_ONE_BF16);
896 return l;
897}
898
903__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16& operator+=(__hip_bfloat16& l, const __hip_bfloat16& r) {
904 l = __hadd(l, r);
905 return l;
906}
907
912__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16& operator-=(__hip_bfloat16& l, const __hip_bfloat16& r) {
913 l = __hsub(l, r);
914 return l;
915}
916
921__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator/(const __hip_bfloat16& l,
922 const __hip_bfloat16& r) {
923 return __hdiv(l, r);
924}
925
930__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16& operator/=(__hip_bfloat16& l, const __hip_bfloat16& r) {
931 l = __hdiv(l, r);
932 return l;
933}
934
939__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 operator*(const __hip_bfloat162& l,
940 const __hip_bfloat162& r) {
941 return __hmul2(l, r);
942}
943
948__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162& operator*=(__hip_bfloat162& l,
949 const __hip_bfloat162& r) {
950 l = __hmul2(l, r);
951 return l;
952}
953
958__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 operator+(const __hip_bfloat162& l) { return l; }
959
964__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 operator+(const __hip_bfloat162& l,
965 const __hip_bfloat162& r) {
966 return __hadd2(l, r);
967}
968
973__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 operator-(const __hip_bfloat162& l) {
974 return __hneg2(l);
975}
976
981__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 operator-(const __hip_bfloat162& l,
982 const __hip_bfloat162& r) {
983 return __hsub2(l, r);
984}
985
990__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 operator++(__hip_bfloat162& l, const int) {
991 auto ret = l;
992 l = __hadd2(l, {HIPRT_ONE_BF16, HIPRT_ONE_BF16});
993 return ret;
994}
995
1000__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162& operator++(__hip_bfloat162& l) {
1001 l = __hadd2(l, {HIPRT_ONE_BF16, HIPRT_ONE_BF16});
1002 return l;
1003}
1004
1009__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 operator--(__hip_bfloat162& l, const int) {
1010 auto ret = l;
1011 l = __hsub2(l, {HIPRT_ONE_BF16, HIPRT_ONE_BF16});
1012 return ret;
1013}
1014
1019__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162& operator--(__hip_bfloat162& l) {
1020 l = __hsub2(l, {HIPRT_ONE_BF16, HIPRT_ONE_BF16});
1021 return l;
1022}
1023
1028__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162& operator+=(__hip_bfloat162& l,
1029 const __hip_bfloat162& r) {
1030 l = __hadd2(l, r);
1031 return l;
1032}
1033
1038__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162& operator-=(__hip_bfloat162& l,
1039 const __hip_bfloat162& r) {
1040 l = __hsub2(l, r);
1041 return l;
1042}
1043
1048__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 operator/(const __hip_bfloat162& l,
1049 const __hip_bfloat162& r) {
1050 return __h2div(l, r);
1051}
1052
1057__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162& operator/=(__hip_bfloat162& l,
1058 const __hip_bfloat162& r) {
1059 l = __h2div(l, r);
1060 return l;
1061}
1062
1067__BF16_HOST_DEVICE_STATIC__ bool __heq(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1068 return __bfloat162float(a) == __bfloat162float(b);
1069}
1070
1075__BF16_HOST_DEVICE_STATIC__ bool __hequ(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1076 return !(__bfloat162float(a) < __bfloat162float(b)) &&
1078}
1079
1084__BF16_HOST_DEVICE_STATIC__ bool __hgt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1085 return __bfloat162float(a) > __bfloat162float(b);
1086}
1087
1092__BF16_HOST_DEVICE_STATIC__ bool __hgtu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1093 return !(__bfloat162float(a) <= __bfloat162float(b));
1094}
1095
1100__BF16_HOST_DEVICE_STATIC__ bool __hge(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1101 return __bfloat162float(a) >= __bfloat162float(b);
1102}
1103
1108__BF16_HOST_DEVICE_STATIC__ bool __hgeu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1109 return !(__bfloat162float(a) < __bfloat162float(b));
1110}
1111
1116__BF16_HOST_DEVICE_STATIC__ bool __hne(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1117 return __bfloat162float(a) != __bfloat162float(b);
1118}
1119
1124__BF16_HOST_DEVICE_STATIC__ bool __hneu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1125 return !(__bfloat162float(a) == __bfloat162float(b));
1126}
1127
1132__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmax(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1133#if __HIP_DEVICE_COMPILE__
1134 return __float2bfloat16(__ocml_fmax_f32(__bfloat162float(a), __bfloat162float(b)));
1135#else
1136 return __float2bfloat16(std::max(__bfloat162float(a), __bfloat162float(b)));
1137#endif
1138}
1139
1144__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmin(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1145#if __HIP_DEVICE_COMPILE__
1146 return __float2bfloat16(__ocml_fmin_f32(__bfloat162float(a), __bfloat162float(b)));
1147#else
1148 return __float2bfloat16(std::min(__bfloat162float(a), __bfloat162float(b)));
1149#endif
1150}
1151
1156__BF16_HOST_DEVICE_STATIC__ bool __hlt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1157 return __bfloat162float(a) < __bfloat162float(b);
1158}
1159
1164__BF16_HOST_DEVICE_STATIC__ bool __hltu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1165 return !(__bfloat162float(a) >= __bfloat162float(b));
1166}
1167
1172__BF16_HOST_DEVICE_STATIC__ bool __hle(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1173 return __bfloat162float(a) <= __bfloat162float(b);
1174}
1175
1180__BF16_HOST_DEVICE_STATIC__ bool __hleu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
1181 return !(__bfloat162float(a) > __bfloat162float(b));
1182}
1183
1188__BF16_HOST_DEVICE_STATIC__ int __hisinf(const __hip_bfloat16 a) {
1189 __hip_bfloat16_raw hr = a;
1190 return !(~hr.x & 0x7f80) && !(hr.x & 0x7f);
1191}
1192
1197__BF16_HOST_DEVICE_STATIC__ bool __hisnan(const __hip_bfloat16 a) {
1198 __hip_bfloat16_raw hr = a;
1199 return !(~hr.x & 0x7f80) && +(hr.x & 0x7f);
1200}
1201
1206__BF16_HOST_DEVICE_STATIC__ bool __hbeq2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1207 __hip_bfloat162_raw hr_a = a;
1208 __hip_bfloat162_raw hr_b = b;
1209 return __heq(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}) &&
1210 __heq(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y});
1211}
1212
1217__BF16_HOST_DEVICE_STATIC__ bool __hbequ2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1218 __hip_bfloat162_raw hr_a = a;
1219 __hip_bfloat162_raw hr_b = b;
1220 return __hequ(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}) &&
1221 __hequ(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y});
1222}
1223
1228__BF16_HOST_DEVICE_STATIC__ bool __hbge2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1229 __hip_bfloat162_raw hr_a = a;
1230 __hip_bfloat162_raw hr_b = b;
1231 return __hge(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}) &&
1232 __hge(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y});
1233}
1234
1239__BF16_HOST_DEVICE_STATIC__ bool __hbgeu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1240 __hip_bfloat162_raw hr_a = a;
1241 __hip_bfloat162_raw hr_b = b;
1242 return __hgeu(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}) &&
1243 __hgeu(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y});
1244}
1245
1250__BF16_HOST_DEVICE_STATIC__ bool __hbgt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1251 __hip_bfloat162_raw hr_a = a;
1252 __hip_bfloat162_raw hr_b = b;
1253 return __hgt(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}) &&
1254 __hgt(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y});
1255}
1256
1261__BF16_HOST_DEVICE_STATIC__ bool __hbgtu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1262 __hip_bfloat162_raw hr_a = a;
1263 __hip_bfloat162_raw hr_b = b;
1264 return __hgtu(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}) &&
1265 __hgtu(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y});
1266}
1267
1272__BF16_HOST_DEVICE_STATIC__ bool __hble2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1273 __hip_bfloat162_raw hr_a = a;
1274 __hip_bfloat162_raw hr_b = b;
1275 return __hle(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}) &&
1276 __hle(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y});
1277}
1278
1283__BF16_HOST_DEVICE_STATIC__ bool __hbleu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1284 __hip_bfloat162_raw hr_a = a;
1285 __hip_bfloat162_raw hr_b = b;
1286 return __hleu(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}) &&
1287 __hleu(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y});
1288}
1289
1294__BF16_HOST_DEVICE_STATIC__ bool __hblt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1295 __hip_bfloat162_raw hr_a = a;
1296 __hip_bfloat162_raw hr_b = b;
1297 return __hlt(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}) &&
1298 __hlt(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y});
1299}
1300
1305__BF16_HOST_DEVICE_STATIC__ bool __hbltu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1306 __hip_bfloat162_raw hr_a = a;
1307 __hip_bfloat162_raw hr_b = b;
1308 return __hltu(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}) &&
1309 __hltu(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y});
1310}
1311
1316__BF16_HOST_DEVICE_STATIC__ bool __hbne2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1317 __hip_bfloat162_raw hr_a = a;
1318 __hip_bfloat162_raw hr_b = b;
1319 return __hne(__hip_bfloat16(__hip_bfloat16_raw{hr_a.x}),
1320 __hip_bfloat16(__hip_bfloat16_raw{hr_b.x})) &&
1321 __hne(__hip_bfloat16(__hip_bfloat16_raw{hr_a.y}), __hip_bfloat16(__hip_bfloat16_raw{hr_b.y}));
1322}
1323
1328__BF16_HOST_DEVICE_STATIC__ bool __hbneu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
1329 __hip_bfloat162_raw hr_a = a;
1330 __hip_bfloat162_raw hr_b = b;
1331 return __hneu(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}) ||
1332 __hneu(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y});
1333}
1334
1339__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __heq2(const __hip_bfloat162 a,
1340 const __hip_bfloat162 b) {
1341 __hip_bfloat162_raw hr_a = a;
1342 __hip_bfloat162_raw hr_b = b;
1343 return __hip_bfloat162{
1344 {__heq(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}) ? HIPRT_ONE_BF16
1345 : HIPRT_ZERO_BF16},
1346 {__heq(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y}) ? HIPRT_ONE_BF16
1347 : HIPRT_ZERO_BF16}};
1348}
1349
1354__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hge2(const __hip_bfloat162 a,
1355 const __hip_bfloat162 b) {
1356 __hip_bfloat162_raw hr_a = a;
1357 __hip_bfloat162_raw hr_b = b;
1358 return __hip_bfloat162{
1359 {__hge(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}) ? HIPRT_ONE_BF16
1360 : HIPRT_ZERO_BF16},
1361 {__hge(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y}) ? HIPRT_ONE_BF16
1362 : HIPRT_ZERO_BF16}};
1363}
1364
1369__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hgt2(const __hip_bfloat162 a,
1370 const __hip_bfloat162 b) {
1371 __hip_bfloat162_raw hr_a = a;
1372 __hip_bfloat162_raw hr_b = b;
1373 return __hip_bfloat162{
1374 {__hgt(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}) ? HIPRT_ONE_BF16
1375 : HIPRT_ZERO_BF16},
1376 {__hgt(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y}) ? HIPRT_ONE_BF16
1377 : HIPRT_ONE_BF16}};
1378}
1379
1384__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hisnan2(const __hip_bfloat162 a) {
1385 __hip_bfloat162_raw hr_a = a;
1386 return __hip_bfloat162{{__hisnan(__hip_bfloat16_raw{hr_a.x}) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
1387 {__hisnan(__hip_bfloat16_raw{hr_a.y}) ? HIPRT_ONE_BF16 : HIPRT_ONE_BF16}};
1388}
1389
1394__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hle2(const __hip_bfloat162 a,
1395 const __hip_bfloat162 b) {
1396 __hip_bfloat162_raw hr_a = a;
1397 __hip_bfloat162_raw hr_b = b;
1398 return __hip_bfloat162{
1399 {__hle(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}) ? HIPRT_ONE_BF16
1400 : HIPRT_ZERO_BF16},
1401 {__hle(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y}) ? HIPRT_ONE_BF16
1402 : HIPRT_ZERO_BF16}};
1403}
1404
1409__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hlt2(const __hip_bfloat162 a,
1410 const __hip_bfloat162 b) {
1411 __hip_bfloat162_raw hr_a = a;
1412 __hip_bfloat162_raw hr_b = b;
1413 return __hip_bfloat162{
1414 {__hlt(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}) ? HIPRT_ONE_BF16
1415 : HIPRT_ZERO_BF16},
1416 {__hlt(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y}) ? HIPRT_ONE_BF16
1417 : HIPRT_ZERO_BF16}};
1418}
1419
1424__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmax2(const __hip_bfloat162 a,
1425 const __hip_bfloat162 b) {
1426 __hip_bfloat162_raw hr_a = a;
1427 __hip_bfloat162_raw hr_b = b;
1428 return __hip_bfloat162(__hmax(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}),
1429 __hmax(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y}));
1430}
1431
1436__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmin2(const __hip_bfloat162 a,
1437 const __hip_bfloat162 b) {
1438 __hip_bfloat162_raw hr_a = a;
1439 __hip_bfloat162_raw hr_b = b;
1440 return __hip_bfloat162(__hmin(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}),
1441 __hmin(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y}));
1442}
1443
1448__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hne2(const __hip_bfloat162 a,
1449 const __hip_bfloat162 b) {
1450 __hip_bfloat162_raw hr_a = a;
1451 __hip_bfloat162_raw hr_b = b;
1452 return __hip_bfloat162{
1453 {__hne(__hip_bfloat16_raw{hr_a.x}, __hip_bfloat16_raw{hr_b.x}) ? HIPRT_ONE_BF16
1454 : HIPRT_ZERO_BF16},
1455 {__hne(__hip_bfloat16_raw{hr_a.y}, __hip_bfloat16_raw{hr_b.y}) ? HIPRT_ONE_BF16
1456 : HIPRT_ZERO_BF16}};
1457}
1458
1463__BF16_HOST_DEVICE_STATIC__ bool operator==(const __hip_bfloat16& l, const __hip_bfloat16& r) {
1464 return __heq(l, r);
1465}
1466
1471__BF16_HOST_DEVICE_STATIC__ bool operator!=(const __hip_bfloat16& l, const __hip_bfloat16& r) {
1472 return __hne(l, r);
1473}
1474
1479__BF16_HOST_DEVICE_STATIC__ bool operator<(const __hip_bfloat16& l, const __hip_bfloat16& r) {
1480 return __hlt(l, r);
1481}
1482
1487__BF16_HOST_DEVICE_STATIC__ bool operator<=(const __hip_bfloat16& l, const __hip_bfloat16& r) {
1488 return __hle(l, r);
1489}
1490
1495__BF16_HOST_DEVICE_STATIC__ bool operator>(const __hip_bfloat16& l, const __hip_bfloat16& r) {
1496 return __hgt(l, r);
1497}
1498
1503__BF16_HOST_DEVICE_STATIC__ bool operator>=(const __hip_bfloat16& l, const __hip_bfloat16& r) {
1504 return __hge(l, r);
1505}
1506
1511__BF16_HOST_DEVICE_STATIC__ bool operator==(const __hip_bfloat162& l, const __hip_bfloat162& r) {
1512 float2 ret = __heq2(l, r);
1513 return ret.x != 0.0f && ret.y != 0.0f;
1514}
1515
1520__BF16_HOST_DEVICE_STATIC__ bool operator!=(const __hip_bfloat162& l, const __hip_bfloat162& r) {
1521 return !(l == r);
1522}
1523
1528__BF16_HOST_DEVICE_STATIC__ bool operator<(const __hip_bfloat162& l, const __hip_bfloat162& r) {
1529 float2 fl = l, fr = r;
1530 return fl.x < fr.x && fl.x < fr.y;
1531}
1532
1537__BF16_HOST_DEVICE_STATIC__ bool operator<=(const __hip_bfloat162& l, const __hip_bfloat162& r) {
1538 float2 fl = l, fr = r;
1539 return fl.x <= fr.x && fl.x <= fr.y;
1540}
1541
1546__BF16_HOST_DEVICE_STATIC__ bool operator>(const __hip_bfloat162& l, const __hip_bfloat162& r) {
1547 float2 fl = l, fr = r;
1548 return fl.x > fr.x && fl.x > fr.y;
1549}
1550
1555__BF16_HOST_DEVICE_STATIC__ bool operator>=(const __hip_bfloat162& l, const __hip_bfloat162& r) {
1556 float2 fl = l, fr = r;
1557 return fl.x >= fr.x && fl.x >= fr.y;
1558}
1559
1564__BF16_DEVICE_STATIC__ __hip_bfloat16 hceil(const __hip_bfloat16 h) {
1565 return __float2bfloat16(__ocml_ceil_f32(__bfloat162float(h)));
1566}
1567
1572__BF16_DEVICE_STATIC__ __hip_bfloat16 hcos(const __hip_bfloat16 h) {
1573 return __float2bfloat16(__ocml_cos_f32(__bfloat162float(h)));
1574}
1575
1580__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp(const __hip_bfloat16 h) {
1581 return __float2bfloat16(__ocml_exp_f32(__bfloat162float(h)));
1582}
1583
1588__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp10(const __hip_bfloat16 h) {
1589 return __float2bfloat16(__ocml_exp10_f32(__bfloat162float(h)));
1590}
1591
1596__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp2(const __hip_bfloat16 h) {
1597 return __float2bfloat16(__ocml_exp2_f32(__bfloat162float(h)));
1598}
1599
1604__BF16_DEVICE_STATIC__ __hip_bfloat16 hfloor(const __hip_bfloat16 h) {
1605 return __float2bfloat16(__ocml_floor_f32(__bfloat162float(h)));
1606}
1607
1612__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog(const __hip_bfloat16 h) {
1613 return __float2bfloat16(__ocml_log_f32(__bfloat162float(h)));
1614}
1615
1620__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog10(const __hip_bfloat16 h) {
1621 return __float2bfloat16(__ocml_log10_f32(__bfloat162float(h)));
1622}
1623
1628__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog2(const __hip_bfloat16 h) {
1629 return __float2bfloat16(__ocml_log2_f32(__bfloat162float(h)));
1630}
1631
1636__BF16_DEVICE_STATIC__ __hip_bfloat16 hrcp(const __hip_bfloat16 h) {
1637 return __float2bfloat16(1.0f / (__bfloat162float(h)));
1638}
1639
1644__BF16_DEVICE_STATIC__ __hip_bfloat16 hrint(const __hip_bfloat16 h) {
1645 return __float2bfloat16(__ocml_rint_f32(__bfloat162float(h)));
1646}
1647
1652__BF16_DEVICE_STATIC__ __hip_bfloat16 hrsqrt(const __hip_bfloat16 h) {
1653 return __float2bfloat16(__ocml_rsqrt_f32(__bfloat162float(h)));
1654}
1655
1660__BF16_DEVICE_STATIC__ __hip_bfloat16 hsin(const __hip_bfloat16 h) {
1661 return __float2bfloat16(__ocml_sin_f32(__bfloat162float(h)));
1662}
1663
1668__BF16_DEVICE_STATIC__ __hip_bfloat16 hsqrt(const __hip_bfloat16 h) {
1669 return __float2bfloat16(__ocml_sqrt_f32(__bfloat162float(h)));
1670}
1671
1676__BF16_DEVICE_STATIC__ __hip_bfloat16 htrunc(const __hip_bfloat16 h) {
1677 return __float2bfloat16(__ocml_trunc_f32(__bfloat162float(h)));
1678}
1679
1684__BF16_DEVICE_STATIC__ __hip_bfloat162 h2ceil(const __hip_bfloat162 h) {
1685 __hip_bfloat162_raw hr = h;
1686 return __hip_bfloat162(hceil(__hip_bfloat16_raw{hr.x}), hceil(__hip_bfloat16_raw{hr.y}));
1687}
1688
1693__BF16_DEVICE_STATIC__ __hip_bfloat162 h2cos(const __hip_bfloat162 h) {
1694 __hip_bfloat162_raw hr = h;
1695 return __hip_bfloat162(hcos(__hip_bfloat16_raw{hr.x}), hcos(__hip_bfloat16_raw{hr.y}));
1696}
1697
1702__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp(const __hip_bfloat162 h) {
1703 __hip_bfloat162_raw hr = h;
1704 return __hip_bfloat162(hexp(__hip_bfloat16_raw{hr.x}), hexp(__hip_bfloat16_raw{hr.y}));
1705}
1706
1711__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp10(const __hip_bfloat162 h) {
1712 __hip_bfloat162_raw hr = h;
1713 return __hip_bfloat162(hexp10(__hip_bfloat16_raw{hr.x}), hexp10(__hip_bfloat16_raw{hr.y}));
1714}
1715
1720__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp2(const __hip_bfloat162 h) {
1721 __hip_bfloat162_raw hr = h;
1722 return __hip_bfloat162(hexp2(__hip_bfloat16_raw{hr.x}), hexp2(__hip_bfloat16_raw{hr.y}));
1723}
1724
1729__BF16_DEVICE_STATIC__ __hip_bfloat162 h2floor(const __hip_bfloat162 h) {
1730 __hip_bfloat162_raw hr = h;
1731 return __hip_bfloat162(hfloor(__hip_bfloat16_raw{hr.x}), hfloor(__hip_bfloat16_raw{hr.y}));
1732}
1733
1738__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log(const __hip_bfloat162 h) {
1739 __hip_bfloat162_raw hr = h;
1740 return __hip_bfloat162(hlog(__hip_bfloat16_raw{hr.x}), hlog(__hip_bfloat16_raw{hr.y}));
1741}
1742
1747__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log10(const __hip_bfloat162 h) {
1748 __hip_bfloat162_raw hr = h;
1749 return __hip_bfloat162(hlog10(__hip_bfloat16_raw{hr.x}), hlog10(__hip_bfloat16_raw{hr.y}));
1750}
1751
1756__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log2(const __hip_bfloat162 h) {
1757 __hip_bfloat162_raw hr = h;
1758 return __hip_bfloat162(hlog2(__hip_bfloat16_raw{hr.x}), hlog2(__hip_bfloat16_raw{hr.y}));
1759}
1760
1765__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rcp(const __hip_bfloat162 h) {
1766 __hip_bfloat162_raw hr = h;
1767 return __hip_bfloat162(hrcp(__hip_bfloat16_raw{hr.x}), hrcp(__hip_bfloat16_raw{hr.y}));
1768}
1769
1774__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rint(const __hip_bfloat162 h) {
1775 __hip_bfloat162_raw hr = h;
1776 return __hip_bfloat162(hrint(__hip_bfloat16_raw{hr.x}), hrint(__hip_bfloat16_raw{hr.y}));
1777}
1778
1783__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rsqrt(const __hip_bfloat162 h) {
1784 __hip_bfloat162_raw hr = h;
1785 return __hip_bfloat162(hrsqrt(__hip_bfloat16_raw{hr.x}), hrsqrt(__hip_bfloat16_raw{hr.y}));
1786}
1787
1792__BF16_DEVICE_STATIC__ __hip_bfloat162 h2sin(const __hip_bfloat162 h) {
1793 __hip_bfloat162_raw hr = h;
1794 return __hip_bfloat162(hsin(__hip_bfloat16_raw{hr.x}), hsin(__hip_bfloat16_raw{hr.y}));
1795}
1796
1801__BF16_DEVICE_STATIC__ __hip_bfloat162 h2sqrt(const __hip_bfloat162 h) {
1802 __hip_bfloat162_raw hr = h;
1803 return __hip_bfloat162(hsqrt(__hip_bfloat16_raw{hr.x}), hsqrt(__hip_bfloat16_raw{hr.y}));
1804}
1805
1810__BF16_DEVICE_STATIC__ __hip_bfloat162 h2trunc(const __hip_bfloat162 h) {
1811 __hip_bfloat162_raw hr = h;
1812 return __hip_bfloat162(htrunc(__hip_bfloat16_raw{hr.x}), htrunc(__hip_bfloat16_raw{hr.y}));
1813}
1814#endif
Contains declarations for types and functions in device library. Uses int64_t and uint64_t instead of...
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hsub(const __hip_bfloat16 a, const __hip_bfloat16 b)
Subtracts two bfloat16 values.
Definition amd_hip_bf16.h:681
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator-=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to subtract-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:912
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator+(const __hip_bfloat16 &l)
Operator to unary+ on a __hip_bfloat16 number.
Definition amd_hip_bf16.h:835
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator/(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to divide two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:921
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator-(const __hip_bfloat16 &l)
Operator to negate a __hip_bfloat16 number.
Definition amd_hip_bf16.h:850
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hneg(const __hip_bfloat16 a)
Negate a bfloat16 value.
Definition amd_hip_bf16.h:715
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hadd(const __hip_bfloat16 a, const __hip_bfloat16 b)
Adds two bfloat16 values.
Definition amd_hip_bf16.h:673
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator/=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to divide-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:930
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator*(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to multiply two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:817
__BF16_DEVICE_STATIC__ __hip_bfloat16 __hfma(const __hip_bfloat16 a, const __hip_bfloat16 b, const __hip_bfloat16 c)
Performs FMA of given bfloat16 values.
Definition amd_hip_bf16.h:697
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator*=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to multiply-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:826
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmul(const __hip_bfloat16 a, const __hip_bfloat16 b)
Multiplies two bfloat16 values.
Definition amd_hip_bf16.h:707
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator++(__hip_bfloat16 &l, const int)
Operator to post increment a __hip_bfloat16 number.
Definition amd_hip_bf16.h:865
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 & operator+=(__hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to add-assign two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:903
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __habs(const __hip_bfloat16 a)
Returns absolute of a bfloat16.
Definition amd_hip_bf16.h:725
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 operator--(__hip_bfloat16 &l, const int)
Operator to post decrement a __hip_bfloat16 number.
Definition amd_hip_bf16.h:884
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hdiv(const __hip_bfloat16 a, const __hip_bfloat16 b)
Divides two bfloat16 values.
Definition amd_hip_bf16.h:689
__BF16_HOST_DEVICE_STATIC__ bool __heq(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values.
Definition amd_hip_bf16.h:1067
__BF16_HOST_DEVICE_STATIC__ bool __hgt(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - greater than.
Definition amd_hip_bf16.h:1084
__BF16_HOST_DEVICE_STATIC__ bool __hne(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - not equal.
Definition amd_hip_bf16.h:1116
__BF16_HOST_DEVICE_STATIC__ bool operator==(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform an equal compare on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1463
__BF16_HOST_DEVICE_STATIC__ bool __hle(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - less than equal.
Definition amd_hip_bf16.h:1172
__BF16_HOST_DEVICE_STATIC__ bool operator!=(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a not equal on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1471
__BF16_HOST_DEVICE_STATIC__ bool operator>(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a greater than on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1495
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmin(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - return min.
Definition amd_hip_bf16.h:1144
__BF16_HOST_DEVICE_STATIC__ bool __hge(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - greater than equal.
Definition amd_hip_bf16.h:1100
__BF16_HOST_DEVICE_STATIC__ bool __hleu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered less than equal.
Definition amd_hip_bf16.h:1180
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hmax(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - return max.
Definition amd_hip_bf16.h:1132
__BF16_HOST_DEVICE_STATIC__ bool operator<=(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a less than equal on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1487
__BF16_HOST_DEVICE_STATIC__ int __hisinf(const __hip_bfloat16 a)
Checks if number is inf.
Definition amd_hip_bf16.h:1188
__BF16_HOST_DEVICE_STATIC__ bool __hneu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered not equal.
Definition amd_hip_bf16.h:1124
__BF16_HOST_DEVICE_STATIC__ bool __hgtu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered greater than.
Definition amd_hip_bf16.h:1092
__BF16_HOST_DEVICE_STATIC__ bool operator<(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a less than on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1479
__BF16_HOST_DEVICE_STATIC__ bool __hltu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered less than.
Definition amd_hip_bf16.h:1164
__BF16_HOST_DEVICE_STATIC__ bool __hgeu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered greater than equal.
Definition amd_hip_bf16.h:1108
__BF16_HOST_DEVICE_STATIC__ bool __hlt(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - less than operator.
Definition amd_hip_bf16.h:1156
__BF16_HOST_DEVICE_STATIC__ bool operator>=(const __hip_bfloat16 &l, const __hip_bfloat16 &r)
Operator to perform a greater than equal on two __hip_bfloat16 numbers.
Definition amd_hip_bf16.h:1503
__BF16_HOST_DEVICE_STATIC__ bool __hisnan(const __hip_bfloat16 a)
Checks if number is nan.
Definition amd_hip_bf16.h:1197
__BF16_HOST_DEVICE_STATIC__ bool __hequ(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered equal.
Definition amd_hip_bf16.h:1075
__BF16_HOST_DEVICE_STATIC__ bool __hbgeu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b - unordered.
Definition amd_hip_bf16.h:1239
__BF16_HOST_DEVICE_STATIC__ bool __hbgt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b.
Definition amd_hip_bf16.h:1250
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmax2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Returns max of two elements.
Definition amd_hip_bf16.h:1424
__BF16_HOST_DEVICE_STATIC__ bool __hble2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b.
Definition amd_hip_bf16.h:1272
__BF16_HOST_DEVICE_STATIC__ bool __hblt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b.
Definition amd_hip_bf16.h:1294
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmin2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Returns min of two elements.
Definition amd_hip_bf16.h:1436
__BF16_HOST_DEVICE_STATIC__ bool __hbleu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b - unordered.
Definition amd_hip_bf16.h:1283
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hne2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks for not equal to.
Definition amd_hip_bf16.h:1448
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hge2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:1354
__BF16_HOST_DEVICE_STATIC__ bool __hbne2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b.
Definition amd_hip_bf16.h:1316
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hlt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:1409
__BF16_HOST_DEVICE_STATIC__ bool __hbltu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b - unordered.
Definition amd_hip_bf16.h:1305
__BF16_HOST_DEVICE_STATIC__ bool __hbequ2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks if two numbers are equal - unordered.
Definition amd_hip_bf16.h:1217
__BF16_HOST_DEVICE_STATIC__ bool __hbeq2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks if two numbers are equal.
Definition amd_hip_bf16.h:1206
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hle2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:1394
__BF16_HOST_DEVICE_STATIC__ bool __hbneu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b.
Definition amd_hip_bf16.h:1328
__BF16_HOST_DEVICE_STATIC__ bool __hbgtu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b - unordered.
Definition amd_hip_bf16.h:1261
__BF16_HOST_DEVICE_STATIC__ bool __hbge2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b.
Definition amd_hip_bf16.h:1228
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __heq2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b, returns 1.0 if equal, otherwise 0.0.
Definition amd_hip_bf16.h:1339
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hgt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:1369
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hisnan2(const __hip_bfloat162 a)
Check for a is NaN, returns 1.0 if NaN, otherwise 0.0.
Definition amd_hip_bf16.h:1384
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __habs2(const __hip_bfloat162 a)
Returns absolute of a bfloat162.
Definition amd_hip_bf16.h:749
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hsub2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Subtracts two bfloat162 values.
Definition amd_hip_bf16.h:805
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hmul2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Multiplies two bfloat162 values.
Definition amd_hip_bf16.h:784
__BF16_DEVICE_STATIC__ __hip_bfloat162 __hfma2(const __hip_bfloat162 a, const __hip_bfloat162 b, const __hip_bfloat162 c)
Performs FMA of given bfloat162 values.
Definition amd_hip_bf16.h:770
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hadd2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Adds two bfloat162 values.
Definition amd_hip_bf16.h:758
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hneg2(const __hip_bfloat162 a)
Converts a bfloat162 into negative.
Definition amd_hip_bf16.h:796
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __h2div(const __hip_bfloat162 a, const __hip_bfloat162 b)
Divides bfloat162 values.
Definition amd_hip_bf16.h:735
__BF16_HOST_DEVICE_STATIC__ float __bfloat162float(__hip_bfloat16 a)
Converts bfloat16 to float.
Definition amd_hip_bf16.h:493
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __float2bfloat16(float f)
Converts float to bfloat16.
Definition amd_hip_bf16.h:502
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __bfloat162bfloat162(const __hip_bfloat16 a)
Moves bfloat16 value to bfloat162.
Definition amd_hip_bf16.h:520
__BF16_HOST_DEVICE_STATIC__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h)
Reinterprets bits in a __hip_bfloat16 as an unsigned signed short integer.
Definition amd_hip_bf16.h:537
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a)
Reinterprets unsigned short int into a bfloat16.
Definition amd_hip_bf16.h:665
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a)
Returns low 16 bits of __hip_bfloat162.
Definition amd_hip_bf16.h:610
__BF16_HOST_DEVICE_STATIC__ float2 __bfloat1622float2(const __hip_bfloat162 a)
Converts and moves bfloat162 to float2.
Definition amd_hip_bf16.h:511
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __halves2bfloat162(const __hip_bfloat16 a, const __hip_bfloat16 b)
Combine two __hip_bfloat16 to __hip_bfloat162.
Definition amd_hip_bf16.h:563
__BF16_HOST_DEVICE_STATIC__ short int __bfloat16_as_short(const __hip_bfloat16 h)
Reinterprets bits in a __hip_bfloat16 as a signed short integer.
Definition amd_hip_bf16.h:528
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __double2bfloat16(const double a)
Convert double to __hip_bfloat16.
Definition amd_hip_bf16.h:546
__BF16_HOST_DEVICE_STATIC__ float __low2float(const __hip_bfloat162 a)
Converts low 16 bits of __hip_bfloat162 to float and returns the result.
Definition amd_hip_bf16.h:628
__BF16_HOST_DEVICE_STATIC__ float __high2float(const __hip_bfloat162 a)
Converts high 16 bits of __hip_bfloat162 to float and returns the result.
Definition amd_hip_bf16.h:590
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __low2bfloat162(const __hip_bfloat162 a)
Returns low 16 bits of __hip_bfloat162.
Definition amd_hip_bf16.h:619
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __float22bfloat162_rn(const float2 a)
Convert float2 to __hip_bfloat162.
Definition amd_hip_bf16.h:555
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __highs2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b)
Extracts high 16 bits from each and combines them.
Definition amd_hip_bf16.h:599
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __lows2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b)
Extracts low 16 bits from each and combines them.
Definition amd_hip_bf16.h:646
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __high2bfloat16(const __hip_bfloat162 a)
Returns high 16 bits of __hip_bfloat162.
Definition amd_hip_bf16.h:572
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __high2bfloat162(const __hip_bfloat162 a)
Returns high 16 bits of __hip_bfloat162.
Definition amd_hip_bf16.h:581
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __short_as_bfloat16(const short int a)
Reinterprets short int into a bfloat16.
Definition amd_hip_bf16.h:657
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __lowhigh2highlow(const __hip_bfloat162 a)
Swaps both halves.
Definition amd_hip_bf16.h:637
__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp10(const __hip_bfloat16 h)
Calculate exponential 10 of bfloat16.
Definition amd_hip_bf16.h:1588
__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog(const __hip_bfloat16 h)
Calculate natural log of bfloat16.
Definition amd_hip_bf16.h:1612
__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp2(const __hip_bfloat16 h)
Calculate exponential 2 of bfloat16.
Definition amd_hip_bf16.h:1596
__BF16_DEVICE_STATIC__ __hip_bfloat16 hceil(const __hip_bfloat16 h)
Calculate ceil of bfloat16.
Definition amd_hip_bf16.h:1564
__BF16_DEVICE_STATIC__ __hip_bfloat16 hrcp(const __hip_bfloat16 h)
Calculate reciprocal.
Definition amd_hip_bf16.h:1636
__BF16_DEVICE_STATIC__ __hip_bfloat16 hsqrt(const __hip_bfloat16 h)
Calculate sqrt of bfloat16.
Definition amd_hip_bf16.h:1668
__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog10(const __hip_bfloat16 h)
Calculate log 10 of bfloat16.
Definition amd_hip_bf16.h:1620
__BF16_DEVICE_STATIC__ __hip_bfloat16 hsin(const __hip_bfloat16 h)
Calculate sin of bfloat16.
Definition amd_hip_bf16.h:1660
__BF16_DEVICE_STATIC__ __hip_bfloat16 hfloor(const __hip_bfloat16 h)
Calculate floor of bfloat16.
Definition amd_hip_bf16.h:1604
__BF16_DEVICE_STATIC__ __hip_bfloat16 hrint(const __hip_bfloat16 h)
Round to nearest int.
Definition amd_hip_bf16.h:1644
__BF16_DEVICE_STATIC__ __hip_bfloat16 htrunc(const __hip_bfloat16 h)
Calculate truncate of bfloat16.
Definition amd_hip_bf16.h:1676
__BF16_DEVICE_STATIC__ __hip_bfloat16 hrsqrt(const __hip_bfloat16 h)
Reciprocal square root.
Definition amd_hip_bf16.h:1652
__BF16_DEVICE_STATIC__ __hip_bfloat16 hcos(const __hip_bfloat16 h)
Calculate cosine of bfloat16.
Definition amd_hip_bf16.h:1572
__BF16_DEVICE_STATIC__ __hip_bfloat16 hlog2(const __hip_bfloat16 h)
Calculate log 2 of bfloat16.
Definition amd_hip_bf16.h:1628
__BF16_DEVICE_STATIC__ __hip_bfloat16 hexp(const __hip_bfloat16 h)
Calculate exponential of bfloat16.
Definition amd_hip_bf16.h:1580
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2trunc(const __hip_bfloat162 h)
Calculate truncate of bfloat162.
Definition amd_hip_bf16.h:1810
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rcp(const __hip_bfloat162 h)
Calculate vector reciprocal.
Definition amd_hip_bf16.h:1765
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log(const __hip_bfloat162 h)
Calculate natural log of bfloat162.
Definition amd_hip_bf16.h:1738
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp(const __hip_bfloat162 h)
Calculate exponential of bfloat162.
Definition amd_hip_bf16.h:1702
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2cos(const __hip_bfloat162 h)
Calculate cosine of bfloat162.
Definition amd_hip_bf16.h:1693
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2sin(const __hip_bfloat162 h)
Calculate sin of bfloat162.
Definition amd_hip_bf16.h:1792
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log2(const __hip_bfloat162 h)
Calculate log 2 of bfloat162.
Definition amd_hip_bf16.h:1756
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2ceil(const __hip_bfloat162 h)
Calculate ceil of bfloat162.
Definition amd_hip_bf16.h:1684
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2floor(const __hip_bfloat162 h)
Calculate floor of bfloat162.
Definition amd_hip_bf16.h:1729
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp10(const __hip_bfloat162 h)
Calculate exponential 10 of bfloat162.
Definition amd_hip_bf16.h:1711
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2exp2(const __hip_bfloat162 h)
Calculate exponential 2 of bfloat162.
Definition amd_hip_bf16.h:1720
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2log10(const __hip_bfloat162 h)
Calculate log 10 of bfloat162.
Definition amd_hip_bf16.h:1747
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rsqrt(const __hip_bfloat162 h)
Calculate vector reciprocal square root.
Definition amd_hip_bf16.h:1783
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2rint(const __hip_bfloat162 h)
Calculate vector round to nearest int.
Definition amd_hip_bf16.h:1774
__BF16_DEVICE_STATIC__ __hip_bfloat162 h2sqrt(const __hip_bfloat162 h)
Calculate sqrt of bfloat162.
Definition amd_hip_bf16.h:1801
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Definition hip_fp16_math_fwd.h:57
Definition amd_hip_vector_types.h:2035