28#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H
29#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H
33#if defined(__HIPCC_RTC__)
34 #define __HOST_DEVICE__ __device__
36 #define __HOST_DEVICE__ __host__ __device__
39#if defined(__has_attribute)
40 #if __has_attribute(ext_vector_type)
41 #define __HIP_USE_NATIVE_VECTOR__ 1
42 #define __NATIVE_VECTOR__(n, T) T __attribute__((ext_vector_type(n)))
44 #define __NATIVE_VECTOR__(n, T) T[n]
47#if defined(__cplusplus)
48#if !defined(__HIPCC_RTC__)
51 #include <type_traits>
56template <
class _Tp, _Tp __v>
struct integral_constant {
57 static constexpr const _Tp value = __v;
58 typedef _Tp value_type;
59 typedef integral_constant type;
60 constexpr operator value_type()
const {
return value; }
61 constexpr value_type operator()()
const {
return value; }
63template <
class _Tp, _Tp __v>
constexpr const _Tp integral_constant<_Tp, __v>::value;
65typedef integral_constant<bool, true> true_type;
66typedef integral_constant<bool, false> false_type;
68template <
bool B>
using bool_constant = integral_constant<bool, B>;
69typedef bool_constant<true> true_type;
70typedef bool_constant<false> false_type;
72template <
bool __B,
class __T =
void>
struct enable_if {};
73template <
class __T>
struct enable_if<true, __T> {
typedef __T type; };
75template<
bool _B>
struct true_or_false_type :
public false_type {};
76template<>
struct true_or_false_type<true> :
public true_type {};
78template <
class _Tp>
struct is_integral :
public false_type {};
79template <>
struct is_integral<bool> :
public true_type {};
80template <>
struct is_integral<char> :
public true_type {};
81template <>
struct is_integral<signed char> :
public true_type {};
82template <>
struct is_integral<unsigned char> :
public true_type {};
83template <>
struct is_integral<wchar_t> :
public true_type {};
84template <>
struct is_integral<short> :
public true_type {};
85template <>
struct is_integral<unsigned short> :
public true_type {};
86template <>
struct is_integral<int> :
public true_type {};
87template <>
struct is_integral<unsigned int> :
public true_type {};
88template <>
struct is_integral<long> :
public true_type {};
89template <>
struct is_integral<unsigned long> :
public true_type {};
90template <>
struct is_integral<long long> :
public true_type {};
91template <>
struct is_integral<unsigned long long> :
public true_type {};
93template <
class _Tp>
struct is_arithmetic :
public false_type {};
94template <>
struct is_arithmetic<bool> :
public true_type {};
95template <>
struct is_arithmetic<char> :
public true_type {};
96template <>
struct is_arithmetic<signed char> :
public true_type {};
97template <>
struct is_arithmetic<unsigned char> :
public true_type {};
98template <>
struct is_arithmetic<wchar_t> :
public true_type {};
99template <>
struct is_arithmetic<short> :
public true_type {};
100template <>
struct is_arithmetic<unsigned short> :
public true_type {};
101template <>
struct is_arithmetic<int> :
public true_type {};
102template <>
struct is_arithmetic<unsigned int> :
public true_type {};
103template <>
struct is_arithmetic<long> :
public true_type {};
104template <>
struct is_arithmetic<unsigned long> :
public true_type {};
105template <>
struct is_arithmetic<long long> :
public true_type {};
106template <>
struct is_arithmetic<unsigned long long> :
public true_type {};
107template <>
struct is_arithmetic<float> :
public true_type {};
108template <>
struct is_arithmetic<double> :
public true_type {};
110template<
typename _Tp>
struct is_floating_point :
public false_type {};
111template<>
struct is_floating_point<float> :
public true_type {};
112template<>
struct is_floating_point<double> :
public true_type {};
113template<>
struct is_floating_point<long double> :
public true_type {};
115template <
typename __T,
typename __U>
struct is_same :
public false_type {};
116template <
typename __T>
struct is_same<__T, __T> :
public true_type {};
118template<typename _Tp, bool = is_arithmetic<_Tp>::value>
119 struct is_signed :
public false_type {};
120template<
typename _Tp>
121 struct is_signed<_Tp, true> :
public true_or_false_type<_Tp(-1) < _Tp(0)> {};
123template <class _T1, class _T2> struct is_convertible
124 : public true_or_false_type<__is_convertible_to(_T1, _T2)> {};
126template<typename _CharT> struct char_traits;
127template<typename _CharT, typename _Traits = char_traits<_CharT>> class basic_istream;
128template<typename _CharT, typename _Traits = char_traits<_CharT>> class basic_ostream;
129typedef basic_istream<char> istream;
130typedef basic_ostream<char> ostream;
132template <typename __T> struct is_scalar : public integral_constant<bool, __is_scalar(__T)> {};
139 unsigned int next_pot(unsigned int x) {
141 return 1u << (32u - __builtin_clz(x - 1u));
145 template<typename T, unsigned int n> struct HIP_vector_base;
148 struct HIP_vector_base<T, 1> {
149 using Native_vec_ = __NATIVE_VECTOR__(1, T);
158 using value_type = T;
161 HIP_vector_base() = default;
165 HIP_vector_base(T x_) noexcept : data{x_} {}
168 HIP_vector_base(const HIP_vector_base&) = default;
171 HIP_vector_base(HIP_vector_base&&) = default;
173 ~HIP_vector_base() = default;
175 HIP_vector_base& operator=(const HIP_vector_base&) = default;
179 struct HIP_vector_base<T, 2> {
180 using Native_vec_ = __NATIVE_VECTOR__(2, T);
183 #if !__has_attribute(ext_vector_type)
184 alignas(hip_impl::next_pot(2 * sizeof(T)))
194 using value_type = T;
197 HIP_vector_base() = default;
201 HIP_vector_base(T x_) noexcept : data{x_, x_} {}
204 HIP_vector_base(T x_, T y_) noexcept : data{x_, y_} {}
207 HIP_vector_base(const HIP_vector_base&) = default;
210 HIP_vector_base(HIP_vector_base&&) = default;
212 ~HIP_vector_base() = default;
214 HIP_vector_base& operator=(const HIP_vector_base&) = default;
218 struct HIP_vector_base<T, 3> {
223 Native_vec_() = default;
228 Native_vec_(T x_) noexcept : d{x_, x_, x_} {}
231 Native_vec_(T x_, T y_, T z_) noexcept : d{x_, y_, z_} {}
234 Native_vec_(const Native_vec_&) = default;
237 Native_vec_(Native_vec_&&) = default;
239 ~Native_vec_() = default;
242 Native_vec_& operator=(const Native_vec_&) = default;
244 Native_vec_& operator=(Native_vec_&&) = default;
247 T& operator[](unsigned int idx) noexcept { return d[idx]; }
249 T operator[](unsigned int idx) const noexcept { return d[idx]; }
252 Native_vec_& operator+=(const Native_vec_& x_) noexcept
254 for (auto i = 0u; i != 3u; ++i) d[i] += x_.d[i];
258 Native_vec_& operator-=(const Native_vec_& x_) noexcept
260 for (auto i = 0u; i != 3u; ++i) d[i] -= x_.d[i];
265 Native_vec_& operator*=(const Native_vec_& x_) noexcept
267 for (auto i = 0u; i != 3u; ++i) d[i] *= x_.d[i];
271 Native_vec_& operator/=(const Native_vec_& x_) noexcept
273 for (auto i = 0u; i != 3u; ++i) d[i] /= x_.d[i];
279 typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
281 Native_vec_ operator-() const noexcept
284 for (auto&& x : r.d) x = -x;
290 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
292 Native_vec_ operator~() const noexcept
295 for (auto&& x : r.d) x = ~x;
300 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
302 Native_vec_& operator%=(const Native_vec_& x_) noexcept
304 for (auto i = 0u; i != 3u; ++i) d[i] %= x_.d[i];
309 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
311 Native_vec_& operator^=(const Native_vec_& x_) noexcept
313 for (auto i = 0u; i != 3u; ++i) d[i] ^= x_.d[i];
318 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
320 Native_vec_& operator|=(const Native_vec_& x_) noexcept
322 for (auto i = 0u; i != 3u; ++i) d[i] |= x_.d[i];
327 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
329 Native_vec_& operator&=(const Native_vec_& x_) noexcept
331 for (auto i = 0u; i != 3u; ++i) d[i] &= x_.d[i];
336 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
338 Native_vec_& operator>>=(const Native_vec_& x_) noexcept
340 for (auto i = 0u; i != 3u; ++i) d[i] >>= x_.d[i];
345 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
347 Native_vec_& operator<<=(const Native_vec_& x_) noexcept
349 for (auto i = 0u; i != 3u; ++i) d[i] <<= x_.d[i];
352#if defined (__INTEL_COMPILER)
356 using Vec3_cmp = _Vec3_cmp;
358 using Vec3_cmp = int __attribute__((vector_size(4 * sizeof(int))));
361 Vec3_cmp operator==(const Native_vec_& x_) const noexcept
363 return Vec3_cmp{d[0] == x_.d[0], d[1] == x_.d[1], d[2] == x_.d[2]};
376 using value_type = T;
379 HIP_vector_base() = default;
383 HIP_vector_base(T x_) noexcept : data{x_, x_, x_} {}
386 HIP_vector_base(T x_, T y_, T z_) noexcept : data{x_, y_, z_} {}
389 HIP_vector_base(const HIP_vector_base&) = default;
392 HIP_vector_base(HIP_vector_base&&) = default;
394 ~HIP_vector_base() = default;
397 HIP_vector_base& operator=(const HIP_vector_base&) = default;
399 HIP_vector_base& operator=(HIP_vector_base&&) = default;
403 struct HIP_vector_base<T, 4> {
404 using Native_vec_ = __NATIVE_VECTOR__(4, T);
407 #if !__has_attribute(ext_vector_type)
408 alignas(hip_impl::next_pot(4 * sizeof(T)))
420 using value_type = T;
423 HIP_vector_base() = default;
427 HIP_vector_base(T x_) noexcept : data{x_, x_, x_, x_} {}
430 HIP_vector_base(T x_, T y_, T z_, T w_) noexcept : data{x_, y_, z_, w_} {}
433 HIP_vector_base(const HIP_vector_base&) = default;
436 HIP_vector_base(HIP_vector_base&&) = default;
438 ~HIP_vector_base() = default;
440 HIP_vector_base& operator=(const HIP_vector_base&) = default;
443 template<typename T, unsigned int rank>
444 struct HIP_vector_type : public HIP_vector_base<T, rank> {
445 using HIP_vector_base<T, rank>::data;
446 using typename HIP_vector_base<T, rank>::Native_vec_;
449 HIP_vector_type() = default;
452 typename std::enable_if<
453 std::is_convertible<U, T>::value>::type* = nullptr>
457 HIP_vector_type(U x_) noexcept
458 : HIP_vector_base<T, rank>{static_cast<T>(x_)}
462 typename std::enable_if<
463 (rank > 1) && sizeof...(Us) == rank>::type* = nullptr>
466 HIP_vector_type(Us... xs) noexcept
467 : HIP_vector_base<T, rank>{static_cast<T>(xs)...}
471 HIP_vector_type(const HIP_vector_type&) = default;
474 HIP_vector_type(HIP_vector_type&&) = default;
476 ~HIP_vector_type() = default;
479 HIP_vector_type& operator=(const HIP_vector_type&) = default;
481 HIP_vector_type& operator=(HIP_vector_type&&) = default;
485 HIP_vector_type& operator++() noexcept
487 return *this += HIP_vector_type{1};
490 HIP_vector_type operator++(int) noexcept
498 HIP_vector_type& operator--() noexcept
500 return *this -= HIP_vector_type{1};
503 HIP_vector_type operator--(int) noexcept
511 HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept
513#if __HIP_USE_NATIVE_VECTOR__
516 for (auto i = 0u; i != rank; ++i) data[i] += x.data[i];
522 typename std::enable_if<
523 std::is_convertible<U, T>{}>::type* = nullptr>
525 HIP_vector_type& operator+=(U x) noexcept
527 return *this += HIP_vector_type{x};
531 HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept
533#if __HIP_USE_NATIVE_VECTOR__
536 for (auto i = 0u; i != rank; ++i) data[i] -= x.data[i];
542 typename std::enable_if<
543 std::is_convertible<U, T>{}>::type* = nullptr>
545 HIP_vector_type& operator-=(U x) noexcept
547 return *this -= HIP_vector_type{x};
551 HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept
553#if __HIP_USE_NATIVE_VECTOR__
556 for (auto i = 0u; i != rank; ++i) data[i] *= x.data[i];
561 friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator*(
562 HIP_vector_type x, const HIP_vector_type& y) noexcept
564 return HIP_vector_type{ x } *= y;
569 typename std::enable_if<
570 std::is_convertible<U, T>{}>::type* = nullptr>
572 HIP_vector_type& operator*=(U x) noexcept
574 return *this *= HIP_vector_type{x};
577 friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator/(
578 HIP_vector_type x, const HIP_vector_type& y) noexcept
580 return HIP_vector_type{ x } /= y;
584 HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept
586#if __HIP_USE_NATIVE_VECTOR__
589 for (auto i = 0u; i != rank; ++i) data[i] /= x.data[i];
595 typename std::enable_if<
596 std::is_convertible<U, T>{}>::type* = nullptr>
598 HIP_vector_type& operator/=(U x) noexcept
600 return *this /= HIP_vector_type{x};
605 typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
607 HIP_vector_type operator-() const noexcept
610#if __HIP_USE_NATIVE_VECTOR__
611 tmp.data = -tmp.data;
613 for (auto i = 0u; i != rank; ++i) tmp.data[i] = -tmp.data[i];
620 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
622 HIP_vector_type operator~() const noexcept
624 HIP_vector_type r{*this};
625#if __HIP_USE_NATIVE_VECTOR__
628 for (auto i = 0u; i != rank; ++i) r.data[i] = ~r.data[i];
635 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
637 HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept
639#if __HIP_USE_NATIVE_VECTOR__
642 for (auto i = 0u; i != rank; ++i) data[i] %= x.data[i];
649 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
651 HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept
653#if __HIP_USE_NATIVE_VECTOR__
656 for (auto i = 0u; i != rank; ++i) data[i] ^= x.data[i];
663 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
665 HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept
667#if __HIP_USE_NATIVE_VECTOR__
670 for (auto i = 0u; i != rank; ++i) data[i] |= x.data[i];
677 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
679 HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept
681#if __HIP_USE_NATIVE_VECTOR__
684 for (auto i = 0u; i != rank; ++i) data[i] &= x.data[i];
691 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
693 HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept
695#if __HIP_USE_NATIVE_VECTOR__
698 for (auto i = 0u; i != rank; ++i) data[i] >>= x.data[i];
705 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
707 HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept
709#if __HIP_USE_NATIVE_VECTOR__
712 for (auto i = 0u; i != rank; ++i) data[i] <<= x.data[i];
718 template<typename T, unsigned int n>
722 HIP_vector_type<T, n> operator+(
723 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
725 return HIP_vector_type<T, n>{x} += y;
727 template<typename T, unsigned int n, typename U>
731 HIP_vector_type<T, n> operator+(
732 const HIP_vector_type<T, n>& x, U y) noexcept
734 return HIP_vector_type<T, n>{x} += HIP_vector_type<T, n>{y};
736 template<typename T, unsigned int n, typename U>
740 HIP_vector_type<T, n> operator+(
741 U x, const HIP_vector_type<T, n>& y) noexcept
743 return HIP_vector_type<T, n>{x} += y;
746 template<typename T, unsigned int n>
750 HIP_vector_type<T, n> operator-(
751 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
753 return HIP_vector_type<T, n>{x} -= y;
755 template<typename T, unsigned int n, typename U>
759 HIP_vector_type<T, n> operator-(
760 const HIP_vector_type<T, n>& x, U y) noexcept
762 return HIP_vector_type<T, n>{x} -= HIP_vector_type<T, n>{y};
764 template<typename T, unsigned int n, typename U>
768 HIP_vector_type<T, n> operator-(
769 U x, const HIP_vector_type<T, n>& y) noexcept
771 return HIP_vector_type<T, n>{x} -= y;
774 template<typename T, unsigned int n, typename U>
778 HIP_vector_type<T, n> operator*(
779 const HIP_vector_type<T, n>& x, U y) noexcept
781 return HIP_vector_type<T, n>{x} *= HIP_vector_type<T, n>{y};
783 template<typename T, unsigned int n, typename U>
787 HIP_vector_type<T, n> operator*(
788 U x, const HIP_vector_type<T, n>& y) noexcept
790 return HIP_vector_type<T, n>{x} *= y;
793 template<typename T, unsigned int n, typename U>
797 HIP_vector_type<T, n> operator/(
798 const HIP_vector_type<T, n>& x, U y) noexcept
800 return HIP_vector_type<T, n>{x} /= HIP_vector_type<T, n>{y};
802 template<typename T, unsigned int n, typename U>
806 HIP_vector_type<T, n> operator/(
807 U x, const HIP_vector_type<T, n>& y) noexcept
809 return HIP_vector_type<T, n>{x} /= y;
816 bool _hip_compare(const V& x, const V& y, int n) noexcept
819 (n == -1) ? true : ((x[n] != y[n]) ? false : _hip_compare(x, y, n - 1));
822 template<typename T, unsigned int n>
827 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
829 return _hip_compare(x.data, y.data, n - 1);
831 template<typename T, unsigned int n, typename U>
835 bool operator==(const HIP_vector_type<T, n>& x, U y) noexcept
837 return x == HIP_vector_type<T, n>{y};
839 template<typename T, unsigned int n, typename U>
843 bool operator==(U x, const HIP_vector_type<T, n>& y) noexcept
845 return HIP_vector_type<T, n>{x} == y;
848 template<typename T, unsigned int n>
853 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
857 template<typename T, unsigned int n, typename U>
861 bool operator!=(const HIP_vector_type<T, n>& x, U y) noexcept
865 template<typename T, unsigned int n, typename U>
869 bool operator!=(U x, const HIP_vector_type<T, n>& y) noexcept
877 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
881 HIP_vector_type<T, n> operator%(
882 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
884 return HIP_vector_type<T, n>{x} %= y;
890 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
894 HIP_vector_type<T, n> operator%(
895 const HIP_vector_type<T, n>& x, U y) noexcept
897 return HIP_vector_type<T, n>{x} %= HIP_vector_type<T, n>{y};
903 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
907 HIP_vector_type<T, n> operator%(
908 U x, const HIP_vector_type<T, n>& y) noexcept
910 return HIP_vector_type<T, n>{x} %= y;
916 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
920 HIP_vector_type<T, n> operator^(
921 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
923 return HIP_vector_type<T, n>{x} ^= y;
929 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
933 HIP_vector_type<T, n> operator^(
934 const HIP_vector_type<T, n>& x, U y) noexcept
936 return HIP_vector_type<T, n>{x} ^= HIP_vector_type<T, n>{y};
942 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
946 HIP_vector_type<T, n> operator^(
947 U x, const HIP_vector_type<T, n>& y) noexcept
949 return HIP_vector_type<T, n>{x} ^= y;
955 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
959 HIP_vector_type<T, n> operator|(
960 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
962 return HIP_vector_type<T, n>{x} |= y;
968 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
972 HIP_vector_type<T, n> operator|(
973 const HIP_vector_type<T, n>& x, U y) noexcept
975 return HIP_vector_type<T, n>{x} |= HIP_vector_type<T, n>{y};
981 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
985 HIP_vector_type<T, n> operator|(
986 U x, const HIP_vector_type<T, n>& y) noexcept
988 return HIP_vector_type<T, n>{x} |= y;
994 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
998 HIP_vector_type<T, n> operator&(
999 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1001 return HIP_vector_type<T, n>{x} &= y;
1007 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1011 HIP_vector_type<T, n> operator&(
1012 const HIP_vector_type<T, n>& x, U y) noexcept
1014 return HIP_vector_type<T, n>{x} &= HIP_vector_type<T, n>{y};
1020 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1024 HIP_vector_type<T, n> operator&(
1025 U x, const HIP_vector_type<T, n>& y) noexcept
1027 return HIP_vector_type<T, n>{x} &= y;
1033 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1037 HIP_vector_type<T, n> operator>>(
1038 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1040 return HIP_vector_type<T, n>{x} >>= y;
1046 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1050 HIP_vector_type<T, n> operator>>(
1051 const HIP_vector_type<T, n>& x, U y) noexcept
1053 return HIP_vector_type<T, n>{x} >>= HIP_vector_type<T, n>{y};
1059 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1063 HIP_vector_type<T, n> operator>>(
1064 U x,
const HIP_vector_type<T, n>& y)
noexcept
1066 return HIP_vector_type<T, n>{x} >>= y;
1072 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1076 HIP_vector_type<T, n> operator<<(
1077 const HIP_vector_type<T, n>& x,
const HIP_vector_type<T, n>& y)
noexcept
1079 return HIP_vector_type<T, n>{x} <<= y;
1085 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1089 HIP_vector_type<T, n> operator<<(
1090 const HIP_vector_type<T, n>& x, U y)
noexcept
1092 return HIP_vector_type<T, n>{x} <<= HIP_vector_type<T, n>{y};
1098 typename std::enable_if<std::is_arithmetic<U>::value>::type,
1099 typename std::enable_if<std::is_integral<T>{}>* =
nullptr>
1103 HIP_vector_type<T, n> operator<<(
1104 U x,
const HIP_vector_type<T, n>& y)
noexcept
1106 return HIP_vector_type<T, n>{x} <<= y;
1112 template <
typename T,
unsigned int rankT,
typename U,
unsigned int rankU>
1113 __forceinline__ __HOST_DEVICE__
typename std::enable_if<(rankT == 1 && rankU >= 1),
1114 const HIP_vector_type<T, rankT>>::type
1115 __hipMapVector(
const HIP_vector_type<U, rankU>& u) {
1116 return HIP_vector_type<T, rankT>(
static_cast<T
>(u.x));
1119 template <
typename T,
unsigned int rankT,
typename U,
unsigned int rankU>
1120 __forceinline__ __HOST_DEVICE__
typename std::enable_if<(rankT == 2 && rankU == 1),
1121 const HIP_vector_type<T, rankT>>::type
1122 __hipMapVector(
const HIP_vector_type<U, rankU>& u) {
1123 return HIP_vector_type<T, rankT> (
static_cast<T
>(u.x),
static_cast<T
>(0));
1126 template <
typename T,
unsigned int rankT,
typename U,
unsigned int rankU>
1127 __forceinline__ __HOST_DEVICE__
typename std::enable_if<(rankT == 2 && rankU >= 2),
1128 const HIP_vector_type<T, rankT>>::type
1129 __hipMapVector(
const HIP_vector_type<U, rankU>& u) {
1130 return HIP_vector_type<T, rankT> (
static_cast<T
>(u.x),
static_cast<T
>(u.y));
1133 template <
typename T,
unsigned int rankT,
typename U,
unsigned int rankU>
1134 __forceinline__ __HOST_DEVICE__
typename std::enable_if<(rankT == 4 && rankU == 1),
1135 const HIP_vector_type<T, rankT>>::type
1136 __hipMapVector(
const HIP_vector_type<U, rankU>& u) {
1137 return HIP_vector_type<T, rankT> (
static_cast<T
>(u.x),
static_cast<T
>(0),
1138 static_cast<T
>(0),
static_cast<T
>(0));
1141 template <
typename T,
unsigned int rankT,
typename U,
unsigned int rankU>
1142 __forceinline__ __HOST_DEVICE__
typename std::enable_if<(rankT == 4 && rankU == 2),
1143 const HIP_vector_type<T, rankT>>::type
1144 __hipMapVector(
const HIP_vector_type<U, rankU>& u) {
1145 return HIP_vector_type<T, rankT>(
static_cast<T
>(u.x),
static_cast<T
>(u.y),
1146 static_cast<T
>(0),
static_cast<T
>(0));
1149 template <
typename T,
unsigned int rankT,
typename U,
unsigned int rankU>
1150 __forceinline__ __HOST_DEVICE__
typename std::enable_if<(rankT == 4 && rankU == 4),
1151 const HIP_vector_type<T, rankT>>::type
1152 __hipMapVector(
const HIP_vector_type<U, rankU>& u) {
1153 return HIP_vector_type<T, rankT> (
static_cast<T
>(u.x),
static_cast<T
>(u.y),
1154 static_cast<T
>(u.z),
static_cast<T
>(u.w));
1157 #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1158 using CUDA_name##1 = HIP_vector_type<T, 1>;\
1159 using CUDA_name##2 = HIP_vector_type<T, 2>;\
1160 using CUDA_name##3 = HIP_vector_type<T, 3>;\
1161 using CUDA_name##4 = HIP_vector_type<T, 4>;
1163 #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1184__MAKE_VECTOR_TYPE__(uchar,
unsigned char);
1185__MAKE_VECTOR_TYPE__(
char,
char);
1186__MAKE_VECTOR_TYPE__(ushort,
unsigned short);
1187__MAKE_VECTOR_TYPE__(
short,
short);
1188__MAKE_VECTOR_TYPE__(uint,
unsigned int);
1189__MAKE_VECTOR_TYPE__(
int,
int);
1190__MAKE_VECTOR_TYPE__(ulong,
unsigned long);
1191__MAKE_VECTOR_TYPE__(
long,
long);
1192__MAKE_VECTOR_TYPE__(ulonglong,
unsigned long long);
1193__MAKE_VECTOR_TYPE__(longlong,
long long);
1194__MAKE_VECTOR_TYPE__(
float,
float);
1195__MAKE_VECTOR_TYPE__(
double,
double);
1199#if defined(_MSC_VER)
1200#include <mmintrin.h>
1201#include <xmmintrin.h>
1202#include <emmintrin.h>
1203#include <immintrin.h>
1257 unsigned char data[2];
1266 unsigned char data[4];
1274 unsigned char data[3];
1324 unsigned short data;
1331 unsigned short data[2];
1348 unsigned short data[3];
1422 unsigned int data[3];
1496 unsigned int data[3];
1707 unsigned char data[2];
1716 unsigned char data[4];
1719 unsigned char data[8];
1722 unsigned char data[16];
1730 unsigned char data[3];
1774 unsigned short data;
1781 unsigned short data[2];
1790 unsigned short data[4];
1793 unsigned short data[8];
1796 unsigned short data[16];
1804 unsigned short data[3];
1855 unsigned int data[2];
1864 unsigned int data[4];
1867 unsigned int data[8];
1870 unsigned int data[16];
1878 unsigned int data[3];
1929 unsigned long data[2];
1938 unsigned long data[4];
1941 unsigned long data[8];
1944 unsigned long data[16];
1952 unsigned long data[3];
1994 unsigned long long x;
1996 unsigned long long data;
2000 unsigned long long x;
2001 unsigned long long y;
2003 unsigned long long data[2];
2007 unsigned long long x;
2008 unsigned long long y;
2009 unsigned long long z;
2010 unsigned long long w;
2012 unsigned long long data[4];
2015 unsigned long long data[8];
2018 unsigned long long data[16];
2022 unsigned long long x;
2023 unsigned long long y;
2024 unsigned long long z;
2026 unsigned long long data[3];
2107#define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
2108 static inline __HOST_DEVICE__ type make_##type(comp x) { \
2113#define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
2114 static inline __HOST_DEVICE__ type make_##type(comp x, comp y) { \
2119#define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
2120 static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z) { \
2125#define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
2126 static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z, comp w) { \
2127 type r{x, y, z, w}; \
2131#define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
2132 static inline __HOST_DEVICE__ type make_##type(comp x) { \
2138#define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
2139 static inline __HOST_DEVICE__ type make_##type(comp x, comp y) { \
2146#define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
2147 static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z) { \
2155#define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
2156 static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z, comp w) { \
2166DECLOP_MAKE_ONE_COMPONENT(
unsigned char,
uchar1);
2167DECLOP_MAKE_TWO_COMPONENT(
unsigned char,
uchar2);
2168DECLOP_MAKE_THREE_COMPONENT(
unsigned char,
uchar3);
2169DECLOP_MAKE_FOUR_COMPONENT(
unsigned char,
uchar4);
2171DECLOP_MAKE_ONE_COMPONENT(
signed char,
char1);
2172DECLOP_MAKE_TWO_COMPONENT(
signed char,
char2);
2173DECLOP_MAKE_THREE_COMPONENT(
signed char,
char3);
2174DECLOP_MAKE_FOUR_COMPONENT(
signed char,
char4);
2176DECLOP_MAKE_ONE_COMPONENT(
unsigned short,
ushort1);
2177DECLOP_MAKE_TWO_COMPONENT(
unsigned short,
ushort2);
2178DECLOP_MAKE_THREE_COMPONENT(
unsigned short,
ushort3);
2179DECLOP_MAKE_FOUR_COMPONENT(
unsigned short,
ushort4);
2181DECLOP_MAKE_ONE_COMPONENT(
signed short,
short1);
2182DECLOP_MAKE_TWO_COMPONENT(
signed short,
short2);
2183DECLOP_MAKE_THREE_COMPONENT(
signed short,
short3);
2184DECLOP_MAKE_FOUR_COMPONENT(
signed short,
short4);
2186DECLOP_MAKE_ONE_COMPONENT(
unsigned int,
uint1);
2187DECLOP_MAKE_TWO_COMPONENT(
unsigned int,
uint2);
2188DECLOP_MAKE_THREE_COMPONENT(
unsigned int,
uint3);
2189DECLOP_MAKE_FOUR_COMPONENT(
unsigned int,
uint4);
2191DECLOP_MAKE_ONE_COMPONENT(
signed int,
int1);
2192DECLOP_MAKE_TWO_COMPONENT(
signed int,
int2);
2193DECLOP_MAKE_THREE_COMPONENT(
signed int,
int3);
2194DECLOP_MAKE_FOUR_COMPONENT(
signed int,
int4);
2196DECLOP_MAKE_ONE_COMPONENT(
float,
float1);
2197DECLOP_MAKE_TWO_COMPONENT(
float,
float2);
2198DECLOP_MAKE_THREE_COMPONENT(
float,
float3);
2199DECLOP_MAKE_FOUR_COMPONENT(
float,
float4);
2201DECLOP_MAKE_ONE_COMPONENT(
double,
double1);
2202DECLOP_MAKE_TWO_COMPONENT(
double,
double2);
2203DECLOP_MAKE_THREE_COMPONENT(
double,
double3);
2204DECLOP_MAKE_FOUR_COMPONENT(
double,
double4);
2206DECLOP_MAKE_ONE_COMPONENT(
unsigned long,
ulong1);
2207DECLOP_MAKE_TWO_COMPONENT(
unsigned long,
ulong2);
2208DECLOP_MAKE_THREE_COMPONENT(
unsigned long,
ulong3);
2209DECLOP_MAKE_FOUR_COMPONENT(
unsigned long,
ulong4);
2211DECLOP_MAKE_ONE_COMPONENT(
signed long,
long1);
2212DECLOP_MAKE_TWO_COMPONENT(
signed long,
long2);
2213DECLOP_MAKE_THREE_COMPONENT(
signed long,
long3);
2214DECLOP_MAKE_FOUR_COMPONENT(
signed long,
long4);
2216DECLOP_MAKE_ONE_COMPONENT(
unsigned long long,
ulonglong1);
2217DECLOP_MAKE_TWO_COMPONENT(
unsigned long long,
ulonglong2);
2218DECLOP_MAKE_THREE_COMPONENT(
unsigned long long,
ulonglong3);
2219DECLOP_MAKE_FOUR_COMPONENT(
unsigned long long,
ulonglong4);
2221DECLOP_MAKE_ONE_COMPONENT(
signed long long,
longlong1);
2222DECLOP_MAKE_TWO_COMPONENT(
signed long long,
longlong2);
2223DECLOP_MAKE_THREE_COMPONENT(
signed long long,
longlong3);
2224DECLOP_MAKE_FOUR_COMPONENT(
signed long long,
longlong4);
Definition amd_hip_vector_types.h:1659
Definition amd_hip_vector_types.h:1665
Definition amd_hip_vector_types.h:1672
Definition amd_hip_vector_types.h:1681
Definition amd_hip_vector_types.h:1684
Definition amd_hip_vector_types.h:1687
Definition amd_hip_vector_types.h:1696
Definition amd_hip_vector_types.h:1702
Definition amd_hip_vector_types.h:1709
Definition amd_hip_vector_types.h:1718
Definition amd_hip_vector_types.h:1721
Definition amd_hip_vector_types.h:1724
Definition amd_hip_vector_types.h:1733
Definition amd_hip_vector_types.h:1739
Definition amd_hip_vector_types.h:1746
Definition amd_hip_vector_types.h:1755
Definition amd_hip_vector_types.h:1758
Definition amd_hip_vector_types.h:1761
Definition amd_hip_vector_types.h:1770
Definition amd_hip_vector_types.h:1776
Definition amd_hip_vector_types.h:1783
Definition amd_hip_vector_types.h:1792
Definition amd_hip_vector_types.h:1795
Definition amd_hip_vector_types.h:1798
Definition amd_hip_vector_types.h:1807
Definition amd_hip_vector_types.h:1813
Definition amd_hip_vector_types.h:1820
Definition amd_hip_vector_types.h:1829
Definition amd_hip_vector_types.h:1832
Definition amd_hip_vector_types.h:1835
Definition amd_hip_vector_types.h:1844
Definition amd_hip_vector_types.h:1850
Definition amd_hip_vector_types.h:1857
Definition amd_hip_vector_types.h:1866
Definition amd_hip_vector_types.h:1869
Definition amd_hip_vector_types.h:1872
Definition amd_hip_vector_types.h:1881
Definition amd_hip_vector_types.h:1887
Definition amd_hip_vector_types.h:1894
Definition amd_hip_vector_types.h:1903
Definition amd_hip_vector_types.h:1906
Definition amd_hip_vector_types.h:1909
Definition amd_hip_vector_types.h:1918
Definition amd_hip_vector_types.h:1924
Definition amd_hip_vector_types.h:1931
Definition amd_hip_vector_types.h:1940
Definition amd_hip_vector_types.h:1943
Definition amd_hip_vector_types.h:1946
Definition amd_hip_vector_types.h:1955
Definition amd_hip_vector_types.h:1961
Definition amd_hip_vector_types.h:1968
Definition amd_hip_vector_types.h:1977
Definition amd_hip_vector_types.h:1980
Definition amd_hip_vector_types.h:1983
Definition amd_hip_vector_types.h:1992
Definition amd_hip_vector_types.h:1998
Definition amd_hip_vector_types.h:2005
Definition amd_hip_vector_types.h:2014
Definition amd_hip_vector_types.h:2017
Definition amd_hip_vector_types.h:2020
Definition amd_hip_vector_types.h:2029
Definition amd_hip_vector_types.h:2035
Definition amd_hip_vector_types.h:2042
Definition amd_hip_vector_types.h:2051
Definition amd_hip_vector_types.h:2054
Definition amd_hip_vector_types.h:2057
Definition amd_hip_vector_types.h:2066
Definition amd_hip_vector_types.h:2072
Definition amd_hip_vector_types.h:2079
Definition amd_hip_vector_types.h:2088
Definition amd_hip_vector_types.h:2091
Definition amd_hip_vector_types.h:2094