HIP: Heterogenous-computing Interface for Portability
Loading...
Searching...
No Matches
amd_hip_vector_types.h
1/*
2Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved.
3
4Permission is hereby granted, free of charge, to any person obtaining a copy
5of this software and associated documentation files (the "Software"), to deal
6in the Software without restriction, including without limitation the rights
7to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8copies of the Software, and to permit persons to whom the Software is
9furnished to do so, subject to the following conditions:
10
11The above copyright notice and this permission notice shall be included in
12all copies or substantial portions of the Software.
13
14THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20THE SOFTWARE.
21*/
22
28#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H
29#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H
30
32
33#if defined(__HIPCC_RTC__)
34 #define __HOST_DEVICE__ __device__
35#else
36 #define __HOST_DEVICE__ __host__ __device__
37#endif
38
39#if defined(__has_attribute)
40 #if __has_attribute(ext_vector_type)
41 #define __HIP_USE_NATIVE_VECTOR__ 1
42 #define __NATIVE_VECTOR__(n, T) T __attribute__((ext_vector_type(n)))
43 #else
44 #define __NATIVE_VECTOR__(n, T) T[n]
45 #endif
46
47#if defined(__cplusplus)
48#if !defined(__HIPCC_RTC__)
49 #include <array>
50 #include <iosfwd>
51 #include <type_traits>
52#else
53namespace std {
54using ::size_t;
55
56template <class _Tp, _Tp __v> struct integral_constant {
57 static constexpr const _Tp value = __v;
58 typedef _Tp value_type;
59 typedef integral_constant type;
60 constexpr operator value_type() const { return value; }
61 constexpr value_type operator()() const { return value; }
62};
63template <class _Tp, _Tp __v> constexpr const _Tp integral_constant<_Tp, __v>::value;
64
65typedef integral_constant<bool, true> true_type;
66typedef integral_constant<bool, false> false_type;
67
68template <bool B> using bool_constant = integral_constant<bool, B>;
69typedef bool_constant<true> true_type;
70typedef bool_constant<false> false_type;
71
72template <bool __B, class __T = void> struct enable_if {};
73template <class __T> struct enable_if<true, __T> { typedef __T type; };
74
75template<bool _B> struct true_or_false_type : public false_type {};
76template<> struct true_or_false_type<true> : public true_type {};
77
78template <class _Tp> struct is_integral : public false_type {};
79template <> struct is_integral<bool> : public true_type {};
80template <> struct is_integral<char> : public true_type {};
81template <> struct is_integral<signed char> : public true_type {};
82template <> struct is_integral<unsigned char> : public true_type {};
83template <> struct is_integral<wchar_t> : public true_type {};
84template <> struct is_integral<short> : public true_type {};
85template <> struct is_integral<unsigned short> : public true_type {};
86template <> struct is_integral<int> : public true_type {};
87template <> struct is_integral<unsigned int> : public true_type {};
88template <> struct is_integral<long> : public true_type {};
89template <> struct is_integral<unsigned long> : public true_type {};
90template <> struct is_integral<long long> : public true_type {};
91template <> struct is_integral<unsigned long long> : public true_type {};
92
93template <class _Tp> struct is_arithmetic : public false_type {};
94template <> struct is_arithmetic<bool> : public true_type {};
95template <> struct is_arithmetic<char> : public true_type {};
96template <> struct is_arithmetic<signed char> : public true_type {};
97template <> struct is_arithmetic<unsigned char> : public true_type {};
98template <> struct is_arithmetic<wchar_t> : public true_type {};
99template <> struct is_arithmetic<short> : public true_type {};
100template <> struct is_arithmetic<unsigned short> : public true_type {};
101template <> struct is_arithmetic<int> : public true_type {};
102template <> struct is_arithmetic<unsigned int> : public true_type {};
103template <> struct is_arithmetic<long> : public true_type {};
104template <> struct is_arithmetic<unsigned long> : public true_type {};
105template <> struct is_arithmetic<long long> : public true_type {};
106template <> struct is_arithmetic<unsigned long long> : public true_type {};
107template <> struct is_arithmetic<float> : public true_type {};
108template <> struct is_arithmetic<double> : public true_type {};
109
110template<typename _Tp> struct is_floating_point : public false_type {};
111template<> struct is_floating_point<float> : public true_type {};
112template<> struct is_floating_point<double> : public true_type {};
113template<> struct is_floating_point<long double> : public true_type {};
114
115template <typename __T, typename __U> struct is_same : public false_type {};
116template <typename __T> struct is_same<__T, __T> : public true_type {};
117
118template<typename _Tp, bool = is_arithmetic<_Tp>::value>
119 struct is_signed : public false_type {};
120template<typename _Tp>
121 struct is_signed<_Tp, true> : public true_or_false_type<_Tp(-1) < _Tp(0)> {};
122
123template <class _T1, class _T2> struct is_convertible
124 : public true_or_false_type<__is_convertible_to(_T1, _T2)> {};
125
126template<typename _CharT> struct char_traits;
127template<typename _CharT, typename _Traits = char_traits<_CharT>> class basic_istream;
128template<typename _CharT, typename _Traits = char_traits<_CharT>> class basic_ostream;
129typedef basic_istream<char> istream;
130typedef basic_ostream<char> ostream;
131
132template <typename __T> struct is_scalar : public integral_constant<bool, __is_scalar(__T)> {};
133} // Namespace std.
134#endif // defined(__HIPCC_RTC__)
135
136 namespace hip_impl {
137 inline
138 constexpr
139 unsigned int next_pot(unsigned int x) {
140 // Precondition: x > 1.
141 return 1u << (32u - __builtin_clz(x - 1u));
142 }
143 } // Namespace hip_impl.
144
145 template<typename T, unsigned int n> struct HIP_vector_base;
146
147 template<typename T>
148 struct HIP_vector_base<T, 1> {
149 using Native_vec_ = __NATIVE_VECTOR__(1, T);
150
151 union {
152 Native_vec_ data;
153 struct {
154 T x;
155 };
156 };
157
158 using value_type = T;
159
160 __HOST_DEVICE__
161 HIP_vector_base() = default;
162 __HOST_DEVICE__
163 explicit
164 constexpr
165 HIP_vector_base(T x_) noexcept : data{x_} {}
166 __HOST_DEVICE__
167 constexpr
168 HIP_vector_base(const HIP_vector_base&) = default;
169 __HOST_DEVICE__
170 constexpr
171 HIP_vector_base(HIP_vector_base&&) = default;
172 __HOST_DEVICE__
173 ~HIP_vector_base() = default;
174 __HOST_DEVICE__
175 HIP_vector_base& operator=(const HIP_vector_base&) = default;
176 };
177
178 template<typename T>
179 struct HIP_vector_base<T, 2> {
180 using Native_vec_ = __NATIVE_VECTOR__(2, T);
181
182 union
183 #if !__has_attribute(ext_vector_type)
184 alignas(hip_impl::next_pot(2 * sizeof(T)))
185 #endif
186 {
187 Native_vec_ data;
188 struct {
189 T x;
190 T y;
191 };
192 };
193
194 using value_type = T;
195
196 __HOST_DEVICE__
197 HIP_vector_base() = default;
198 __HOST_DEVICE__
199 explicit
200 constexpr
201 HIP_vector_base(T x_) noexcept : data{x_, x_} {}
202 __HOST_DEVICE__
203 constexpr
204 HIP_vector_base(T x_, T y_) noexcept : data{x_, y_} {}
205 __HOST_DEVICE__
206 constexpr
207 HIP_vector_base(const HIP_vector_base&) = default;
208 __HOST_DEVICE__
209 constexpr
210 HIP_vector_base(HIP_vector_base&&) = default;
211 __HOST_DEVICE__
212 ~HIP_vector_base() = default;
213 __HOST_DEVICE__
214 HIP_vector_base& operator=(const HIP_vector_base&) = default;
215 };
216
217 template<typename T>
218 struct HIP_vector_base<T, 3> {
219 struct Native_vec_ {
220 T d[3];
221
222 __HOST_DEVICE__
223 Native_vec_() = default;
224
225 __HOST_DEVICE__
226 explicit
227 constexpr
228 Native_vec_(T x_) noexcept : d{x_, x_, x_} {}
229 __HOST_DEVICE__
230 constexpr
231 Native_vec_(T x_, T y_, T z_) noexcept : d{x_, y_, z_} {}
232 __HOST_DEVICE__
233 constexpr
234 Native_vec_(const Native_vec_&) = default;
235 __HOST_DEVICE__
236 constexpr
237 Native_vec_(Native_vec_&&) = default;
238 __HOST_DEVICE__
239 ~Native_vec_() = default;
240
241 __HOST_DEVICE__
242 Native_vec_& operator=(const Native_vec_&) = default;
243 __HOST_DEVICE__
244 Native_vec_& operator=(Native_vec_&&) = default;
245
246 __HOST_DEVICE__
247 T& operator[](unsigned int idx) noexcept { return d[idx]; }
248 __HOST_DEVICE__
249 T operator[](unsigned int idx) const noexcept { return d[idx]; }
250
251 __HOST_DEVICE__
252 Native_vec_& operator+=(const Native_vec_& x_) noexcept
253 {
254 for (auto i = 0u; i != 3u; ++i) d[i] += x_.d[i];
255 return *this;
256 }
257 __HOST_DEVICE__
258 Native_vec_& operator-=(const Native_vec_& x_) noexcept
259 {
260 for (auto i = 0u; i != 3u; ++i) d[i] -= x_.d[i];
261 return *this;
262 }
263
264 __HOST_DEVICE__
265 Native_vec_& operator*=(const Native_vec_& x_) noexcept
266 {
267 for (auto i = 0u; i != 3u; ++i) d[i] *= x_.d[i];
268 return *this;
269 }
270 __HOST_DEVICE__
271 Native_vec_& operator/=(const Native_vec_& x_) noexcept
272 {
273 for (auto i = 0u; i != 3u; ++i) d[i] /= x_.d[i];
274 return *this;
275 }
276
277 template<
278 typename U = T,
279 typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
280 __HOST_DEVICE__
281 Native_vec_ operator-() const noexcept
282 {
283 auto r{*this};
284 for (auto&& x : r.d) x = -x;
285 return r;
286 }
287
288 template<
289 typename U = T,
290 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
291 __HOST_DEVICE__
292 Native_vec_ operator~() const noexcept
293 {
294 auto r{*this};
295 for (auto&& x : r.d) x = ~x;
296 return r;
297 }
298 template<
299 typename U = T,
300 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
301 __HOST_DEVICE__
302 Native_vec_& operator%=(const Native_vec_& x_) noexcept
303 {
304 for (auto i = 0u; i != 3u; ++i) d[i] %= x_.d[i];
305 return *this;
306 }
307 template<
308 typename U = T,
309 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
310 __HOST_DEVICE__
311 Native_vec_& operator^=(const Native_vec_& x_) noexcept
312 {
313 for (auto i = 0u; i != 3u; ++i) d[i] ^= x_.d[i];
314 return *this;
315 }
316 template<
317 typename U = T,
318 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
319 __HOST_DEVICE__
320 Native_vec_& operator|=(const Native_vec_& x_) noexcept
321 {
322 for (auto i = 0u; i != 3u; ++i) d[i] |= x_.d[i];
323 return *this;
324 }
325 template<
326 typename U = T,
327 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
328 __HOST_DEVICE__
329 Native_vec_& operator&=(const Native_vec_& x_) noexcept
330 {
331 for (auto i = 0u; i != 3u; ++i) d[i] &= x_.d[i];
332 return *this;
333 }
334 template<
335 typename U = T,
336 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
337 __HOST_DEVICE__
338 Native_vec_& operator>>=(const Native_vec_& x_) noexcept
339 {
340 for (auto i = 0u; i != 3u; ++i) d[i] >>= x_.d[i];
341 return *this;
342 }
343 template<
344 typename U = T,
345 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
346 __HOST_DEVICE__
347 Native_vec_& operator<<=(const Native_vec_& x_) noexcept
348 {
349 for (auto i = 0u; i != 3u; ++i) d[i] <<= x_.d[i];
350 return *this;
351 }
352#if defined (__INTEL_COMPILER)
353 typedef struct {
354 int values[4];
355 } _Vec3_cmp;
356 using Vec3_cmp = _Vec3_cmp;
357#else
358 using Vec3_cmp = int __attribute__((vector_size(4 * sizeof(int))));
359#endif //INTEL
360 __HOST_DEVICE__
361 Vec3_cmp operator==(const Native_vec_& x_) const noexcept
362 {
363 return Vec3_cmp{d[0] == x_.d[0], d[1] == x_.d[1], d[2] == x_.d[2]};
364 }
365 };
366
367 union {
368 Native_vec_ data;
369 struct {
370 T x;
371 T y;
372 T z;
373 };
374 };
375
376 using value_type = T;
377
378 __HOST_DEVICE__
379 HIP_vector_base() = default;
380 __HOST_DEVICE__
381 explicit
382 constexpr
383 HIP_vector_base(T x_) noexcept : data{x_, x_, x_} {}
384 __HOST_DEVICE__
385 constexpr
386 HIP_vector_base(T x_, T y_, T z_) noexcept : data{x_, y_, z_} {}
387 __HOST_DEVICE__
388 constexpr
389 HIP_vector_base(const HIP_vector_base&) = default;
390 __HOST_DEVICE__
391 constexpr
392 HIP_vector_base(HIP_vector_base&&) = default;
393 __HOST_DEVICE__
394 ~HIP_vector_base() = default;
395
396 __HOST_DEVICE__
397 HIP_vector_base& operator=(const HIP_vector_base&) = default;
398 __HOST_DEVICE__
399 HIP_vector_base& operator=(HIP_vector_base&&) = default;
400 };
401
402 template<typename T>
403 struct HIP_vector_base<T, 4> {
404 using Native_vec_ = __NATIVE_VECTOR__(4, T);
405
406 union
407 #if !__has_attribute(ext_vector_type)
408 alignas(hip_impl::next_pot(4 * sizeof(T)))
409 #endif
410 {
411 Native_vec_ data;
412 struct {
413 T x;
414 T y;
415 T z;
416 T w;
417 };
418 };
419
420 using value_type = T;
421
422 __HOST_DEVICE__
423 HIP_vector_base() = default;
424 __HOST_DEVICE__
425 explicit
426 constexpr
427 HIP_vector_base(T x_) noexcept : data{x_, x_, x_, x_} {}
428 __HOST_DEVICE__
429 constexpr
430 HIP_vector_base(T x_, T y_, T z_, T w_) noexcept : data{x_, y_, z_, w_} {}
431 __HOST_DEVICE__
432 constexpr
433 HIP_vector_base(const HIP_vector_base&) = default;
434 __HOST_DEVICE__
435 constexpr
436 HIP_vector_base(HIP_vector_base&&) = default;
437 __HOST_DEVICE__
438 ~HIP_vector_base() = default;
439 __HOST_DEVICE__
440 HIP_vector_base& operator=(const HIP_vector_base&) = default;
441 };
442
443 template<typename T, unsigned int rank>
444 struct HIP_vector_type : public HIP_vector_base<T, rank> {
445 using HIP_vector_base<T, rank>::data;
446 using typename HIP_vector_base<T, rank>::Native_vec_;
447
448 __HOST_DEVICE__
449 HIP_vector_type() = default;
450 template<
451 typename U,
452 typename std::enable_if<
453 std::is_convertible<U, T>::value>::type* = nullptr>
454 __HOST_DEVICE__
455 explicit
456 constexpr
457 HIP_vector_type(U x_) noexcept
458 : HIP_vector_base<T, rank>{static_cast<T>(x_)}
459 {}
460 template< // TODO: constrain based on type as well.
461 typename... Us,
462 typename std::enable_if<
463 (rank > 1) && sizeof...(Us) == rank>::type* = nullptr>
464 __HOST_DEVICE__
465 constexpr
466 HIP_vector_type(Us... xs) noexcept
467 : HIP_vector_base<T, rank>{static_cast<T>(xs)...}
468 {}
469 __HOST_DEVICE__
470 constexpr
471 HIP_vector_type(const HIP_vector_type&) = default;
472 __HOST_DEVICE__
473 constexpr
474 HIP_vector_type(HIP_vector_type&&) = default;
475 __HOST_DEVICE__
476 ~HIP_vector_type() = default;
477
478 __HOST_DEVICE__
479 HIP_vector_type& operator=(const HIP_vector_type&) = default;
480 __HOST_DEVICE__
481 HIP_vector_type& operator=(HIP_vector_type&&) = default;
482
483 // Operators
484 __HOST_DEVICE__
485 HIP_vector_type& operator++() noexcept
486 {
487 return *this += HIP_vector_type{1};
488 }
489 __HOST_DEVICE__
490 HIP_vector_type operator++(int) noexcept
491 {
492 auto tmp(*this);
493 ++*this;
494 return tmp;
495 }
496
497 __HOST_DEVICE__
498 HIP_vector_type& operator--() noexcept
499 {
500 return *this -= HIP_vector_type{1};
501 }
502 __HOST_DEVICE__
503 HIP_vector_type operator--(int) noexcept
504 {
505 auto tmp(*this);
506 --*this;
507 return tmp;
508 }
509
510 __HOST_DEVICE__
511 HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept
512 {
513#if __HIP_USE_NATIVE_VECTOR__
514 data += x.data;
515#else
516 for (auto i = 0u; i != rank; ++i) data[i] += x.data[i];
517#endif
518 return *this;
519 }
520 template<
521 typename U,
522 typename std::enable_if<
523 std::is_convertible<U, T>{}>::type* = nullptr>
524 __HOST_DEVICE__
525 HIP_vector_type& operator+=(U x) noexcept
526 {
527 return *this += HIP_vector_type{x};
528 }
529
530 __HOST_DEVICE__
531 HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept
532 {
533#if __HIP_USE_NATIVE_VECTOR__
534 data -= x.data;
535#else
536 for (auto i = 0u; i != rank; ++i) data[i] -= x.data[i];
537#endif
538 return *this;
539 }
540 template<
541 typename U,
542 typename std::enable_if<
543 std::is_convertible<U, T>{}>::type* = nullptr>
544 __HOST_DEVICE__
545 HIP_vector_type& operator-=(U x) noexcept
546 {
547 return *this -= HIP_vector_type{x};
548 }
549
550 __HOST_DEVICE__
551 HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept
552 {
553#if __HIP_USE_NATIVE_VECTOR__
554 data *= x.data;
555#else
556 for (auto i = 0u; i != rank; ++i) data[i] *= x.data[i];
557#endif
558 return *this;
559 }
560
561 friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator*(
562 HIP_vector_type x, const HIP_vector_type& y) noexcept
563 {
564 return HIP_vector_type{ x } *= y;
565 }
566
567 template<
568 typename U,
569 typename std::enable_if<
570 std::is_convertible<U, T>{}>::type* = nullptr>
571 __HOST_DEVICE__
572 HIP_vector_type& operator*=(U x) noexcept
573 {
574 return *this *= HIP_vector_type{x};
575 }
576
577 friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator/(
578 HIP_vector_type x, const HIP_vector_type& y) noexcept
579 {
580 return HIP_vector_type{ x } /= y;
581 }
582
583 __HOST_DEVICE__
584 HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept
585 {
586#if __HIP_USE_NATIVE_VECTOR__
587 data /= x.data;
588#else
589 for (auto i = 0u; i != rank; ++i) data[i] /= x.data[i];
590#endif
591 return *this;
592 }
593 template<
594 typename U,
595 typename std::enable_if<
596 std::is_convertible<U, T>{}>::type* = nullptr>
597 __HOST_DEVICE__
598 HIP_vector_type& operator/=(U x) noexcept
599 {
600 return *this /= HIP_vector_type{x};
601 }
602
603 template<
604 typename U = T,
605 typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
606 __HOST_DEVICE__
607 HIP_vector_type operator-() const noexcept
608 {
609 auto tmp(*this);
610#if __HIP_USE_NATIVE_VECTOR__
611 tmp.data = -tmp.data;
612#else
613 for (auto i = 0u; i != rank; ++i) tmp.data[i] = -tmp.data[i];
614#endif
615 return tmp;
616 }
617
618 template<
619 typename U = T,
620 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
621 __HOST_DEVICE__
622 HIP_vector_type operator~() const noexcept
623 {
624 HIP_vector_type r{*this};
625#if __HIP_USE_NATIVE_VECTOR__
626 r.data = ~r.data;
627#else
628 for (auto i = 0u; i != rank; ++i) r.data[i] = ~r.data[i];
629#endif
630 return r;
631 }
632
633 template<
634 typename U = T,
635 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
636 __HOST_DEVICE__
637 HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept
638 {
639#if __HIP_USE_NATIVE_VECTOR__
640 data %= x.data;
641#else
642 for (auto i = 0u; i != rank; ++i) data[i] %= x.data[i];
643#endif
644 return *this;
645 }
646
647 template<
648 typename U = T,
649 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
650 __HOST_DEVICE__
651 HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept
652 {
653#if __HIP_USE_NATIVE_VECTOR__
654 data ^= x.data;
655#else
656 for (auto i = 0u; i != rank; ++i) data[i] ^= x.data[i];
657#endif
658 return *this;
659 }
660
661 template<
662 typename U = T,
663 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
664 __HOST_DEVICE__
665 HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept
666 {
667#if __HIP_USE_NATIVE_VECTOR__
668 data |= x.data;
669#else
670 for (auto i = 0u; i != rank; ++i) data[i] |= x.data[i];
671#endif
672 return *this;
673 }
674
675 template<
676 typename U = T,
677 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
678 __HOST_DEVICE__
679 HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept
680 {
681#if __HIP_USE_NATIVE_VECTOR__
682 data &= x.data;
683#else
684 for (auto i = 0u; i != rank; ++i) data[i] &= x.data[i];
685#endif
686 return *this;
687 }
688
689 template<
690 typename U = T,
691 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
692 __HOST_DEVICE__
693 HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept
694 {
695#if __HIP_USE_NATIVE_VECTOR__
696 data >>= x.data;
697#else
698 for (auto i = 0u; i != rank; ++i) data[i] >>= x.data[i];
699#endif
700 return *this;
701 }
702
703 template<
704 typename U = T,
705 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
706 __HOST_DEVICE__
707 HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept
708 {
709#if __HIP_USE_NATIVE_VECTOR__
710 data <<= x.data;
711#else
712 for (auto i = 0u; i != rank; ++i) data[i] <<= x.data[i];
713#endif
714 return *this;
715 }
716 };
717
718 template<typename T, unsigned int n>
719 __HOST_DEVICE__
720 inline
721 constexpr
722 HIP_vector_type<T, n> operator+(
723 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
724 {
725 return HIP_vector_type<T, n>{x} += y;
726 }
727 template<typename T, unsigned int n, typename U>
728 __HOST_DEVICE__
729 inline
730 constexpr
731 HIP_vector_type<T, n> operator+(
732 const HIP_vector_type<T, n>& x, U y) noexcept
733 {
734 return HIP_vector_type<T, n>{x} += HIP_vector_type<T, n>{y};
735 }
736 template<typename T, unsigned int n, typename U>
737 __HOST_DEVICE__
738 inline
739 constexpr
740 HIP_vector_type<T, n> operator+(
741 U x, const HIP_vector_type<T, n>& y) noexcept
742 {
743 return HIP_vector_type<T, n>{x} += y;
744 }
745
746 template<typename T, unsigned int n>
747 __HOST_DEVICE__
748 inline
749 constexpr
750 HIP_vector_type<T, n> operator-(
751 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
752 {
753 return HIP_vector_type<T, n>{x} -= y;
754 }
755 template<typename T, unsigned int n, typename U>
756 __HOST_DEVICE__
757 inline
758 constexpr
759 HIP_vector_type<T, n> operator-(
760 const HIP_vector_type<T, n>& x, U y) noexcept
761 {
762 return HIP_vector_type<T, n>{x} -= HIP_vector_type<T, n>{y};
763 }
764 template<typename T, unsigned int n, typename U>
765 __HOST_DEVICE__
766 inline
767 constexpr
768 HIP_vector_type<T, n> operator-(
769 U x, const HIP_vector_type<T, n>& y) noexcept
770 {
771 return HIP_vector_type<T, n>{x} -= y;
772 }
773
774 template<typename T, unsigned int n, typename U>
775 __HOST_DEVICE__
776 inline
777 constexpr
778 HIP_vector_type<T, n> operator*(
779 const HIP_vector_type<T, n>& x, U y) noexcept
780 {
781 return HIP_vector_type<T, n>{x} *= HIP_vector_type<T, n>{y};
782 }
783 template<typename T, unsigned int n, typename U>
784 __HOST_DEVICE__
785 inline
786 constexpr
787 HIP_vector_type<T, n> operator*(
788 U x, const HIP_vector_type<T, n>& y) noexcept
789 {
790 return HIP_vector_type<T, n>{x} *= y;
791 }
792
793 template<typename T, unsigned int n, typename U>
794 __HOST_DEVICE__
795 inline
796 constexpr
797 HIP_vector_type<T, n> operator/(
798 const HIP_vector_type<T, n>& x, U y) noexcept
799 {
800 return HIP_vector_type<T, n>{x} /= HIP_vector_type<T, n>{y};
801 }
802 template<typename T, unsigned int n, typename U>
803 __HOST_DEVICE__
804 inline
805 constexpr
806 HIP_vector_type<T, n> operator/(
807 U x, const HIP_vector_type<T, n>& y) noexcept
808 {
809 return HIP_vector_type<T, n>{x} /= y;
810 }
811
812 template<typename V>
813 __HOST_DEVICE__
814 inline
815 constexpr
816 bool _hip_compare(const V& x, const V& y, int n) noexcept
817 {
818 return
819 (n == -1) ? true : ((x[n] != y[n]) ? false : _hip_compare(x, y, n - 1));
820 }
821
822 template<typename T, unsigned int n>
823 __HOST_DEVICE__
824 inline
825 constexpr
826 bool operator==(
827 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
828 {
829 return _hip_compare(x.data, y.data, n - 1);
830 }
831 template<typename T, unsigned int n, typename U>
832 __HOST_DEVICE__
833 inline
834 constexpr
835 bool operator==(const HIP_vector_type<T, n>& x, U y) noexcept
836 {
837 return x == HIP_vector_type<T, n>{y};
838 }
839 template<typename T, unsigned int n, typename U>
840 __HOST_DEVICE__
841 inline
842 constexpr
843 bool operator==(U x, const HIP_vector_type<T, n>& y) noexcept
844 {
845 return HIP_vector_type<T, n>{x} == y;
846 }
847
848 template<typename T, unsigned int n>
849 __HOST_DEVICE__
850 inline
851 constexpr
852 bool operator!=(
853 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
854 {
855 return !(x == y);
856 }
857 template<typename T, unsigned int n, typename U>
858 __HOST_DEVICE__
859 inline
860 constexpr
861 bool operator!=(const HIP_vector_type<T, n>& x, U y) noexcept
862 {
863 return !(x == y);
864 }
865 template<typename T, unsigned int n, typename U>
866 __HOST_DEVICE__
867 inline
868 constexpr
869 bool operator!=(U x, const HIP_vector_type<T, n>& y) noexcept
870 {
871 return !(x == y);
872 }
873
874 template<
875 typename T,
876 unsigned int n,
877 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
878 __HOST_DEVICE__
879 inline
880 constexpr
881 HIP_vector_type<T, n> operator%(
882 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
883 {
884 return HIP_vector_type<T, n>{x} %= y;
885 }
886 template<
887 typename T,
888 unsigned int n,
889 typename U,
890 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
891 __HOST_DEVICE__
892 inline
893 constexpr
894 HIP_vector_type<T, n> operator%(
895 const HIP_vector_type<T, n>& x, U y) noexcept
896 {
897 return HIP_vector_type<T, n>{x} %= HIP_vector_type<T, n>{y};
898 }
899 template<
900 typename T,
901 unsigned int n,
902 typename U,
903 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
904 __HOST_DEVICE__
905 inline
906 constexpr
907 HIP_vector_type<T, n> operator%(
908 U x, const HIP_vector_type<T, n>& y) noexcept
909 {
910 return HIP_vector_type<T, n>{x} %= y;
911 }
912
913 template<
914 typename T,
915 unsigned int n,
916 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
917 __HOST_DEVICE__
918 inline
919 constexpr
920 HIP_vector_type<T, n> operator^(
921 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
922 {
923 return HIP_vector_type<T, n>{x} ^= y;
924 }
925 template<
926 typename T,
927 unsigned int n,
928 typename U,
929 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
930 __HOST_DEVICE__
931 inline
932 constexpr
933 HIP_vector_type<T, n> operator^(
934 const HIP_vector_type<T, n>& x, U y) noexcept
935 {
936 return HIP_vector_type<T, n>{x} ^= HIP_vector_type<T, n>{y};
937 }
938 template<
939 typename T,
940 unsigned int n,
941 typename U,
942 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
943 __HOST_DEVICE__
944 inline
945 constexpr
946 HIP_vector_type<T, n> operator^(
947 U x, const HIP_vector_type<T, n>& y) noexcept
948 {
949 return HIP_vector_type<T, n>{x} ^= y;
950 }
951
952 template<
953 typename T,
954 unsigned int n,
955 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
956 __HOST_DEVICE__
957 inline
958 constexpr
959 HIP_vector_type<T, n> operator|(
960 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
961 {
962 return HIP_vector_type<T, n>{x} |= y;
963 }
964 template<
965 typename T,
966 unsigned int n,
967 typename U,
968 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
969 __HOST_DEVICE__
970 inline
971 constexpr
972 HIP_vector_type<T, n> operator|(
973 const HIP_vector_type<T, n>& x, U y) noexcept
974 {
975 return HIP_vector_type<T, n>{x} |= HIP_vector_type<T, n>{y};
976 }
977 template<
978 typename T,
979 unsigned int n,
980 typename U,
981 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
982 __HOST_DEVICE__
983 inline
984 constexpr
985 HIP_vector_type<T, n> operator|(
986 U x, const HIP_vector_type<T, n>& y) noexcept
987 {
988 return HIP_vector_type<T, n>{x} |= y;
989 }
990
991 template<
992 typename T,
993 unsigned int n,
994 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
995 __HOST_DEVICE__
996 inline
997 constexpr
998 HIP_vector_type<T, n> operator&(
999 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1000 {
1001 return HIP_vector_type<T, n>{x} &= y;
1002 }
1003 template<
1004 typename T,
1005 unsigned int n,
1006 typename U,
1007 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1008 __HOST_DEVICE__
1009 inline
1010 constexpr
1011 HIP_vector_type<T, n> operator&(
1012 const HIP_vector_type<T, n>& x, U y) noexcept
1013 {
1014 return HIP_vector_type<T, n>{x} &= HIP_vector_type<T, n>{y};
1015 }
1016 template<
1017 typename T,
1018 unsigned int n,
1019 typename U,
1020 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1021 __HOST_DEVICE__
1022 inline
1023 constexpr
1024 HIP_vector_type<T, n> operator&(
1025 U x, const HIP_vector_type<T, n>& y) noexcept
1026 {
1027 return HIP_vector_type<T, n>{x} &= y;
1028 }
1029
1030 template<
1031 typename T,
1032 unsigned int n,
1033 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1034 __HOST_DEVICE__
1035 inline
1036 constexpr
1037 HIP_vector_type<T, n> operator>>(
1038 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1039 {
1040 return HIP_vector_type<T, n>{x} >>= y;
1041 }
1042 template<
1043 typename T,
1044 unsigned int n,
1045 typename U,
1046 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1047 __HOST_DEVICE__
1048 inline
1049 constexpr
1050 HIP_vector_type<T, n> operator>>(
1051 const HIP_vector_type<T, n>& x, U y) noexcept
1052 {
1053 return HIP_vector_type<T, n>{x} >>= HIP_vector_type<T, n>{y};
1054 }
1055 template<
1056 typename T,
1057 unsigned int n,
1058 typename U,
1059 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1060 __HOST_DEVICE__
1061 inline
1062 constexpr
1063 HIP_vector_type<T, n> operator>>(
1064 U x, const HIP_vector_type<T, n>& y) noexcept
1065 {
1066 return HIP_vector_type<T, n>{x} >>= y;
1067 }
1068
1069 template<
1070 typename T,
1071 unsigned int n,
1072 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1073 __HOST_DEVICE__
1074 inline
1075 constexpr
1076 HIP_vector_type<T, n> operator<<(
1077 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1078 {
1079 return HIP_vector_type<T, n>{x} <<= y;
1080 }
1081 template<
1082 typename T,
1083 unsigned int n,
1084 typename U,
1085 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1086 __HOST_DEVICE__
1087 inline
1088 constexpr
1089 HIP_vector_type<T, n> operator<<(
1090 const HIP_vector_type<T, n>& x, U y) noexcept
1091 {
1092 return HIP_vector_type<T, n>{x} <<= HIP_vector_type<T, n>{y};
1093 }
1094 template<
1095 typename T,
1096 unsigned int n,
1097 typename U,
1098 typename std::enable_if<std::is_arithmetic<U>::value>::type,
1099 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1100 __HOST_DEVICE__
1101 inline
1102 constexpr
1103 HIP_vector_type<T, n> operator<<(
1104 U x, const HIP_vector_type<T, n>& y) noexcept
1105 {
1106 return HIP_vector_type<T, n>{x} <<= y;
1107 }
1108
1109 /*
1110 * Map HIP_vector_type<U, rankU> to HIP_vector_type<T, rankT>
1111 */
1112 template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1113 __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 1 && rankU >= 1),
1114 const HIP_vector_type<T, rankT>>::type
1115 __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1116 return HIP_vector_type<T, rankT>(static_cast<T>(u.x));
1117 };
1118
1119 template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1120 __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 2 && rankU == 1),
1121 const HIP_vector_type<T, rankT>>::type
1122 __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1123 return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(0));
1124 };
1125
1126 template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1127 __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 2 && rankU >= 2),
1128 const HIP_vector_type<T, rankT>>::type
1129 __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1130 return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(u.y));
1131 };
1132
1133 template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1134 __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 4 && rankU == 1),
1135 const HIP_vector_type<T, rankT>>::type
1136 __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1137 return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(0),
1138 static_cast<T>(0), static_cast<T>(0));
1139 };
1140
1141 template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1142 __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 4 && rankU == 2),
1143 const HIP_vector_type<T, rankT>>::type
1144 __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1145 return HIP_vector_type<T, rankT>(static_cast<T>(u.x), static_cast<T>(u.y),
1146 static_cast<T>(0), static_cast<T>(0));
1147 };
1148
1149 template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1150 __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 4 && rankU == 4),
1151 const HIP_vector_type<T, rankT>>::type
1152 __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1153 return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(u.y),
1154 static_cast<T>(u.z), static_cast<T>(u.w));
1155 };
1156
1157 #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1158 using CUDA_name##1 = HIP_vector_type<T, 1>;\
1159 using CUDA_name##2 = HIP_vector_type<T, 2>;\
1160 using CUDA_name##3 = HIP_vector_type<T, 3>;\
1161 using CUDA_name##4 = HIP_vector_type<T, 4>;
1162#else
1163 #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1164 typedef struct {\
1165 T x;\
1166 } CUDA_name##1;\
1167 typedef struct {\
1168 T x;\
1169 T y;\
1170 } CUDA_name##2;\
1171 typedef struct {\
1172 T x;\
1173 T y;\
1174 T z;\
1175 } CUDA_name##3;\
1176 typedef struct {\
1177 T x;\
1178 T y;\
1179 T z;\
1180 T w;\
1181 } CUDA_name##4;
1182#endif
1183
1184__MAKE_VECTOR_TYPE__(uchar, unsigned char);
1185__MAKE_VECTOR_TYPE__(char, char);
1186__MAKE_VECTOR_TYPE__(ushort, unsigned short);
1187__MAKE_VECTOR_TYPE__(short, short);
1188__MAKE_VECTOR_TYPE__(uint, unsigned int);
1189__MAKE_VECTOR_TYPE__(int, int);
1190__MAKE_VECTOR_TYPE__(ulong, unsigned long);
1191__MAKE_VECTOR_TYPE__(long, long);
1192__MAKE_VECTOR_TYPE__(ulonglong, unsigned long long);
1193__MAKE_VECTOR_TYPE__(longlong, long long);
1194__MAKE_VECTOR_TYPE__(float, float);
1195__MAKE_VECTOR_TYPE__(double, double);
1196
1197#else // !defined(__has_attribute)
1198
1199#if defined(_MSC_VER)
1200#include <mmintrin.h>
1201#include <xmmintrin.h>
1202#include <emmintrin.h>
1203#include <immintrin.h>
1204
1205/*
1206this is for compatibility with CUDA as CUDA allows accessing vector components
1207in C++ program with MSVC
1208*/
1209typedef union {
1210 struct {
1211 char x;
1212 };
1213 char data;
1214} char1;
1215typedef union {
1216 struct {
1217 char x;
1218 char y;
1219 };
1220 char data[2];
1221} char2;
1222typedef union {
1223 struct {
1224 char x;
1225 char y;
1226 char z;
1227 char w;
1228 };
1229 char data[4];
1230} char4;
1231typedef union {
1232 struct {
1233 char x;
1234 char y;
1235 char z;
1236 };
1237 char data[3];
1238} char3;
1239typedef union {
1240 __m64 data;
1241} char8;
1242typedef union {
1243 __m128i data;
1244} char16;
1245
1246typedef union {
1247 struct {
1248 unsigned char x;
1249 };
1250 unsigned char data;
1251} uchar1;
1252typedef union {
1253 struct {
1254 unsigned char x;
1255 unsigned char y;
1256 };
1257 unsigned char data[2];
1258} uchar2;
1259typedef union {
1260 struct {
1261 unsigned char x;
1262 unsigned char y;
1263 unsigned char z;
1264 unsigned char w;
1265 };
1266 unsigned char data[4];
1267} uchar4;
1268typedef union {
1269 struct {
1270 unsigned char x;
1271 unsigned char y;
1272 unsigned char z;
1273 };
1274 unsigned char data[3];
1275} uchar3;
1276typedef union {
1277 __m64 data;
1278} uchar8;
1279typedef union {
1280 __m128i data;
1281} uchar16;
1282
1283typedef union {
1284 struct {
1285 short x;
1286 };
1287 short data;
1288} short1;
1289typedef union {
1290 struct {
1291 short x;
1292 short y;
1293 };
1294 short data[2];
1295} short2;
1296typedef union {
1297 struct {
1298 short x;
1299 short y;
1300 short z;
1301 short w;
1302 };
1303 __m64 data;
1304} short4;
1305typedef union {
1306 struct {
1307 short x;
1308 short y;
1309 short z;
1310 };
1311 short data[3];
1312} short3;
1313typedef union {
1314 __m128i data;
1315} short8;
1316typedef union {
1317 __m128i data[2];
1318} short16;
1319
1320typedef union {
1321 struct {
1322 unsigned short x;
1323 };
1324 unsigned short data;
1325} ushort1;
1326typedef union {
1327 struct {
1328 unsigned short x;
1329 unsigned short y;
1330 };
1331 unsigned short data[2];
1332} ushort2;
1333typedef union {
1334 struct {
1335 unsigned short x;
1336 unsigned short y;
1337 unsigned short z;
1338 unsigned short w;
1339 };
1340 __m64 data;
1341} ushort4;
1342typedef union {
1343 struct {
1344 unsigned short x;
1345 unsigned short y;
1346 unsigned short z;
1347 };
1348 unsigned short data[3];
1349} ushort3;
1350typedef union {
1351 __m128i data;
1352} ushort8;
1353typedef union {
1354 __m128i data[2];
1355} ushort16;
1356
1357typedef union {
1358 struct {
1359 int x;
1360 };
1361 int data;
1362} int1;
1363typedef union {
1364 struct {
1365 int x;
1366 int y;
1367 };
1368 __m64 data;
1369} int2;
1370typedef union {
1371 struct {
1372 int x;
1373 int y;
1374 int z;
1375 int w;
1376 };
1377 __m128i data;
1378} int4;
1379typedef union {
1380 struct {
1381 int x;
1382 int y;
1383 int z;
1384 };
1385 int data[3];
1386} int3;
1387typedef union {
1388 __m128i data[2];
1389} int8;
1390typedef union {
1391 __m128i data[4];
1392} int16;
1393
1394typedef union {
1395 struct {
1396 unsigned int x;
1397 };
1398 unsigned int data;
1399} uint1;
1400typedef union {
1401 struct {
1402 unsigned int x;
1403 unsigned int y;
1404 };
1405 __m64 data;
1406} uint2;
1407typedef union {
1408 struct {
1409 unsigned int x;
1410 unsigned int y;
1411 unsigned int z;
1412 unsigned int w;
1413 };
1414 __m128i data;
1415} uint4;
1416typedef union {
1417 struct {
1418 unsigned int x;
1419 unsigned int y;
1420 unsigned int z;
1421 };
1422 unsigned int data[3];
1423} uint3;
1424typedef union {
1425 __m128i data[2];
1426} uint8;
1427typedef union {
1428 __m128i data[4];
1429} uint16;
1430
1431typedef union {
1432 struct {
1433 int x;
1434 };
1435 int data;
1436} long1;
1437typedef union {
1438 struct {
1439 int x;
1440 int y;
1441 };
1442 __m64 data;
1443} long2;
1444typedef union {
1445 struct {
1446 int x;
1447 int y;
1448 int z;
1449 int w;
1450 };
1451 __m128i data;
1452} long4;
1453typedef union {
1454 struct {
1455 int x;
1456 int y;
1457 int z;
1458 };
1459 int data[3];
1460} long3;
1461typedef union {
1462 __m128i data[2];
1463} long8;
1464typedef union {
1465 __m128i data[4];
1466} long16;
1467
1468typedef union {
1469 struct {
1470 unsigned int x;
1471 };
1472 unsigned int data;
1473} ulong1;
1474typedef union {
1475 struct {
1476 unsigned int x;
1477 unsigned int y;
1478 };
1479 __m64 data;
1480} ulong2;
1481typedef union {
1482 struct {
1483 unsigned int x;
1484 unsigned int y;
1485 unsigned int z;
1486 unsigned int w;
1487 };
1488 __m128i data;
1489} ulong4;
1490typedef union {
1491 struct {
1492 unsigned int x;
1493 unsigned int y;
1494 unsigned int z;
1495 };
1496 unsigned int data[3];
1497} ulong3;
1498typedef union {
1499 __m128i data[2];
1500} ulong8;
1501typedef union {
1502 __m128i data[4];
1503} ulong16;
1504
1505typedef union {
1506 struct {
1507 long long x;
1508 };
1509 __m64 data;
1510} longlong1;
1511typedef union {
1512 struct {
1513 long long x;
1514 long long y;
1515 };
1516 __m128i data;
1517} longlong2;
1518typedef union {
1519 struct {
1520 long long x;
1521 long long y;
1522 long long z;
1523 long long w;
1524 };
1525 __m128i data[2];
1526} longlong4;
1527typedef union {
1528 struct {
1529 long long x;
1530 long long y;
1531 long long z;
1532 };
1533 __m64 data[3];
1534} longlong3;
1535typedef union {
1536 __m128i data[4];
1537} longlong8;
1538typedef union {
1539 __m128i data[8];
1540} longlong16;
1541
1542typedef union {
1543 struct {
1544 __m64 x;
1545 };
1546 __m64 data;
1547} ulonglong1;
1548typedef union {
1549 struct {
1550 __m64 x;
1551 __m64 y;
1552 };
1553 __m128i data;
1554} ulonglong2;
1555typedef union {
1556 struct {
1557 __m64 x;
1558 __m64 y;
1559 __m64 z;
1560 __m64 w;
1561 };
1562 __m128i data[2];
1563} ulonglong4;
1564typedef union {
1565 struct {
1566 __m64 x;
1567 __m64 y;
1568 __m64 z;
1569 };
1570 __m64 data[3];
1571} ulonglong3;
1572typedef union {
1573 __m128i data[4];
1574} ulonglong8;
1575typedef union {
1576 __m128i data[8];
1577} ulonglong16;
1578
1579typedef union {
1580 struct {
1581 float x;
1582 };
1583 float data;
1584} float1;
1585typedef union {
1586 struct {
1587 float x;
1588 float y;
1589 };
1590 __m64 data;
1591} float2;
1592typedef union {
1593 struct {
1594 float x;
1595 float y;
1596 float z;
1597 float w;
1598 };
1599 __m128 data;
1600} float4;
1601typedef union {
1602 struct {
1603 float x;
1604 float y;
1605 float z;
1606 };
1607 float data[3];
1608} float3;
1609typedef union {
1610 __m256 data;
1611} float8;
1612typedef union {
1613 __m256 data[2];
1614} float16;
1615
1616typedef union {
1617 struct {
1618 double x;
1619 };
1620 double data;
1621} double1;
1622typedef union {
1623 struct {
1624 double x;
1625 double y;
1626 };
1627 __m128d data;
1628} double2;
1629typedef union {
1630 struct {
1631 double x;
1632 double y;
1633 double z;
1634 double w;
1635 };
1636 __m256d data;
1637} double4;
1638typedef union {
1639 struct {
1640 double x;
1641 double y;
1642 double z;
1643 };
1644 double data[3];
1645} double3;
1646typedef union {
1647 __m256d data[2];
1648} double8;
1649typedef union {
1650 __m256d data[4];
1651} double16;
1652
1653#else // !defined(_MSC_VER)
1654
1655/*
1656this is for compatibility with CUDA as CUDA allows accessing vector components
1657in C++ program with MSVC
1658*/
1659typedef union {
1660 struct {
1661 char x;
1662 };
1663 char data;
1664} char1;
1665typedef union {
1666 struct {
1667 char x;
1668 char y;
1669 };
1670 char data[2];
1671} char2;
1672typedef union {
1673 struct {
1674 char x;
1675 char y;
1676 char z;
1677 char w;
1678 };
1679 char data[4];
1680} char4;
1681typedef union {
1682 char data[8];
1683} char8;
1684typedef union {
1685 char data[16];
1686} char16;
1687typedef union {
1688 struct {
1689 char x;
1690 char y;
1691 char z;
1692 };
1693 char data[3];
1694} char3;
1695
1696typedef union {
1697 struct {
1698 unsigned char x;
1699 };
1700 unsigned char data;
1701} uchar1;
1702typedef union {
1703 struct {
1704 unsigned char x;
1705 unsigned char y;
1706 };
1707 unsigned char data[2];
1708} uchar2;
1709typedef union {
1710 struct {
1711 unsigned char x;
1712 unsigned char y;
1713 unsigned char z;
1714 unsigned char w;
1715 };
1716 unsigned char data[4];
1717} uchar4;
1718typedef union {
1719 unsigned char data[8];
1720} uchar8;
1721typedef union {
1722 unsigned char data[16];
1723} uchar16;
1724typedef union {
1725 struct {
1726 unsigned char x;
1727 unsigned char y;
1728 unsigned char z;
1729 };
1730 unsigned char data[3];
1731} uchar3;
1732
1733typedef union {
1734 struct {
1735 short x;
1736 };
1737 short data;
1738} short1;
1739typedef union {
1740 struct {
1741 short x;
1742 short y;
1743 };
1744 short data[2];
1745} short2;
1746typedef union {
1747 struct {
1748 short x;
1749 short y;
1750 short z;
1751 short w;
1752 };
1753 short data[4];
1754} short4;
1755typedef union {
1756 short data[8];
1757} short8;
1758typedef union {
1759 short data[16];
1760} short16;
1761typedef union {
1762 struct {
1763 short x;
1764 short y;
1765 short z;
1766 };
1767 short data[3];
1768} short3;
1769
1770typedef union {
1771 struct {
1772 unsigned short x;
1773 };
1774 unsigned short data;
1775} ushort1;
1776typedef union {
1777 struct {
1778 unsigned short x;
1779 unsigned short y;
1780 };
1781 unsigned short data[2];
1782} ushort2;
1783typedef union {
1784 struct {
1785 unsigned short x;
1786 unsigned short y;
1787 unsigned short z;
1788 unsigned short w;
1789 };
1790 unsigned short data[4];
1791} ushort4;
1792typedef union {
1793 unsigned short data[8];
1794} ushort8;
1795typedef union {
1796 unsigned short data[16];
1797} ushort16;
1798typedef union {
1799 struct {
1800 unsigned short x;
1801 unsigned short y;
1802 unsigned short z;
1803 };
1804 unsigned short data[3];
1805} ushort3;
1806
1807typedef union {
1808 struct {
1809 int x;
1810 };
1811 int data;
1812} int1;
1813typedef union {
1814 struct {
1815 int x;
1816 int y;
1817 };
1818 int data[2];
1819} int2;
1820typedef union {
1821 struct {
1822 int x;
1823 int y;
1824 int z;
1825 int w;
1826 };
1827 int data[4];
1828} int4;
1829typedef union {
1830 int data[8];
1831} int8;
1832typedef union {
1833 int data[16];
1834} int16;
1835typedef union {
1836 struct {
1837 int x;
1838 int y;
1839 int z;
1840 };
1841 int data[3];
1842} int3;
1843
1844typedef union {
1845 struct {
1846 unsigned int x;
1847 };
1848 unsigned int data;
1849} uint1;
1850typedef union {
1851 struct {
1852 unsigned int x;
1853 unsigned int y;
1854 };
1855 unsigned int data[2];
1856} uint2;
1857typedef union {
1858 struct {
1859 unsigned int x;
1860 unsigned int y;
1861 unsigned int z;
1862 unsigned int w;
1863 };
1864 unsigned int data[4];
1865} uint4;
1866typedef union {
1867 unsigned int data[8];
1868} uint8;
1869typedef union {
1870 unsigned int data[16];
1871} uint16;
1872typedef union {
1873 struct {
1874 unsigned int x;
1875 unsigned int y;
1876 unsigned int z;
1877 };
1878 unsigned int data[3];
1879} uint3;
1880
1881typedef union {
1882 struct {
1883 long x;
1884 };
1885 long data;
1886} long1;
1887typedef union {
1888 struct {
1889 long x;
1890 long y;
1891 };
1892 long data[2];
1893} long2;
1894typedef union {
1895 struct {
1896 long x;
1897 long y;
1898 long z;
1899 long w;
1900 };
1901 long data[4];
1902} long4;
1903typedef union {
1904 long data[8];
1905} long8;
1906typedef union {
1907 long data[16];
1908} long16;
1909typedef union {
1910 struct {
1911 long x;
1912 long y;
1913 long z;
1914 };
1915 long data[3];
1916} long3;
1917
1918typedef union {
1919 struct {
1920 unsigned long x;
1921 };
1922 unsigned long data;
1923} ulong1;
1924typedef union {
1925 struct {
1926 unsigned long x;
1927 unsigned long y;
1928 };
1929 unsigned long data[2];
1930} ulong2;
1931typedef union {
1932 struct {
1933 unsigned long x;
1934 unsigned long y;
1935 unsigned long z;
1936 unsigned long w;
1937 };
1938 unsigned long data[4];
1939} ulong4;
1940typedef union {
1941 unsigned long data[8];
1942} ulong8;
1943typedef union {
1944 unsigned long data[16];
1945} ulong16;
1946typedef union {
1947 struct {
1948 unsigned long x;
1949 unsigned long y;
1950 unsigned long z;
1951 };
1952 unsigned long data[3];
1953} ulong3;
1954
1955typedef union {
1956 struct {
1957 long long x;
1958 };
1959 long long data;
1960} longlong1;
1961typedef union {
1962 struct {
1963 long long x;
1964 long long y;
1965 };
1966 long long data[2];
1967} longlong2;
1968typedef union {
1969 struct {
1970 long long x;
1971 long long y;
1972 long long z;
1973 long long w;
1974 };
1975 long long data[4];
1976} longlong4;
1977typedef union {
1978 long long data[8];
1979} longlong8;
1980typedef union {
1981 long long data[16];
1982} longlong16;
1983typedef union {
1984 struct {
1985 long long x;
1986 long long y;
1987 long long z;
1988 };
1989 long long data[3];
1990} longlong3;
1991
1992typedef union {
1993 struct {
1994 unsigned long long x;
1995 };
1996 unsigned long long data;
1997} ulonglong1;
1998typedef union {
1999 struct {
2000 unsigned long long x;
2001 unsigned long long y;
2002 };
2003 unsigned long long data[2];
2004} ulonglong2;
2005typedef union {
2006 struct {
2007 unsigned long long x;
2008 unsigned long long y;
2009 unsigned long long z;
2010 unsigned long long w;
2011 };
2012 unsigned long long data[4];
2013} ulonglong4;
2014typedef union {
2015 unsigned long long data[8];
2016} ulonglong8;
2017typedef union {
2018 unsigned long long data[16];
2019} ulonglong16;
2020typedef union {
2021 struct {
2022 unsigned long long x;
2023 unsigned long long y;
2024 unsigned long long z;
2025 };
2026 unsigned long long data[3];
2027} ulonglong3;
2028
2029typedef union {
2030 struct {
2031 float x;
2032 };
2033 float data;
2034} float1;
2035typedef union {
2036 struct {
2037 float x;
2038 float y;
2039 };
2040 float data[2];
2041} float2;
2042typedef union {
2043 struct {
2044 float x;
2045 float y;
2046 float z;
2047 float w;
2048 };
2049 float data[4];
2050} float4;
2051typedef union {
2052 float data[8];
2053} float8;
2054typedef union {
2055 float data[16];
2056} float16;
2057typedef union {
2058 struct {
2059 float x;
2060 float y;
2061 float z;
2062 };
2063 float data[3];
2064} float3;
2065
2066typedef union {
2067 struct {
2068 double x;
2069 };
2070 double data;
2071} double1;
2072typedef union {
2073 struct {
2074 double x;
2075 double y;
2076 };
2077 double data[2];
2078} double2;
2079typedef union {
2080 struct {
2081 double x;
2082 double y;
2083 double z;
2084 double w;
2085 };
2086 double data[4];
2087} double4;
2088typedef union {
2089 double data[8];
2090} double8;
2091typedef union {
2092 double data[16];
2093} double16;
2094typedef union {
2095 struct {
2096 double x;
2097 double y;
2098 double z;
2099 };
2100 double data[3];
2101} double3;
2102
2103#endif // defined(_MSC_VER)
2104#endif // defined(__has_attribute)
2105
2106#ifdef __cplusplus
2107#define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
2108 static inline __HOST_DEVICE__ type make_##type(comp x) { \
2109 type r{x}; \
2110 return r; \
2111 }
2112
2113#define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
2114 static inline __HOST_DEVICE__ type make_##type(comp x, comp y) { \
2115 type r{x, y}; \
2116 return r; \
2117 }
2118
2119#define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
2120 static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z) { \
2121 type r{x, y, z}; \
2122 return r; \
2123 }
2124
2125#define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
2126 static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z, comp w) { \
2127 type r{x, y, z, w}; \
2128 return r; \
2129 }
2130#else
2131#define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
2132 static inline __HOST_DEVICE__ type make_##type(comp x) { \
2133 type r; \
2134 r.x = x; \
2135 return r; \
2136 }
2137
2138#define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
2139 static inline __HOST_DEVICE__ type make_##type(comp x, comp y) { \
2140 type r; \
2141 r.x = x; \
2142 r.y = y; \
2143 return r; \
2144 }
2145
2146#define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
2147 static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z) { \
2148 type r; \
2149 r.x = x; \
2150 r.y = y; \
2151 r.z = z; \
2152 return r; \
2153 }
2154
2155#define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
2156 static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z, comp w) { \
2157 type r; \
2158 r.x = x; \
2159 r.y = y; \
2160 r.z = z; \
2161 r.w = w; \
2162 return r; \
2163 }
2164#endif
2165
2166DECLOP_MAKE_ONE_COMPONENT(unsigned char, uchar1);
2167DECLOP_MAKE_TWO_COMPONENT(unsigned char, uchar2);
2168DECLOP_MAKE_THREE_COMPONENT(unsigned char, uchar3);
2169DECLOP_MAKE_FOUR_COMPONENT(unsigned char, uchar4);
2170
2171DECLOP_MAKE_ONE_COMPONENT(signed char, char1);
2172DECLOP_MAKE_TWO_COMPONENT(signed char, char2);
2173DECLOP_MAKE_THREE_COMPONENT(signed char, char3);
2174DECLOP_MAKE_FOUR_COMPONENT(signed char, char4);
2175
2176DECLOP_MAKE_ONE_COMPONENT(unsigned short, ushort1);
2177DECLOP_MAKE_TWO_COMPONENT(unsigned short, ushort2);
2178DECLOP_MAKE_THREE_COMPONENT(unsigned short, ushort3);
2179DECLOP_MAKE_FOUR_COMPONENT(unsigned short, ushort4);
2180
2181DECLOP_MAKE_ONE_COMPONENT(signed short, short1);
2182DECLOP_MAKE_TWO_COMPONENT(signed short, short2);
2183DECLOP_MAKE_THREE_COMPONENT(signed short, short3);
2184DECLOP_MAKE_FOUR_COMPONENT(signed short, short4);
2185
2186DECLOP_MAKE_ONE_COMPONENT(unsigned int, uint1);
2187DECLOP_MAKE_TWO_COMPONENT(unsigned int, uint2);
2188DECLOP_MAKE_THREE_COMPONENT(unsigned int, uint3);
2189DECLOP_MAKE_FOUR_COMPONENT(unsigned int, uint4);
2190
2191DECLOP_MAKE_ONE_COMPONENT(signed int, int1);
2192DECLOP_MAKE_TWO_COMPONENT(signed int, int2);
2193DECLOP_MAKE_THREE_COMPONENT(signed int, int3);
2194DECLOP_MAKE_FOUR_COMPONENT(signed int, int4);
2195
2196DECLOP_MAKE_ONE_COMPONENT(float, float1);
2197DECLOP_MAKE_TWO_COMPONENT(float, float2);
2198DECLOP_MAKE_THREE_COMPONENT(float, float3);
2199DECLOP_MAKE_FOUR_COMPONENT(float, float4);
2200
2201DECLOP_MAKE_ONE_COMPONENT(double, double1);
2202DECLOP_MAKE_TWO_COMPONENT(double, double2);
2203DECLOP_MAKE_THREE_COMPONENT(double, double3);
2204DECLOP_MAKE_FOUR_COMPONENT(double, double4);
2205
2206DECLOP_MAKE_ONE_COMPONENT(unsigned long, ulong1);
2207DECLOP_MAKE_TWO_COMPONENT(unsigned long, ulong2);
2208DECLOP_MAKE_THREE_COMPONENT(unsigned long, ulong3);
2209DECLOP_MAKE_FOUR_COMPONENT(unsigned long, ulong4);
2210
2211DECLOP_MAKE_ONE_COMPONENT(signed long, long1);
2212DECLOP_MAKE_TWO_COMPONENT(signed long, long2);
2213DECLOP_MAKE_THREE_COMPONENT(signed long, long3);
2214DECLOP_MAKE_FOUR_COMPONENT(signed long, long4);
2215
2216DECLOP_MAKE_ONE_COMPONENT(unsigned long long, ulonglong1);
2217DECLOP_MAKE_TWO_COMPONENT(unsigned long long, ulonglong2);
2218DECLOP_MAKE_THREE_COMPONENT(unsigned long long, ulonglong3);
2219DECLOP_MAKE_FOUR_COMPONENT(unsigned long long, ulonglong4);
2220
2221DECLOP_MAKE_ONE_COMPONENT(signed long long, longlong1);
2222DECLOP_MAKE_TWO_COMPONENT(signed long long, longlong2);
2223DECLOP_MAKE_THREE_COMPONENT(signed long long, longlong3);
2224DECLOP_MAKE_FOUR_COMPONENT(signed long long, longlong4);
2225
2226#endif
Definition amd_hip_vector_types.h:1659
Definition amd_hip_vector_types.h:1665
Definition amd_hip_vector_types.h:1672
Definition amd_hip_vector_types.h:1681
Definition amd_hip_vector_types.h:1684
Definition amd_hip_vector_types.h:1687
Definition amd_hip_vector_types.h:1696
Definition amd_hip_vector_types.h:1702
Definition amd_hip_vector_types.h:1709
Definition amd_hip_vector_types.h:1718
Definition amd_hip_vector_types.h:1721
Definition amd_hip_vector_types.h:1724
Definition amd_hip_vector_types.h:1733
Definition amd_hip_vector_types.h:1739
Definition amd_hip_vector_types.h:1746
Definition amd_hip_vector_types.h:1755
Definition amd_hip_vector_types.h:1758
Definition amd_hip_vector_types.h:1761
Definition amd_hip_vector_types.h:1770
Definition amd_hip_vector_types.h:1776
Definition amd_hip_vector_types.h:1783
Definition amd_hip_vector_types.h:1792
Definition amd_hip_vector_types.h:1795
Definition amd_hip_vector_types.h:1798
Definition amd_hip_vector_types.h:1807
Definition amd_hip_vector_types.h:1813
Definition amd_hip_vector_types.h:1820
Definition amd_hip_vector_types.h:1829
Definition amd_hip_vector_types.h:1832
Definition amd_hip_vector_types.h:1835
Definition amd_hip_vector_types.h:1844
Definition amd_hip_vector_types.h:1850
Definition amd_hip_vector_types.h:1857
Definition amd_hip_vector_types.h:1866
Definition amd_hip_vector_types.h:1869
Definition amd_hip_vector_types.h:1872
Definition amd_hip_vector_types.h:1881
Definition amd_hip_vector_types.h:1887
Definition amd_hip_vector_types.h:1894
Definition amd_hip_vector_types.h:1903
Definition amd_hip_vector_types.h:1906
Definition amd_hip_vector_types.h:1909
Definition amd_hip_vector_types.h:1918
Definition amd_hip_vector_types.h:1924
Definition amd_hip_vector_types.h:1931
Definition amd_hip_vector_types.h:1940
Definition amd_hip_vector_types.h:1943
Definition amd_hip_vector_types.h:1946
Definition amd_hip_vector_types.h:1955
Definition amd_hip_vector_types.h:1961
Definition amd_hip_vector_types.h:1968
Definition amd_hip_vector_types.h:1977
Definition amd_hip_vector_types.h:1980
Definition amd_hip_vector_types.h:1983
Definition amd_hip_vector_types.h:1992
Definition amd_hip_vector_types.h:1998
Definition amd_hip_vector_types.h:2005
Definition amd_hip_vector_types.h:2014
Definition amd_hip_vector_types.h:2017
Definition amd_hip_vector_types.h:2020
Definition amd_hip_vector_types.h:2029
Definition amd_hip_vector_types.h:2035
Definition amd_hip_vector_types.h:2042
Definition amd_hip_vector_types.h:2051
Definition amd_hip_vector_types.h:2054
Definition amd_hip_vector_types.h:2057
Definition amd_hip_vector_types.h:2066
Definition amd_hip_vector_types.h:2072
Definition amd_hip_vector_types.h:2079
Definition amd_hip_vector_types.h:2088
Definition amd_hip_vector_types.h:2091
Definition amd_hip_vector_types.h:2094