24
24
// Wrap intrinsics so we can pass them as function pointers
25
25
// - OP: intrinsics name prefix, e.g., vorrq
26
26
// - RT: type traits to deduce intrinsics return types
27
- #define WRAP_BINARY_INT_EXCLUDING_64 (OP, RT ) \
27
+ #define WRAP_BINARY_INT_EXCLUDING_64 (OP ) \
28
28
namespace wrap { \
29
- inline RT< uint8x16_t > _##OP##_u8 (uint8x16_t a, uint8x16_t b) { return ::OP##_u8 (a, b); } \
30
- inline RT< int8x16_t > _##OP##_s8 (int8x16_t a, int8x16_t b) { return ::OP##_s8 (a, b); } \
31
- inline RT< uint16x8_t > _##OP##_u16(uint16x8_t a, uint16x8_t b) { return ::OP##_u16 (a, b); } \
32
- inline RT< int16x8_t > _##OP##_s16(int16x8_t a, int16x8_t b) { return ::OP##_s16 (a, b); } \
33
- inline RT< uint32x4_t > _##OP##_u32(uint32x4_t a, uint32x4_t b) { return ::OP##_u32 (a, b); } \
34
- inline RT< int32x4_t > _##OP##_s32(int32x4_t a, int32x4_t b) { return ::OP##_s32 (a, b); } \
29
+ inline uint8x16_t _##OP##_u8 (uint8x16_t a, uint8x16_t b) { return ::OP##_u8 (a, b); } \
30
+ inline int8x16_t _##OP##_s8 (int8x16_t a, int8x16_t b) { return ::OP##_s8 (a, b); } \
31
+ inline uint16x8_t _##OP##_u16(uint16x8_t a, uint16x8_t b) { return ::OP##_u16 (a, b); } \
32
+ inline int16x8_t _##OP##_s16(int16x8_t a, int16x8_t b) { return ::OP##_s16 (a, b); } \
33
+ inline uint32x4_t _##OP##_u32(uint32x4_t a, uint32x4_t b) { return ::OP##_u32 (a, b); } \
34
+ inline int32x4_t _##OP##_s32(int32x4_t a, int32x4_t b) { return ::OP##_s32 (a, b); } \
35
35
}
36
36
37
- #define WRAP_BINARY_INT (OP, RT ) \
38
- WRAP_BINARY_INT_EXCLUDING_64 (OP, RT ) \
37
+ #define WRAP_BINARY_INT (OP ) \
38
+ WRAP_BINARY_INT_EXCLUDING_64 (OP) \
39
39
namespace wrap { \
40
- inline RT< uint64x2_t > _##OP##_u64 (uint64x2_t a, uint64x2_t b) { return ::OP##_u64 (a, b); } \
41
- inline RT< int64x2_t > _##OP##_s64 (int64x2_t a, int64x2_t b) { return ::OP##_s64 (a, b); } \
40
+ inline uint64x2_t _##OP##_u64 (uint64x2_t a, uint64x2_t b) { return ::OP##_u64 (a, b); } \
41
+ inline int64x2_t _##OP##_s64 (int64x2_t a, int64x2_t b) { return ::OP##_s64 (a, b); } \
42
42
}
43
43
44
- #define WRAP_BINARY_FLOAT (OP, RT ) \
44
+ #define WRAP_BINARY_FLOAT (OP ) \
45
45
namespace wrap { \
46
- inline RT< float32x4_t > _##OP##_f32(float32x4_t a, float32x4_t b) { return ::OP##_f32 (a, b); } \
46
+ inline float32x4_t _##OP##_f32(float32x4_t a, float32x4_t b) { return ::OP##_f32 (a, b); } \
47
47
}
48
48
49
49
#define WRAP_UNARY_INT_EXCLUDING_64 (OP ) \
@@ -87,139 +87,6 @@ namespace xsimd
87
87
88
88
namespace detail
89
89
{
90
- template <template <class > class return_type , class ... T>
91
- struct neon_dispatcher_base
92
- {
93
- struct unary
94
- {
95
- using container_type = std::tuple<return_type<T> (*)(T)...>;
96
- const container_type m_func;
97
-
98
- template <class U >
99
- return_type<U> apply (U rhs) const
100
- {
101
- using func_type = return_type<U> (*)(U);
102
- auto func = xsimd::detail::get<func_type>(m_func);
103
- return func (rhs);
104
- }
105
- };
106
-
107
- struct binary
108
- {
109
- using container_type = std::tuple<return_type<T> (*)(T, T) ...>;
110
- const container_type m_func;
111
-
112
- template <class U >
113
- return_type<U> apply (U lhs, U rhs) const
114
- {
115
- using func_type = return_type<U> (*)(U, U);
116
- auto func = xsimd::detail::get<func_type>(m_func);
117
- return func (lhs, rhs);
118
- }
119
- };
120
- };
121
-
122
- /* **************************
123
- * arithmetic dispatchers *
124
- ***************************/
125
-
126
- template <class T >
127
- using identity_return_type = T;
128
-
129
- template <class ... T>
130
- struct neon_dispatcher_impl : neon_dispatcher_base<identity_return_type, T...>
131
- {
132
- };
133
-
134
-
135
- using neon_dispatcher = neon_dispatcher_impl<uint8x16_t , int8x16_t ,
136
- uint16x8_t , int16x8_t ,
137
- uint32x4_t , int32x4_t ,
138
- uint64x2_t , int64x2_t ,
139
- float32x4_t >;
140
-
141
- using excluding_int64_dispatcher = neon_dispatcher_impl<uint8x16_t , int8x16_t ,
142
- uint16x8_t , int16x8_t ,
143
- uint32x4_t , int32x4_t ,
144
- float32x4_t >;
145
-
146
- /* *************************
147
- * comparison dispatchers *
148
- **************************/
149
-
150
- template <class T >
151
- struct comp_return_type_impl ;
152
-
153
- template <>
154
- struct comp_return_type_impl <uint8x16_t >
155
- {
156
- using type = uint8x16_t ;
157
- };
158
-
159
- // MSVC uses same underlying type for all vector variants which would cause C++ function overload ambiguity
160
- #if !defined(_WIN32) || (defined(__clang__))
161
- template <>
162
- struct comp_return_type_impl <int8x16_t >
163
- {
164
- using type = uint8x16_t ;
165
- };
166
-
167
- template <>
168
- struct comp_return_type_impl <uint16x8_t >
169
- {
170
- using type = uint16x8_t ;
171
- };
172
-
173
- template <>
174
- struct comp_return_type_impl <int16x8_t >
175
- {
176
- using type = uint16x8_t ;
177
- };
178
-
179
- template <>
180
- struct comp_return_type_impl <uint32x4_t >
181
- {
182
- using type = uint32x4_t ;
183
- };
184
-
185
- template <>
186
- struct comp_return_type_impl <int32x4_t >
187
- {
188
- using type = uint32x4_t ;
189
- };
190
-
191
- template <>
192
- struct comp_return_type_impl <uint64x2_t >
193
- {
194
- using type = uint64x2_t ;
195
- };
196
-
197
- template <>
198
- struct comp_return_type_impl <int64x2_t >
199
- {
200
- using type = uint64x2_t ;
201
- };
202
-
203
- template <>
204
- struct comp_return_type_impl <float32x4_t >
205
- {
206
- using type = uint32x4_t ;
207
- };
208
- #endif
209
-
210
- template <class T >
211
- using comp_return_type = typename comp_return_type_impl<T>::type;
212
-
213
- template <class ... T>
214
- struct neon_comp_dispatcher_impl : neon_dispatcher_base<comp_return_type, T...>
215
- {
216
- };
217
-
218
- using excluding_int64_comp_dispatcher = neon_comp_dispatcher_impl<uint8x16_t , int8x16_t ,
219
- uint16x8_t , int16x8_t ,
220
- uint32x4_t , int32x4_t ,
221
- float32x4_t >;
222
-
223
90
/* *************************************
224
91
* enabling / disabling metafunctions *
225
92
**************************************/
@@ -627,8 +494,8 @@ namespace xsimd
627
494
* add *
628
495
*******/
629
496
630
- WRAP_BINARY_INT (vaddq, detail::identity_return_type )
631
- WRAP_BINARY_FLOAT (vaddq, detail::identity_return_type )
497
+ WRAP_BINARY_INT (vaddq)
498
+ WRAP_BINARY_FLOAT (vaddq)
632
499
633
500
template <class A , class T , detail::enable_neon_type_t <T> = 0 >
634
501
batch<T, A> add (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -645,7 +512,7 @@ namespace xsimd
645
512
* sadd *
646
513
********/
647
514
648
- WRAP_BINARY_INT (vqaddq, detail::identity_return_type )
515
+ WRAP_BINARY_INT (vqaddq)
649
516
650
517
template <class A , class T , detail::enable_neon_type_t <T> = 0 >
651
518
batch<T, A> sadd (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -662,8 +529,8 @@ namespace xsimd
662
529
* sub *
663
530
*******/
664
531
665
- WRAP_BINARY_INT (vsubq, detail::identity_return_type )
666
- WRAP_BINARY_FLOAT (vsubq, detail::identity_return_type )
532
+ WRAP_BINARY_INT (vsubq)
533
+ WRAP_BINARY_FLOAT (vsubq)
667
534
668
535
template <class A , class T , detail::enable_neon_type_t <T> = 0 >
669
536
batch<T, A> sub (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -680,7 +547,7 @@ namespace xsimd
680
547
* ssub *
681
548
********/
682
549
683
- WRAP_BINARY_INT (vqsubq, detail::identity_return_type )
550
+ WRAP_BINARY_INT (vqsubq)
684
551
685
552
template <class A , class T , detail::enable_neon_type_t <T> = 0 >
686
553
batch<T, A> ssub (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -698,8 +565,8 @@ namespace xsimd
698
565
* mul *
699
566
*******/
700
567
701
- WRAP_BINARY_INT_EXCLUDING_64 (vmulq, detail::identity_return_type )
702
- WRAP_BINARY_FLOAT (vmulq, detail::identity_return_type )
568
+ WRAP_BINARY_INT_EXCLUDING_64 (vmulq)
569
+ WRAP_BINARY_FLOAT (vmulq)
703
570
704
571
template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
705
572
batch<T, A> mul (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -751,8 +618,8 @@ namespace xsimd
751
618
* eq *
752
619
******/
753
620
754
- WRAP_BINARY_INT_EXCLUDING_64 (vceqq, detail::comp_return_type )
755
- WRAP_BINARY_FLOAT (vceqq, detail::comp_return_type )
621
+ WRAP_BINARY_INT_EXCLUDING_64 (vceqq)
622
+ WRAP_BINARY_FLOAT (vceqq)
756
623
757
624
template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
758
625
batch_bool<T, A> eq (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -797,8 +664,8 @@ namespace xsimd
797
664
* lt *
798
665
******/
799
666
800
- WRAP_BINARY_INT_EXCLUDING_64 (vcltq, detail::comp_return_type )
801
- WRAP_BINARY_FLOAT (vcltq, detail::comp_return_type )
667
+ WRAP_BINARY_INT_EXCLUDING_64 (vcltq)
668
+ WRAP_BINARY_FLOAT (vcltq)
802
669
803
670
template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
804
671
batch_bool<T, A> lt (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -821,8 +688,8 @@ namespace xsimd
821
688
* le *
822
689
******/
823
690
824
- WRAP_BINARY_INT_EXCLUDING_64 (vcleq, detail::comp_return_type )
825
- WRAP_BINARY_FLOAT (vcleq, detail::comp_return_type )
691
+ WRAP_BINARY_INT_EXCLUDING_64 (vcleq)
692
+ WRAP_BINARY_FLOAT (vcleq)
826
693
827
694
template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
828
695
batch_bool<T, A> le (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -845,8 +712,8 @@ namespace xsimd
845
712
* gt *
846
713
******/
847
714
848
- WRAP_BINARY_INT_EXCLUDING_64 (vcgtq, detail::comp_return_type )
849
- WRAP_BINARY_FLOAT (vcgtq, detail::comp_return_type )
715
+ WRAP_BINARY_INT_EXCLUDING_64 (vcgtq)
716
+ WRAP_BINARY_FLOAT (vcgtq)
850
717
851
718
template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
852
719
batch_bool<T, A> gt (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -869,8 +736,8 @@ namespace xsimd
869
736
* ge *
870
737
******/
871
738
872
- WRAP_BINARY_INT_EXCLUDING_64 (vcgeq, detail::comp_return_type )
873
- WRAP_BINARY_FLOAT (vcgeq, detail::comp_return_type )
739
+ WRAP_BINARY_INT_EXCLUDING_64 (vcgeq)
740
+ WRAP_BINARY_FLOAT (vcgeq)
874
741
875
742
template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
876
743
batch_bool<T, A> get (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -893,7 +760,7 @@ namespace xsimd
893
760
* bitwise_and *
894
761
***************/
895
762
896
- WRAP_BINARY_INT (vandq, detail::identity_return_type )
763
+ WRAP_BINARY_INT (vandq)
897
764
898
765
namespace detail
899
766
{
@@ -930,7 +797,7 @@ namespace xsimd
930
797
* bitwise_or *
931
798
**************/
932
799
933
- WRAP_BINARY_INT (vorrq, detail::identity_return_type )
800
+ WRAP_BINARY_INT (vorrq)
934
801
935
802
namespace detail
936
803
{
@@ -967,7 +834,7 @@ namespace xsimd
967
834
* bitwise_xor *
968
835
***************/
969
836
970
- WRAP_BINARY_INT (veorq, detail::identity_return_type )
837
+ WRAP_BINARY_INT (veorq)
971
838
972
839
namespace detail
973
840
{
@@ -1085,7 +952,7 @@ namespace xsimd
1085
952
* bitwise_andnot *
1086
953
******************/
1087
954
1088
- WRAP_BINARY_INT (vbicq, detail::identity_return_type )
955
+ WRAP_BINARY_INT (vbicq)
1089
956
1090
957
namespace detail
1091
958
{
@@ -1121,8 +988,8 @@ namespace xsimd
1121
988
* min *
1122
989
*******/
1123
990
1124
- WRAP_BINARY_INT_EXCLUDING_64 (vminq, detail::identity_return_type )
1125
- WRAP_BINARY_FLOAT (vminq, detail::identity_return_type )
991
+ WRAP_BINARY_INT_EXCLUDING_64 (vminq)
992
+ WRAP_BINARY_FLOAT (vminq)
1126
993
1127
994
template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
1128
995
batch<T, A> min (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -1145,8 +1012,8 @@ namespace xsimd
1145
1012
* max *
1146
1013
*******/
1147
1014
1148
- WRAP_BINARY_INT_EXCLUDING_64 (vmaxq, detail::identity_return_type )
1149
- WRAP_BINARY_FLOAT (vmaxq, detail::identity_return_type )
1015
+ WRAP_BINARY_INT_EXCLUDING_64 (vmaxq)
1016
+ WRAP_BINARY_FLOAT (vmaxq)
1150
1017
1151
1018
template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
1152
1019
batch<T, A> max (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>)
@@ -1355,30 +1222,6 @@ namespace xsimd
1355
1222
inline float32x4_t _vbslq_f32 (uint32x4_t a, float32x4_t b, float32x4_t c) { return ::vbslq_f32 (a, b, c); }
1356
1223
}
1357
1224
1358
- namespace detail
1359
- {
1360
- template <class ... T>
1361
- struct neon_select_dispatcher_impl
1362
- {
1363
- using container_type = std::tuple<T (*)(comp_return_type<T>, T, T)...>;
1364
- const container_type m_func;
1365
-
1366
- template <class U >
1367
- U apply (comp_return_type<U> cond, U lhs, U rhs) const
1368
- {
1369
- using func_type = U (*)(comp_return_type<U>, U, U);
1370
- auto func = xsimd::detail::get<func_type>(m_func);
1371
- return func (cond, lhs, rhs);
1372
- }
1373
- };
1374
-
1375
- using neon_select_dispatcher = neon_select_dispatcher_impl<uint8x16_t , int8x16_t ,
1376
- uint16x8_t , int16x8_t ,
1377
- uint32x4_t , int32x4_t ,
1378
- uint64x2_t , int64x2_t ,
1379
- float32x4_t >;
1380
- }
1381
-
1382
1225
template <class A , class T , detail::enable_neon_type_t <T> = 0 >
1383
1226
batch<T, A> select (batch_bool<T, A> const & cond, batch<T, A> const & a, batch<T, A> const & b, requires_arch<neon>)
1384
1227
{
0 commit comments