Skip to content

Commit a82ed95

Browse files
committed
Remove unused neon dispatcher
1 parent 663b06b commit a82ed95

File tree

1 file changed

+39
-196
lines changed

1 file changed

+39
-196
lines changed

include/xsimd/arch/xsimd_neon.hpp

+39-196
Original file line numberDiff line numberDiff line change
@@ -24,26 +24,26 @@
2424
// Wrap intrinsics so we can pass them as function pointers
2525
// - OP: intrinsics name prefix, e.g., vorrq
2626
// - RT: type traits to deduce intrinsics return types
27-
#define WRAP_BINARY_INT_EXCLUDING_64(OP, RT) \
27+
#define WRAP_BINARY_INT_EXCLUDING_64(OP) \
2828
namespace wrap { \
29-
inline RT<uint8x16_t> _##OP##_u8 (uint8x16_t a, uint8x16_t b) { return ::OP##_u8 (a, b); } \
30-
inline RT<int8x16_t> _##OP##_s8 (int8x16_t a, int8x16_t b) { return ::OP##_s8 (a, b); } \
31-
inline RT<uint16x8_t> _##OP##_u16(uint16x8_t a, uint16x8_t b) { return ::OP##_u16(a, b); } \
32-
inline RT<int16x8_t> _##OP##_s16(int16x8_t a, int16x8_t b) { return ::OP##_s16(a, b); } \
33-
inline RT<uint32x4_t> _##OP##_u32(uint32x4_t a, uint32x4_t b) { return ::OP##_u32(a, b); } \
34-
inline RT<int32x4_t> _##OP##_s32(int32x4_t a, int32x4_t b) { return ::OP##_s32(a, b); } \
29+
inline uint8x16_t _##OP##_u8 (uint8x16_t a, uint8x16_t b) { return ::OP##_u8 (a, b); } \
30+
inline int8x16_t _##OP##_s8 (int8x16_t a, int8x16_t b) { return ::OP##_s8 (a, b); } \
31+
inline uint16x8_t _##OP##_u16(uint16x8_t a, uint16x8_t b) { return ::OP##_u16(a, b); } \
32+
inline int16x8_t _##OP##_s16(int16x8_t a, int16x8_t b) { return ::OP##_s16(a, b); } \
33+
inline uint32x4_t _##OP##_u32(uint32x4_t a, uint32x4_t b) { return ::OP##_u32(a, b); } \
34+
inline int32x4_t _##OP##_s32(int32x4_t a, int32x4_t b) { return ::OP##_s32(a, b); } \
3535
}
3636

37-
#define WRAP_BINARY_INT(OP, RT) \
38-
WRAP_BINARY_INT_EXCLUDING_64(OP, RT) \
37+
#define WRAP_BINARY_INT(OP) \
38+
WRAP_BINARY_INT_EXCLUDING_64(OP) \
3939
namespace wrap { \
40-
inline RT<uint64x2_t> _##OP##_u64(uint64x2_t a, uint64x2_t b) { return ::OP##_u64(a, b); } \
41-
inline RT<int64x2_t> _##OP##_s64(int64x2_t a, int64x2_t b) { return ::OP##_s64(a, b); } \
40+
inline uint64x2_t _##OP##_u64(uint64x2_t a, uint64x2_t b) { return ::OP##_u64(a, b); } \
41+
inline int64x2_t _##OP##_s64(int64x2_t a, int64x2_t b) { return ::OP##_s64(a, b); } \
4242
}
4343

44-
#define WRAP_BINARY_FLOAT(OP, RT) \
44+
#define WRAP_BINARY_FLOAT(OP) \
4545
namespace wrap { \
46-
inline RT<float32x4_t> _##OP##_f32(float32x4_t a, float32x4_t b) { return ::OP##_f32(a, b); } \
46+
inline float32x4_t _##OP##_f32(float32x4_t a, float32x4_t b) { return ::OP##_f32(a, b); } \
4747
}
4848

4949
#define WRAP_UNARY_INT_EXCLUDING_64(OP) \
@@ -87,139 +87,6 @@ namespace xsimd
8787

8888
namespace detail
8989
{
90-
template <template <class> class return_type, class... T>
91-
struct neon_dispatcher_base
92-
{
93-
struct unary
94-
{
95-
using container_type = std::tuple<return_type<T> (*)(T)...>;
96-
const container_type m_func;
97-
98-
template <class U>
99-
return_type<U> apply(U rhs) const
100-
{
101-
using func_type = return_type<U> (*)(U);
102-
auto func = xsimd::detail::get<func_type>(m_func);
103-
return func(rhs);
104-
}
105-
};
106-
107-
struct binary
108-
{
109-
using container_type = std::tuple<return_type<T> (*)(T, T) ...>;
110-
const container_type m_func;
111-
112-
template <class U>
113-
return_type<U> apply(U lhs, U rhs) const
114-
{
115-
using func_type = return_type<U> (*)(U, U);
116-
auto func = xsimd::detail::get<func_type>(m_func);
117-
return func(lhs, rhs);
118-
}
119-
};
120-
};
121-
122-
/***************************
123-
* arithmetic dispatchers *
124-
***************************/
125-
126-
template <class T>
127-
using identity_return_type = T;
128-
129-
template <class... T>
130-
struct neon_dispatcher_impl : neon_dispatcher_base<identity_return_type, T...>
131-
{
132-
};
133-
134-
135-
using neon_dispatcher = neon_dispatcher_impl<uint8x16_t, int8x16_t,
136-
uint16x8_t, int16x8_t,
137-
uint32x4_t, int32x4_t,
138-
uint64x2_t, int64x2_t,
139-
float32x4_t>;
140-
141-
using excluding_int64_dispatcher = neon_dispatcher_impl<uint8x16_t, int8x16_t,
142-
uint16x8_t, int16x8_t,
143-
uint32x4_t, int32x4_t,
144-
float32x4_t>;
145-
146-
/**************************
147-
* comparison dispatchers *
148-
**************************/
149-
150-
template <class T>
151-
struct comp_return_type_impl;
152-
153-
template <>
154-
struct comp_return_type_impl<uint8x16_t>
155-
{
156-
using type = uint8x16_t;
157-
};
158-
159-
// MSVC uses same underlying type for all vector variants which would cause C++ function overload ambiguity
160-
#if !defined(_WIN32) || (defined(__clang__))
161-
template <>
162-
struct comp_return_type_impl<int8x16_t>
163-
{
164-
using type = uint8x16_t;
165-
};
166-
167-
template <>
168-
struct comp_return_type_impl<uint16x8_t>
169-
{
170-
using type = uint16x8_t;
171-
};
172-
173-
template <>
174-
struct comp_return_type_impl<int16x8_t>
175-
{
176-
using type = uint16x8_t;
177-
};
178-
179-
template <>
180-
struct comp_return_type_impl<uint32x4_t>
181-
{
182-
using type = uint32x4_t;
183-
};
184-
185-
template <>
186-
struct comp_return_type_impl<int32x4_t>
187-
{
188-
using type = uint32x4_t;
189-
};
190-
191-
template <>
192-
struct comp_return_type_impl<uint64x2_t>
193-
{
194-
using type = uint64x2_t;
195-
};
196-
197-
template <>
198-
struct comp_return_type_impl<int64x2_t>
199-
{
200-
using type = uint64x2_t;
201-
};
202-
203-
template <>
204-
struct comp_return_type_impl<float32x4_t>
205-
{
206-
using type = uint32x4_t;
207-
};
208-
#endif
209-
210-
template <class T>
211-
using comp_return_type = typename comp_return_type_impl<T>::type;
212-
213-
template <class... T>
214-
struct neon_comp_dispatcher_impl : neon_dispatcher_base<comp_return_type, T...>
215-
{
216-
};
217-
218-
using excluding_int64_comp_dispatcher = neon_comp_dispatcher_impl<uint8x16_t, int8x16_t,
219-
uint16x8_t, int16x8_t,
220-
uint32x4_t, int32x4_t,
221-
float32x4_t>;
222-
22390
/**************************************
22491
* enabling / disabling metafunctions *
22592
**************************************/
@@ -627,8 +494,8 @@ namespace xsimd
627494
* add *
628495
*******/
629496

630-
WRAP_BINARY_INT(vaddq, detail::identity_return_type)
631-
WRAP_BINARY_FLOAT(vaddq, detail::identity_return_type)
497+
WRAP_BINARY_INT(vaddq)
498+
WRAP_BINARY_FLOAT(vaddq)
632499

633500
template <class A, class T, detail::enable_neon_type_t<T> = 0>
634501
batch<T, A> add(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>)
@@ -645,7 +512,7 @@ namespace xsimd
645512
* sadd *
646513
********/
647514

648-
WRAP_BINARY_INT(vqaddq, detail::identity_return_type)
515+
WRAP_BINARY_INT(vqaddq)
649516

650517
template <class A, class T, detail::enable_neon_type_t<T> = 0>
651518
batch<T, A> sadd(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>)
@@ -662,8 +529,8 @@ namespace xsimd
662529
* sub *
663530
*******/
664531

665-
WRAP_BINARY_INT(vsubq, detail::identity_return_type)
666-
WRAP_BINARY_FLOAT(vsubq, detail::identity_return_type)
532+
WRAP_BINARY_INT(vsubq)
533+
WRAP_BINARY_FLOAT(vsubq)
667534

668535
template <class A, class T, detail::enable_neon_type_t<T> = 0>
669536
batch<T, A> sub(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>)
@@ -680,7 +547,7 @@ namespace xsimd
680547
* ssub *
681548
********/
682549

683-
WRAP_BINARY_INT(vqsubq, detail::identity_return_type)
550+
WRAP_BINARY_INT(vqsubq)
684551

685552
template <class A, class T, detail::enable_neon_type_t<T> = 0>
686553
batch<T, A> ssub(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>)
@@ -698,8 +565,8 @@ namespace xsimd
698565
* mul *
699566
*******/
700567

701-
WRAP_BINARY_INT_EXCLUDING_64(vmulq, detail::identity_return_type)
702-
WRAP_BINARY_FLOAT(vmulq, detail::identity_return_type)
568+
WRAP_BINARY_INT_EXCLUDING_64(vmulq)
569+
WRAP_BINARY_FLOAT(vmulq)
703570

704571
template <class A, class T, detail::exclude_int64_neon_t<T> = 0>
705572
batch<T, A> mul(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>)
@@ -751,8 +618,8 @@ namespace xsimd
751618
* eq *
752619
******/
753620

754-
WRAP_BINARY_INT_EXCLUDING_64(vceqq, detail::comp_return_type)
755-
WRAP_BINARY_FLOAT(vceqq, detail::comp_return_type)
621+
WRAP_BINARY_INT_EXCLUDING_64(vceqq)
622+
WRAP_BINARY_FLOAT(vceqq)
756623

757624
template <class A, class T, detail::exclude_int64_neon_t<T> = 0>
758625
batch_bool<T, A> eq(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>)
@@ -797,8 +664,8 @@ namespace xsimd
797664
* lt *
798665
******/
799666

800-
WRAP_BINARY_INT_EXCLUDING_64(vcltq, detail::comp_return_type)
801-
WRAP_BINARY_FLOAT(vcltq, detail::comp_return_type)
667+
WRAP_BINARY_INT_EXCLUDING_64(vcltq)
668+
WRAP_BINARY_FLOAT(vcltq)
802669

803670
template <class A, class T, detail::exclude_int64_neon_t<T> = 0>
804671
batch_bool<T, A> lt(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>)
@@ -821,8 +688,8 @@ namespace xsimd
821688
* le *
822689
******/
823690

824-
WRAP_BINARY_INT_EXCLUDING_64(vcleq, detail::comp_return_type)
825-
WRAP_BINARY_FLOAT(vcleq, detail::comp_return_type)
691+
WRAP_BINARY_INT_EXCLUDING_64(vcleq)
692+
WRAP_BINARY_FLOAT(vcleq)
826693

827694
template <class A, class T, detail::exclude_int64_neon_t<T> = 0>
828695
batch_bool<T, A> le(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>)
@@ -845,8 +712,8 @@ namespace xsimd
845712
* gt *
846713
******/
847714

848-
WRAP_BINARY_INT_EXCLUDING_64(vcgtq, detail::comp_return_type)
849-
WRAP_BINARY_FLOAT(vcgtq, detail::comp_return_type)
715+
WRAP_BINARY_INT_EXCLUDING_64(vcgtq)
716+
WRAP_BINARY_FLOAT(vcgtq)
850717

851718
template <class A, class T, detail::exclude_int64_neon_t<T> = 0>
852719
batch_bool<T, A> gt(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>)
@@ -869,8 +736,8 @@ namespace xsimd
869736
* ge *
870737
******/
871738

872-
WRAP_BINARY_INT_EXCLUDING_64(vcgeq, detail::comp_return_type)
873-
WRAP_BINARY_FLOAT(vcgeq, detail::comp_return_type)
739+
WRAP_BINARY_INT_EXCLUDING_64(vcgeq)
740+
WRAP_BINARY_FLOAT(vcgeq)
874741

875742
template <class A, class T, detail::exclude_int64_neon_t<T> = 0>
876743
batch_bool<T, A> get(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>)
@@ -893,7 +760,7 @@ namespace xsimd
893760
* bitwise_and *
894761
***************/
895762

896-
WRAP_BINARY_INT(vandq, detail::identity_return_type)
763+
WRAP_BINARY_INT(vandq)
897764

898765
namespace detail
899766
{
@@ -930,7 +797,7 @@ namespace xsimd
930797
* bitwise_or *
931798
**************/
932799

933-
WRAP_BINARY_INT(vorrq, detail::identity_return_type)
800+
WRAP_BINARY_INT(vorrq)
934801

935802
namespace detail
936803
{
@@ -967,7 +834,7 @@ namespace xsimd
967834
* bitwise_xor *
968835
***************/
969836

970-
WRAP_BINARY_INT(veorq, detail::identity_return_type)
837+
WRAP_BINARY_INT(veorq)
971838

972839
namespace detail
973840
{
@@ -1085,7 +952,7 @@ namespace xsimd
1085952
* bitwise_andnot *
1086953
******************/
1087954

1088-
WRAP_BINARY_INT(vbicq, detail::identity_return_type)
955+
WRAP_BINARY_INT(vbicq)
1089956

1090957
namespace detail
1091958
{
@@ -1121,8 +988,8 @@ namespace xsimd
1121988
* min *
1122989
*******/
1123990

1124-
WRAP_BINARY_INT_EXCLUDING_64(vminq, detail::identity_return_type)
1125-
WRAP_BINARY_FLOAT(vminq, detail::identity_return_type)
991+
WRAP_BINARY_INT_EXCLUDING_64(vminq)
992+
WRAP_BINARY_FLOAT(vminq)
1126993

1127994
template <class A, class T, detail::exclude_int64_neon_t<T> = 0>
1128995
batch<T, A> min(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>)
@@ -1145,8 +1012,8 @@ namespace xsimd
11451012
* max *
11461013
*******/
11471014

1148-
WRAP_BINARY_INT_EXCLUDING_64(vmaxq, detail::identity_return_type)
1149-
WRAP_BINARY_FLOAT(vmaxq, detail::identity_return_type)
1015+
WRAP_BINARY_INT_EXCLUDING_64(vmaxq)
1016+
WRAP_BINARY_FLOAT(vmaxq)
11501017

11511018
template <class A, class T, detail::exclude_int64_neon_t<T> = 0>
11521019
batch<T, A> max(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>)
@@ -1355,30 +1222,6 @@ namespace xsimd
13551222
inline float32x4_t _vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c) { return ::vbslq_f32(a, b, c); }
13561223
}
13571224

1358-
namespace detail
1359-
{
1360-
template <class... T>
1361-
struct neon_select_dispatcher_impl
1362-
{
1363-
using container_type = std::tuple<T (*)(comp_return_type<T>, T, T)...>;
1364-
const container_type m_func;
1365-
1366-
template <class U>
1367-
U apply(comp_return_type<U> cond, U lhs, U rhs) const
1368-
{
1369-
using func_type = U (*)(comp_return_type<U>, U, U);
1370-
auto func = xsimd::detail::get<func_type>(m_func);
1371-
return func(cond, lhs, rhs);
1372-
}
1373-
};
1374-
1375-
using neon_select_dispatcher = neon_select_dispatcher_impl<uint8x16_t, int8x16_t,
1376-
uint16x8_t, int16x8_t,
1377-
uint32x4_t, int32x4_t,
1378-
uint64x2_t, int64x2_t,
1379-
float32x4_t>;
1380-
}
1381-
13821225
template <class A, class T, detail::enable_neon_type_t<T> = 0>
13831226
batch<T, A> select(batch_bool<T, A> const& cond, batch<T, A> const& a, batch<T, A> const& b, requires_arch<neon>)
13841227
{

0 commit comments

Comments
 (0)