Skip to content

Commit 2be89a0

Browse files
committed
Add new Algorithms using explicit batch type
1 parent e845404 commit 2be89a0

File tree

3 files changed

+300
-12
lines changed

3 files changed

+300
-12
lines changed

README.md

+40
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,46 @@ void mean(const vector_type& a, const vector_type& b, vector_type& res)
151151
}
152152
```
153153
154+
Algorithms like `xsimd::reduce` and `xsimd::transform` are available also in the batch explicit modality:
155+
156+
```cpp
157+
template <class C, class T = typename std::decay<decltype(*C().begin())>::type>
158+
T nansum(const C& v)
159+
{
160+
return xsimd::reduce_batch(v.begin(), v.end(), 0.0,
161+
[](auto x, auto y) {
162+
return (std::isnan(x) ? 0.0 : x) + (std::isnan(y) ? 0.0 : y);
163+
},
164+
[](auto x, auto y) {
165+
static decltype(x) zero(0.0);
166+
auto xnan = xsimd::isnan(x);
167+
auto ynan = xsimd::isnan(y);
168+
auto xs = xsimd::select(xnan, zero, x);
169+
auto ys = xsimd::select(ynan, zero, y);
170+
return xs + ys;
171+
});
172+
}
173+
```
174+
175+
To switch from `std::count_if` to `xsimd::count_if`:
176+
177+
```cpp
178+
// v is an aligned vector of int type
179+
auto count_expected = std::count_if(v.begin(), v.end(),
180+
[](auto x) {
181+
return x >= 50 && x <= 70 ? 1 : 0;
182+
});
183+
auto count = xsimd::count_if(v.begin(), v.end(),
184+
[](auto x) {
185+
return x >= 50 && x <= 70 ? 1 : 0;
186+
},
187+
[](auto b) {
188+
static decltype(b) zero(0);
189+
static decltype(b) one(1);
190+
return xsimd::hadd(xsimd::select(b >= 50 && b <= 70, one, zero));
191+
});
192+
assert(count_expected == count);
193+
```
154194
155195
## Building and Running the Tests
156196

include/xsimd/stl/algorithms.hpp

+108-11
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515

1616
namespace xsimd
1717
{
18-
template <class I1, class I2, class O1, class UF>
19-
void transform(I1 first, I2 last, O1 out_first, UF&& f)
18+
template <class I1, class I2, class O1, class UF, class UFB>
19+
void transform_batch(I1 first, I2 last, O1 out_first, UF&& f, UFB&& fb)
2020
{
2121
using value_type = typename std::decay<decltype(*first)>::type;
2222
using traits = simd_traits<value_type>;
@@ -43,7 +43,7 @@ namespace xsimd
4343
for (std::size_t i = align_begin; i < align_end; i += simd_size)
4444
{
4545
xsimd::load_aligned(&first[i], batch);
46-
xsimd::store_aligned(&out_first[i], f(batch));
46+
xsimd::store_aligned(&out_first[i], fb(batch));
4747
}
4848

4949
for (std::size_t i = align_end; i < size; ++i)
@@ -62,7 +62,7 @@ namespace xsimd
6262
for (std::size_t i = align_begin; i < align_end; i += simd_size)
6363
{
6464
xsimd::load_aligned(&first[i], batch);
65-
xsimd::store_unaligned(&out_first[i], f(batch));
65+
xsimd::store_unaligned(&out_first[i], fb(batch));
6666
}
6767

6868
for (std::size_t i = align_end; i < size; ++i)
@@ -72,8 +72,14 @@ namespace xsimd
7272
}
7373
}
7474

75-
template <class I1, class I2, class I3, class O1, class UF>
76-
void transform(I1 first_1, I2 last_1, I3 first_2, O1 out_first, UF&& f)
75+
template <class I1, class I2, class O1, class UF>
76+
void transform(I1 first, I2 last, O1 out_first, UF&& f)
77+
{
78+
transform_batch(first, last, out_first, f, f);
79+
}
80+
81+
template <class I1, class I2, class I3, class O1, class UF, class UFB>
82+
void transform_batch(I1 first_1, I2 last_1, I3 first_2, O1 out_first, UF&& f, UFB&& fb)
7783
{
7884
using value_type = typename std::decay<decltype(*first_1)>::type;
7985
using traits = simd_traits<value_type>;
@@ -102,7 +108,7 @@ namespace xsimd
102108
{ \
103109
xsimd::A1(&first_1[i], batch_1); \
104110
xsimd::A2(&first_2[i], batch_2); \
105-
xsimd::A3(&out_first[i], f(batch_1, batch_2)); \
111+
xsimd::A3(&out_first[i], fb(batch_1, batch_2)); \
106112
} \
107113
\
108114
for (std::size_t i = align_end; i < size; ++i) \
@@ -130,6 +136,11 @@ namespace xsimd
130136
#undef XSIMD_LOOP_MACRO
131137
}
132138

139+
template <class I1, class I2, class I3, class O1, class UF>
140+
void transform(I1 first_1, I2 last_1, I3 first_2, O1 out_first, UF&& f)
141+
{
142+
transform_batch(first_1, last_1, first_2, out_first, f, f);
143+
}
133144

134145
// TODO: Remove this once we drop C++11 support
135146
namespace detail
@@ -141,9 +152,8 @@ namespace xsimd
141152
};
142153
}
143154

144-
145-
template <class Iterator1, class Iterator2, class Init, class BinaryFunction = detail::plus>
146-
Init reduce(Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun = detail::plus{})
155+
template <class Iterator1, class Iterator2, class Init, class BinaryFunction, class BinaryFunctionBatch>
156+
Init reduce_batch(Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun, BinaryFunctionBatch&& binfun_batch)
147157
{
148158
using value_type = typename std::decay<decltype(*first)>::type;
149159
using traits = simd_traits<value_type>;
@@ -180,7 +190,7 @@ namespace xsimd
180190
for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size)
181191
{
182192
xsimd::load_aligned(ptr, batch);
183-
batch_init = binfun(batch_init, batch);
193+
batch_init = binfun_batch(batch_init, batch);
184194
}
185195

186196
// reduce across batch
@@ -197,6 +207,93 @@ namespace xsimd
197207
return init;
198208
}
199209

210+
template <class Iterator1, class Iterator2, class Init, class BinaryFunction = detail::plus>
211+
Init reduce(Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun = detail::plus{})
212+
{
213+
return reduce_batch(first, last, init, binfun, binfun);
214+
}
215+
216+
namespace detail
217+
{
218+
template <class T>
219+
struct count_batch
220+
{
221+
count_batch(T value)
222+
: value(value)
223+
{}
224+
225+
count_batch(const count_batch<T>&) = default;
226+
count_batch(count_batch<T>&&) = default;
227+
228+
template <class B>
229+
std::size_t operator()(const B& b)
230+
{
231+
static auto zero = B(T(0));
232+
static auto one = B(T(1));
233+
return static_cast<std::size_t>(xsimd::hadd(xsimd::select(b == value, one, zero)));
234+
}
235+
236+
private:
237+
T value;
238+
};
239+
}
240+
241+
template <class Iterator1, class Iterator2, class UnaryPredicate, class UnaryPredicateBatch>
242+
std::size_t count_if(Iterator1 first, Iterator2 last, UnaryPredicate&& predicate, UnaryPredicateBatch&& predicate_batch)
243+
{
244+
using value_type = typename std::decay<decltype(*first)>::type;
245+
using traits = simd_traits<value_type>;
246+
using batch_type = typename traits::type;
247+
248+
std::size_t size = static_cast<std::size_t>(std::distance(first, last));
249+
constexpr std::size_t simd_size = traits::size;
250+
251+
std::size_t counter(0);
252+
if(size < simd_size)
253+
{
254+
while(first != last)
255+
{
256+
counter += predicate(*first++);
257+
}
258+
return counter;
259+
}
260+
261+
const auto* const ptr_begin = &(*first);
262+
263+
std::size_t align_begin = xsimd::get_alignment_offset(ptr_begin, size, simd_size);
264+
std::size_t align_end = align_begin + ((size - align_begin) & ~(simd_size - 1));
265+
266+
// reduce initial unaligned part
267+
for (std::size_t i = 0; i < align_begin; ++i)
268+
{
269+
counter += predicate(first[i]);
270+
}
271+
272+
// reduce aligned part
273+
batch_type batch;
274+
auto ptr = ptr_begin + align_begin;
275+
for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size)
276+
{
277+
xsimd::load_aligned(ptr, batch);
278+
counter += predicate_batch(batch);
279+
}
280+
281+
// reduce final unaligned part
282+
for (std::size_t i = align_end; i < size; ++i)
283+
{
284+
counter += predicate(first[i]);
285+
}
286+
287+
return counter;
288+
}
289+
290+
template <class Iterator1, class Iterator2, class T>
291+
std::size_t count(Iterator1 first, Iterator2 last, const T& value)
292+
{
293+
return count_if(first, last,
294+
[&value](const T& x) { return value == x; }, detail::count_batch<T>{value});
295+
}
296+
200297
}
201298

202299
#endif

0 commit comments

Comments
 (0)