forked from tum-db/user-defined-operators
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_arrays.cpp
96 lines (87 loc) · 2.97 KB
/
create_arrays.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#include <array>
#include <atomic>
#include <random>
#include <string>
#include <string_view>
//---------------------------------------------------------------------------
#include <udo/UDOperator.hpp>
//---------------------------------------------------------------------------
using namespace std;
using namespace std::literals::string_view_literals;
//---------------------------------------------------------------------------
/// The names that are randomly selected for the name attribute
static constexpr array names = {
"DuckDB"sv,
"Hyper"sv,
"MSSQL"sv,
"MonetDB"sv,
"Peloton"sv,
"Postgres"sv,
"Umbra"sv,
"Vectorwise"sv,
};
//---------------------------------------------------------------------------
/// The strings that are used for "invalid" values
static constexpr array invalidValues = {
""sv,
"F"sv,
"FALSE"sv,
"N/A"sv,
"NaN"sv,
"f"sv,
"false"sv,
"n/a"sv,
"nan"sv,
};
//---------------------------------------------------------------------------
/// The output of this operator
struct Output {
udo::String name;
udo::String values;
};
//---------------------------------------------------------------------------
class CreateArrays : public udo::UDOperator<udo::EmptyTuple, Output> {
private:
/// The total number of tuples that should be generated
uint64_t numTuples;
/// The counter to track the number of tuples that were generated
atomic<uint64_t> tupleCount = 0;
public:
/// Constructor
explicit CreateArrays(uint64_t numTuples) : numTuples(numTuples) {}
/// Produce the output
bool postProduce(LocalState& /*localState*/) {
uint64_t localTupleCount = tupleCount.fetch_add(10000);
if (localTupleCount >= numTuples)
return true;
uint64_t seed = 42 + localTupleCount;
mt19937_64 gen(seed);
uniform_int_distribution<size_t> nameIndexDistr(0, names.size() - 1);
uniform_int_distribution<size_t> invalidValueIndexDistr(0, invalidValues.size() - 1);
bernoulli_distribution hasValueDistr(0.9);
binomial_distribution<unsigned> numValuesDistr(50, 0.2);
uniform_int_distribution<int> randomNumberDistr(0, 1000000);
for (uint64_t i = 0; i < 10000 && localTupleCount + i < numTuples; ++i) {
auto name = names[nameIndexDistr(gen)];
string values;
auto numValues = numValuesDistr(gen);
for (unsigned j = 0; j < numValues; ++j) {
if (j > 0)
values += ',';
if (hasValueDistr(gen)) {
auto value = randomNumberDistr(gen);
values += to_string(value);
} else {
auto value = invalidValues[invalidValueIndexDistr(gen)];
values += value;
}
}
Output output;
output.name = name;
output.values = string_view(values);
produceOutputTuple(output);
}
return false;
}
};
//---------------------------------------------------------------------------