1 #ifndef BV_BRANCH_SELECTION_HPP
2 #define BV_BRANCH_SELECTION_HPP
26 template <
class dtype, dtype branches>
34 static_assert((branches == 8) || (branches == 16) || (branches == 32) ||
35 (branches == 64) || (branches == 128),
36 "branching factor needs to be a reasonable power of 2");
43 for (dtype i = 0; i < branches; i++) {
44 elems_[i] = (~dtype(0)) >> 1;
54 dtype
get(uint8_t index)
const {
return elems_[index]; }
67 void set(uint8_t index, dtype value) {
elems_[index] = value; }
85 void increment(uint8_t from, uint8_t array_size, T change) {
86 for (uint8_t i = from; i < array_size; i++) {
106 void insert(uint8_t index, uint8_t array_size, dtype a_value,
109 for (uint8_t i = array_size; i > index; i--) {
112 elems_[index - 1] = index != 1 ?
elems_[index - 2] + a_value : a_value;
127 void remove(uint8_t index, uint8_t array_size) {
128 for (uint8_t i = index; i < array_size - 1; i++) {
131 elems_[array_size - 1] = (~dtype(0)) >> 1;
145 dtype subtrahend =
elems_[n - 1];
146 for (uint8_t i = n; i < array_size; i++) {
148 elems_[i] = (~dtype(0)) >> 1;
163 for (uint8_t i = array_size - n; i < array_size; i++) {
164 elems_[i] = (~dtype(0)) >> 1;
180 dtype addend = array_size != 0 ?
elems_[array_size - 1] : 0;
181 for (uint8_t i = 0; i < n_elems; i++) {
182 elems_[i + array_size] = addend + other->
get(i);
194 void append(uint8_t index, dtype value) {
214 void prepend(uint8_t n_elems, uint8_t array_size, uint8_t o_size,
216 memmove(
elems_ + n_elems,
elems_, array_size *
sizeof(dtype));
218 n_elems < o_size ? other->
get(o_size - n_elems - 1) : 0;
219 for (uint8_t i = 0; i < n_elems; i++) {
220 elems_[i] = other->
get(i + o_size - n_elems) - subtrahend;
222 for (uint8_t i = n_elems; i < array_size + n_elems; i++) {
257 constexpr dtype SIGN_BIT = ~((~dtype(0)) >> 1);
258 constexpr dtype num_bits =
sizeof(dtype) * 8;
259 constexpr dtype lines = CACHE_LINE /
sizeof(dtype);
260 for (dtype i = 0; i < branches; i += lines) {
261 __builtin_prefetch(
elems_ + i);
264 if constexpr (branches == 128) {
265 idx = (uint8_t(1) << 6) - 1;
266 idx ^= (dtype((
elems_[idx] - q) & SIGN_BIT) >> (num_bits - 7)) |
268 idx ^= (dtype((
elems_[idx] - q) & SIGN_BIT) >> (num_bits - 6)) |
270 idx ^= (dtype((
elems_[idx] - q) & SIGN_BIT) >> (num_bits - 5)) |
272 idx ^= (dtype((
elems_[idx] - q) & SIGN_BIT) >> (num_bits - 4)) |
274 }
else if constexpr (branches == 64) {
275 idx = (uint8_t(1) << 5) - 1;
276 idx ^= (dtype((
elems_[idx] - q) & SIGN_BIT) >> (num_bits - 6)) |
278 idx ^= (dtype((
elems_[idx] - q) & SIGN_BIT) >> (num_bits - 5)) |
280 idx ^= (dtype((
elems_[idx] - q) & SIGN_BIT) >> (num_bits - 4)) |
282 }
else if constexpr (branches == 32) {
283 idx = (uint8_t(1) << 4) - 1;
284 idx ^= (dtype((
elems_[idx] - q) & SIGN_BIT) >> (num_bits - 5)) |
286 idx ^= (dtype((
elems_[idx] - q) & SIGN_BIT) >> (num_bits - 4)) |
288 }
else if constexpr (branches == 16) {
289 idx = (uint8_t(1) << 3) - 1;
290 idx ^= (dtype((
elems_[idx] - q) & SIGN_BIT) >> (num_bits - 4)) |
293 idx = (uint8_t(1) << 2) - 1;
295 idx ^= (dtype((
elems_[idx] - q) & SIGN_BIT) >> (num_bits - 3)) |
297 idx ^= (dtype((
elems_[idx] - q) & SIGN_BIT) >> (num_bits - 2)) |
299 return idx ^ (dtype((
elems_[idx] - q) & SIGN_BIT) >> (num_bits - 1));