Bit vector  0.9
Fast and space efficient dynamic bit vector library.
branch_selection.hpp
1 #ifndef BV_BRANCH_SELECTION_HPP
2 #define BV_BRANCH_SELECTION_HPP
3 
4 #include <cassert>
5 #include <cstdint>
6 #include <cstring>
7 
8 #ifndef CACHE_LINE
9 // Apparently the most common cache line size is 64.
10 #define CACHE_LINE 64
11 #endif
12 
13 namespace bv {
14 
26 template <class dtype, dtype branches>
28  protected:
32  dtype elems_[branches];
33 
34  static_assert((branches == 8) || (branches == 16) || (branches == 32) ||
35  (branches == 64) || (branches == 128),
36  "branching factor needs to be a reasonable power of 2");
37 
38  public:
43  for (dtype i = 0; i < branches; i++) {
44  elems_[i] = (~dtype(0)) >> 1;
45  }
46  }
47 
54  dtype get(uint8_t index) const { return elems_[index]; }
55 
67  void set(uint8_t index, dtype value) { elems_[index] = value; }
68 
84  template <class T>
85  void increment(uint8_t from, uint8_t array_size, T change) {
86  for (uint8_t i = from; i < array_size; i++) {
87  elems_[i] += change;
88  }
89  }
90 
106  void insert(uint8_t index, uint8_t array_size, dtype a_value,
107  dtype b_value) {
108  assert(index > 0);
109  for (uint8_t i = array_size; i > index; i--) {
110  elems_[i] = elems_[i - 1];
111  }
112  elems_[index - 1] = index != 1 ? elems_[index - 2] + a_value : a_value;
113  elems_[index] = elems_[index - 1] + b_value;
114  }
115 
127  void remove(uint8_t index, uint8_t array_size) {
128  for (uint8_t i = index; i < array_size - 1; i++) {
129  elems_[i] = elems_[i + 1];
130  }
131  elems_[array_size - 1] = (~dtype(0)) >> 1;
132  }
133 
144  void clear_first(uint8_t n, uint8_t array_size) {
145  dtype subtrahend = elems_[n - 1];
146  for (uint8_t i = n; i < array_size; i++) {
147  elems_[i - n] = elems_[i] - subtrahend;
148  elems_[i] = (~dtype(0)) >> 1;
149  }
150  }
151 
162  void clear_last(uint8_t n, uint8_t array_size) {
163  for (uint8_t i = array_size - n; i < array_size; i++) {
164  elems_[i] = (~dtype(0)) >> 1;
165  }
166  }
167 
179  void append(uint8_t n_elems, uint8_t array_size, branchless_scan* other) {
180  dtype addend = array_size != 0 ? elems_[array_size - 1] : 0;
181  for (uint8_t i = 0; i < n_elems; i++) {
182  elems_[i + array_size] = addend + other->get(i);
183  }
184  }
185 
194  void append(uint8_t index, dtype value) {
195  if (index == 0) {
196  elems_[index] = value;
197  } else {
198  elems_[index] = elems_[index - 1] + value;
199  }
200  }
201 
214  void prepend(uint8_t n_elems, uint8_t array_size, uint8_t o_size,
215  branchless_scan* other) {
216  memmove(elems_ + n_elems, elems_, array_size * sizeof(dtype));
217  dtype subtrahend =
218  n_elems < o_size ? other->get(o_size - n_elems - 1) : 0;
219  for (uint8_t i = 0; i < n_elems; i++) {
220  elems_[i] = other->get(i + o_size - n_elems) - subtrahend;
221  }
222  for (uint8_t i = n_elems; i < array_size + n_elems; i++) {
223  elems_[i] += elems_[n_elems - 1];
224  }
225  }
226 
256  uint8_t find(dtype q) const {
257  constexpr dtype SIGN_BIT = ~((~dtype(0)) >> 1);
258  constexpr dtype num_bits = sizeof(dtype) * 8;
259  constexpr dtype lines = CACHE_LINE / sizeof(dtype);
260  for (dtype i = 0; i < branches; i += lines) {
261  __builtin_prefetch(elems_ + i);
262  }
263  uint8_t idx;
264  if constexpr (branches == 128) {
265  idx = (uint8_t(1) << 6) - 1;
266  idx ^= (dtype((elems_[idx] - q) & SIGN_BIT) >> (num_bits - 7)) |
267  (uint8_t(1) << 5);
268  idx ^= (dtype((elems_[idx] - q) & SIGN_BIT) >> (num_bits - 6)) |
269  (uint8_t(1) << 4);
270  idx ^= (dtype((elems_[idx] - q) & SIGN_BIT) >> (num_bits - 5)) |
271  (uint8_t(1) << 3);
272  idx ^= (dtype((elems_[idx] - q) & SIGN_BIT) >> (num_bits - 4)) |
273  (uint8_t(1) << 2);
274  } else if constexpr (branches == 64) {
275  idx = (uint8_t(1) << 5) - 1;
276  idx ^= (dtype((elems_[idx] - q) & SIGN_BIT) >> (num_bits - 6)) |
277  (uint8_t(1) << 4);
278  idx ^= (dtype((elems_[idx] - q) & SIGN_BIT) >> (num_bits - 5)) |
279  (uint8_t(1) << 3);
280  idx ^= (dtype((elems_[idx] - q) & SIGN_BIT) >> (num_bits - 4)) |
281  (uint8_t(1) << 2);
282  } else if constexpr (branches == 32) {
283  idx = (uint8_t(1) << 4) - 1;
284  idx ^= (dtype((elems_[idx] - q) & SIGN_BIT) >> (num_bits - 5)) |
285  (uint8_t(1) << 3);
286  idx ^= (dtype((elems_[idx] - q) & SIGN_BIT) >> (num_bits - 4)) |
287  (uint8_t(1) << 2);
288  } else if constexpr (branches == 16) {
289  idx = (uint8_t(1) << 3) - 1;
290  idx ^= (dtype((elems_[idx] - q) & SIGN_BIT) >> (num_bits - 4)) |
291  (uint8_t(1) << 2);
292  } else {
293  idx = (uint8_t(1) << 2) - 1;
294  }
295  idx ^= (dtype((elems_[idx] - q) & SIGN_BIT) >> (num_bits - 3)) |
296  (uint8_t(1) << 1);
297  idx ^= (dtype((elems_[idx] - q) & SIGN_BIT) >> (num_bits - 2)) |
298  uint8_t(1);
299  return idx ^ (dtype((elems_[idx] - q) & SIGN_BIT) >> (num_bits - 1));
300  }
301 };
302 
303 } // namespace bv
304 
305 #endif
bv::branchless_scan::set
void set(uint8_t index, dtype value)
Set the value at index to value.
Definition: branch_selection.hpp:67
bv::branchless_scan
Class providing support for cumulative sums.
Definition: branch_selection.hpp:27
bv::branchless_scan::elems_
dtype elems_[branches]
Definition: branch_selection.hpp:32
bv::branchless_scan::get
dtype get(uint8_t index) const
Get the cumulative sum at index.
Definition: branch_selection.hpp:54
bv::bv
simple_bv< 8, 16384, 64 > bv
Default dynamic bit vector type.
Definition: bv.hpp:94
bv::branchless_scan::insert
void insert(uint8_t index, uint8_t array_size, dtype a_value, dtype b_value)
Inserts a new value into the cumulative sums.
Definition: branch_selection.hpp:106
bv::branchless_scan::append
void append(uint8_t index, dtype value)
Adds single element to the end of this
Definition: branch_selection.hpp:194
bv::branchless_scan::append
void append(uint8_t n_elems, uint8_t array_size, branchless_scan *other)
Adds elements from other to the end of this
Definition: branch_selection.hpp:179
bv::branchless_scan::remove
void remove(uint8_t index, uint8_t array_size)
Removes a value from the cumulative sums.
Definition: branch_selection.hpp:127
bv::branchless_scan::branchless_scan
branchless_scan()
Definition: branch_selection.hpp:42
bv::branchless_scan::clear_first
void clear_first(uint8_t n, uint8_t array_size)
Removes the first n elements from the structure.
Definition: branch_selection.hpp:144
bv::branchless_scan::clear_last
void clear_last(uint8_t n, uint8_t array_size)
Removes the last n elements from the structure.
Definition: branch_selection.hpp:162
bv::branchless_scan::prepend
void prepend(uint8_t n_elems, uint8_t array_size, uint8_t o_size, branchless_scan *other)
Adds elements form other to the start of this
Definition: branch_selection.hpp:214
bv::branchless_scan::find
uint8_t find(dtype q) const
Find the lowest child index s.t. the cumulative sum at the index is at least q.
Definition: branch_selection.hpp:256
bv::branchless_scan::increment
void increment(uint8_t from, uint8_t array_size, T change)
Increments all values in range.
Definition: branch_selection.hpp:85