Bit vector  0.9
Fast and space efficient dynamic bit vector library.
leaf.hpp
1 #ifndef BV_LEAF_HPP
2 #define BV_LEAF_HPP
3 
4 #include <immintrin.h>
5 
6 #include <bitset>
7 #include <cassert>
8 #include <cstdint>
9 #include <cstring>
10 #include <iostream>
11 
12 #include "libpopcnt.h"
13 
14 namespace bv {
15 
16 #define WORD_BITS 64
17 
46 template <uint8_t buffer_size, bool avx = true>
47 class leaf {
48  protected:
49  uint8_t buffer_count_;
50  uint16_t capacity_;
51  uint32_t size_;
52  uint32_t p_sum_;
53 #pragma GCC diagnostic ignored "-Wpedantic"
54  uint32_t buffer_[buffer_size];
55 #pragma GCC diagnostic pop
56  uint64_t* data_;
57 
58  static const constexpr uint64_t MASK = 1;
60  static const constexpr uint32_t VALUE_MASK = 1;
62  static const constexpr uint32_t TYPE_MASK = 8;
64  static const constexpr uint32_t INDEX_MASK = ((uint32_t(1) << 8) - 1);
65 
66  // The buffer fill rate is stored in part of an 8-bit word
67  // where the rest is reserved for future meta-data use.
68  // This could be resolved with using 16 bits instead (without increasing the
69  // sizeof(leaf) value but there does not really appear to be any reason to
70  // do so.
71  static_assert(buffer_size < 64);
72 
73  public:
86  leaf(uint64_t capacity, uint64_t* data) {
87  buffer_count_ = 0;
89  size_ = 0;
90  p_sum_ = 0;
91  data_ = data;
92  }
93 
101  bool at(const uint32_t i) const {
102  if constexpr (buffer_size != 0) {
103  uint64_t index = i;
104  for (uint8_t idx = 0; idx < buffer_count_; idx++) {
105  uint64_t b = buffer_index(buffer_[idx]);
106  if (b == i) {
107  if (buffer_is_insertion(buffer_[idx])) {
108  [[unlikely]] return buffer_value(buffer_[idx]);
109  }
110  index++;
111  } else if (b < i) {
112  [[likely]] index -=
113  buffer_is_insertion(buffer_[idx]) * 2 - 1;
114  }
115  }
116  return MASK & (data_[index / WORD_BITS] >> (index % WORD_BITS));
117  }
118  return MASK & (data_[i / WORD_BITS] >> (i % WORD_BITS));
119  }
120 
122  uint32_t p_sum() const { return p_sum_; }
124  uint32_t size() const { return size_; }
125 
140  void insert(const uint64_t i, const bool x) {
141 #ifdef DEBUG
142  if (size_ >= capacity_ * WORD_BITS) {
143  std::cerr << "Overflow. Reallocate before adding elements"
144  << "\n";
145  std::cerr << "Attempted to insert to " << i << std::endl;
146  assert(size_ < capacity_ * WORD_BITS);
147  }
148 #endif
149  if (i == size_) {
150  // Convert to append if applicable.
151  push_back(x);
152  [[unlikely]] return;
153  }
154  p_sum_ += x ? 1 : 0;
155  if constexpr (buffer_size != 0) {
156  uint8_t idx = buffer_count_;
157  while (idx > 0) {
158  uint64_t b = buffer_index(buffer_[idx - 1]);
159  if (b > i ||
160  (b == i && buffer_is_insertion(buffer_[idx - 1]))) {
161  [[likely]] set_buffer_index(b + 1, idx - 1);
162  } else {
163  break;
164  }
165  idx--;
166  }
167  size_++;
168  if (idx == buffer_count_) {
169  buffer_[buffer_count_] = create_buffer(i, 1, x);
170  [[likely]] buffer_count_++;
171  } else {
172  insert_buffer(idx, create_buffer(i, 1, x));
173  }
174  if (buffer_count_ >= buffer_size) [[unlikely]]
175  commit();
176  } else {
177  // If there is no buffer, a simple linear time insertion is done
178  // instead.
179  size_++;
180  auto target_word = i / WORD_BITS;
181  auto target_offset = i % WORD_BITS;
182  for (size_t j = capacity_ - 1; j > target_word; j--) {
183  data_[j] <<= 1;
184  data_[j] |= (data_[j - 1] >> 63);
185  }
186  data_[target_word] =
187  (data_[target_word] & ((MASK << target_offset) - 1)) |
188  ((data_[target_word] & ~((MASK << target_offset) - 1)) << 1);
189  data_[target_word] |= x ? (MASK << target_offset) : uint64_t(0);
190  }
191  }
192 
207  bool remove(const uint64_t i) {
208  if constexpr (buffer_size != 0) {
209  bool x = this->at(i);
210  p_sum_ -= x;
211  --size_;
212  uint8_t idx = buffer_count_;
213  while (idx > 0) {
214  uint64_t b = buffer_index(buffer_[idx - 1]);
215  if (b == i) {
216  if (buffer_is_insertion(buffer_[idx - 1])) {
217  delete_buffer_element(idx - 1);
218  return x;
219  } else {
220  [[likely]] break;
221  }
222  } else if (b < i) {
223  break;
224  } else {
225  [[likely]] set_buffer_index(b - 1, idx - 1);
226  }
227  idx--;
228  }
229  if (idx == buffer_count_) {
230  buffer_[idx] = create_buffer(i, 0, x);
231  buffer_count_++;
232  } else {
233  [[likely]] insert_buffer(idx, create_buffer(i, 0, x));
234  }
235  if (buffer_count_ >= buffer_size) [[unlikely]]
236  commit();
237  return x;
238  } else {
239  // If buffer does not exits. A simple linear time removal is done
240  // instead.
241  uint64_t target_word = i / WORD_BITS;
242  uint64_t target_offset = i % WORD_BITS;
243  bool x = MASK & (data_[target_word] >> target_offset);
244  p_sum_ -= x;
245  data_[target_word] =
246  (data_[target_word] & ((MASK << target_offset) - 1)) |
247  ((data_[target_word] >> 1) & (~((MASK << target_offset) - 1)));
248  data_[target_word] |= (uint32_t(capacity_ - 1) > target_word)
249  ? (data_[target_word + 1] << 63)
250  : 0;
251  for (size_t j = target_word + 1; j < uint32_t(capacity_ - 1); j++) {
252  data_[j] >>= 1;
253  data_[j] |= data_[j + 1] << 63;
254  }
255  data_[capacity_ - 1] >>=
256  (uint64_t(capacity_ - 1) > target_word) ? 1 : 0;
257  size_--;
258  return x;
259  }
260  }
261 
274  int set(const uint64_t i, const bool x) {
275  uint64_t idx = i;
276  if constexpr (buffer_size != 0) {
277  // If buffer exists, the index needs for the underlying structure
278  // needs to be modified. And if there exists an insertion to this
279  // location in the buffer, the insertion can simply be amended.
280  for (uint8_t j = 0; j < buffer_count_; j++) {
281  uint64_t b = buffer_index(buffer_[j]);
282  if (b < i) {
283  [[likely]] idx += buffer_is_insertion(buffer_[j]) ? -1 : 1;
284  } else if (b == i) {
285  if (buffer_is_insertion(buffer_[j])) {
286  if (buffer_value(buffer_[j]) != x) {
287  int change = x ? 1 : -1;
288  p_sum_ += change;
289  buffer_[j] ^= VALUE_MASK;
290  [[likely]] return change;
291  }
292  [[likely]] return 0;
293  }
294  idx++;
295  } else {
296  break;
297  }
298  }
299  }
300  uint64_t word_nr = idx / WORD_BITS;
301  uint64_t pos = idx % WORD_BITS;
302 
303  if ((data_[word_nr] & (MASK << pos)) != (uint64_t(x) << pos)) {
304  int change = x ? 1 : -1;
305  p_sum_ += change;
306  data_[word_nr] ^= MASK << pos;
307  [[likely]] return change;
308  }
309  return 0;
310  }
311 
323  uint32_t rank(const uint64_t n) const {
324  uint32_t count = 0;
325 
326  uint64_t idx = n;
327  if constexpr (buffer_size != 0) {
328  for (uint8_t i = 0; i < buffer_count_; i++) {
329  if (buffer_index(buffer_[i]) >= n) {
330  [[unlikely]] break;
331  }
332  // Location of the n<sup>th</sup> element needs to be amended
333  // base on buffer contents.
334  if (buffer_is_insertion(buffer_[i])) {
335  idx--;
336  count += buffer_value(buffer_[i]);
337  } else {
338  idx++;
339  count -= buffer_value(buffer_[i]);
340  }
341  }
342  }
343  uint64_t target_word = idx / WORD_BITS;
344  uint64_t target_offset = idx % WORD_BITS;
345  if constexpr (avx) {
346  if (target_word) {
347  count += pop::popcnt(data_, target_word * 8);
348  }
349  } else {
350  for (size_t i = 0; i < target_word; i++) {
351  count += __builtin_popcountll(data_[i]);
352  }
353  }
354  if (target_offset != 0) {
355  count += __builtin_popcountll(data_[target_word] &
356  ((MASK << target_offset) - 1));
357  }
358  return count;
359  }
360 
373  uint32_t rank(const uint64_t n, const uint64_t offset) const {
374  uint32_t count = 0;
375  uint64_t idx = n;
376  uint64_t o_idx = offset;
377  if constexpr (buffer_size != 0) {
378  for (uint8_t i = 0; i < buffer_count_; i++) {
379  uint64_t b = buffer_index(buffer_[i]);
380  if (b >= n) {
381  [[unlikely]] break;
382  }
383  // Location of the n<sup>th</sup> element needs to be amended
384  // base on buffer contents.
385  if (buffer_is_insertion(buffer_[i])) {
386  if (b >= offset) {
387  count += buffer_value(buffer_[i]);
388  } else {
389  o_idx--;
390  }
391  idx--;
392  } else {
393  if (b >= offset) {
394  count -= buffer_value(buffer_[i]);
395  } else {
396  o_idx++;
397  }
398  idx++;
399  }
400  }
401  }
402  uint64_t target_word = idx / WORD_BITS;
403  uint64_t offset_word = o_idx / WORD_BITS;
404  uint64_t target_offset = idx % WORD_BITS;
405  uint64_t offset_offset = o_idx % WORD_BITS;
406  if (target_word == offset_word) {
407  count += __builtin_popcountll(data_[offset_word] &
408  ~((MASK << offset_offset) - 1) &
409  ((MASK << target_offset) - 1));
410  return count;
411  }
412  if (offset_offset != 0) {
413  count += __builtin_popcountll(data_[offset_word] &
414  ~((MASK << offset_offset) - 1));
415  offset_word++;
416  }
417  if constexpr (avx) {
418  if (target_word - offset_word > 0) {
419  count += pop::popcnt(data_ + offset_word,
420  (target_word - offset_word) * 8);
421  }
422  } else {
423  for (size_t i = offset_word; i < target_word; i++) {
424  count += __builtin_popcountll(data_[i]);
425  }
426  }
427  if (target_offset != 0) {
428  count += __builtin_popcountll(data_[target_word] &
429  ((MASK << target_offset) - 1));
430  }
431  return count;
432  }
433 
444  uint32_t select(const uint32_t x) const {
445  uint32_t pop = 0;
446  uint32_t pos = 0;
447  uint8_t current_buffer = 0;
448  int8_t a_pos_offset = 0;
449 
450  // Step one 64-bit word at a time considering the buffer until pop >= x
451  for (uint64_t j = 0; j < capacity_; j++) {
452  pop += __builtin_popcountll(data_[j]);
453  pos += 64;
454  if constexpr (buffer_size != 0) {
455  for (uint8_t b = current_buffer; b < buffer_count_; b++) {
456  uint64_t b_index = buffer_index(buffer_[b]);
457  if (b_index < pos) {
458  if (buffer_is_insertion(buffer_[b])) {
459  pop += buffer_value(buffer_[b]);
460  pos++;
461  a_pos_offset--;
462  } else {
463  pop -=
464  (data_[(b_index + a_pos_offset) / WORD_BITS] >>
465  ((b_index + a_pos_offset) % WORD_BITS)) &
466  MASK;
467  pos--;
468  a_pos_offset++;
469  }
470  [[unlikely]] current_buffer++;
471  } else {
472  [[likely]] break;
473  }
474  [[unlikely]] (void(0));
475  }
476  }
477  if (pop >= x) [[unlikely]]
478  break;
479  }
480 
481  // Make sure we have not overshot the logical end of the structure.
482  pos = size_ < pos ? size_ : pos;
483 
484  // Decrement one bit at a time until we can't anymore without going
485  // under x.
486  pos--;
487  while (pop >= x && pos < capacity_ * 64) {
488  pop -= at(pos);
489  pos--;
490  }
491  //std::cout << "simple select: " << pos + 1 << std::endl;
492  return ++pos;
493  }
494 
505  uint32_t select(const uint32_t x, uint32_t pos, uint32_t pop) const {
506  //std::cout << "select(" << x << ", " << pos << ", " << pop << ")" << std::endl;
507  //pos++;
508  uint8_t current_buffer = 0;
509  int8_t a_pos_offset = 0;
510  // Scroll the buffer to the start position and calculate offset.
511  if constexpr (buffer_size != 0) {
512  while (current_buffer < buffer_count_) {
513  uint32_t b_index = buffer_index(buffer_[current_buffer]);
514  if (b_index < pos) {
515  if (buffer_is_insertion(buffer_[current_buffer])) {
516  a_pos_offset--;
517  } else {
518  a_pos_offset++;
519  }
520  [[unlikely]] current_buffer++;
521  } else {
522  [[likely]] break;
523  }
524  }
525  }
526 
527  // Step to the next 64-bit word boundary
528  uint64_t pop_idx = (pos + a_pos_offset) / WORD_BITS;
529  uint64_t offset = (pos + a_pos_offset) % WORD_BITS;
530 #ifdef DEBUG
531  if (pop_idx >= capacity_) {
532  std::cerr << "Invalid select query apparently\n"
533  << "x = " << x << ", pos = " << pos
534  << ", pop = " << pop
535  << "\npop_idx = " << pop_idx
536  << ", capacity_ = " << capacity_
537  << std::endl;
538  assert(pop_idx < capacity_);
539  }
540 #endif
541  if (offset != 0) {
542  pop += __builtin_popcountll(data_[pop_idx++] >> offset);
543  pos += 64 - offset;
544  if constexpr (buffer_size != 0) {
545  for (uint8_t b = current_buffer; b < buffer_count_; b++) {
546  uint64_t b_index = buffer_index(buffer_[b]);
547  if (b_index < pos) {
548  if (buffer_is_insertion(buffer_[b])) {
549  pop += buffer_value(buffer_[b]);
550  pos++;
551  a_pos_offset--;
552  } else {
553  pop -=
554  (data_[(b_index + a_pos_offset) / WORD_BITS] >>
555  ((b_index + a_pos_offset) % WORD_BITS)) &
556  MASK;
557  pos--;
558  a_pos_offset++;
559  }
560  [[unlikely]] current_buffer++;
561  } else {
562  [[likely]] break;
563  }
564  [[unlikely]] (void(0));
565  }
566  }
567  }
568 
569  // Step one 64-bit word at a time considering the buffer until pop >= x
570  for (uint64_t j = pop_idx; j < capacity_; j++) {
571  pop += __builtin_popcountll(data_[j]);
572  pos += 64;
573  if constexpr (buffer_size != 0) {
574  for (uint8_t b = current_buffer; b < buffer_count_; b++) {
575  uint64_t b_index = buffer_index(buffer_[b]);
576  if (b_index < pos) {
577  if (buffer_is_insertion(buffer_[b])) {
578  pop += buffer_value(buffer_[b]);
579  pos++;
580  a_pos_offset--;
581  } else {
582  pop -=
583  (data_[(b_index + a_pos_offset) / WORD_BITS] >>
584  ((b_index + a_pos_offset) % WORD_BITS)) &
585  MASK;
586  pos--;
587  a_pos_offset++;
588  }
589  [[unlikely]] current_buffer++;
590  } else {
591  [[likely]] break;
592  }
593  [[unlikely]] (void(0));
594  }
595  }
596  if (pop >= x) [[unlikely]]
597  break;
598  }
599 
600  // Make sure we have not overshot the logical end of the structure.
601  pos = size_ < pos ? size_ : pos;
602 
603  // Decrement one bit at a time until we can't anymore without going
604  // under x.
605  pos--;
606  while (pop >= x && pos < capacity_ * 64) {
607  pop -= at(pos);
608  pos--;
609  }
610  //std::cout << "block select: " << pos + 1 << std::endl;
611  return ++pos;
612  }
613 
617  uint64_t bits_size() const {
618  return 8 * (sizeof(*this) + capacity_ * sizeof(uint64_t));
619  }
620 
628  bool need_realloc() const { return size_ >= capacity_ * WORD_BITS; }
629 
637  uint32_t capacity() const { return capacity_; }
638 
648  void capacity(uint32_t cap) { capacity_ = cap; }
649 
659  void set_data_ptr(uint64_t* ptr) { data_ = ptr; }
660 
671  uint64_t* data() { return data_; }
672 
683  void clear_first(uint64_t elems) {
684  // Important to ensure that all trailing data gets zeroed out to not
685  // cause undefined behaviour for select or copy operations.
686  uint64_t ones = rank(elems);
687  uint64_t words = elems / WORD_BITS;
688 
689  if (elems % WORD_BITS == 0) {
690  // If the data to remove happens to align with 64-bit words the
691  // removal can be done by simply shuffling elements. This could be
692  // memmove instead but testing indicates that it's not actually
693  // faster in this case.
694  for (uint64_t i = 0; i < capacity_ - words; i++) {
695  data_[i] = data_[i + words];
696  }
697  for (uint64_t i = capacity_ - words; i < capacity_; i++) {
698  data_[i] = 0;
699  }
700  } else {
701  // clear elem bits at start of data
702  for (uint64_t i = 0; i < words; i++) {
703  data_[i] = 0;
704  }
705  uint64_t tail = elems % WORD_BITS;
706  uint64_t tail_mask = (MASK << tail) - 1;
707  data_[words] &= ~tail_mask;
708 
709  // Copy data backwars by elem bits
710  uint64_t words_to_shuffle = capacity_ - words - 1;
711  for (uint64_t i = 0; i < words_to_shuffle; i++) {
712  data_[i] = data_[words + i] >> tail;
713  data_[i] |= (data_[words + i + 1]) << (WORD_BITS - tail);
714  }
715  data_[capacity_ - words - 1] = data_[capacity_ - 1] >> tail;
716  for (uint64_t i = capacity_ - words; i < capacity_; i++) {
717  data_[i] = 0;
718  }
719  [[unlikely]] (void(0));
720  }
721  size_ -= elems;
722  p_sum_ -= ones;
723  }
724 
738  void transfer_append(leaf* other, uint64_t elems) {
739  commit();
740  other->commit();
741 
742  uint64_t* o_data = other->data();
743  uint64_t split_point = size_ % WORD_BITS;
744  uint64_t target_word = size_ / WORD_BITS;
745  uint64_t copy_words = elems / WORD_BITS;
746  uint64_t overhang = elems % WORD_BITS;
747  // Branching depending on word alignment of source and target.
748  if (split_point == 0) {
749  // Copy target is word aligned
750  for (size_t i = 0; i < copy_words; i++) {
751  data_[target_word++] = o_data[i];
752  p_sum_ += __builtin_popcountll(o_data[i]);
753  }
754  if (overhang != 0) {
755  // Copy source is not word aligned
756  data_[target_word] =
757  o_data[copy_words] & ((MASK << overhang) - 1);
758  [[unlikely]] p_sum_ += __builtin_popcountll(data_[target_word]);
759  }
760  } else {
761  // Copy target is not word aligned
762  for (size_t i = 0; i < copy_words; i++) {
763  data_[target_word++] |= o_data[i] << split_point;
764  data_[target_word] |= o_data[i] >> (WORD_BITS - split_point);
765  p_sum_ += __builtin_popcountll(o_data[i]);
766  }
767  if (elems % WORD_BITS != 0) {
768  // Copy source is not word aligned.
769  uint64_t to_write =
770  o_data[copy_words] & ((MASK << overhang) - 1);
771  p_sum_ += __builtin_popcountll(to_write);
772  data_[target_word++] |= to_write << split_point;
773  if (target_word < capacity_) {
774  data_[target_word] |= to_write >> (WORD_BITS - split_point);
775  }
776  [[likely]] ((void)0);
777  }
778  }
779  size_ += elems;
780  other->clear_first(elems);
781  }
782 
793  void clear_last(uint64_t elems) {
794  size_ -= elems;
795  p_sum_ = rank(size_);
796  uint64_t offset = size_ % 64;
797  uint64_t words = size_ / 64;
798  // Important to ensure that all trailing data gets zeroed out to not
799  // cause undefined behaviour for select or copy operations.
800  if (offset != 0) {
801  [[likely]] data_[words++] &= (MASK << offset) - 1;
802  }
803  for (uint64_t i = words; i < capacity_; i++) {
804  data_[i] = 0;
805  }
806  }
807 
821  void transfer_prepend(leaf* other, uint64_t elems) {
822  commit();
823  other->commit();
824  uint64_t* o_data = other->data();
825  uint64_t words = elems / 64;
826  // Make space for new data
827  for (uint64_t i = capacity_ - 1; i >= words; i--) {
828  data_[i] = data_[i - words];
829  if (i == 0) break;
830  }
831  for (uint64_t i = 0; i < words; i++) {
832  data_[i] = 0;
833  }
834  uint64_t overflow = elems % 64;
835  if (overflow > 0) {
836  for (uint64_t i = capacity_ - 1; i > words; i--) {
837  data_[i] <<= overflow;
838  data_[i] |= data_[i - 1] >> (64 - overflow);
839  }
840  data_[words] <<= overflow;
841  [[likely]] (void(0));
842  }
843  // Copy over data from sibling
844  uint64_t source_word = other->size();
845  uint64_t source_offset = source_word % 64;
846  source_word /= 64;
847  // Somewhat complicated case handling based on target and source word
848  // alignment. Additional complications compared to transfer append is
849  // due to the both the start point and end point for the copy source not
850  // being word aligned.
851  if (source_offset == 0) {
852  // Source is word aligned.
853  if (overflow == 0) {
854  // Target is word aligned.
855  for (uint64_t i = words - 1; i < words; i--) {
856  data_[i] = o_data[--source_word];
857  p_sum_ += __builtin_popcountll(data_[i]);
858  }
859  } else {
860  // Target is not word aligned.
861  source_word--;
862  for (uint64_t i = words; i > 0; i--) {
863  p_sum_ += __builtin_popcountll(o_data[source_word]);
864  data_[i] |= o_data[source_word] >> (64 - overflow);
865  data_[i - 1] |= o_data[source_word--] << overflow;
866  }
867  uint64_t w = o_data[source_word] >> (64 - overflow);
868  p_sum_ += __builtin_popcountll(w);
869  [[likely]] data_[0] |= w;
870  }
871  [[unlikely]] (void(0));
872  } else {
873  // Source is not word aligned
874  if (overflow == 0) {
875  // Target is word aligned
876  for (uint64_t i = words - 1; i < words; i--) {
877  data_[i] = o_data[source_word] << (64 - source_offset);
878  data_[i] |= o_data[--source_word] >> source_offset;
879  p_sum_ += __builtin_popcountll(data_[i]);
880  }
881  } else {
882  // Target is not word aligned
883  uint64_t w;
884  for (uint64_t i = words; i > 0; i--) {
885  w = o_data[source_word] << (64 - source_offset);
886  w |= o_data[--source_word] >> source_offset;
887  p_sum_ += __builtin_popcountll(w);
888  data_[i] |= w >> (64 - overflow);
889  data_[i - 1] |= w << overflow;
890  }
891  w = o_data[source_word] << (64 - source_offset);
892  w |= o_data[--source_word] >> source_offset;
893  w >>= 64 - overflow;
894  p_sum_ += __builtin_popcountll(w);
895  [[likely]] data_[0] |= w;
896  }
897  }
898  size_ += elems;
899  other->clear_last(elems);
900  }
901 
917  void append_all(leaf* other) {
918  commit();
919  other->commit();
920  uint64_t* o_data = other->data();
921  uint64_t offset = size_ % 64;
922  uint64_t word = size_ / 64;
923  uint64_t o_size = other->size();
924  uint64_t o_p_sum = other->p_sum();
925  uint64_t o_words = o_size / 64;
926  o_words += o_size % 64 == 0 ? 0 : 1;
927  // Elements are guaranteed to be fully word aligned in practice so
928  // branching is only required for word alignment of the target leaf.
929  if (offset == 0) {
930  // Target is word aligned.
931  for (uint64_t i = 0; i < o_words; i++) {
932  data_[word++] = o_data[i];
933  }
934  [[unlikely]] (void(0));
935  } else {
936  // Target is not word aligned.
937  for (uint64_t i = 0; i < o_words; i++) {
938  data_[word++] |= o_data[i] << offset;
939  data_[word] |= o_data[i] >> (64 - offset);
940  }
941  }
942  size_ += o_size;
943  p_sum_ += o_p_sum;
944  }
945 
955  void commit() {
956  // Complicated bit manipulation but whacha gonna do. Hopefully won't
957  // need to debug this anymore.
958  if constexpr (buffer_size == 0) return;
959  if (buffer_count_ == 0) [[unlikely]]
960  return;
961 
962  uint64_t overflow = 0;
963  uint8_t overflow_length = 0;
964  uint8_t underflow_length = 0;
965  uint8_t current_index = 0;
966  uint32_t buf = buffer_[current_index];
967  uint64_t target_word = buffer_index(buf) / WORD_BITS;
968  uint64_t target_offset = buffer_index(buf) % WORD_BITS;
969 
970  uint64_t words = size_ / WORD_BITS;
971  words += size_ % WORD_BITS > 0 ? 1 : 0;
972  for (uint64_t current_word = 0; current_word < words; current_word++) {
973  uint64_t underflow =
974  current_word + 1 < capacity_ ? data_[current_word + 1] : 0;
975  if (overflow_length) {
976  [[likely]] underflow =
977  (underflow << overflow_length) |
978  (data_[current_word] >> (64 - overflow_length));
979  }
980 
981  uint64_t new_overflow = 0;
982  // If buffers need to be commit to this word:
983  if (current_word == target_word && current_index < buffer_count_) {
984  uint64_t word =
985  underflow_length
986  ? (data_[current_word] >> underflow_length) |
987  (underflow << (64 - underflow_length))
988  : (data_[current_word] << overflow_length) | overflow;
989  underflow >>= underflow_length;
990  uint64_t new_word = 0;
991  uint8_t start_offset = 0;
992  // While there are buffers for this word
993  while (current_word == target_word) {
994  new_word |=
995  (word << start_offset) & ((MASK << target_offset) - 1);
996  word = (word >> (target_offset - start_offset)) |
997  (target_offset == 0 ? 0
998  : target_offset - start_offset == 0
999  ? 0
1000  : (underflow
1001  << (64 - (target_offset - start_offset))));
1002  underflow >>= target_offset - start_offset;
1003  if (buffer_is_insertion(buf)) {
1004  if (buffer_value(buf)) {
1005  new_word |= MASK << target_offset;
1006  }
1007  start_offset = target_offset + 1;
1008  if (underflow_length) [[unlikely]]
1009  underflow_length--;
1010  else
1011  overflow_length++;
1012  } else {
1013  word >>= 1;
1014  word |= underflow << 63;
1015  underflow >>= 1;
1016  if (overflow_length)
1017  overflow_length--;
1018  else [[likely]]
1019  underflow_length++;
1020  start_offset = target_offset;
1021  }
1022  current_index++;
1023  if (current_index >= buffer_count_) [[unlikely]]
1024  break;
1025  buf = buffer_[current_index];
1026  target_word = buffer_index(buf) / WORD_BITS;
1027  [[unlikely]] target_offset = buffer_index(buf) % WORD_BITS;
1028  }
1029  new_word |=
1030  start_offset < 64 ? (word << start_offset) : uint64_t(0);
1031  new_overflow = overflow_length ? data_[current_word] >>
1032  (64 - overflow_length)
1033  : 0;
1034  [[unlikely]] data_[current_word] = new_word;
1035  } else {
1036  if (underflow_length) {
1037  data_[current_word] =
1038  (data_[current_word] >> underflow_length) |
1039  (underflow << (64 - underflow_length));
1040  } else if (overflow_length) {
1041  new_overflow =
1042  data_[current_word] >> (64 - overflow_length);
1043  [[likely]] data_[current_word] =
1044  (data_[current_word] << overflow_length) | overflow;
1045  } else {
1046  overflow = 0;
1047  }
1048  }
1049  overflow = new_overflow;
1050  }
1051  if (capacity_ > words) [[likely]]
1052  data_[words] = 0;
1053  buffer_count_ = 0;
1054  }
1055 
1069  uint64_t validate() const {
1070  assert(size_ <= capacity_ * WORD_BITS);
1071  assert(p_sum_ <= size_);
1072  assert(p_sum_ == rank(size_));
1073  uint64_t last_word = size_ / WORD_BITS;
1074  uint64_t overflow = size_ % WORD_BITS;
1075  if (overflow != 0) {
1076  for (uint64_t i = overflow; i < WORD_BITS; i++) {
1077  uint64_t val = (MASK << i) & data_[last_word];
1078  assert(val == 0);
1079  }
1080  last_word++;
1081  }
1082  for (uint64_t i = last_word; i < capacity_; i++) {
1083  assert(data_[i] == 0);
1084  }
1085  return 1;
1086  }
1087 
1096  void print(bool internal_only) const {
1097  std::cout << "{\n\"type\": \"leaf\",\n"
1098  << "\"size\": " << size_ << ",\n"
1099  << "\"capacity\": " << capacity_ << ",\n"
1100  << "\"p_sum\": " << p_sum_ << ",\n"
1101  << "\"buffer_size\": " << int(buffer_size) << ",\n"
1102  << "\"buffer\": [\n";
1103  for (uint8_t i = 0; i < buffer_count_; i++) {
1104 #pragma GCC diagnostic ignored "-Warray-bounds"
1105  std::cout << "{\"is_insertion\": "
1106  << buffer_is_insertion(buffer_[i]) << ", "
1107  << "\"buffer_value\": " << buffer_value(buffer_[i])
1108  << ", "
1109  << "\"buffer_index\": " << buffer_index(buffer_[i])
1110  << "}";
1111 #pragma GCC diagnostic pop
1112  if (i != buffer_count_ - 1) {
1113  std::cout << ",\n";
1114  }
1115  }
1116  if (!internal_only) {
1117  std::cout << "]\n\"data\": [\n";
1118  for (uint64_t i = 0; i < capacity_; i++) {
1119  std::bitset<64> b(data_[i]);
1120  std::cout << "\"" << b << "\"";
1121  if (i != uint64_t(capacity_ - 1)) {
1122  std::cout << ",\n";
1123  }
1124  }
1125  std::cout << "]}";
1126  } else {
1127  std::cout << "]}";
1128  }
1129  }
1130 
1131  protected:
1146  inline bool buffer_value(uint32_t e) const { return (e & VALUE_MASK) != 0; }
1147 
1159  inline bool buffer_is_insertion(uint32_t e) const {
1160  return (e & TYPE_MASK) != 0;
1161  }
1162 
1173  inline uint32_t buffer_index(uint32_t e) const { return (e) >> 8; }
1174 
1184  void set_buffer_index(uint32_t v, uint8_t i) {
1185  buffer_[i] = (v << 8) | (buffer_[i] & INDEX_MASK);
1186  }
1187 
1197  uint32_t create_buffer(uint32_t idx, bool t, bool v) {
1198  return ((idx << 8) | (t ? TYPE_MASK : uint32_t(0))) |
1199  (v ? VALUE_MASK : uint32_t(0));
1200  }
1201 
1211  void insert_buffer(uint8_t idx, uint32_t buf) {
1212  memmove(buffer_ + idx + 1, buffer_ + idx,
1213  (buffer_count_ - idx) * sizeof(uint32_t));
1214  buffer_[idx] = buf;
1215  buffer_count_++;
1216  }
1217 
1226  void delete_buffer_element(uint8_t idx) {
1227  buffer_count_--;
1228  memmove(buffer_ + idx, buffer_ + idx + 1,
1229  (buffer_count_ - idx) * sizeof(uint32_t));
1230  buffer_[buffer_count_] = 0;
1231  }
1232 
1243  void push_back(const bool x) {
1244  assert(size_ < capacity_ * WORD_BITS);
1245  auto pb_size = size_;
1246  if constexpr (buffer_size != 0) {
1247  for (uint8_t i = 0; i < buffer_count_; i++) {
1248  pb_size += buffer_is_insertion(buffer_[i]) ? -1 : 1;
1249  }
1250  }
1251 
1252  // If the leaf has at some point been full and has subsequently shrunk
1253  // due to removals, the next available position to write to without
1254  // committing the buffer may be beyond the end of the data_ array. In
1255  // this case the buffer will need to be committed before appending the
1256  // new element.
1257  if (pb_size >= capacity_ * WORD_BITS) {
1258  commit();
1259  [[unlikely]] data_[size_ / WORD_BITS] |= uint64_t(x)
1260  << (size_ % WORD_BITS);
1261  } else {
1262  data_[pb_size / WORD_BITS] |= uint64_t(x) << (pb_size % WORD_BITS);
1263  }
1264 
1265  size_++;
1266  p_sum_ += uint64_t(x);
1267  }
1268 };
1269 } // namespace bv
1270 #endif
bv::leaf::MASK
static const constexpr uint64_t MASK
0x1 to be used in bit operations.
Definition: leaf.hpp:58
bv::leaf::set
int set(const uint64_t i, const bool x)
Set the ith bit to x.
Definition: leaf.hpp:274
bv::leaf::clear_last
void clear_last(uint64_t elems)
Remove the last "elems" elements from the leaf.
Definition: leaf.hpp:793
bv::leaf::validate
uint64_t validate() const
Ensure that the leaf is in a valid state.
Definition: leaf.hpp:1069
bv::leaf::buffer_
uint32_t buffer_[buffer_size]
Insert/remove buffer.
Definition: leaf.hpp:54
bv::leaf::buffer_value
bool buffer_value(uint32_t e) const
Extract the value of a buffer element.
Definition: leaf.hpp:1146
bv::leaf::p_sum_
uint32_t p_sum_
Logical number of 1-bits stored.
Definition: leaf.hpp:52
bv::leaf::select
uint32_t select(const uint32_t x, uint32_t pos, uint32_t pop) const
Index of the xth 1-bit in the data structure starting at pos with pop
Definition: leaf.hpp:505
bv::leaf::transfer_append
void transfer_append(leaf *other, uint64_t elems)
Move "elems" elements from the start of "other" to the end of "this".
Definition: leaf.hpp:738
bv::leaf::at
bool at(const uint32_t i) const
Get the value of the ith element in the leaf.
Definition: leaf.hpp:101
bv::leaf::delete_buffer_element
void delete_buffer_element(uint8_t idx)
Remove an element from the buffer.
Definition: leaf.hpp:1226
bv::leaf::p_sum
uint32_t p_sum() const
Getter for p_sum_.
Definition: leaf.hpp:122
bv::leaf::rank
uint32_t rank(const uint64_t n, const uint64_t offset) const
Number of 1-bits up to position n from position offset.
Definition: leaf.hpp:373
bv::leaf::push_back
void push_back(const bool x)
Add an element to the end of the leaf data.
Definition: leaf.hpp:1243
bv::leaf::set_buffer_index
void set_buffer_index(uint32_t v, uint8_t i)
Updates index information for a specified buffer element.
Definition: leaf.hpp:1184
bv::leaf::leaf
leaf(uint64_t capacity, uint64_t *data)
Leaf constructor.
Definition: leaf.hpp:86
bv::leaf::data
uint64_t * data()
Provides access to raw leaf-associated data.
Definition: leaf.hpp:671
bv::leaf::print
void print(bool internal_only) const
Output data stored in the leaf as json.
Definition: leaf.hpp:1096
bv::leaf::VALUE_MASK
static const constexpr uint32_t VALUE_MASK
Mask for accessing buffer value.
Definition: leaf.hpp:60
bv::bv
simple_bv< 8, 16384, 64 > bv
Default dynamic bit vector type.
Definition: bv.hpp:94
bv::leaf::buffer_count_
uint8_t buffer_count_
Number of elements in insert/remove buffer.
Definition: leaf.hpp:49
bv::leaf::buffer_is_insertion
bool buffer_is_insertion(uint32_t e) const
Extract type information of a buffer element.
Definition: leaf.hpp:1159
bv::leaf::INDEX_MASK
static const constexpr uint32_t INDEX_MASK
Mask for accessing buffer index value.
Definition: leaf.hpp:64
bv::leaf::capacity_
uint16_t capacity_
Number of 64-bit integers available in data.
Definition: leaf.hpp:50
bv::leaf::capacity
uint32_t capacity() const
Size of the leaf-associated data storage in 64-bit words.
Definition: leaf.hpp:637
bv::leaf::clear_first
void clear_first(uint64_t elems)
Remove the fist "elems" elements from the leaf.
Definition: leaf.hpp:683
bv::leaf
Simple flat dynamic bit vector for use as a leaf for a dynamic b-tree bit vector.
Definition: leaf.hpp:47
bv::leaf::data_
uint64_t * data_
Pointer to data storage.
Definition: leaf.hpp:56
bv::leaf::capacity
void capacity(uint32_t cap)
Set the size of the leaf-associated data storage.
Definition: leaf.hpp:648
bv::leaf::set_data_ptr
void set_data_ptr(uint64_t *ptr)
Sets the pointer to the leaf-associated data storage.
Definition: leaf.hpp:659
bv::leaf::buffer_index
uint32_t buffer_index(uint32_t e) const
Extract index information from a butter element.
Definition: leaf.hpp:1173
bv::leaf::create_buffer
uint32_t create_buffer(uint32_t idx, bool t, bool v)
Creates a new 32-bit buffer element with the given parameters.
Definition: leaf.hpp:1197
bv::leaf::insert
void insert(const uint64_t i, const bool x)
Insert x into position i.
Definition: leaf.hpp:140
bv::leaf::remove
bool remove(const uint64_t i)
Remove the ith bit from the leaf.
Definition: leaf.hpp:207
bv::leaf::insert_buffer
void insert_buffer(uint8_t idx, uint32_t buf)
Insert a new element into the buffer.
Definition: leaf.hpp:1211
bv::leaf::size
uint32_t size() const
Getter for size_.
Definition: leaf.hpp:124
bv::leaf::transfer_prepend
void transfer_prepend(leaf *other, uint64_t elems)
Move "elems" elements from the end of "other" to the start of "this".
Definition: leaf.hpp:821
bv::leaf::need_realloc
bool need_realloc() const
Determine if reallocation / splitting is required prior to additional insertion of new elements.
Definition: leaf.hpp:628
bv::leaf::bits_size
uint64_t bits_size() const
Size of the leaf and associated data in bits.
Definition: leaf.hpp:617
bv::leaf::commit
void commit()
Commit and clear the Insert/Remove buffer for the leaf.
Definition: leaf.hpp:955
bv::leaf::rank
uint32_t rank(const uint64_t n) const
Number of 1-bits up to position n.
Definition: leaf.hpp:323
bv::leaf::size_
uint32_t size_
Logical number of bits stored.
Definition: leaf.hpp:51
bv::leaf::select
uint32_t select(const uint32_t x) const
Index of the xth 1-bit in the data structure.
Definition: leaf.hpp:444
bv::leaf::TYPE_MASK
static const constexpr uint32_t TYPE_MASK
Mask for accessing buffer type.
Definition: leaf.hpp:62
bv::leaf::append_all
void append_all(leaf *other)
Copy all elements from (the start of) "other" to the end of "this".
Definition: leaf.hpp:917