Bit vector
0.9
Fast and space efficient dynamic bit vector library.
|
13 #include "branch_selection.hpp"
64 template <
class leaf_type,
class dtype, u
int64_t leaf_size, u
int8_t branches>
87 static_assert(leaf_size >= 256,
"leaf size needs to be a at least 256");
88 static_assert((leaf_size % 128) == 0,
89 "leaf size needs to be divisible by 128");
90 static_assert(leaf_size < 0xffffff,
"leaf size must fit in 24 bits");
91 static_assert((branches == 8) || (branches == 16) || (branches == 32) ||
92 (branches == 64) || (branches == 128),
93 "branching factor needs to be a reasonable power of 2");
107 void generate_query_structure(qds* qs) {
117 children[i]->generate_query_structure(qs);
150 bool at(dtype index)
const {
154 [[unlikely]]
return reinterpret_cast<leaf_type*
>(
174 int set(dtype index,
bool v) {
180 reinterpret_cast<leaf_type*
>(
children_[child_index]);
181 [[unlikely]] change =
child->set(index, v);
184 change =
child->set(index, v);
202 dtype
rank(dtype index)
const {
205 if (child_index != 0) {
211 reinterpret_cast<leaf_type*
>(
children_[child_index]);
212 [[unlikely]]
return res +
child->rank(index);
215 return res +
child->rank(index);
233 if (child_index != 0) {
239 reinterpret_cast<leaf_type*
>(
children_[child_index]);
240 [[unlikely]]
return res +
child->select(count);
243 return res +
child->select(count);
257 template <
class allocator>
263 alloc->deallocate_leaf(l);
270 alloc->deallocate_node(n);
336 template <
class child>
368 template <
class allocator>
369 void insert(dtype index,
bool value, allocator* alloc) {
388 template <
class allocator>
389 bool remove(dtype index, allocator* alloc) {
426 void** o_children = other->
children();
430 for (uint8_t i = 0; i < elems; i++) {
467 void** o_children = other->
children();
472 for (uint8_t i = 0; i < elems; i++) {
473 children_[i] = o_children[o_size - elems + i];
491 void** o_children = other->
children();
495 for (uint8_t i = 0; i < o_size; i++) {
512 uint64_t ret =
sizeof(
node) * 8;
515 reinterpret_cast<leaf_type* const*
>(
children_);
565 uint64_t child_s_sum = 0;
566 uint64_t child_p_sum = 0;
569 reinterpret_cast<leaf_type* const*
>(
children_);
571 uint64_t child_size =
children[i]->size();
572 assert(child_size >= leaf_size / 3);
573 child_s_sum += child_size;
575 child_p_sum +=
children[i]->p_sum();
577 assert(
children[i]->capacity() * WORD_BITS <= leaf_size);
584 uint64_t child_size =
children[i]->size();
585 assert(child_size >= branches / 3);
586 child_s_sum += child_size;
588 child_p_sum +=
children[i]->p_sum();
606 void print(
bool internal_only)
const {
607 std::cout <<
"{\n\"type\": \"node\",\n"
608 <<
"\"has_leaves\": " <<
has_leaves() <<
",\n"
610 <<
"\"size\": " <<
size() <<
",\n"
611 <<
"\"child_sizes\": [";
612 for (uint8_t i = 0; i < branches; i++) {
614 if (i != branches - 1) {
619 <<
"\"p_sum\": " <<
p_sum() <<
",\n"
620 <<
"\"child_sums\": [";
621 for (uint8_t i = 0; i < branches; i++) {
623 if (i != branches - 1) {
628 <<
"\"children\": [\n";
631 reinterpret_cast<leaf_type* const*
>(
children_);
671 template <
class allocator>
679 [[likely]] l_cap = leaf_size - l_cap;
687 [[likely]] r_cap = leaf_size - r_cap;
689 if (l_cap < 2 * leaf_size / 9 && r_cap < 2 * leaf_size / 9) {
694 leaf_type* new_child;
699 dtype n_cap = (n_elem + 2 * WORD_BITS) / WORD_BITS;
701 n_cap = n_cap * WORD_BITS > leaf_size ? leaf_size / WORD_BITS
703 a_child =
reinterpret_cast<leaf_type*
>(
children_[0]);
704 new_child = alloc->template allocate_leaf<leaf_type>(n_cap);
705 b_child =
reinterpret_cast<leaf_type*
>(
children_[1]);
706 new_child->transfer_append(b_child, b_child->size() - n_elem);
707 new_child->transfer_prepend(a_child, a_child->size() - n_elem);
708 [[unlikely]] index++;
712 a_child =
reinterpret_cast<leaf_type*
>(
children_[index - 1]);
713 b_child =
reinterpret_cast<leaf_type*
>(
children_[index]);
714 dtype n_elem = (a_child->size() + b_child->size()) / 3;
715 dtype n_cap = (n_elem + 2 * WORD_BITS) / WORD_BITS;
717 n_cap = n_cap * WORD_BITS > leaf_size ? leaf_size / WORD_BITS
719 new_child = alloc->template allocate_leaf<leaf_type>(n_cap);
720 new_child->transfer_append(b_child, b_child->size() - n_elem);
721 new_child->transfer_prepend(a_child, a_child->size() - n_elem);
733 }
else if (r_cap > l_cap) {
737 reinterpret_cast<leaf_type*
>(
children_[index + 1]);
738 uint32_t n_size = sibling->size() + r_cap / 2;
739 if (sibling->capacity() * WORD_BITS < n_size) {
740 n_size = n_size / WORD_BITS + 1;
741 n_size += n_size % 2;
742 n_size = n_size * WORD_BITS <= leaf_size
744 : leaf_size / WORD_BITS;
745 children_[index + 1] = alloc->reallocate_leaf(
746 sibling, sibling->capacity(), n_size);
747 sibling =
reinterpret_cast<leaf_type*
>(
children_[index + 1]);
749 sibling->transfer_prepend(
leaf, r_cap / 2);
760 reinterpret_cast<leaf_type*
>(
children_[index - 1]);
761 uint32_t n_size = sibling->size() + l_cap / 2;
762 if (sibling->capacity() * WORD_BITS < n_size) {
763 n_size = n_size / WORD_BITS + 1;
764 n_size += n_size % 2;
765 n_size = n_size * WORD_BITS <= leaf_size
767 : leaf_size / WORD_BITS;
768 children_[index - 1] = alloc->reallocate_leaf(
769 sibling, sibling->capacity(), n_size);
770 sibling =
reinterpret_cast<leaf_type*
>(
children_[index - 1]);
772 sibling->transfer_append(
leaf, l_cap / 2);
780 [[likely]] (void(0));
795 template <
class allocator>
798 leaf_type*
child =
reinterpret_cast<leaf_type*
>(
children_[child_index]);
800 if (
child->size() > leaf_size) {
801 std::cerr <<
child->size() <<
" > " << leaf_size <<
"\n";
802 std::cerr <<
"invalid leaf size" << std::endl;
803 assert(
child->size() <= leaf_size);
806 if (
child->need_realloc()) {
807 if (
child->size() >= leaf_size) {
810 dtype cap =
child->capacity();
812 alloc->reallocate_leaf(
child, cap, cap + 2);
816 if (child_index != 0) {
821 child->insert(index, value);
841 template <
class allocator>
859 if (l_cap <= 1 && r_cap <= 1) {
864 [[unlikely]] index++;
869 node* new_child = alloc->template allocate_node<node>();
883 }
else if (l_cap > r_cap) {
918 template <
class allocator>
924 std::cout << int(child_index) <<
" >= " << int(
child_count_)
929 if (
child->child_count() == branches) {
935 if (child_index != 0) {
940 child->insert(index, value, alloc);
958 template <
class allocator>
960 dtype a_cap = a->capacity();
961 dtype addition = (b->size() - leaf_size / 3) / 2;
962 if (a_cap * WORD_BITS < a->
size() + addition) {
963 dtype n_cap = 1 + (a->size() + addition) / WORD_BITS;
966 n_cap * WORD_BITS <= leaf_size ? n_cap : leaf_size / WORD_BITS;
967 a = alloc->reallocate_leaf(a, a_cap, n_cap);
970 a->transfer_append(b, addition);
988 template <
class allocator>
991 dtype b_cap = b->capacity();
992 dtype addition = (a->size() - leaf_size / 3) / 2;
993 if (b_cap * WORD_BITS < b->
size() + addition) {
994 dtype n_cap = 1 + (b->size() + addition) / WORD_BITS;
997 n_cap * WORD_BITS <= leaf_size ? n_cap : leaf_size / WORD_BITS;
998 b = alloc->reallocate_leaf(b, b_cap, n_cap);
1001 b->transfer_prepend(a, addition);
1025 template <
class allocator>
1028 dtype a_cap = a->capacity();
1029 if (a_cap * WORD_BITS < a->
size() + b->size()) {
1030 dtype n_cap = 1 + (a->size() + b->size()) / WORD_BITS;
1033 n_cap * WORD_BITS <= leaf_size ? n_cap : leaf_size / WORD_BITS;
1034 a = alloc->reallocate_leaf(a, a_cap, n_cap);
1037 alloc->deallocate_leaf(b);
1060 template <
class allocator>
1063 leaf_type*
child =
reinterpret_cast<leaf_type*
>(
children_[child_index]);
1064 if (
child->size() <= leaf_size / 3) {
1065 if (child_index == 0) {
1066 leaf_type* sibling =
reinterpret_cast<leaf_type*
>(
children_[1]);
1067 if (sibling->size() > leaf_size * 5 / 9) {
1072 [[unlikely]] ((void)0);
1074 leaf_type* sibling =
1075 reinterpret_cast<leaf_type*
>(
children_[child_index - 1]);
1076 if (sibling->size() > leaf_size * 5 / 9) {
1084 [[unlikely]]
child =
1085 reinterpret_cast<leaf_type*
>(
children_[child_index]);
1087 if (child_index != 0) {
1090 bool value =
child->remove(index);
1158 template <
class allocator>
1161 alloc->deallocate_node(b);
1184 template <
class allocator>
1188 if (
child->child_count_ <= branches / 3) {
1189 if (child_index == 0) {
1196 [[unlikely]] ((void)0);
1207 [[unlikely]]
child =
1210 if (child_index != 0) {
1213 bool value =
child->remove(index, alloc);
void set(uint8_t index, dtype value)
Set the value at index to value.
Definition: branch_selection.hpp:67
void rebalance_nodes_right(node *a, node *b, uint8_t idx)
Transfer elements from the "right" node to the "left" node.
Definition: node.hpp:1112
bool node_remove(dtype index, allocator *alloc)
Removal operation when children are internal nodes.
Definition: node.hpp:1185
int set(dtype index, bool v)
Set the value of the logical indexth element to v.
Definition: node.hpp:174
void deallocate(allocator *alloc)
Recursively deallocates all children.
Definition: node.hpp:258
Class providing support for cumulative sums.
Definition: branch_selection.hpp:27
branching child_sizes_
Cumulative child sizes and (~0) >> 1 for non-existing children.
Definition: node.hpp:79
void merge_nodes(node *a, node *b, uint8_t idx, allocator *alloc)
Transfer all from the "right" node to the "left" node.
Definition: node.hpp:1159
void transfer_append(node *other, uint8_t elems)
Moves the fist "elems" elements from "other" to the end of "this".
Definition: node.hpp:425
uint8_t child_count_
Number of active children.
Definition: node.hpp:75
void clear_first(uint8_t elems)
Remove the first "elems" elements form this node.
Definition: node.hpp:406
uint32_t p_sum() const
Getter for p_sum_.
Definition: leaf.hpp:122
branching child_sums_
Cumulative child sums and (~0) >> 1 for non-existint children.
Definition: node.hpp:84
dtype rank(dtype index) const
Counts number of one bits up to the indexth logical element.
Definition: node.hpp:202
void rebalance_leaves_left(leaf_type *a, leaf_type *b, uint8_t idx, allocator *alloc)
Transfer elements from the "left" leaf to the "right" leaf.
Definition: node.hpp:989
dtype get(uint8_t index) const
Get the cumulative sum at index.
Definition: branch_selection.hpp:54
bool has_leaves() const
Get the type of children for the node.
Definition: node.hpp:140
dtype p_sum() const
Logical number of 1-bits stored in the subtree.
Definition: node.hpp:323
branching * child_sizes()
Get pointer to the cumulative child sizes.
Definition: node.hpp:298
void * children_[branches]
pointers to leaf_type or node children.
Definition: node.hpp:85
simple_bv< 8, 16384, 64 > bv
Default dynamic bit vector type.
Definition: bv.hpp:94
void insert(dtype index, bool value, allocator *alloc)
Insert "value" at "index".
Definition: node.hpp:369
void ** children()
Get pointer to the children of this node.
Definition: node.hpp:289
void insert(uint8_t index, uint8_t array_size, dtype a_value, dtype b_value)
Inserts a new value into the cumulative sums.
Definition: branch_selection.hpp:106
void clear_last(uint8_t elems)
Remove the last "elems" elemets from the node.
Definition: node.hpp:449
void append_child(child *new_child)
Add child to the subtree.
Definition: node.hpp:337
Simple flat dynamic bit vector for use as a leaf for a dynamic b-tree bit vector.
Definition: leaf.hpp:47
bool leaf_remove(dtype index, allocator *alloc)
Removal used when the children are leaves.
Definition: node.hpp:1061
void node_insert(dtype index, bool value, allocator *alloc)
Insertion operation if the children are internal nodes.
Definition: node.hpp:919
void * child(uint8_t i)
Get ith child of the node.
Definition: node.hpp:353
void rebalance_leaf(uint8_t index, leaf_type *leaf, allocator *alloc)
Ensure that there is space for insertion in the child leaves.
Definition: node.hpp:672
void print(bool internal_only) const
Outputs this subtree as json.
Definition: node.hpp:606
void append(uint8_t n_elems, uint8_t array_size, branchless_scan *other)
Adds elements from other to the end of this
Definition: branch_selection.hpp:179
void has_leaves(bool leaves)
Set whether the children of the node are leaves or internal nodes.
Definition: node.hpp:131
void remove(uint8_t index, uint8_t array_size)
Removes a value from the cumulative sums.
Definition: branch_selection.hpp:127
void clear_first(uint8_t n, uint8_t array_size)
Removes the first n elements from the structure.
Definition: branch_selection.hpp:144
void append_all(node *other)
Copies all children from "other" to the end of this node.
Definition: node.hpp:490
void clear_last(uint8_t n, uint8_t array_size)
Removes the last n elements from the structure.
Definition: branch_selection.hpp:162
uint32_t size() const
Getter for size_.
Definition: leaf.hpp:124
dtype size() const
Logical number of elements stored in the subtree.
Definition: node.hpp:314
bool remove(dtype index, allocator *alloc)
Remove the indexth element.
Definition: node.hpp:389
uint8_t meta_data_
Bit indicating whether the nodes children are laves or nodes.
Definition: node.hpp:71
void rebalance_node(uint8_t index, allocator *alloc)
Ensure that there is space for insertion in the child nodes.
Definition: node.hpp:842
void rebalance_nodes_left(node *a, node *b, uint8_t idx)
Transfer elements from the "lef" node to the "right" node.
Definition: node.hpp:1132
dtype select(dtype count) const
Calculates the index of the counttu 1-bit.
Definition: node.hpp:230
uint64_t validate() const
Check that the subtree structure is internally consistent.
Definition: node.hpp:563
void prepend(uint8_t n_elems, uint8_t array_size, uint8_t o_size, branchless_scan *other)
Adds elements form other to the start of this
Definition: branch_selection.hpp:214
Internal node for use with bit vector b-tree structures.
Definition: node.hpp:65
node()
Constructor.
Definition: node.hpp:99
bool at(dtype index) const
Access the value of the indexth element of the logical structure.
Definition: node.hpp:150
void leaf_insert(dtype index, bool value, allocator *alloc)
Insertion operation if the children are leaves.
Definition: node.hpp:796
void rebalance_leaves_right(leaf_type *a, leaf_type *b, allocator *alloc)
Transfer elements from the "right" leaf to the "left" leaf.
Definition: node.hpp:959
uint64_t bits_size() const
Size of the subtree in "allocated" bits.
Definition: node.hpp:511
void transfer_prepend(node *other, uint8_t elems)
Moves the last "elems" elements from "other" to the start of "this".
Definition: node.hpp:466
uint8_t find(dtype q) const
Find the lowest child index s.t. the cumulative sum at the index is at least q.
Definition: branch_selection.hpp:256
uint8_t child_count() const
Get the number of children of this node.
Definition: node.hpp:280
branching * child_sums()
Get pointer to the cumulative child sums.
Definition: node.hpp:307
void merge_leaves(leaf_type *a, leaf_type *b, uint8_t idx, allocator *alloc)
Merge the right leaf into the left leaf.
Definition: node.hpp:1026
void increment(uint8_t from, uint8_t array_size, T change)
Increments all values in range.
Definition: branch_selection.hpp:85