Bit vector  0.9
Fast and space efficient dynamic bit vector library.
query_support.hpp
1 
2 #ifndef BV_QUERY_SUPPORT_HPP
3 #define BV_QUERY_SUPPORT_HPP
4 
5 #include <cassert>
6 #include <cstdint>
7 #include <vector>
8 
9 namespace bv {
10 
14 template <class dtype, class leaf_type>
15 struct r_elem {
16  dtype p_size;
17  dtype p_sum;
18  dtype select_index;
19  dtype internal_offset;
20  leaf_type* leaf;
21 
22  r_elem(dtype size, dtype sum, dtype offset, leaf_type* l) {
23  p_size = size;
24  p_sum = sum;
25  leaf = l;
26  select_index = 0;
27  internal_offset = offset;
28  }
29 };
30 
48 template <class dtype, class leaf_type, dtype block_size>
50  protected:
51  dtype size_ = 0;
52  dtype sum_ = 0;
53  std::vector<r_elem<dtype, leaf_type>> elems_ =
54  std::vector<r_elem<dtype, leaf_type>>();
55 
56  public:
66  void append(leaf_type* leaf) {
67  dtype i = elems_.size();
68  dtype a_size = leaf->size();
69  while (size_ + a_size > i * block_size) {
70  dtype i_rank = leaf->rank(i * block_size - size_);
71  elems_.push_back(
72  r_elem<dtype, leaf_type>(size_, sum_, i_rank, leaf));
73  i++;
74  }
75  size_ += a_size;
76  sum_ += leaf->p_sum();
77  }
78 
88  void finalize() {
89  if (sum_ <= elems_.size()) {
90  for (size_t i = 0; i < sum_; i++) {
91  elems_[i].select_index = dumb_select(i + 1);
92  }
93  [[unlikely]] (void(0));
94  } else {
95  for (size_t i = 0; i < elems_.size(); i++) {
96  elems_[i].select_index =
97  s_select(1 + (i * sum_) / elems_.size());
98  }
99  }
100  }
101 
107  dtype size() const { return size_; }
113  dtype p_sum() const { return sum_; }
114 
124  bool at(dtype i) const {
125  dtype idx = i / block_size;
126  r_elem<dtype, leaf_type> e = elems_[idx];
127  if (e.p_size + e.leaf->size() <= i) {
128  [[unlikely]] e = elems_[idx + 1];
129  }
130  return e.leaf->at(i - e.p_size);
131  }
132 
146  dtype rank(dtype i) const {
147  dtype idx = block_size;
148  idx = i / idx;
149  r_elem<dtype, leaf_type> e = elems_[idx];
150  if (e.p_size + e.leaf->size() < i) {
151  e = elems_[++idx];
152  [[unlikely]] return e.p_sum + e.leaf->rank(i - e.p_size);
153  }
154  dtype offs = idx * block_size - e.p_size;
155  return e.p_sum + e.internal_offset + e.leaf->rank(i - e.p_size, offs);
156  }
157 
178  dtype select(dtype i) const {
179  if (i == 0) [[unlikely]]
180  return -1;
181  if (sum_ <= elems_.size()) {
182  [[unlikely]] return elems_[i - 1].select_index;
183  }
184  dtype idx = elems_.size() * i / (sum_ + 1);
185  if (idx == elems_.size()) {
186  [[unlikely]] idx--;
187  }
188  dtype a_idx = elems_[idx].select_index;
189  dtype b_idx =
190  idx < elems_.size() - 1 ? elems_[idx + 1].select_index : a_idx;
191  if (b_idx - a_idx > 1) {
192  [[unlikely]] return dumb_select(i);
193  }
194  r_elem<dtype, leaf_type> e = elems_[a_idx];
195  if (e.p_sum + e.leaf->p_sum() < i) {
196  e = elems_[a_idx + 1];
197  [[unlikely]] return e.p_size + e.leaf->select(i - e.p_sum);
198  }
199  dtype s_pos = a_idx * block_size - e.p_size;
200  if (s_pos == 0) {
201  [[unlikely]] return e.p_size + e.leaf->select(i - e.p_sum);
202  }
203  return e.p_size + e.leaf->select(i - e.p_sum,
204  a_idx * block_size - e.p_size,
205  e.internal_offset);
206  }
207 
216  uint64_t bit_size() const {
217  return (sizeof(query_support) +
218  elems_.capacity() * sizeof(r_elem<dtype, leaf_type>)) *
219  8;
220  }
221 
227  void print(bool internal_only) const {
228  std::cout << "{\n\"type\": \"rank_support\",\n"
229  << "\"size\": " << size_ << ",\n"
230  << "\"sum\": " << sum_ << ",\n"
231  << "\"number of elems\": " << elems_.size() << ",\n"
232  << "\"p_sizes\": [";
233  for (size_t i = 0; i < elems_.size(); i++) {
234  std::cout << elems_[i].p_size;
235  if (i != elems_.size() - 1) {
236  std::cout << ", ";
237  }
238  }
239  std::cout << "],\n"
240  << "\"p_sums\": [";
241  for (size_t i = 0; i < elems_.size(); i++) {
242  std::cout << elems_[i].p_sum;
243  if (i != elems_.size() - 1) {
244  std::cout << ", ";
245  }
246  }
247  std::cout << "],\n"
248  << "\"internal_offsets\": [";
249  for (size_t i = 0; i < elems_.size(); i++) {
250  std::cout << elems_[i].internal_offset;
251  if (i != elems_.size() - 1) {
252  std::cout << ", ";
253  }
254  }
255  std::cout << "],\n"
256  << "\"select_index\": [";
257  for (size_t i = 0; i < elems_.size(); i++) {
258  std::cout << elems_[i].select_index;
259  if (i != elems_.size() - 1) {
260  std::cout << ", ";
261  }
262  }
263  std::cout << "],\n"
264  << "\"nodes\": [";
265  for (uint8_t i = 0; i < elems_.size(); i++) {
266  elems_[i].leaf->print(internal_only);
267  if (i != elems_.size() - 1) {
268  std::cout << ",";
269  }
270  std::cout << "\n";
271  }
272  std::cout << "]}" << std::endl;
273  }
274 
275  protected:
285  dtype s_select(dtype i) const {
286  dtype idx = 0;
287  dtype b = elems_.size() - 1;
288  while (idx < b) {
289  dtype m = (idx + b + 1) / 2;
290  if (elems_[m].p_sum + elems_[m].internal_offset >= i) {
291  b = m - 1;
292  } else {
293  idx = m;
294  }
295  }
296  r_elem<dtype, leaf_type> e = elems_[idx];
297  while (e.p_sum + e.leaf->p_sum() < i) {
298  idx++;
299  [[unlikely]] e = elems_[idx];
300  }
301  return idx;
302  }
303 
315  dtype dumb_select(dtype i) const {
316  dtype idx = 0;
317  dtype b = elems_.size() - 1;
318  while (idx < b) {
319  dtype m = (idx + b + 1) / 2;
320  if (elems_[m].p_sum >= i) {
321  b = m - 1;
322  } else {
323  idx = m;
324  }
325  }
326  r_elem<dtype, leaf_type> e = elems_[idx];
327  while (e.p_sum + e.leaf->p_sum() < i) {
328  idx++;
329  [[unlikely]] e = elems_[idx];
330  }
331  return e.p_size + e.leaf->select(i - e.p_sum);
332  }
333 };
334 
335 } // namespace bv
336 
337 #endif
bv::query_support::rank
dtype rank(dtype i) const
Number of 1-bits up to position in the underlying bit vector.
Definition: query_support.hpp:146
bv::query_support::size
dtype size() const
Number of bits stored in the bit vector.
Definition: query_support.hpp:107
bv::query_support::at
bool at(dtype i) const
Return value of bit at index in the underlying bit vector.
Definition: query_support.hpp:124
bv::query_support::append
void append(leaf_type *leaf)
Add leaf reference to the support structure.
Definition: query_support.hpp:66
bv::query_support
Support structure for bv::bit_vector to enable fast queries.
Definition: query_support.hpp:49
bv::leaf::p_sum
uint32_t p_sum() const
Getter for p_sum_.
Definition: leaf.hpp:122
bv::query_support::bit_size
uint64_t bit_size() const
Number of bits allocated for the support structure.
Definition: query_support.hpp:216
bv::query_support::select
dtype select(dtype i) const
Index of the th 1-bit in the data structure.
Definition: query_support.hpp:178
bv::bv
simple_bv< 8, 16384, 64 > bv
Default dynamic bit vector type.
Definition: bv.hpp:94
bv::query_support::finalize
void finalize()
Prepare the structure for querying.
Definition: query_support.hpp:88
bv::leaf
Simple flat dynamic bit vector for use as a leaf for a dynamic b-tree bit vector.
Definition: leaf.hpp:47
bv::query_support::print
void print(bool internal_only) const
Outputs json representation of the support structure.
Definition: query_support.hpp:227
bv::query_support::s_select
dtype s_select(dtype i) const
Locate the block containing the th 1-bit.
Definition: query_support.hpp:285
bv::query_support::dumb_select
dtype dumb_select(dtype i) const
Index of the th 1-bit in the data structure.
Definition: query_support.hpp:315
bv::leaf::size
uint32_t size() const
Getter for size_.
Definition: leaf.hpp:124
bv::r_elem
Block storage used by the query support structure.
Definition: query_support.hpp:15
bv::leaf::rank
uint32_t rank(const uint64_t n) const
Number of 1-bits up to position n.
Definition: leaf.hpp:323
bv::query_support::p_sum
dtype p_sum() const
Number of 1-bits stored in the bit vector.
Definition: query_support.hpp:113