EWAHBoolArray 0.8.0
A compressed bitmap class in C++
Loading...
Searching...
No Matches
ewah.h
1// See LICENSE file for license information.
2#ifndef EWAH_H
3#define EWAH_H
4
5#include <algorithm>
6#include <queue>
7#include <vector>
8
9#include "boolarray.h"
10#include "ewahutil.h"
11
12#include "runninglengthword.h"
13
14namespace ewah {
15
16template <class uword> class EWAHBoolArrayIterator;
17
18template <class uword> class EWAHBoolArraySetBitForwardIterator;
19
21
22template <class uword> class EWAHBoolArrayRawIterator;
23
30template <class uword = uint32_t> class EWAHBoolArray {
31public:
32 EWAHBoolArray() : buffer(1, 0), sizeinbits(0), lastRLW(0) {}
33
34 static EWAHBoolArray bitmapOf(size_t n, ...) {
35 EWAHBoolArray ans;
36 va_list vl;
37 va_start(vl, n);
38 for (size_t i = 0; i < n; i++) {
39 ans.set(static_cast<size_t>(va_arg(vl, int)));
40 }
41 va_end(vl);
42 return ans;
43 }
44
48 void trim() { buffer.shrink_to_fit(); }
49
58 bool get(const size_t pos) const {
59 if (pos >= static_cast<size_t>(sizeinbits))
60 return false;
61 const size_t wordpos = pos / wordinbits;
62 size_t WordChecked = 0;
64 while (j.hasNext()) {
66 WordChecked += static_cast<size_t>(rle.getRunningLength());
67 if (wordpos < WordChecked)
68 return rle.getRunningBit();
69 if (wordpos < WordChecked + rle.getNumberOfLiteralWords()) {
70 const uword w = j.dirtyWords()[wordpos - WordChecked];
71 return (w & (static_cast<uword>(1) << (pos % wordinbits))) != 0;
72 }
73 WordChecked += static_cast<size_t>(rle.getNumberOfLiteralWords());
74 }
75 return false;
76 }
77
81 bool empty() const {
82 size_t pointer(0);
83 while (pointer < buffer.size()) {
84 ConstRunningLengthWord<uword> rlw(buffer[pointer]);
85 if (rlw.getRunningBit()) {
86 if (rlw.getRunningLength() > 0)
87 return false;
88 }
89 ++pointer;
90 for (size_t k = 0; k < rlw.getNumberOfLiteralWords(); ++k) {
91 if (buffer[pointer] != 0)
92 return false;
93 ++pointer;
94 }
95 }
96 return true;
97 }
98
114 bool set(size_t i);
115
120 operator std::string() const {
121 std::stringstream ss;
122 ss << *this;
123 return ss.str();
124 }
125 friend std::ostream &operator<<(std::ostream &out, const EWAHBoolArray &a) {
126
127 out << "{";
128 for (EWAHBoolArray::const_iterator i = a.begin(); i != a.end();) {
129 out << *i;
130 ++i;
131 if (i != a.end())
132 out << ",";
133 }
134 out << "}";
135
136 return out;
137 }
148 if (a.sizeinbits < sizeinbits)
149 a.padWithZeroes(sizeinbits);
150 else if (sizeinbits < a.sizeinbits)
151 padWithZeroes(a.sizeinbits);
152 }
153
154 enum { RESERVEMEMORY = true }; // for speed
155
156 typedef EWAHBoolArraySetBitForwardIterator<uword> const_iterator;
157
171
180
185 std::vector<size_t> toArray() const;
186
196 void logicaland(const EWAHBoolArray &a, EWAHBoolArray &container) const;
197
208 EWAHBoolArray answer;
209 logicaland(a, answer);
210 return answer;
211 }
212
217 return logicaland(a);
218 }
219
230 void logicalandnot(const EWAHBoolArray &a, EWAHBoolArray &container) const;
231
236 return logicalandnot(a);
237 }
238
250 EWAHBoolArray answer;
251 logicalandnot(a, answer);
252 return answer;
253 }
254
260 bool intersects(const EWAHBoolArray &a) const;
261
273 void logicalor(const EWAHBoolArray &a, EWAHBoolArray &container) const;
274
281 size_t logicalorcount(const EWAHBoolArray &a) const;
282
289 size_t logicalandcount(const EWAHBoolArray &a) const;
290
297 size_t logicalandnotcount(const EWAHBoolArray &a) const;
298
305 size_t logicalxorcount(const EWAHBoolArray &a) const;
306
319 EWAHBoolArray answer;
320 logicalor(a, answer);
321 return answer;
322 }
323
327 EWAHBoolArray operator|(const EWAHBoolArray &a) const { return logicalor(a); }
328
338 void logicalxor(const EWAHBoolArray &a, EWAHBoolArray &container) const;
339
350 EWAHBoolArray answer;
351 logicalxor(a, answer);
352 return answer;
353 }
354
359 return logicalxor(a);
360 }
365 void reset() {
366 buffer.clear();
367 buffer.push_back(0);
368 sizeinbits = 0;
369 lastRLW = 0;
370 }
371
377 inline size_t addWord(const uword newdata,
378 const uint32_t bitsthatmatter = 8 * sizeof(uword));
379
380 inline void printout(std::ostream &o = std::cout) {
381 toBoolArray().printout(o);
382 }
383
387 void debugprintout() const;
388
395 inline size_t sizeInBits() const { return sizeinbits; }
396
402 inline size_t sizeInBytes() const { return buffer.size() * sizeof(uword); }
403
408 size_t addStreamOfEmptyWords(const bool v, size_t number);
409
414 size_t addStreamOfDirtyWords(const uword *v, const size_t number);
415
421 size_t addStreamOfNegatedDirtyWords(const uword *v, const size_t number);
422
433 size_t padWithZeroes(const size_t totalbits);
434
439 size_t sizeOnDisk(const bool savesizeinbits = true) const;
440
449 size_t write(std::ostream &out, const bool savesizeinbits = true) const;
450
458 size_t write(char *out, size_t capacity,
459 const bool savesizeinbits = true) const;
460
465 void writeBuffer(std::ostream &out) const;
466
470 size_t bufferSize() const { return buffer.size(); }
471
480 size_t read(std::istream &in, const bool savesizeinbits = true);
481
489 size_t read(const char *in, size_t capacity,
490 const bool savesizeinbits = true);
491
496 void readBuffer(std::istream &in, const size_t buffersize);
497
502 bool operator==(const EWAHBoolArray &x) const;
503
509 bool operator!=(const EWAHBoolArray &x) const;
510
511 bool operator==(const BoolArray<uword> &x) const;
512
513 bool operator!=(const BoolArray<uword> &x) const;
514
522
530
535 void append(const EWAHBoolArray &x);
536
542
548
555 template <class container>
556 void appendSetBits(container &out, const size_t offset = 0) const;
557
562 std::vector<size_t> toVector() const { return toArray(); }
563
571 size_t numberOfOnes() const;
572
577 void swap(EWAHBoolArray &x);
578
579 const std::vector<uword> &getBuffer() const { return buffer; }
580
581 enum { wordinbits = sizeof(uword) * 8 };
582
588 : buffer(other.buffer), sizeinbits(other.sizeinbits),
589 lastRLW(other.lastRLW) {}
590
597 buffer = x.buffer;
598 sizeinbits = x.sizeinbits;
599 lastRLW = x.lastRLW;
600 return *this;
601 }
602
607 : buffer(std::move(other.buffer)), sizeinbits(other.sizeinbits),
608 lastRLW(other.lastRLW) {}
609
614 buffer = std::move(x.buffer);
615 sizeinbits = x.sizeinbits;
616 lastRLW = x.lastRLW;
617 return *this;
618 }
619
627 buffer = x.buffer;
628 sizeinbits = x.sizeinbits;
629 lastRLW = x.lastRLW;
630 }
631
638 void logicalnot(EWAHBoolArray &x) const;
639
647 EWAHBoolArray answer;
648 logicalnot(answer);
649 return answer;
650 }
651
661 void inplace_logicalnot();
662
668 inline void setSizeInBits(const size_t size) { sizeinbits = size; }
669
675 inline void fastaddStreamOfEmptyWords(const bool v, size_t number);
680 inline void fastaddStreamOfDirtyWords(const uword *v, const size_t number);
681
682private:
683 void assertWordCount(std::string message) const;
684 void correctWordCount();
685 size_t numberOfWords() const;
686 // private because does not increment the size in bits
687 // returns the number of words added (storage cost increase)
688 inline size_t addLiteralWord(const uword newdata);
689
690 // private because does not increment the size in bits
691 // returns the number of words added (storage cost increase)
692 size_t addEmptyWord(const bool v);
693 // this second version "might" be faster if you hate OOP.
694 // in my tests, it turned out to be slower!
695 // private because does not increment the size in bits
696 // inline void addEmptyWordStaticCalls(bool v);
697
698 std::vector<uword> buffer;
699 size_t sizeinbits;
700 size_t lastRLW;
701};
702} // namespace ewah
703#include "ewah-inl.h"
704
705#endif
This object is returned by the compressed bitmap as a statistical descriptor.
Definition ewah-inl.h:240
A dynamic bitset implementation.
Definition boolarray.h:19
Same as RunningLengthWord, except that the values are buffered for quick access.
uword getRunningLength() const
how many words should be filled by the running bit (see previous method)
uword getNumberOfLiteralWords() const
followed by how many literal words?
bool getRunningBit() const
Which bit is being repeated?
Same as RunningLengthWord, except that the values cannot be modified.
bool getRunningBit() const
Which bit is being repeated?
uword getRunningLength() const
how many words should be filled by the running bit
uword getNumberOfLiteralWords() const
followed by how many literal words?
This class is a compressed bitmap.
bool operator==(const EWAHBoolArray &x) const
We define two EWAHBoolArray as being equal if they have the same set bits.
Definition ewah-inl.h:720
void appendSetBits(container &out, const size_t offset=0) const
Convert to a list of positions of "set" bits.
Definition ewah-inl.h:845
EWAHBoolArray logicalor(const EWAHBoolArray &a) const
computes the logical or with another compressed bitmap Return the answer Running time complexity is p...
Definition ewah.h:318
void setSizeInBits(const size_t size)
set size in bits.
Definition ewah.h:668
void logicalxor(const EWAHBoolArray &a, EWAHBoolArray &container) const
computes the logical xor with another compressed bitmap answer goes into container Running time compl...
Definition ewah-inl.h:1288
EWAHBoolArrayRawIterator< uword > raw_iterator() const
To iterate over the compressed data.
Definition ewah-inl.h:715
void makeSameSize(EWAHBoolArray &a)
Make sure the two bitmaps have the same size (padding with zeroes if necessary).
Definition ewah.h:147
size_t padWithZeroes(const size_t totalbits)
make sure the size of the array is totalbits bits by padding with zeroes.
Definition ewah-inl.h:635
void logicalor(const EWAHBoolArray &a, EWAHBoolArray &container) const
computes the logical or with another compressed bitmap answer goes into container Running time comple...
Definition ewah-inl.h:1193
EWAHBoolArray operator-(const EWAHBoolArray &a) const
calls logicalandnot
Definition ewah.h:235
void fastaddStreamOfDirtyWords(const uword *v, const size_t number)
LikeaddStreamOfDirtyWords but does not return the cost increse, does not update sizeinbits.
Definition ewah-inl.h:1036
EWAHBoolArray operator^(const EWAHBoolArray &a) const
calls logicalxor
Definition ewah.h:358
EWAHBoolArray logicalandnot(const EWAHBoolArray &a) const
computes the logical and not with another compressed bitmap Return the answer Running time complexity...
Definition ewah.h:249
EWAHBoolArray(const EWAHBoolArray &other)
Please don't copy your bitmaps! The running time complexity of a copy is the size of the compressed b...
Definition ewah.h:587
void logicalandnot(const EWAHBoolArray &a, EWAHBoolArray &container) const
computes the logical and with another compressed bitmap answer goes into container Running time compl...
Definition ewah-inl.h:1436
size_t sizeInBytes() const
Return the size of the buffer in bytes.
Definition ewah.h:402
size_t addStreamOfNegatedDirtyWords(const uword *v, const size_t number)
add a stream of dirty words, each one negated, returns the number of words added (storage cost increa...
Definition ewah-inl.h:1069
size_t logicalandcount(const EWAHBoolArray &a) const
computes the size (in number of set bits) of the logical and with another compressed bitmap Running t...
Definition ewah-inl.h:1555
std::vector< size_t > toVector() const
Returns a vector containing the position of the set bits in increasing order.
Definition ewah.h:562
void inplace_logicalnot()
Apply the logical not operation on this bitmap.
Definition ewah-inl.h:294
bool get(const size_t pos) const
Query the value of bit i.
Definition ewah.h:58
size_t bufferSize() const
size (in words) of the underlying STL vector.
Definition ewah.h:470
EWAHBoolArray operator|(const EWAHBoolArray &a) const
calls logicalor
Definition ewah.h:327
BitmapStatistics computeStatistics() const
For research purposes.
Definition ewah-inl.h:1635
EWAHBoolArray & operator=(EWAHBoolArray &&x)
Move assignment operator.
Definition ewah.h:613
void fastaddStreamOfEmptyWords(const bool v, size_t number)
Like addStreamOfEmptyWords but addStreamOfEmptyWords but does not return the cost increase,...
Definition ewah-inl.h:953
void debugprintout() const
Prints a verbose description of the content of the compressed bitmap.
Definition ewah-inl.h:1651
EWAHBoolArray< uword > logicalnot() const
Write the logical not of this bitmap in the provided container.
Definition ewah.h:646
EWAHBoolArray & operator=(const EWAHBoolArray &x)
Copies the content of one bitmap onto another.
Definition ewah.h:596
EWAHBoolArray logicaland(const EWAHBoolArray &a) const
computes the logical and with another compressed bitmap Return the answer Running time complexity is ...
Definition ewah.h:207
bool set(size_t i)
Set the ith bit to true (starting at zero).
Definition ewah-inl.h:257
const_iterator begin() const
Returns an iterator that can be used to access the position of the set bits.
Definition ewah.h:168
EWAHBoolArray(EWAHBoolArray &&other)
Move constructor.
Definition ewah.h:606
EWAHBoolArray logicalxor(const EWAHBoolArray &a) const
computes the logical xor with another compressed bitmap Return the answer Running time complexity is ...
Definition ewah.h:349
size_t addWord(const uword newdata, const uint32_t bitsthatmatter=8 *sizeof(uword))
convenience method.
Definition ewah-inl.h:475
size_t addStreamOfEmptyWords(const bool v, size_t number)
same as addEmptyWord, but you can do several in one shot! returns the number of words added (storage ...
Definition ewah-inl.h:898
void readBuffer(std::istream &in, const size_t buffersize)
read the buffer from a stream, see method writeBuffer.
Definition ewah-inl.h:495
size_t logicalxorcount(const EWAHBoolArray &a) const
computes the size (in number of set bits) of the logical xor with another compressed bitmap Running t...
Definition ewah-inl.h:1332
void writeBuffer(std::ostream &out) const
This only writes the content of the buffer (see write()) method.
Definition ewah-inl.h:488
size_t logicalandnotcount(const EWAHBoolArray &a) const
computes the size (in number of set bits) of the logical and not with another compressed bitmap Runni...
Definition ewah-inl.h:1505
bool empty() const
Returns true if no bit is set.
Definition ewah.h:81
size_t logicalorcount(const EWAHBoolArray &a) const
computes the size (in number of set bits) of the logical or with another compressed bitmap Running ti...
Definition ewah-inl.h:1243
size_t write(std::ostream &out, const bool savesizeinbits=true) const
Save this bitmap to a stream.
Definition ewah-inl.h:503
size_t read(std::istream &in, const bool savesizeinbits=true)
this is the counterpart to the write method.
Definition ewah-inl.h:556
void logicaland(const EWAHBoolArray &a, EWAHBoolArray &container) const
computes the logical and with another compressed bitmap answer goes into container Running time compl...
Definition ewah-inl.h:1381
size_t sizeOnDisk(const bool savesizeinbits=true) const
Compute the size on disk assuming that it was saved using the method "write".
Definition ewah-inl.h:1674
size_t numberOfOnes() const
Returns the number of bits set to the value 1.
Definition ewah-inl.h:365
size_t sizeInBits() const
Return the size in bits of this bitmap (this refers to the uncompressed size in bits).
Definition ewah.h:395
EWAHBoolArray operator&(const EWAHBoolArray &a) const
calls logicaland
Definition ewah.h:216
const_iterator & end() const
Basically a bogus iterator that can be used together with begin() for constructions such as for(EWAHB...
Definition ewah.h:177
void expensive_copy(const EWAHBoolArray &x)
This is equivalent to the operator =.
Definition ewah.h:626
void reset()
clear the content of the bitmap.
Definition ewah.h:365
bool operator!=(const EWAHBoolArray &x) const
We define two EWAHBoolArray as being different if they do not have the same set bits.
Definition ewah-inl.h:881
size_t addStreamOfDirtyWords(const uword *v, const size_t number)
add a stream of dirty words, returns the number of words added (storage cost increase)
Definition ewah-inl.h:1002
bool intersects(const EWAHBoolArray &a) const
tests whether the bitmaps "intersect" (have at least one 1-bit at the same position).
Definition ewah-inl.h:1594
void trim()
Recover wasted memory usage.
Definition ewah.h:48
std::vector< size_t > toArray() const
Retrieve the set bits.
Definition ewah-inl.h:383
void append(const EWAHBoolArray &x)
Appends the content of some other compressed bitmap at the end of the current bitmap.
Definition ewah-inl.h:777
void swap(EWAHBoolArray &x)
Swap the content of this bitmap with another bitmap.
Definition ewah-inl.h:766
BoolArray< uword > toBoolArray() const
For convenience, this fully uncompresses the bitmap.
Definition ewah-inl.h:833
EWAHBoolArrayIterator< uword > uncompress() const
Iterate over the uncompressed words.
Definition ewah-inl.h:710
Iterate over words of bits from a compressed bitmap.
Definition ewah.h:16
This is a low-level iterator.
Used to go through the set bits.
Definition ewah.h:18