1#include <vkdb/bloom_filter.h>
6 std::stringstream ss{str};
9 bits_.resize(no_of_bits);
11 size_type no_of_hashes;
13 seeds_.resize(no_of_hashes);
14 for (size_type i{0}; i < no_of_hashes; ++i) {
20 for (size_type i{0}; i < bits.size(); ++i) {
21 bits_[i] = bits[i] -
'0';
26 size_type expected_no_of_elems,
27 double false_positive_rate) {
28 if (expected_no_of_elems == 0) {
29 throw std::invalid_argument{
30 "BloomFilter(): Expected elements must be greater than 0."
36 throw std::invalid_argument{
37 "BloomFilter(): False positive rate must be in the range (0, 1)."
41 auto log_false_positive_rate{std::log(false_positive_rate)};
42 auto log_2_squared{std::log(2) * std::log(2)};
44 auto no_of_bits{
static_cast<size_type
>(
45 -(expected_no_of_elems * log_false_positive_rate) / log_2_squared
47 bits_.resize(no_of_bits);
49 auto no_of_hashes{
static_cast<size_type
>(
50 (no_of_bits / expected_no_of_elems) * std::log(2)
52 initialise_seeds(no_of_hashes);
56 for (size_type i{0}; i < seeds_.size(); ++i) {
57 bits_[hash(key, i)] =
true;
62 for (size_type i{0}; i < seeds_.size(); ++i) {
63 if (!bits_[hash(key, i)]) {
72 ss << bits_.size() <<
" ";
73 ss << seeds_.size() <<
" ";
75 for (
const auto& seed : seeds_) {
83 for (
const auto& bit : bits_) {
89void BloomFilter::initialise_seeds(size_type no_of_hashes)
noexcept {
90 seeds_.resize(no_of_hashes);
91 for (size_type i{0}; i < no_of_hashes; ++i) {
92 seeds_[i] = random<size_type>();
96BloomFilter::HashValue BloomFilter::hash(
100 HashValue hash_value{0};
101 auto std_hash_value{std::hash<std::string>{}(key.str())};
102 MurmurHash3_x86_32(&std_hash_value,
sizeof(std_hash_value), i, &hash_value);
103 return hash_value % bits_.size();
std::string str() const noexcept
Get the string representation of the Bloom filter.
BloomFilter()=delete
Deleted default constructor.
bool mayContain(const key_type &key) const noexcept
Check if the Bloom filter may contain a key.
void insert(const key_type &key) noexcept
Insert a key into the Bloom filter.
static constexpr double MIN_FALSE_POSITIVE_RATE
Minimum (exclusive) false positive rate.
static constexpr double MAX_FALSE_POSITIVE_RATE
Maximum (exclusive) false positive rate.
Represents a key in vkdb.