目录
前言
位图的概念
经典面试题目
位图的模拟实现
//非类型模版参数N指定开辟多少比特位的空间
template<size_t N>
class BitSet
{
public:
//构造函数中需要开辟空间,否则vector大小为0
BitSet()
{
_bits.resize(N / 32 + 1, 0);
}
private:
vector<int> _bits;//开辟整型数组空间
};
set()
void set(size_t x)
{
//计算x在第i个整型
size_t i = x / 32;
//计算x在第j个比特位
size_t j = x % 32;
_bits[i] = _bits[i] | (1 << j);
}
reset()
//将x映射的比特位设置为0
void reset(size_t x)
{
//计算x在第i个整型
size_t i = x / 32;
//计算x在第j个比特位
size_t j = x % 32;
_bits[i] = _bits[i] & ~(1 << j);
}
test()
位图整体代码
template<size_t N>
class BitSet
{
public:
BitSet()
{
_bits.resize(N / 32 + 1, 0);
}
//将x映射的比特位设置为1
void set(size_t x)
{
//计算x在第i个整型
size_t i = x / 32;
//计算x在第j个比特位
size_t j = x % 32;
_bits[i] = _bits[i] | (1 << j);
}
//将x映射的比特位设置为0
void reset(size_t x)
{
size_t i = x / 32;
size_t j = x % 32;
_bits[i] = _bits[i] & ~(1 << j);
}
//检测数值x是否存在
bool test(size_t x)
{
size_t i = x / 32;
size_t j = x % 32;
return _bits[i] & (1 << j);
}
private:
vector<int> _bits;
};
位图的应用
template<size_t N>
class two_bit_set
{
public:
void set(size_t x)
{
// 原先为00,数据x出现后变为01
if (_bs1.test(x) == false
&& _bs2.test(x) == false)
{
_bs2.set(x);
}
else if (_bs1.test(x) == false
&& _bs2.test(x) == true)
{
//原先为01,数据x出现后变为10
_bs1.set(x);
_bs2.reset(x);
}
//一次及以上不做处理
}
//数值x出现0次返回0,出现1次返回1,出现1次及以上返回2
int test(size_t x)
{
if (_bs1.test(x) == false
&& _bs2.test(x) == false)
{
return 0;
}
else if (_bs1.test(x) == false
&& _bs2.test(x) == true)
{
return 1;
}
else
{
return 2; // 2次及以上
}
}
private:
BitSet<N> _bs1;
BitSet<N> _bs2;
};
位图的优缺点
布隆过滤器
布隆过滤器的概念
哈希函数的个数与布隆过滤器长度的关系
布隆过滤器的模拟实现
template<size_t N,
class K=string, //数据默认为字符串
class Hash1 = BKDRHash,//三种字符串哈希算法(将字符串转换为整型)
class Hash2 = APHash,
class Hash3 = DJBHash>
class bloomfilter
{
public:
private:
static const size_t M = 5 * N;//M布隆过滤器长度=5*插入元素的个数
//STL库中位图实现为静态数组(即int arr[]),存储在对象中,数据量大时可能会导致栈溢出,所以使用new开辟堆空间避免栈溢出
std::bitset<M>* _bs = new std::bitset<M>;
};
//BKDR版本
struct BKDRHash
{
size_t operator()(const string& s)
{
size_t value = 0;
for (auto ch : s)
{
value = value * 131 + ch;
}
return value;
}
};
//AP版本
struct APHash
{
size_t operator()(const string& s)
{
size_t value = 0;
for (size_t i = 0; i < s.size(); i++)
{
if ((i & 1) == 0)
{
value ^= ((value << 7) ^ s[i] ^ (value >> 3));
}
else
{
value ^= (~((value << 11) ^ s[i] ^ (value >> 5)));
}
}
return value;
}
};
//DJB版本
struct DJBHash
{
size_t operator()(const string& s)
{
if (s.empty())
return 0;
size_t value = 5381;
for (auto ch : s)
{
value += (value << 5) + ch;
}
return value;
}
};
插入
void set(const K& key)
{
size_t hash1 = Hash1()(key) % M;
size_t hash2 = Hash2()(key) % M;
size_t hash3 = Hash3()(key) % M;
_bs->set(hash1);
_bs->set(hash2);
_bs->set(hash3);
}
查找
bool test(const K& key)
{
//依次判断key对应的三个位是否被设置
size_t hash1 = Hash1()(key) % M;
if (_bs->test(hash1) == false)
return false;//key一定不存在
size_t hash2 = Hash2()(key) % M;
if (_bs->test(hash2) == false)
return false;//key一定不存在
size_t hash3 = Hash3()(key) % M;
if (_bs->test(hash3) == false)
return false;//key一定不存在
return true; // 存在误判(有可能3个位都是跟别人冲突的,所以误判)
}
删除
布隆过滤器整体代码
struct BKDRHash
{
size_t operator()(const string& s)
{
size_t value = 0;
for (auto ch : s)
{
value = value * 131 + ch;
}
return value;
}
};
struct APHash
{
size_t operator()(const string& s)
{
size_t value = 0;
for (size_t i = 0; i < s.size(); i++)
{
if ((i & 1) == 0)
{
value ^= ((value << 7) ^ s[i] ^ (value >> 3));
}
else
{
value ^= (~((value << 11) ^ s[i] ^ (value >> 5)));
}
}
return value;
}
};
struct DJBHash
{
size_t operator()(const string& s)
{
if (s.empty())
return 0;
size_t value = 5381;
for (auto ch : s)
{
value += (value << 5) + ch;
}
return value;
}
};
template<size_t N,
class K=string,
class Hash1 = BKDRHash,
class Hash2 = APHash,
class Hash3 = DJBHash>
class bloomfilter
{
public:
void set(const K& key)
{
size_t hash1 = Hash1()(key) % M;
size_t hash2 = Hash2()(key) % M;
size_t hash3 = Hash3()(key) % M;
_bs->set(hash1);
_bs->set(hash2);
_bs->set(hash3);
}
bool test(const K& key)
{
size_t hash1 = Hash1()(key) % M;
if (_bs->test(hash1) == false)
return false;
size_t hash2 = Hash2()(key) % M;
if (_bs->test(hash2) == false)
return false;
size_t hash3 = Hash3()(key) % M;
if (_bs->test(hash3) == false)
return false;
return true;
}
private:
static const size_t M = 5 * N;
std::bitset<M>* _bs = new std::bitset<M>;
};