目录
一、前言
二、正文
1.位图
1.1 位图概念
1.2 位图的实现
1.2.1 Set
bool Set(size_t val)
{
if (val / 32 + 1 <= _v.size())
{
//第几个整型
size_t i = val / 32;
//第几个比特位
size_t j = val % 32;
//将对应比特位设置为1
_v[i] |= (1 << j);
return true;
}
else return false;
}
1.2.2 ReSet
bool Reset(size_t val)
{
if (val / 32 + 1 <= _v.size())
{
//第几个整型
size_t i = val / 32;
//第几个比特位
size_t j = val % 32;
//将对应比特位设置为0
_v[i] &= (~(1 << j));
return true;
}
else return false;
}
1.2.3 Text
bool text(size_t val)
{
if (val / 32 + 1 <= _v.size())
{
//第几个整型
size_t i = val / 32;
//第几个比特位
size_t j = val % 32;
//查询对应比特位的数据
return _v[i]&(1<<j);
}
else return false;
}
1.3 位图的应用
2.布隆过滤器
2.1布隆过滤器的提出
2.2 布隆过滤器概念
2.3 布隆过滤器的实现
2.3.1布隆过滤器的插入
向布隆过滤器中插入:“baidu”
向布隆过滤器中插入“tencent”
void Set(const K& key)
{
size_t Hashi1 = HashFun1()(key)%N;
_bs.Set(Hashi1);
size_t Hashi2 = HashFun2()(key)%N;
_bs.Set(Hashi2);
size_t Hashi3 = HashFun3()(key)%N;
_bs.Set(Hashi3);
//cout << s << ":" << Hashi1 << "-" << Hashi2 << "-" << Hashi3 << endl;
}
2.3.2 布隆过滤器的查找
bool Text(const K& key)
{
if (!_bs.text(HashFun1()(key)%N)) return false;
else if (!_bs.text(HashFun2()(key)%N)) return false;
else if (!_bs.text(HashFun3()(key)%N)) return false;
else return true;
}
2.3.3 布隆过滤器的删除
2.3.4 布隆过滤器优点
2.3.4 布隆过滤器缺陷
3.海量数据面试题
3.1哈希切割
3.2位图应用
3.3布隆过滤器
三、全部代码
//BloomFilter.h
#include <bitset>
#include <string>
#include <vector>
struct BKDRHash
{
size_t operator()(const string& str)
{
size_t hash = 0;
for (auto ch : str)
{
hash = hash * 131 + ch;
}
//cout <<"BKDRHash:" << hash << endl;
return hash;
}
};
struct APHash
{
size_t operator()(const string& str)
{
size_t hash = 0;
for (size_t i = 0; i < str.size(); i++)
{
size_t ch = str[i];
if ((i & 1) == 0)
{
hash ^= ((hash << 7) ^ ch ^ (hash >> 3));
}
else
{
hash ^= (~((hash << 11) ^ ch ^ (hash >> 5)));
}
}
//cout << "APHash:" << hash << endl;
return hash;
}
};
struct DJBHash
{
size_t operator()(const string& str)
{
size_t hash = 5381;
for (auto ch : str)
{
hash += (hash << 5) + ch;
}
//cout << "DJBHash:" << hash << endl;
return hash;
}
};
namespace mine
{
template<size_t N
,class K=string
,class HashFun1 = BKDRHash
,class HashFun2 = APHash
,class HashFun3 = DJBHash>
class BloomFilter
{
public:
void Set(const K& key)
{
size_t Hashi1 = HashFun1()(key)%N;
_bs.Set(Hashi1);
size_t Hashi2 = HashFun2()(key)%N;
_bs.Set(Hashi2);
size_t Hashi3 = HashFun3()(key)%N;
_bs.Set(Hashi3);
//cout << s << ":" << Hashi1 << "-" << Hashi2 << "-" << Hashi3 << endl;
}
bool Text(const K& key)
{
//cout << s << ":" << HashFun1()(key)%N << "-" << HashFun2()(key)%N << "-" << HashFun3()(key)%N << endl;
//cout << s << ":" << _bs.text(HashFun1()(key)%N) << "-" << _bs.text(HashFun2()(key)%N) << "-" << _bs.text(HashFun3()(key)%N) << endl;
if (!_bs.text(HashFun1()(key)%N)) return false;
else if (!_bs.text(HashFun2()(key)%N)) return false;
else if (!_bs.text(HashFun3()(key)%N)) return false;
else return true;
}
private:
bitset<N> _bs;
};
void text3()
{
BloomFilter<20> BF; //注改成11后,牛魔王会发生误判
vector<string> v1 = { "孙悟空","白龙马","猪八戒","唐三藏" };
for (auto& s : v1)
{
BF.Set(s);
}
cout << endl;
vector<string> v2 = { "孙悟空","牛魔王","猪八戒","白骨精" };
for (auto& s : v2)
{
BF.Text(s);
cout << s << ":" << BF.Text(s) << endl;
}
}
}
//BitSet.h
#pragma once
#include <vector>
namespace mine
{
template<size_t N>
class bitset
{
public:
bitset()
{
_v.resize((N / 32) + 1);
}
bool Set(size_t val)
{
if (val / 32 + 1 <= _v.size())
{
//第几个整型
size_t i = val / 32;
//第几个比特位
size_t j = val % 32;
//将对应比特位设置为1
_v[i] |= (1 << j);
return true;
}
else return false;
}
bool Reset(size_t val)
{
if (val / 32 + 1 <= _v.size())
{
//第几个整型
size_t i = val / 32;
//第几个比特位
size_t j = val % 32;
//将对应比特位设置为0
_v[i] &= (~(1 << j));
return true;
}
else return false;
}
bool text(size_t val)
{
if (val / 32 + 1 <= _v.size())
{
//第几个整型
size_t i = val / 32;
//第几个比特位
size_t j = val % 32;
//查询对应比特位的数据
return _v[i]&(1<<j);
}
else return false;
}
private:
vector<int> _v;
};
template<size_t N>
class twobitset
{
public:
// 00->未出现 01->出现一次 10->出现两次 11->出现两次以上
void Set(size_t val)
{
//第几个整型
size_t i = val / 32;
//第几个比特位
size_t j = val % 32;
//设置比特位
//00->01
if (!_bs1.text(val) && !_bs2.text(val))
{
_bs2.Set(val);
}
//01->10
else if (!_bs1.text(val) && _bs2.text(val))
{
_bs1.Set(val);
_bs2.Reset(val);
}
//10->11
else if (_bs1.text(val) && !_bs2.text(val))
{
_bs2.Set(val);
}
//11->11
else {}
}
//01
bool is_once(size_t val)
{
return !_bs1.text(val) && _bs2.text(val);
}
private:
bitset<N> _bs1;
bitset<N> _bs2;
};
//测试bitset
void text1()
{
bitset<SIZE_MAX> bs;
int arr[] = { -5,1,5,8,7,9,6,2,3 };
for (auto e : arr)
{
bs.Set(e);
}
for (size_t i = 0; i < SIZE_MAX; ++i)
{
if (bs.text(i)) cout << (int)i << " ";
}
cout << endl;
bs.Reset(5);
bs.Reset(9);
for (size_t i = 0; i < 10; ++i)
{
if (bs.text(i)) cout << (int)i << " ";
}
cout << endl;
}
//测试twobitset
void text2()
{
twobitset<15> tbs;
int arr[] = { 1,1,2,5,8,6,6,10 };
for (auto e : arr)
{
tbs.Set(e);
}
cout << "只出现一次的数字" << ":";
for (size_t i = 0; i < 15; ++i)
{
if(tbs.is_once(i)) cout<<i<<" ";
}
cout << endl;
}
}
//text.c
#include <iostream>
using namespace std;
#include "BitSet.h"
#include"Bloomfilter.h"
int main()
{
//mine::text1();
//mine::text2();
mine::text3();
return 0;
}