【Java】＜泛型＞，在编译阶段约束操作的数据结构，并进行检查。-CFANZ编程社区

四、哈希表模拟实现unordered_set和unordered_map

看这篇文章之前你需要对哈希表有一定了解，本文主讲代码实现

一、哈希的概念与方法

1、哈希概念

2、常用的两个哈希函数

注：由于除留余数法是目前很好的哈希函数，本文讲解用的全是除留余数法，故不用直接定址法

1. 直接定址法 --( 常用 )

2. 除留余数法 --( 常用 )

二、闭散列的实现

先用哈希表的查找来引出基本结构的设计

可见，删除会影响查找，可不可以这样考虑：直接遍历整个表查找一下有没有21？不行，哈希表就是为了效率而生的，你这么搞效率就是O（N）了，哈希表就没意义了。

注：哈希表的代码在HashTable.h中实现

1、基本结构：

enum State
{
	EMPTY, //空
	EXIST, //存在
	DELETE //删除
};

template<class T>
struct HashData
{
	T _data;
	State _state; //代表数据状态

	HashData()
		:_state(EMPTY)
		,_data(0)
	{}
};

template<class K, class T, class KeyOfT>
class HashTable
{
	typedef HashData<T> HashData;
private:
	vector<HashData> _tables; //哈希数组
	size_t _num = 0;	//表中存了多少个有效个数，不等于容量
};

2、两种增容思路和插入

闭散列的增容：

增容分两种思路：

1、传统思路

2、简便思路

哈希表的插入：

当发生哈希冲突时，如果哈希表未被装满，说明在哈希表中必然还有 空位置，那么可以把 key 存放到冲突位置中的 “ 下一个 ” 空位置中去。那如何寻找下一个空位置呢？怎么找利用线性探测和二次探测，怎么插入就要找空位置或者是被删除过的位置，这一点闭散列使用enum枚举状态做到的

规则：

a、线性探测（挨着往后找，直到找到空位置
b、二次探测（按 i^2，跳跃着往后找，直到找到空位置）

线性探测：

二次探测：

代码如下：

bool Insert(const T& data)
{
	KeyOfT koft;
	//1、增容：第一次插入或者负载因子>=0.7就要增容
	if (_tables.capacity() == 0 || _num * 10 / _tables.capacity() == 7)
	{
		//A、增容——传统思路
		//vector<HashData> newtables;
		//size_t newcapacity = _tables.capacity() == 0 ? 10 : _tables.capacity() * 2;
		//newtables.resize(newcapacity);//开空间+自动初始化为0
		把旧空间数据拷贝到新空间中
		//for (size_t i = 0; i < _tables.capacity(); ++i)
		//{
		//	if (_tables[i]._state == EXIST)
		//	{
		//		size_t index = koft(_tables[i]._data) % newtables.capacity();
		//		while (newtables[index]._state == EXIST)
		//		{
		//			index++;

		//			if (index == newtables.capacity())
		//			{
		//				index = 0;//走到尾了就要返回头找位置
		//			}
		//		}
		//		newtables[index] = _tables[i];
		//	}
		//}

		//_tables.swap(newtables);

		//B、增容——简便思路
		HashTable<K, T, KeyOfT> newht;
		size_t newcapacity = _tables.capacity() == 0 ? 10 : _tables.capacity() * 2;
		newht._tables.resize(newcapacity);

		for (size_t i = 0; i < _tables.capacity(); ++i)
		{
			if (_tables[i]._state == EXIST)
			{
				newht.Insert(_tables[i]._data);//把原哈希表中每个数据利用Insert都插入到新哈希表中
			}
		}

		_tables.swap(newht._tables);//交换两者的vector
	}

	//1、线性探测
	//size_t index = koft(data) % _tables.capacity();//计算出要映射的位置
	//while (_tables[index]._state == EXIST)
	//{
	//	if (koft(_tables[index]._data) == koft(data))
	//	{
	//		return false;//如果存在相同的数据
	//	}

	//	++index;
	//	if (index == _tables.capacity())
	//	{
	//		index = 0;
	//	}
	//}

	//2、二次探测
	size_t start = koft(data) % _tables.capacity();
	size_t index = start;
	int i = 1;
	while (_tables[index]._state == EXIST)
	{
		if (koft(_tables[index]._data) == koft(data))
		{
			return false;
		}

		index = start + i * i;
		++i;
		index %= _tables.capacity();
	}

	//插入数据
	_tables[index]._data = data;
	_tables[index]._state = EXIST;//用状态表示该位置已有数据
	++_num;		//有效数据个数++
	
	return true;
}

问题解释和知识回顾（理解代码）：

3、查找

HashData* Find(const K& key)
{
	KeyOfT koft;
	size_t index = key % _tables.capacity();
	while(_tables[index]._state != EMPTY)
	{//只要是存在和删除状态就要持续往下找
		if (koft(_tables[index]._data) == key)
		{
			if (_tables[index]._state == EXIST)
				return &_tables[index];//值相等且为存在状态
			else
				return nullptr;//值相等但为删除状态，说明被删除了
		}

		++index;//没找到继续往后找
		index %= _tables.capacity();
	}

	return nullptr;
}

4、删除

	bool Erase(const K& key)
	{
		HashData* ret = Find(key);
		if (ret)
		{
			ret->_state = DELETE;//用状态代表删除状态
			--_num; //--有效元素个数
			return true;
		}
		else
		{
			return false;
		}
	}

测试开散列代码：

template<class K>
struct SetKeyOfT
{
	const K& operator()(const K& key)
	{
		return key;
	}
};

void TestCloseHash()
{
	CLOSEHASH::HashTable<int, int, SetKeyOfT<int>> ht;

	ht.Insert(2);
	ht.Insert(4);
	ht.Insert(14);
	ht.Insert(24);
	ht.Insert(26);
	ht.Insert(16);

	ht.Erase(14);
	ht.Erase(2);

	CLOSEHASH::HashData<int>* data = ht.Find(4);
}

用线性探测测试代码：

用二次探测测试代码：

因为闭散列没有开散列好，所以这里闭散列简单实现下即可，对于更进一步的迭代器、和实现unordered_set和unordered_map等等操作我们都是用开散列实现，开散列才是重中之重

三、开散列的实现

1、基本结构

2、仿函数Hash

3、迭代器实现

//前置声明：为了让哈希表的迭代器能用哈希表
template<class K, class T, class KeyOfT, class Hash>
class HashTable;		

template<class K, class T, class KeyOfT, class Hash>
struct __HashTableIterator
{
	typedef __HashTableIterator<K, T, KeyOfT, Hash> Self;
	typedef HashTable<K, T, KeyOfT, Hash> HT;
	typedef HashNode<T> Node;
	Node* _node;//迭代器中存的是节点指针
	HT* _pht;//对象的指针

	__HashTableIterator(Node* node, HT* pht)
		:_node(node)
		,_pht(pht)
	{}

	T& operator*()
	{
		return _node->_data;
	}

	T* operator->()
	{
		return &_node->_data;
	}

	Self operator++()
	{
		if (_node->_next)
		{//如果还能在一个桶中，就直接在一个桶中往后走（单链表）
			_node = _node->_next;
		}
		else
		{
			// 如果一个桶走完了，要往下找到下一个桶继续遍历
			KeyOfT koft;
			//先计算我当前是在哪个桶
			size_t i = _pht->HashFunc(koft(_node->_data)) % _pht->_tables.capacity();
			++i;//下一个桶
			for (; i < _pht->_tables.capacity(); ++i)
			{	//找不为空的桶
				Node* cur = _pht->_tables[i];
				if (cur)
				{	//如果这个桶不为空
					_node = cur;
					return *this;//迭代器++返回的是迭代器本身
				}
			}

			_node = nullptr;//如果没有找到有数据的桶，则指针置为空,与end()相符
			return *this;
		}
	}

	bool operator!=(const Self& s)
	{
		return _node != s._node;
	}
};

哈希表中的begin()和end()实现：

	typedef __HashTableIterator<K, T, KeyOfT, Hash> iterator;
    //begin()返回第一个不为空的桶的第一个节点
	iterator begin()
	{
		for (size_t i = 0; i < _tables.capacity(); ++i)
		{
			if (_tables[i])
			{
				return iterator(_tables[i], this);//找到了则构造匿名对象返回
			}
		}

		return end();//每个桶中都没找到则返回end()
	}

	iterator end()
	{
		return iterator(nullptr, this);
	}

4、增容和插入

pair<iterator, bool> Insert(const T& data)
{
	KeyOfT koft;
	//1、判断是否增容
	if (_tables.capacity() == _num)
	{	//开散列的实现平衡因子为1就增容且第一次插入也会增容
		size_t newcapacity = _tables.capacity() == 0 ? 10 : _tables.capacity() * 2;
		vector<Node*> newtables;
		newtables.resize(newcapacity, nullptr);//给新的vector开新空间+初始化
		//重新计算旧表的数据在新表中的映射位置
		for (size_t i = 0; i < _tables.capacity(); ++i)
		{	//如果是第一次的增容不会进for循环的，故不用担忧表的初始数据是否为nullptr
			//哈希表中每一个桶都是一个单链表，故考察单链表的头插
			Node* cur = _tables[i];
			while (cur)
			{
				Node* next = cur->_next;
				size_t index = HashFunc(koft(cur->_data)) % newtables.capacity();//重新计算映射位置
				//头插
				cur->_next = newtables[index];
				newtables[index] = cur;
						 
				cur = next;
			}
			_tables[i] = nullptr;//映射到新表后置为空
		}

		_tables.swap(newtables);//新旧表的vector交换

	}

	size_t index = HashFunc(koft(data)) % _tables.capacity();//计算新的映射位置

	//1、先查找这个元素是否在哈希表中
	Node* cur = _tables[index];//知道映射位置就能确定是哪个桶
	while (cur)
	{
		if (koft(cur->_data) == koft(data))
			return make_pair(iterator(cur, this), false);//找到相同数据则插入失败
		else
			cur = cur->_next;
	}

	//2、头插到这个桶中
	Node* newnode = new Node(data);//开辟新节点
	//头插
	newnode->_next = _tables[index];
	_tables[index] = newnode;

	++_num;//哈希表中有效元素个数++
	return make_pair(iterator(newnode, this), false);
}

5、查找

Node* Find(const K& key)
{
	KeyOfT koft;
	size_t index = HashFunc(key) % _tables.capacity();//先计算映射位置
	Node* cur = _tables[index];
	while (cur)
	{
		if (koft(cur->_data) == key)
			return cur;
		else
			cur = cur->_next;
	}

	return nullptr;//走遍桶都没找到则返回空
}

6、删除

bool Erase(const K& key)
{
	assert(_tables.capacity() > 0);//有空间才能删

	KeyOfT koft;
	size_t index = HashFunc(key) % _tables.capacity();
	Node* cur = _tables[index];
	Node* prev = nullptr;//记录cur的前一位置

	while (cur)
	{
		if (koft(cur->_data) == key)
		{
			if (prev == nullptr)
			{	//如果是头删
				_tables[index] = cur->_next;
			}
			else
			{
				prev->_next = cur->_next;
			}
			delete cur;
			--_num;

			return true;
		}
		else
		{
			prev = cur;
			cur = cur->_next;
		}
	}

	return false;//要删除数据根本不存在
}

7、Clear和析构函数

	~HashTable()
	{
		Clear();
		//vector不用我们释放，因为它是自定义类型，哈希表要清理时，vector也会自动清理
	}

	void Clear()
	{
		for (size_t i = 0; i < _tables.capacity(); ++i)
		{
			Node* cur = _tables[i];
			while (cur)
			{
				Node* next = cur->_next;
				delete cur;
				cur = next;
			}

			_tables[i] = nullptr;//清空完数据后置为nullptr
		}
	}

遗留的问题：

四、哈希表模拟实现unordered_set和unordered_map

MyUnordered_set.h中实现unordered_set的：

#pragma once
#include"HashTable.h"
using namespace OPENHASH;

namespace mz
{
	using OPENHASH::_Hash;
	template<class K, class Hash = _Hash<K>>
	class unordered_set
	{
		struct SetKOfT
		{
			const K& operator()(const K& k)
			{
				return k;
			}
		};

	public:
		//加typename是告诉编译器这是个类型，你先让我过，等实例化了再去找
		//因为模板没实例化它是不接受你用模板里面的一个类型，故要用typename 

		typedef typename HashTable<K, K, SetKOfT, Hash>::iterator iterator;
		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}
		pair<iterator, bool> insert(const K& k)
		{
			return _ht.Insert(k);
		}
	private:
		HashTable<K, K, SetKOfT, Hash> _ht;
	};

	void test_unordered_set()
	{
		unordered_set<int> s;
		s.insert(1);
		s.insert(5);
		s.insert(4);
		s.insert(2);

		unordered_set<int>::iterator it = s.begin();
		while (it != s.end())
		{
			cout << *it << " ";
			++it;
		}
		cout << endl;
	}
}

MyUnordered_map.h中实现unordered_map的：

#pragma once
#include"HashTable.h"
using namespace OPENHASH;

namespace mz
{
	using OPENHASH::_Hash;
	template<class K, class V, class Hash = _Hash<K>>//一般模板参数都是由上一层来控制的
	class unordered_map
	{
		struct MapKOfT 
		{
			const K& operator()(const pair<K, V>& kv)
			{
				return kv.first;
			}
		};

	public:

		typedef typename HashTable<K, pair<K,V>, MapKOfT, Hash>::iterator iterator;
		iterator begin()
		{
			return _ht.begin();
		}
		
		iterator end()
		{
			return _ht.end();
		}

		pair<iterator, bool> insert(const pair<K, V>& kv)
		{
			return _ht.Insert(kv);
		}

		V& operator[](const K& key)
		{//unordered_map的operator[]是给key值返回v的引用
         //底层实现用的是哈希表的Insert来实现，在介绍使用时讲过这点
			pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
			return ret.first->second;
		}

	private:
		HashTable<K, pair<K, V>, MapKOfT, Hash> _ht;//底层是个哈希表
	};

	void test_unordered_map()
	{
		unordered_map<string, string> dict;
		dict.insert(make_pair("factual", "真实的"));
		dict.insert(make_pair("fringe", "侵犯"));
		dict.insert(make_pair("intermittent", "间歇的"));
		dict["prerequisite"] = "先决条件";
		dict["reduce to"] = "处于";

		//unordered_map<string, string>::iterator it = dict.begin();
		auto it = dict.begin();
		while (it != dict.end())
		{
			cout << it->first << ":" << it->second << endl;
			++it;
		}
		cout << endl;

	}
}