framework:
内部排序:
insert
直接
折半
希尔: 不 易 记, 不 care
交换
bubble
quick
选择
simple select
heap sort
归并
本文 默认按 从小到大 排序, insert 到 insert_pos 前
1 insert sort
思想:
每次 将 待排 record
按 其 key 大小 insert
到 previous sorted subsequence
-> loop
引出 3 insert methods / algorithms:
direct / half / shell insert
1.1 direct insert
1. 思想
(1) `某时刻, 前 i = 1,2,... 个 已 insert / sorted`
sorted arr[0 .. i - 1] / arr[i] / unsorted arr[i + 1 .. ]
(2) `insert arr[i] to sorted arr[0 .. i - 1]:`
1) record arr[i]
2) `find insert_pos
+ arr[i - 1, i - 2, ..., insert_pos] 逐个后移 同时进行:`
从 pos = i - 1 开始倒着往前找,
if current pos elem not satisfied insert
current elem move backward
3) recorded arr[i] copy 到 arr[insert_pos]
void
direct_insert(int arr[], int n) // n: len_to_be_sorted
{
//(1) n - 1 times: start from 2th elem
for(int i = 1; i < n; i++)
{
//(2) only arr[i] smaller than its forward adjacent elem, insert needed; else do nothing
if(arr[i] < arr[i - 1] )
{
//(3) record arr[i]
int tmp = arr[i];
//(4) cmp *walk with recorded arr[i],
// > 则 *walk 后移 1 位
// => walk + 1: moved/inserted position
// loop exit:
// case1: walk = -1
// case2: walk >=0, arr[walk] <= tmp
int walk;
for(walk = i - 1; walk >=0 && arr[walk] > tmp; --walk )
arr[walk + 1] = arr[walk];
//(5) insert recorded arr[i]
arr[walk + 1] = tmp;
}
}
}
1.2 half insert
是 对 direct insert 的 改进
direct insert: `边 比较 边 移动 elem`
1. 思想
`分离 比较 和 移动:`
(1) 先 折半 find insert_pos,
(2) 再 统一 移动 insert_pos 及其 之后 elem
void
half_sort(int arr[], int n)
{
//(1) n - 1 times: start from 2th elem
for(int i = 1; i < n; i++)
{
//(2) record arr[i]
int tmp = arr[i];
//(3) 折半 find insert_pos:
// cmp *walk with tmp,
// > 则 left_walk = mid - 1, else 则 right_walk = mid + 1,
// every iteration half the search range
// insert_pos = right + 1
int left = 0, right = i - 1, walk;
// loop exit: left == right + 1
// eg.
// 1) 某次 iteration 前: left = 5, right = 6:
// 即 arr[left] <= tmp <= arr[right]
// => mid = 5, a[5] <= tmp
// => left = mid + 1 = 6
// 2) next iteration:left ==right == 6:
// => mid = 6, arr[mid] >= tmp
// => right = mid - 1 = 5 => while exit
while(left <= right)
{
int mid = left + (right - left) / 2;
if(arr[mid] > tmp)
right = mid - 1;
else // arr[mid] <= tmp
left = mid + 1;
}
//(4) 统一 move
for(walk = i - 1; walk > right; walk--)
arr[walk + 1] = arr[walk];
//(5) recorded arr[i] put into insert_pos
arr[right + 1] = tmp;
}
}
2 swap sort
交换: 据 两 element key 的 compare result
来 交换 这 两个 record 的 position
2.1 bubble sort
1. 思想
假设 待 sort sequence len = n
1) `一趟排序:`
从后往前 / 从前往后 `两两比较 相邻元素`,
若 `逆序, 则 交换`; untill 比较完 最后 2 个,
result 是 min_elem 冒到 first/last position,
2) `下一趟, 前面已冒出 elem 不参与`,
待 sort 序列 少 1 个
3) `最多 n - 1 趟` 就能 sort 好 所有 elem
void
swap(int *v1, int *v2)
{
int tmp = *v1;
*v1 = *v2;
*v2 = tmp;
}
void
bubble_sort(int arr[], int n)
{
//(1) n - 1 times
for(int times = 0; times < n - 1; times++)
{
//(2) 从后往前 两两比较
int walk;
for(walk = n - 1; walk > times; walk--)
if(arr[walk - 1] > arr[walk] )
swap(arr + walk - 1, arr + walk);
}
}
2. 改进: 若 某趟 中无交换 => sort 提前完成
use a swap_flag
void
bubble_sort(int arr[], int n)
{
//(1) n - 1 times
for(int times = 0; times < n - 1; times++)
{
//(2) swap_flag
int swap_flag = false;
//(3) 从后往前 两两比较
int walk;
for(walk = n - 1; walk > times; walk--)
{
if(arr[walk - 1] > arr[walk] )
{
swap(arr + walk - 1, arr + walk);
swap_flag = true;
}
}
//(4) (times + 1)th 趟, 无 swap => 已 sorted
if(swap_flag == false)
return;
}
}
2.2 quick sort
是对 bubble sort 的 一种改进
1. 思想: `分治`
(1) 划分
1) 待 sort sequence a[1..n] 中
任取 一 elem 作 pivot (枢轴 / 中心)
2) 用 `1 趟 快排` 将 a[1..n] 划分为 two parts:
a[1...pivot_pos - 1] 和 a[pivot_pos + 1.. n],
分别 < 和 >= pivot
即 找 pivot_pos
3) `pivot_pos 即 当前序列 pivot 的 最终排序 pos`
(2) 分别对 left / right 2 个 sub_sequence
递归 上述过程
// 1 趟 快排: 将 a[left..right] 划分为 two parts:
// a[left...pivot_pos - 1] 和 a[pivot_pos + 1.. n],
// 分别 < 和 >= pivot
int
partition(int a[], int left, int right)
{
//(1) select pivot
int pivot = a[left];
//(2) cmp a[walk_index] with pivot
// 1) first, from right to left,
// when stop, swap right to left
// 2) second, form left to right
// when stop, swap left to right
// loop exit:
// left == right
while(left < right)
{
while(left < right && a[right] >= pivot )
right--;
a[left] = a[right];
while(left < right && a[left] < pivot )
left++;
a[right] = a[left];
}
//(3) pivot 放 最终 sorted pos
a[left] = pivot;
//(4) return pivot_pos
return left;
}
void
quick_sort(int a[], int left, int right)
{
//(1) 递归 exit / 最小规模递归:
// left == right, 即 1 elem -> do nothing
//(2) 次小规模递归:
// 划分后 有 1 个 子序列 只有 1 个 elem
// 1) partition
// 2) 只 有 1 个 elem 的 递归 do nothing
if(left < right)
{
//(3) 划分: pivot_pos 是 当前序列 pivot 的 最终排序 pos
int pivot_pos = partition(a, left, right);
//(4) 分别对 left / right 2 个 sub_sequence
// 递归 上述 划分(partition) 过程
quick_sort(a, left, pivot_pos - 1);
quick_sort(a, pivot_pos + 1, right);
}
}
2. 空间效率
`递归 => 用 递归工作栈: 栈 容量 与 递归最大深度 一致:`
最好情况 ceil(log_2(n + 1) )
最坏情况: n - 1 次 递归 => 栈 深度 O(n)
平均: 栈深度 O(log_2 n)
`最坏/平均 O(n) / O(log_2 n)`
3. 时间效率
与 划分 是否对称(划分算法) 有关
最坏: two parts 分别 n - 1 / 0 个 elem
-> 这种 最大程度的不对称 发生在 每层递归上
-> O(n^2)
改进:
(1) sub_sequence 较小时, no longer 递归调 快排,
而是用 direct insert sort
(2) 尽量选 1 个 可将 sequence 中分 的 pivot
eg: 取 head/tail/mid elem -> 再取 3 者 的 middle
4. 稳定性: 不稳定
划分 中, 若 right range 有 2 key 相等, 且 < pivot
=> 交换到 left range 后, 2 key 相对位置 会变
seq = {3, 2
, 2} -> 1趟 快排后 -> {2, 2
, 3}
3 select sort
1. 思想
第 i = 1, 2, ..., n-1 趟,
在后面 n - i + 1 个 待 sort elems 中
select key 最小者, 作 已 sorted sub_sequence ith elem
until 第 n - 1 趟 end, 待 sort elem 只剩 1 个, 不必再 sort
3.1 simple select sort
1. 思想
第 i = 1, 2, ..., n - 1 趟
`select key 最小者 与 ith elem a[i - 1] swap`
(times+1)th 趟: times = 0..n-1
walk & cmp *walk with *min_elem_pos
`min_elem_pos pointer` 初始化为 times,
`walk index pointer` 从 times + 1 开始,
if <
则 update min_elem_pos
void
select_sort(int a[], int n)
{
//(1) n - 1 趟
for(int times = 0; times < n; times++)
{
//(2) (times+1)th 趟: walk & cmp *walk with *min_elem_pos
int min_elem_pos = times;
int walk;
for(walk = times + 1; walk < n; walk++)
if(a[walk] < a[min_elem_pos] )
min_elem_pos = walk;
if(min_elem_pos != times)
swap(a+times, a+min_elem_pos );
}
}
2. 空间效率 O(n)
3. 时间效率 O( n*(n - 1) / 2 ) = O(n^2)
4. 稳定性: 不稳定
seq = {2, `2`, 1} -> 1 趟 sort 后 -> {1, `2`, 2}
3.2 heap sort
见
https://www.jianshu.com/p/ab2d9c54eb49
4 归并 sort
1. 思想
归并:
把 >= 2 个 有序表 合并为 1个 新有序表
2 路归并: n 个 records 待 sort 表
(1) 视为 n 个 len = 1 的 有序子表,
两两归并, 得 ceil(n / 2) 个 len = 2 or 1 有序表
(2) 再 两两归并... 重复之
(3) until 归并后 新表 len = n
int *aux = NULL;
void
malloc_aux_array(int left, int right)
{
int n = right - left + 1;
if(n > 0)
aux = (int *)malloc( n*sizeof(n) );
}
void
free_aux_array()
{
if(aux)
free(aux);
}
void
merge(int a[], int left, int mid, int right)
{
//(1) copy to aux_array
for(int i = left; i <= right; i++)
aux[i] = a[i];
//(2) cmp two parts in aux[] from left to rignt,
// smaller put into a[]
// 1) part1: [left..mid] / part2: [mid+1..right]
// 2) two pointer to aux[] + one pointer to a[]
// loop exit:
// case1: part1_index == mid + 1 > mid
// <=> part2_index <= right
// <=> part2 longer/equal
// case2: part1_index <= mid, part2_index == right + 1 > right
// <=> part1_index <= mid
// <=> part1 longer/equal
int part1_index, part2_index, index;
for(part1_index = left, part2_index = mid + 1, index = part1_index;
part1_index <= mid && part2_index <= right;
index++)
{
if(aux[part1_index] <= aux[part2_index])
a[index] = aux[part1_index++];
else
a[index] = aux[part2_index++];
}
//(3) part1 or part2 longer ?
// case2: part1 longer/equal -> part1 left elems copy
while(part1_index <= mid)
a[index++] = aux[part1_index++];
// case1: <=> part2 longer/equal
while(part2_index <= right)
a[index++] = aux[part2_index++];
}
void
merge_sort(int a[], int left, int right)
{
//(1) 递归
// 1) 递归 exit / 最小规模递归:
// left == right, 即 one elem => do nothing
// 2) 次小规模递归:
// 2 elem: 如 b[0] = 3 / b[1] = 1 => mid = 0
// merge_sort(a, 0, 0) & merge_sort(a, 1, 1) do nothing
// merge(a, 0, 0, 1): a[0] = min( b[0], b[1] ) = 1, a[1] = 3
if(left < right)
{
//(2) mid
int mid = (left + right) / 2;
// 思想: 分治
//(3) 分解
merge_sort(a, left, mid);
merge_sort(a, mid + 1, right);
//(4) 合并
merge(a, left, mid, right);
}
}
5 test
// sort.cpp
#include <cstdio>
#include <cstdlib> // malloc / free
#include "sort.h"
//----------1. direct_insert
void
direct_insert(int arr[], int n) // n: len_to_be_sorted
{
//(1) n - 1 times: start from 2th elem
for(int i = 1; i < n; i++)
{
//(2) only arr[i] smaller than its forward adjacent elem, insert needed; else do nothing
if(arr[i] < arr[i - 1] )
{
//(3) record arr[i]
int tmp = arr[i];
//(4) cmp *walk with recorded arr[i],
// > 则 *walk 后移 1 位
// => walk + 1: moved/inserted position
// loop exit:
// case1: walk = -1
// case2: walk >=0, arr[walk] <= tmp
int walk;
for(walk = i - 1; walk >=0 && arr[walk] > tmp; --walk )
arr[walk + 1] = arr[walk];
//(5) insert recorded arr[i]
arr[walk + 1] = tmp;
}
}
}
//----------2. half_sort
void
half_sort(int arr[], int n)
{
//(1) n - 1 times: start from 2th elem
for(int i = 1; i < n; i++)
{
//(2) record arr[i]
int tmp = arr[i];
//(3) 折半 find insert_pos:
// cmp *walk with tmp,
// > 则 left_walk = mid - 1, else 则 right_walk = mid + 1,
// every iteration half the search range
// insert_pos = right + 1
int left = 0, right = i - 1, walk;
// loop exit: left == right + 1
// eg.
// 1) 某次 iteration 前: left = 5, right = 6:
// 即 arr[left] <= tmp <= arr[right]
// => mid = 5, a[5] <= tmp
// => left = mid + 1 = 6
// 2) next iteration:left ==right == 6:
// => mid = 6, arr[mid] >= tmp
// => right = mid - 1 = 5 => while exit
while(left <= right)
{
int mid = left + (right - left) / 2;
if(arr[mid] > tmp)
right = mid - 1;
else // arr[mid] <= tmp
left = mid + 1;
}
//(4) 统一 move
for(walk = i - 1; walk > right; walk--)
arr[walk + 1] = arr[walk];
//(5) recorded arr[i] put into insert_pos
arr[right + 1] = tmp;
}
}
//----------3. bubble_sort
void
swap(int *v1, int *v2)
{
int tmp = *v1;
*v1 = *v2;
*v2 = tmp;
}
void
bubble_sort(int arr[], int n)
{
//(1) n - 1 times
for(int times = 0; times < n - 1; times++)
{
//(2) swap_flag
int swap_flag = false;
//(3) 从后往前 两两比较
int walk;
for(walk = n - 1; walk > times; walk--)
{
if(arr[walk - 1] > arr[walk] )
{
swap(arr + walk - 1, arr + walk);
swap_flag = true;
}
}
//(4) (times + 1)th 趟, 无 swap => 已 sorted
if(swap_flag == false)
return;
}
}
//----------4. quick_sort
// 1 趟 快排: 将 a[left..right] 划分为 two parts:
// a[left...pivot_pos - 1] 和 a[pivot_pos + 1.. n],
// 分别 < 和 >= pivot
int
partition(int a[], int left, int right)
{
//(1) select pivot
int pivot = a[left];
//(2) cmp a[walk_index] with pivot
// 1) first, from right to left,
// when stop, swap right to left
// 2) second, form left to right
// when stop, swap left to right
// loop exit:
// left == right
while(left < right)
{
while(left < right && a[right] >= pivot )
right--;
a[left] = a[right];
while(left < right && a[left] < pivot )
left++;
a[right] = a[left];
}
//(3) pivot 放 最终 sorted pos
a[left] = pivot;
//(4) return pivot_pos
return left;
}
void
quick_sort(int a[], int left, int right)
{
//(1) 递归 exit / 最小规模递归:
// left == right, 即 1 elem -> do nothing
//(2) 次小规模递归:
// 划分后 有 1 个 子序列 只有 1 个 elem
// 1) partition
// 2) 只 有 1 个 elem 的 递归 do nothing
if(left < right)
{
//(3) 划分: pivot_pos 是 当前序列 pivot 的 最终排序 pos
int pivot_pos = partition(a, left, right);
//(4) 分别对 left / right 2 个 sub_sequence
// 递归 上述 划分(partition) 过程
quick_sort(a, left, pivot_pos - 1);
quick_sort(a, pivot_pos + 1, right);
}
}
//----------5. select_sort
void
select_sort(int a[], int n)
{
//(1) n - 1 趟
for(int times = 0; times < n; times++)
{
//(2) (times+1)th 趟: walk & cmp *walk with *min_elem_pos
int min_elem_pos = times;
int walk;
for(walk = times + 1; walk < n; walk++)
if(a[walk] < a[min_elem_pos] )
min_elem_pos = walk;
if(min_elem_pos != times)
swap(a+times, a+min_elem_pos );
}
}
//----------6. merge_sort
// global aux array
int *aux = NULL;
void
malloc_aux_array(int left, int right)
{
int n = right - left + 1;
if(n > 0)
aux = (int *)malloc( n*sizeof(n) );
}
void
free_aux_array()
{
if(aux)
free(aux);
}
void
merge(int a[], int left, int mid, int right)
{
//(1) copy to aux_array
for(int i = left; i <= right; i++)
aux[i] = a[i];
//(2) cmp two parts in aux[] from left to rignt,
// smaller put into a[]
// 1) part1: [left..mid] / part2: [mid+1..right]
// 2) two pointer to aux[] + one pointer to a[]
// loop exit:
// case1: part1_index == mid + 1 > mid
// <=> part2_index <= right
// <=> part2 longer/equal
// case2: part1_index <= mid, part2_index == right + 1 > right
// <=> part1_index <= mid
// <=> part1 longer/equal
int part1_index, part2_index, index;
for(part1_index = left, part2_index = mid + 1, index = part1_index;
part1_index <= mid && part2_index <= right;
index++)
{
if(aux[part1_index] <= aux[part2_index])
a[index] = aux[part1_index++];
else
a[index] = aux[part2_index++];
}
//(3) part1 or part2 longer ?
// case2: part1 longer/equal -> part1 left elems copy
while(part1_index <= mid)
a[index++] = aux[part1_index++];
// case1: <=> part2 longer/equal
while(part2_index <= right)
a[index++] = aux[part2_index++];
}
void
merge_sort(int a[], int left, int right)
{
//(1) 递归
// 1) 递归 exit / 最小规模递归:
// left == right, 即 one elem => do nothing
// 2) 次小规模递归:
// 2 elem: 如 b[0] = 3 / b[1] = 1 => mid = 0
// merge_sort(a, 0, 0) & merge_sort(a, 1, 1) do nothing
// merge(a, 0, 0, 1): a[0] = min( b[0], b[1] ) = 1, a[1] = 3
if(left < right)
{
//(2) mid
int mid = (left + right) / 2;
// 思想: 分治
//(3) 分解
merge_sort(a, left, mid);
merge_sort(a, mid + 1, right);
//(4) 合并
merge(a, left, mid, right);
}
}
// sort.h
#ifndef _SORT_H
#define _SORT_H
void
direct_insert(int arr[], int n);
void
half_sort(int arr[], int n);
void
bubble_sort(int arr[], int n);
void
quick_sort(int a[], int left, int right);
void
select_sort(int a[], int n);
//--- merge_sort
void
malloc_aux_array(int left, int right);
void
free_aux_array();
void
merge_sort(int a[], int left, int right);
#endif
// sort_test.cpp
#include <cstdio>
#include "sort.h"
int main()
{
int a1[5] = {3, 1, 5, 2, 6};
int a2[5] = {3, 1, 5, 2, 6};
int a3[5] = {3, 1, 5, 2, 6};
int a4[5] = {3, 1, 5, 2, 6};
int a5[5] = {3, 1, 5, 2, 6};
int a6[5] = {3, 1, 5, 2, 6};
direct_insert(a1, 5);
half_sort(a2, 4);
bubble_sort(a3, 4);
quick_sort(a4, 0, 4);
select_sort(a5, 5);
//---
malloc_aux_array(0, 4);
merge_sort(a6, 0, 4);
free_aux_array();
}