0
点赞
收藏
分享

微信扫一扫

sort: data structure and algoritgms

小沙坨 2021-09-30 阅读 33
framework:

内部排序:
    insert
        直接
        折半
        希尔: 不 易 记, 不 care

    交换
        bubble
        quick

    选择
        simple select
        heap sort

    归并

本文 默认按 从小到大 排序, insert 到 insert_pos 前

1 insert sort

思想:

每次 将 待排 record 按 其 key 大小 insertprevious sorted subsequence -> loop

引出 3 insert methods / algorithms:
direct / half / shell insert

1.1 direct insert

1. 思想

(1) `某时刻, 前 i = 1,2,... 个 已 insert / sorted`

sorted arr[0 .. i - 1]  / arr[i]  / unsorted arr[i + 1 .. ] 

(2) `insert arr[i] to sorted arr[0 .. i - 1]:`

1) record arr[i]

2) `find insert_pos 
    + arr[i - 1, i - 2, ..., insert_pos] 逐个后移 同时进行:`

从 pos = i - 1 开始倒着往前找,
if current pos elem not satisfied insert
    current elem move backward

3) recorded arr[i] copy 到 arr[insert_pos]
void
direct_insert(int arr[], int n) // n: len_to_be_sorted
{
    //(1) n - 1 times: start from 2th elem
    for(int i = 1; i < n; i++)
    {
        //(2) only arr[i] smaller than its forward adjacent elem, insert needed; else do nothing
        if(arr[i] < arr[i - 1] )
        {
            //(3) record arr[i]
            int tmp = arr[i];
            
            //(4) cmp *walk with recorded arr[i], 
            //    > 则 *walk 后移 1 位
            // => walk + 1: moved/inserted position 
            
            // loop exit:
            // case1: walk = -1
            // case2: walk >=0, arr[walk] <= tmp
            int walk;
            for(walk = i - 1; walk >=0 && arr[walk] > tmp; --walk )
                arr[walk + 1] = arr[walk]; 
            
            //(5) insert recorded arr[i]
            arr[walk + 1] = tmp;
        }
    }
}

1.2 half insert

是 对 direct insert 的 改进

direct insert: `边 比较 边 移动 elem`
1. 思想

`分离 比较 和 移动:`

(1) 先 折半 find insert_pos, 
(2) 再 统一 移动 insert_pos 及其 之后 elem
void
half_sort(int arr[], int n)
{
    //(1) n - 1 times: start from 2th elem
    for(int i = 1; i < n; i++)
    {
        //(2) record arr[i]
        int tmp = arr[i];
        
        //(3) 折半 find insert_pos: 
        //    cmp *walk with tmp, 
        //        > 则 left_walk = mid - 1, else 则 right_walk = mid + 1, 
        //    every iteration half the search range
        //    insert_pos = right + 1
        
        int left = 0, right = i - 1, walk;

        // loop exit: left == right + 1
        // eg. 
        // 1) 某次 iteration 前: left = 5, right = 6: 
        // 即 arr[left] <= tmp <= arr[right] 
        // => mid = 5, a[5] <= tmp 
        // => left = mid + 1 = 6
        // 2) next iteration:left ==right == 6: 
        //  => mid = 6, arr[mid] >= tmp
        //  => right = mid - 1 = 5 => while exit
        while(left <= right)
        {
            int mid = left + (right - left) / 2;
             
            if(arr[mid] > tmp)
                right = mid - 1;
            else // arr[mid] <= tmp
                left = mid + 1;
        }

        //(4) 统一 move
        for(walk = i - 1; walk > right; walk--)
            arr[walk + 1] = arr[walk];
        
        //(5) recorded arr[i] put into insert_pos
        arr[right + 1] = tmp; 
    }
}

2 swap sort

交换: 据 两 element key 的 compare result 
      来 交换 这 两个 record 的 position

2.1 bubble sort

1. 思想

假设 待 sort sequence len = n

1) `一趟排序:`
从后往前 / 从前往后 `两两比较 相邻元素`,
若 `逆序, 则 交换`; untill 比较完 最后 2 个, 
result 是 min_elem 冒到 first/last position,

2) `下一趟, 前面已冒出 elem 不参与`,
待 sort 序列 少 1 个

3) `最多 n - 1 趟` 就能 sort 好 所有 elem
void 
swap(int *v1, int *v2)
{
    int tmp = *v1;
    *v1 = *v2;
    *v2 = tmp;
}

void 
bubble_sort(int arr[], int n)
{
    //(1) n - 1 times
    for(int times = 0; times < n - 1; times++)
    {
        
        //(2) 从后往前 两两比较
        int walk;
        for(walk = n - 1; walk > times; walk--)
            if(arr[walk - 1] > arr[walk] )
                swap(arr + walk - 1, arr + walk);
    }
}
2. 改进: 若 某趟 中无交换 => sort 提前完成
    use a swap_flag
void 
bubble_sort(int arr[], int n)
{
    //(1) n - 1 times
    for(int times = 0; times < n - 1; times++)
    {
        //(2) swap_flag
        int swap_flag = false;
        
        //(3) 从后往前 两两比较
        int walk;
        for(walk = n - 1; walk > times; walk--)
        {
            if(arr[walk - 1] > arr[walk] )
            {
                swap(arr + walk - 1, arr + walk);
                swap_flag = true;
            }
        }
        
        //(4) (times + 1)th 趟, 无 swap => 已 sorted
        if(swap_flag == false)
            return;
    }
}

2.2 quick sort

是对 bubble sort 的 一种改进

1. 思想: `分治`

(1) 划分
1) 待 sort sequence a[1..n]  中 
    任取 一 elem 作 pivot (枢轴 / 中心)

2) 用 `1 趟 快排` 将 a[1..n] 划分为 two parts: 
    a[1...pivot_pos - 1] 和 a[pivot_pos + 1.. n],
     分别 < 和 >= pivot

即 找 pivot_pos

3) `pivot_pos 即 当前序列 pivot 的 最终排序 pos`

(2) 分别对 left / right 2 个 sub_sequence 
    递归 上述过程
// 1 趟 快排: 将 a[left..right] 划分为 two parts: 
// a[left...pivot_pos - 1] 和 a[pivot_pos + 1.. n],
// 分别 < 和 >= pivot
int
partition(int a[], int left, int right)
{
    //(1) select pivot 
    int pivot = a[left];
    
    //(2) cmp a[walk_index] with pivot
    // 1) first,  from right to left, 
    //            when stop, swap right to left
    // 2) second, form left to right 
    //            when stop, swap left to right
    // loop exit:
    // left == right
    while(left < right)
    {
        while(left < right && a[right] >= pivot )
            right--;
        a[left] = a[right];
                         
        while(left < right && a[left] < pivot )
            left++;
        a[right] = a[left];
    }   

    //(3) pivot 放 最终 sorted pos
    a[left] = pivot;
    
    //(4) return pivot_pos
    return left;
}

void
quick_sort(int a[], int left, int right)
{
    //(1) 递归 exit / 最小规模递归: 
    //    left == right, 即 1 elem -> do nothing
    //(2) 次小规模递归: 
    //    划分后 有 1 个 子序列 只有 1 个 elem
    // 1) partition 
    // 2) 只 有 1 个 elem 的 递归 do nothing
    if(left < right)
    {
        //(3) 划分: pivot_pos 是 当前序列 pivot 的 最终排序 pos
        int pivot_pos = partition(a, left, right);
        
        //(4) 分别对 left / right 2 个 sub_sequence 
        //    递归 上述 划分(partition) 过程
        quick_sort(a, left, pivot_pos - 1);
        quick_sort(a, pivot_pos + 1, right);
    }
}

2. 空间效率

`递归 => 用 递归工作栈: 栈 容量 与 递归最大深度 一致:`

最好情况 ceil(log_2(n + 1) )
最坏情况: n - 1 次 递归 => 栈 深度 O(n)
平均: 栈深度 O(log_2 n)

`最坏/平均 O(n) / O(log_2 n)`


3. 时间效率

与 划分 是否对称(划分算法) 有关
最坏: two parts 分别 n - 1 / 0 个 elem
    -> 这种 最大程度的不对称 发生在 每层递归上
    -> O(n^2)

改进: 
(1) sub_sequence 较小时, no longer 递归调 快排, 
    而是用 direct insert sort

(2) 尽量选 1 个 可将 sequence 中分 的 pivot

eg: 取 head/tail/mid elem -> 再取 3 者 的 middle 

4. 稳定性: 不稳定

划分 中, 若 right range 有 2 key 相等, 且 < pivot
=> 交换到 left range 后, 2 key 相对位置 会变

seq = {3, 2, 2} -> 1趟 快排后 -> {2, 2, 3}

3 select sort

1. 思想

第 i = 1, 2, ..., n-1 趟, 
    在后面 n - i + 1 个 待 sort elems 中 
    select key 最小者, 作 已 sorted sub_sequence ith elem

until 第 n - 1 趟 end, 待 sort elem 只剩 1 个, 不必再 sort

3.1 simple select sort

1. 思想

第 i = 1, 2, ..., n - 1 趟
    `select key 最小者 与 ith elem a[i - 1] swap`

(times+1)th 趟:  times = 0..n-1
    walk & cmp *walk with *min_elem_pos
        `min_elem_pos pointer` 初始化为 times,
        `walk index pointer` 从 times + 1 开始,
    if < 
        则 update min_elem_pos
void 
select_sort(int a[], int n)
{
    //(1) n - 1 趟
    for(int times = 0; times < n; times++)
    {
        //(2) (times+1)th 趟: walk & cmp *walk with *min_elem_pos
        int min_elem_pos = times;
        int walk;
        for(walk = times + 1; walk < n; walk++)
            if(a[walk] < a[min_elem_pos] )
                min_elem_pos = walk;

        if(min_elem_pos != times)
            swap(a+times, a+min_elem_pos );
    }
}
2. 空间效率 O(n)

3. 时间效率 O( n*(n - 1) / 2 ) = O(n^2)

4. 稳定性: 不稳定

seq = {2, `2`, 1} -> 1 趟 sort 后 -> {1, `2`, 2}

3.2 heap sort

https://www.jianshu.com/p/ab2d9c54eb49

4 归并 sort

1. 思想

归并:
把 >= 2 个 有序表 合并为 1个 新有序表

2 路归并: n 个 records 待 sort 表
(1) 视为 n 个 len = 1 的 有序子表,
    两两归并, 得 ceil(n / 2) 个 len = 2 or 1 有序表

(2) 再 两两归并... 重复之

(3) until 归并后 新表 len = n
int *aux = NULL;

void
malloc_aux_array(int left, int right)
{
    int n = right - left + 1;
    if(n > 0)
        aux = (int *)malloc( n*sizeof(n) );
}

void
free_aux_array()
{
    if(aux)
        free(aux);
}

void
merge(int a[], int left, int mid, int right)
{
    //(1) copy to aux_array
    for(int i = left; i <= right; i++)
        aux[i] = a[i];
        
    //(2) cmp two parts in aux[] from left to rignt, 
    //   smaller put into a[] 
    // 1) part1: [left..mid] / part2: [mid+1..right]
    // 2) two pointer to aux[] + one pointer to a[]
    
    // loop exit:
    // case1: part1_index == mid + 1 > mid
    //    <=> part2_index <= right
    //    <=> part2 longer/equal
    // case2: part1_index <= mid, part2_index == right + 1 > right
    //    <=> part1_index <= mid 
    //    <=> part1 longer/equal
    int part1_index, part2_index, index;
    for(part1_index = left, part2_index = mid + 1, index = part1_index;
        part1_index <= mid && part2_index <= right;
        index++)
    {
        if(aux[part1_index] <= aux[part2_index])
            a[index] = aux[part1_index++];
        else
            a[index] = aux[part2_index++];
    }
    
    //(3) part1 or part2 longer ?
    // case2: part1 longer/equal -> part1 left elems copy 
    while(part1_index <= mid)
        a[index++] = aux[part1_index++];
    
    // case1: <=> part2 longer/equal
    while(part2_index <= right)
        a[index++] = aux[part2_index++];
}

void
merge_sort(int a[], int left, int right)
{
    //(1) 递归 
    // 1) 递归 exit / 最小规模递归: 
    //    left == right, 即 one elem => do nothing
    // 2) 次小规模递归: 
    //    2 elem: 如 b[0] = 3 / b[1] = 1 => mid = 0
    // merge_sort(a, 0, 0) & merge_sort(a, 1, 1) do nothing
    // merge(a, 0, 0, 1): a[0] = min( b[0], b[1] ) = 1, a[1] = 3
    if(left < right)
    {
        //(2) mid
        int mid = (left + right) / 2;
        
        // 思想: 分治
        //(3) 分解
        merge_sort(a, left, mid);
        merge_sort(a, mid + 1, right);
        
        //(4) 合并
        merge(a, left, mid, right);
    }
}

5 test

// sort.cpp
#include <cstdio>
#include <cstdlib> // malloc / free

#include "sort.h"

//----------1. direct_insert
void
direct_insert(int arr[], int n) // n: len_to_be_sorted
{
    //(1) n - 1 times: start from 2th elem
    for(int i = 1; i < n; i++)
    {
        //(2) only arr[i] smaller than its forward adjacent elem, insert needed; else do nothing
        if(arr[i] < arr[i - 1] )
        {
            //(3) record arr[i]
            int tmp = arr[i];
            
            //(4) cmp *walk with recorded arr[i], 
            //    > 则 *walk 后移 1 位
            // => walk + 1: moved/inserted position 
            
            // loop exit:
            // case1: walk = -1
            // case2: walk >=0, arr[walk] <= tmp
            int walk;
            for(walk = i - 1; walk >=0 && arr[walk] > tmp; --walk )
                arr[walk + 1] = arr[walk]; 
            
            //(5) insert recorded arr[i]
            arr[walk + 1] = tmp;
        }
    }
}

//----------2. half_sort
void
half_sort(int arr[], int n)
{
    //(1) n - 1 times: start from 2th elem
    for(int i = 1; i < n; i++)
    {
        //(2) record arr[i]
        int tmp = arr[i];
        
        //(3) 折半 find insert_pos: 
        //    cmp *walk with tmp, 
        //        > 则 left_walk = mid - 1, else 则 right_walk = mid + 1, 
        //    every iteration half the search range
        //    insert_pos = right + 1
        
        int left = 0, right = i - 1, walk;

        // loop exit: left == right + 1
        // eg. 
        // 1) 某次 iteration 前: left = 5, right = 6: 
        // 即 arr[left] <= tmp <= arr[right] 
        // => mid = 5, a[5] <= tmp 
        // => left = mid + 1 = 6
        // 2) next iteration:left ==right == 6: 
        //  => mid = 6, arr[mid] >= tmp
        //  => right = mid - 1 = 5 => while exit
        while(left <= right)
        {
            int mid = left + (right - left) / 2;
             
            if(arr[mid] > tmp)
                right = mid - 1;
            else // arr[mid] <= tmp
                left = mid + 1;
        }

        //(4) 统一 move
        for(walk = i - 1; walk > right; walk--)
            arr[walk + 1] = arr[walk];
        
        //(5) recorded arr[i] put into insert_pos
        arr[right + 1] = tmp; 
    }
}

//----------3. bubble_sort
void 
swap(int *v1, int *v2)
{
    int tmp = *v1;
    *v1 = *v2;
    *v2 = tmp;
}

void 
bubble_sort(int arr[], int n)
{
    //(1) n - 1 times
    for(int times = 0; times < n - 1; times++)
    {
        //(2) swap_flag
        int swap_flag = false;
        
        //(3) 从后往前 两两比较
        int walk;
        for(walk = n - 1; walk > times; walk--)
        {
            if(arr[walk - 1] > arr[walk] )
            {
                swap(arr + walk - 1, arr + walk);
                swap_flag = true;
            }
        }
        
        //(4) (times + 1)th 趟, 无 swap => 已 sorted
        if(swap_flag == false)
            return;
    }
}

//----------4. quick_sort
// 1 趟 快排: 将 a[left..right] 划分为 two parts: 
// a[left...pivot_pos - 1] 和 a[pivot_pos + 1.. n],
// 分别 < 和 >= pivot
int
partition(int a[], int left, int right)
{
    //(1) select pivot 
    int pivot = a[left];
    
    //(2) cmp a[walk_index] with pivot
    // 1) first,  from right to left, 
    //            when stop, swap right to left
    // 2) second, form left to right 
    //            when stop, swap left to right
    // loop exit:
    // left == right
    while(left < right)
    {
        while(left < right && a[right] >= pivot )
            right--;
        a[left] = a[right];
                         
        while(left < right && a[left] < pivot )
            left++;
        a[right] = a[left];
    }   

    //(3) pivot 放 最终 sorted pos
    a[left] = pivot;
    
    //(4) return pivot_pos
    return left;
}

void
quick_sort(int a[], int left, int right)
{
    //(1) 递归 exit / 最小规模递归: 
    //    left == right, 即 1 elem -> do nothing
    //(2) 次小规模递归: 
    //    划分后 有 1 个 子序列 只有 1 个 elem
    // 1) partition 
    // 2) 只 有 1 个 elem 的 递归 do nothing
    if(left < right)
    {
        //(3) 划分: pivot_pos 是 当前序列 pivot 的 最终排序 pos
        int pivot_pos = partition(a, left, right);
        
        //(4) 分别对 left / right 2 个 sub_sequence 
        //    递归 上述 划分(partition) 过程
        quick_sort(a, left, pivot_pos - 1);
        quick_sort(a, pivot_pos + 1, right);
    }
}

//----------5. select_sort

void 
select_sort(int a[], int n)
{
    //(1) n - 1 趟
    for(int times = 0; times < n; times++)
    {
        //(2) (times+1)th 趟: walk & cmp *walk with *min_elem_pos
        int min_elem_pos = times;
        int walk;
        for(walk = times + 1; walk < n; walk++)
            if(a[walk] < a[min_elem_pos] )
                min_elem_pos = walk;

        if(min_elem_pos != times)
            swap(a+times, a+min_elem_pos );
    }
}

//----------6. merge_sort
// global aux array
int *aux = NULL;

void
malloc_aux_array(int left, int right)
{
    int n = right - left + 1;
    if(n > 0)
        aux = (int *)malloc( n*sizeof(n) );
}

void
free_aux_array()
{
    if(aux)
        free(aux);
}

void
merge(int a[], int left, int mid, int right)
{
    //(1) copy to aux_array
    for(int i = left; i <= right; i++)
        aux[i] = a[i];
        
    //(2) cmp two parts in aux[] from left to rignt, 
    //   smaller put into a[] 
    // 1) part1: [left..mid] / part2: [mid+1..right]
    // 2) two pointer to aux[] + one pointer to a[]
    
    // loop exit:
    // case1: part1_index == mid + 1 > mid
    //    <=> part2_index <= right
    //    <=> part2 longer/equal
    // case2: part1_index <= mid, part2_index == right + 1 > right
    //    <=> part1_index <= mid 
    //    <=> part1 longer/equal
    int part1_index, part2_index, index;
    for(part1_index = left, part2_index = mid + 1, index = part1_index;
        part1_index <= mid && part2_index <= right;
        index++)
    {
        if(aux[part1_index] <= aux[part2_index])
            a[index] = aux[part1_index++];
        else
            a[index] = aux[part2_index++];
    }
    
    //(3) part1 or part2 longer ?
    // case2: part1 longer/equal -> part1 left elems copy 
    while(part1_index <= mid)
        a[index++] = aux[part1_index++];
    
    // case1: <=> part2 longer/equal
    while(part2_index <= right)
        a[index++] = aux[part2_index++];
}

void
merge_sort(int a[], int left, int right)
{
    //(1) 递归 
    // 1) 递归 exit / 最小规模递归: 
    //    left == right, 即 one elem => do nothing
    // 2) 次小规模递归: 
    //    2 elem: 如 b[0] = 3 / b[1] = 1 => mid = 0
    // merge_sort(a, 0, 0) & merge_sort(a, 1, 1) do nothing
    // merge(a, 0, 0, 1): a[0] = min( b[0], b[1] ) = 1, a[1] = 3
    if(left < right)
    {
        //(2) mid
        int mid = (left + right) / 2;
        
        // 思想: 分治
        //(3) 分解
        merge_sort(a, left, mid);
        merge_sort(a, mid + 1, right);
        
        //(4) 合并
        merge(a, left, mid, right);
    }
}
// sort.h
#ifndef _SORT_H
#define _SORT_H
void
direct_insert(int arr[], int n);

void
half_sort(int arr[], int n);

void 
bubble_sort(int arr[], int n);

void
quick_sort(int a[], int left, int right);

void 
select_sort(int a[], int n);

//--- merge_sort
void
malloc_aux_array(int left, int right);

void
free_aux_array();

void
merge_sort(int a[], int left, int right);

#endif
// sort_test.cpp
#include <cstdio>
#include "sort.h"

int main()
{
    int a1[5] = {3, 1, 5, 2, 6};
    int a2[5] = {3, 1, 5, 2, 6};
    int a3[5] = {3, 1, 5, 2, 6};
    int a4[5] = {3, 1, 5, 2, 6};
    int a5[5] = {3, 1, 5, 2, 6};
    int a6[5] = {3, 1, 5, 2, 6};
    
    direct_insert(a1, 5);
    
    half_sort(a2, 4);
    
    bubble_sort(a3, 4);
    
    quick_sort(a4, 0, 4);
    
    select_sort(a5, 5);
    
    //--- 
    malloc_aux_array(0, 4);
    
    merge_sort(a6, 0, 4);

    free_aux_array();
}
举报

相关推荐

0 条评论