PostgreSQL中的分区表

PostgreSQL分区的意思是把逻辑上的一个大表分割成物理上的几块。分区不仅能带来访问速度的提升，关键的是，它能带来管理和维护上的方便。

分区表的具体好处：

某些类型的查询性能可以得到极大提升
更新的性能也可以得到提升，因为表的每块的索引要比在整个数据集上的索引要小。如果索引不能全部放在内存里，那么在索引上的读和写都会产生更多的磁盘访问
批量删除可以用简单的删除某个分区来实现
可以将很少用的数据移动到便宜、转速慢的存储介质上

PG10版本后可以通过声明式分区进行创建分区表，就是通过相应的DDL语句来创建分区表。

注意：

主表和分区表分别单独创建
范围分区支持多个字段组成的KEY
列表分区的KEY只能有一个字段

创建主表的语法

CREATE TABLE 表名( [列名称 数据类型]...)
PARTITION BY RANG ([列名称]...)
--范围分区的KEY值可由多个字段组成（最多32个字段）。

创建分区表语法：

CREATE TABLE 表名 PARTITION of 主表 FOR VALUES 
FROM (表达式) TO (表达式)

范围分区

create table tbp(n int, t text) partition by range(n); create table tbp_1 partition of tbp for values from (MINVALUE) to (10); create table tbp_2 partition of tbp for values from (10) to (100); create table tbp_3 partition of tbp for values from (100) to (1000); create table tbp_4 partition of tbp for values from (1000) to (MAXVALUE);

postgres=# \d
               List of relations
 Schema | Name  |       Type        |  Owner   
--------+-------+-------------------+----------
 public | tbp   | partitioned table | postgres
 public | tbp_1 | table             | postgres
 public | tbp_2 | table             | postgres
 public | tbp_3 | table             | postgres
 public | tbp_4 | table             | postgres
(5 rows)

postgres=# select * from pg_partitioned_table;
 partrelid | partstrat | partnatts | partdefid | partattrs | partclass | partcollation | partexprs 
-----------+-----------+-----------+-----------+-----------+-----------+---------------+-----------
     16384 | r         |         1 |         0 | 1         | 1978      | 0             | 
(1 row)

postgres=# \d+ tbp
                              Partitioned table "public.tbp"
 Column |  Type   | Collation | Nullable | Default | Storage  | Stats target | Description 
--------+---------+-----------+----------+---------+----------+--------------+-------------
 n      | integer |           |          |         | plain    |              | 
 t      | text    |           |          |         | extended |              | 
Partition key: RANGE (n)
Partitions: tbp_1 FOR VALUES FROM (MINVALUE) TO (10),
            tbp_2 FOR VALUES FROM (10) TO (100),
            tbp_3 FOR VALUES FROM (100) TO (1000),
            tbp_4 FOR VALUES FROM (1000) TO (MAXVALUE)
-- 查询分区类型和KEY
postgres=# SELECT pg_get_partkeydef('tbp'::regclass); 
 pg_get_partkeydef 
-------------------
 RANGE (n)
(1 row)

-- 获取分区范围
postgres=# SELECT pg_get_partition_constraintdef('tbp_1'::regclass) ;
 pg_get_partition_constraintdef 
--------------------------------
 ((n IS NOT NULL) AND (n < 10))
(1 row)

多个KEY

postgres=# create table test(n1 int, n2 int) partition by range(n1, n2);
CREATE TABLE
postgres=# create table test_1 partition of test for values from (0, 0) to (10, 100);
CREATE TABLE
postgres=# \d+ test_1
                                  Table "public.test_1"
 Column |  Type   | Collation | Nullable | Default | Storage | Stats target | Description 
--------+---------+-----------+----------+---------+---------+--------------+-------------
 n1     | integer |           |          |         | plain   |              | 
 n2     | integer |           |          |         | plain   |              | 
Partition of: test FOR VALUES FROM (0, 0) TO (10, 100)
Partition constraint: ((n1 IS NOT NULL) AND (n2 IS NOT NULL) AND ((n1 > 0) OR ((n1 = 0) AND (n2 >= 0))) AND ((n1 < 10) OR ((n1 = 10) AND (n2 < 100))))
Access method: heap

列表分区

postgres=# CREATE TABLE sales (product_id int, saleroom int, province text) PARTITION BY LIST(province);
CREATE TABLE
postgres=# CREATE TABLE sales_east PARTITION OF sales FOR VALUES IN ('山东','江苏','上海');
CREATE TABLE
postgres=# CREATE TABLE sales_west PARTITION OF sales FOR VALUES IN ('山西','陕西','四川');
CREATE TABLE
postgres=# CREATE TABLE sales_north PARTITION OF sales FOR VALUES IN ('北京','河北','辽宁');
CREATE TABLE
postgres=# CREATE TABLE sales_south PARTITION OF sales FOR VALUES IN ('广东','福建');
CREATE TABLE
postgres=# \dt
                  List of relations
 Schema |    Name     |       Type        |  Owner   
--------+-------------+-------------------+----------
 public | sales       | partitioned table | postgres
 public | sales_east  | table             | postgres
 public | sales_north | table             | postgres
 public | sales_south | table             | postgres
 public | sales_west  | table             | postgres
(5 rows)
postgres=# insert into sales values(1,1,'山东'),(2,2,'山西'),(3,3,'北京'),(4,4,'广东');
INSERT 0 4
postgres=# select * from sales;
 product_id | saleroom | province 
------------+----------+----------
          1 |        1 | 山东
          3 |        3 | 北京
          2 |        2 | 山西
          4 |        4 | 广东
(4 rows)

postgres=# select * from sales_east;
 product_id | saleroom | province 
------------+----------+----------
          1 |        1 | 山东
(1 row)

postgres=# select * from sales_west;
 product_id | saleroom | province 
------------+----------+----------
          2 |        2 | 山西
(1 row)

绑定分区

分区表支持把普通表绑定成父表的一个分区，也支持把分区绑定为普通表。

若普通表中有数据，ATTACH操作时，默认会做数据校验。

postgres=# CREATE TABLE sales_foreign (like sales);
CREATE TABLE
-- 绑定分区
postgres=# ALTER TABLE sales ATTACH PARTITION sales_foreign FOR VALUES IN('美国','日本');
ALTER TABLE
postgres=# \d+ sales
                               Partitioned table "public.sales"
   Column   |  Type   | Collation | Nullable | Default | Storage  | Stats target | Description 
------------+---------+-----------+----------+---------+----------+--------------+-------------
 product_id | integer |           |          |         | plain    |              | 
 saleroom   | integer |           |          |         | plain    |              | 
 province   | text    |           |          |         | extended |              | 
Partition key: LIST (province)
Partitions: sales_east FOR VALUES IN ('山东', '江苏', '上海'),
            sales_foreign FOR VALUES IN ('美国', '日本'),
            sales_north FOR VALUES IN ('北京', '河北', '辽宁'),
            sales_south FOR VALUES IN ('广东', '福建'),
            sales_west FOR VALUES IN ('山西', '陕西', '四川')

-- 解除绑定
postgres=# ALTER TABLE sales DETACH PARTITION sales_foreign;
ALTER TABLE
postgres=# \d+ sales
                               Partitioned table "public.sales"
   Column   |  Type   | Collation | Nullable | Default | Storage  | Stats target | Description 
------------+---------+-----------+----------+---------+----------+--------------+-------------
 product_id | integer |           |          |         | plain    |              | 
 saleroom   | integer |           |          |         | plain    |              | 
 province   | text    |           |          |         | extended |              | 
Partition key: LIST (province)
Partitions: sales_east FOR VALUES IN ('山东', '江苏', '上海'),
            sales_north FOR VALUES IN ('北京', '河北', '辽宁'),
            sales_south FOR VALUES IN ('广东', '福建'),
            sales_west FOR VALUES IN ('山西', '陕西', '四川')

查询分区表

获取系统信息（系统表）

pg_partitioned_table记录主表信息的系统表

名称	类型	引用	描述
`partrelid`	`oid`	`pg_class.oid`	这个分区表的`pg_class`项的OID
`partstrat`	`char`		分区策略；`h` = 哈希分区表，`l` = 列表分区表，`r` = 范围分区表
`partnatts`	`int2`		分区键中的列数
`partdefid`	`oid`	`pg_class.oid`	这个分区表的默认分区的`pg_class`项的OID，如果这个分区表没有默认分区则为零。
`partattrs`	`int2vector`	`pg_attribute.attnum`	这是一个长度为`partnatts`值的数组，它指示哪些表列是分区键的组成部分。例如，值`1 3`表示第一个和第三个表列组成了分区键。这个数组中的零表示对应的分区键列是一个表达式而不是简单的列引用。
`partclass`	`oidvector`	`pg_opclass.oid`	对于分区键中的每一个列，这个域包含要使用的操作符类的OID。详见`pg_opclass`。
`partcollation`	`oidvector`	`pg_opclass.oid`	对于分区键中的每一个列，这个域包含要用于分区的排序规则的OID，如果该列不是一种可排序数据类型则为零。
`partexprs`	`pg_node_tree`		非简单列引用的分区键列的表达式树（以`nodeToString()`的表达方式）。这是一个列表，`partattrs`中每一个零项都有一个元素。如果所有分区键列都是简单列引用，则这个域为空。

分区的信息记录在pg_class相关的字段中

名称	类型	引用	描述
relispartition	boolean		表是否为分区（TRUE 是）
relpartbound	pg_node_tree		分区约束

hash分区

hash分区语法

--主表
CREATE TABLE TABLE_NAME (COLUMN_NAME DATA_TYPE)
PARTITION BY HASH (column_name)

--分区表
CREATE TABLE table_name
PARTITION OF parent_table
FOR VALUES WITH (modulus numeric_literal, REMAINDER numeric_literal)

哈希分区支持多列分区。示例：

create table test_hash_key(x int, y int) 
  partition by hash(x,y);
create table test_hash_key_1 partition of test_hash_key 
  for values with(modulus 2, remainder 0);
create table test_hash_key_2 partition of test_hash_key 
  for values with(modulus 2, remainder 1);
postgres=# \d+ test_hash_key
                         Partitioned table "public.test_hash_key"
 Column |  Type   | Collation | Nullable | Default | Storage | Stats target | Description 
--------+---------+-----------+----------+---------+---------+--------------+-------------
 x      | integer |           |          |         | plain   |              | 
 y      | integer |           |          |         | plain   |              | 
Partition key: HASH (x, y)
Partitions: test_hash_key_1 FOR VALUES WITH (modulus 2, remainder 0),
            test_hash_key_2 FOR VALUES WITH (modulus 2, remainder 1)

postgres=# 
postgres=# \d+ test_hash_key_1
                              Table "public.test_hash_key_1"
 Column |  Type   | Collation | Nullable | Default | Storage | Stats target | Description 
--------+---------+-----------+----------+---------+---------+--------------+-------------
 x      | integer |           |          |         | plain   |              | 
 y      | integer |           |          |         | plain   |              | 
Partition of: test_hash_key FOR VALUES WITH (modulus 2, remainder 0)
Partition constraint: satisfies_hash_partition('16450'::oid, 2, 0, x, y)
Access method: heap

默认分区

PG11新特性，防止插入失败，对于不符合分区约束的数据会插入到默认分区。目前rang/list支持默认分区，hash分区不支持。

语法：

CREATE TABLE table_name
	PARTITION OF parent_table
FOR VALUES 
	DEFAULT

ATTACH/DETACH分区

语法：

ALTER TABLE name     
	ATTACH PARTITION partition_name { FOR VALUES partition_bound_spec | DEFAULT }  

ALTER TABLE name    
	DETACH PARTITION partition_name

示例

create table test_hash(id int, date date)   partition by hash(id);   
create table test_hash_1 partition of test_hash   for values with(modulus 2, remainder 0);   
create table test_hash_2 partition of test_hash   for values with(modulus 2, remainder 1);
postgres=# \d+ test_hash
                           Partitioned table "public.test_hash"
 Column |  Type   | Collation | Nullable | Default | Storage | Stats target | Description 
--------+---------+-----------+----------+---------+---------+--------------+-------------
 id     | integer |           |          |         | plain   |              | 
 date   | date    |           |          |         | plain   |              | 
Partition key: HASH (id)
Partitions: test_hash_1 FOR VALUES WITH (modulus 2, remainder 0),
            test_hash_2 FOR VALUES WITH (modulus 2, remainder 1)

postgres=# alter table test_hash detach partition test_hash_2;
ALTER TABLE
postgres=# \d+ test_hash
                           Partitioned table "public.test_hash"
 Column |  Type   | Collation | Nullable | Default | Storage | Stats target | Description 
--------+---------+-----------+----------+---------+---------+--------------+-------------
 id     | integer |           |          |         | plain   |              | 
 date   | date    |           |          |         | plain   |              | 
Partition key: HASH (id)
Partitions: test_hash_1 FOR VALUES WITH (modulus 2, remainder 0)

postgres=# create table test_hash_attach (id int, date date);
CREATE TABLE
postgres=# alter table test_hash attach partition test_hash_attach   for values with (modulus 2, remainder 1);
ALTER TABLE
postgres=# \d+ test_hash
                           Partitioned table "public.test_hash"
 Column |  Type   | Collation | Nullable | Default | Storage | Stats target | Description 
--------+---------+-----------+----------+---------+---------+--------------+-------------
 id     | integer |           |          |         | plain   |              | 
 date   | date    |           |          |         | plain   |              | 
Partition key: HASH (id)
Partitions: test_hash_1 FOR VALUES WITH (modulus 2, remainder 0),
            test_hash_attach FOR VALUES WITH (modulus 2, remainder 1)

分区管理

删除分区

直接删除对应的分区表

drop table tbl_partition_202110;

另一个方法就是先将分区表移除，但是保留访问权限

alter table tbl_partition_202110 no inherit tbl_partition;

和直接DROP相比，该方式仅仅是使子表脱离了原有的主表，而存储在子表中的数据仍然可以访问，因为此时该表已经被还原成一个普通的数据表。在这时就可以对该表进行必要的维护操作。如数据清理、归档等。然后可以考虑是直接删除该表，还是先清空该表，然后再重新继承主表。

alter table tbl_partition_202110 inher tbl_partition;

增加分区

1、可以通过新建分区表来增加一个分区

2、可以直接让普通表绑定主表，从而达到新增分区的目的