数据类型 | 描述 | 语法示例 |
---|---|---|
STRUCT(结构体)对象 | 和c语言中的struct类似,都可以通过“点”符号访问元素内容。例如,如果某个列的数据类型是STRUCT{first STRING, last STRING},那么第1个元素可以通过字段.first来引用。 | struct() |
MAP 映射 | MAP是一组键-值对元组集合,使用数组表示法可以访问数据。例如,如果某个列的数据类型是MAP,其中键->值对是’first’->’John’和’last’->’Doe’,那么可以通过字段名[‘last’]获取最后一个元素 | map() |
ARRAY 数组 | 数组是一组具有相同类型和名称的变量的集合。这些变量称为数组的元素,每个数组元素都有一个编号,编号从零开始。例如,数组值为[‘John’, ‘Doe’],那么第2个元素可以通过数组名[1]进行引用 | Array() |
Hive有三种复杂数据类型ARRAY、MAP 和 STRUCT。ARRAY和MAP与Java中的Array和Map类似,而STRUCT与C语言中的Struct类似,它封装了一个命名字段集合,复杂数据类型允许任意层次的嵌套。
**数据结构
{
"name": "songsong",
"friends": ["bingbing" , "lili"] , //列表Array,
"children": { //键值Map,
"xiao song": 18 ,
"xiaoxiao song": 14
}
"address": { //结构Struct,
"street": "hui long guan" ,
"city": "beijing"
}
}
本地数据
songsong,bingbing_lili,xiao song:18_xiaoxiao song:19,hui long guan_beijing
yangyang,caicai_susu,xiao yang:18_xiaoxiao yang:19,chao yang_beijing
操作
create table test(
name string,
friends array<string>,
children map<string, int>,
address struct<street:string, city:string>
)
row format delimited fields terminated by ','
collection items terminated by '_'
map keys terminated by ':'
lines terminated by '\n';
-- 语法解释
row format delimited fields terminated by ',' -- 列分隔符
collection items terminated by '_' -- MAP STRUCT 和 ARRAY 的分隔符(数据分割符号)
map keys terminated by ':' -- MAP中的key与value的分隔符
+-------------+----------------------+--------------------------------------+----------------------------------------------+
| tb_dt.name | tb_dt.fs | tb_dt.chs | tb_dt.addr |
+-------------+----------------------+--------------------------------------+----------------------------------------------+
| songsong | ["bingbing","lili"] | {"xiao song":18,"xiaoxiao song":19} | {"street":"hui long guan","city":"beijing"} |
| yangyang | ["caicai","susu"] | {"xiao yang":18,"xiaoxiao yang":19} | {"street":"chao yang","city":"beijing"} |
+-------------+----------------------+--------------------------------------+----------------------------------------------+
-- 操作数组
- arr[index] 取值
- size(arr) 长度
- 数组角标越界返回NULL
- 数组的定义
- array(e,e2,e3...)
- split(str , regex)
- collect_set collect_list
- explode(arr)
select
name ,
fs[0] ,
fs[1]
from
tb_dt ;
+-----------+-----------+-------+
| name | _c1 | _c2 |
+-----------+-----------+-------+
| songsong | bingbing | lili |
| yangyang | caicai | susu |
+-----------+-----------+-------+
select
name ,
fs[0] ,
fs[1] ,
fs[3] --NULL
from
tb_dt ;
+-----------+-----------+-------+-------+
| name | _c1 | _c2 | _c3 |
+-----------+-----------+-------+-------+
| songsong | bingbing | lili | NULL |
| yangyang | caicai | susu | NULL |
+-----------+-----------+-------+-------+
select
name ,
fs[0] ,
fs[1] ,
fs[size(fs)-1] --NULL
from
tb_dt ;
--操作map
map
map_keys
map_value
select name , map_keys(chs)keys , map_values(chs) vs from tb_dt ;
+-----------+--------------------------------+----------+
| name | keys | vs |
+-----------+--------------------------------+----------+
| songsong | ["xiao song","xiaoxiao song"] | [18,19] |
| yangyang | ["xiao yang","xiaoxiao yang"] | [18,19] |
+-----------+--------------------------------+----------+
---根据key获取value值 , 如果不存在 返回NULL
select
name ,
chs['xiao song']
from
tb_dt ;
+-----------+-------+
| name | _c1 |
+-----------+-------+
| songsong | 18 |
| yangyang | NULL |
+-----------+-------+
-- map长度
select size(chs) from tb_dt ;
-- 将字符串转换成 map集合
select str_to_map('zss:23,lss:22',',' ,':') ;
+--------------------------+
| _c0 |
+--------------------------+
| {"zss":"23","lss":"22"} |
-----------------------------
select
explode(chs) as (k,v) -- 别名
from
tb_dt ;
+----------------+--------+
| k | v |
+----------------+--------+
| xiao song | 18 |
| xiaoxiao song | 19 |
| xiao yang | 18 |
| xiaoxiao yang | 19 |
+----------------+--------+
--操作struct
addr.street
addr.city
select name , addr.city , addr.street from tb_dt ;
+-----------+----------+----------------+
| name | city | street |
+-----------+----------+----------------+
| songsong | beijing | hui long guan |
| yangyang | beijing | chao yang |
+-----------+----------+----------------+