文章目录
Hive数据仓库——Hive SQL练习
练习前准备
1、启动hive
hive
2、在hive中创建test1数据库
create database test1;
3、切换test1数据库
use test1;
4、创建students表
create table students(
id bigint comment '学生id',
name string comment '学生姓名',
age int comment '学生年龄',
gender string comment '学生性别',
clazz string comment '学生班级'
) comment '学生信息表'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
5、创建score表
create table score(
id bigint comment '学生id',
score_id bigint comment '科目id',
score int comment '学生成绩'
) comment '学生成绩表'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
6、查看表信息
desc students;
desc score;
练习
1、模仿建表语句,创建subject表,并使用hdfs dfs -put 命令加载数据
create table subject(
subject_id bigint comment '科目id',
subject_name string comment '科目名称'
) comment '科目表'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
2、查询学生分数(输出:学号,姓名,班级,科目id,科目名称,成绩)
select t1.id
,t1.name
,t1.clazz
,t2.score_id
,t3.subject_name
,t2.score
from students t1
left join score t2
on t1.id = t2.id
left join subject t3
on t2.score_id = t3.subject_id
limit 10;
3、查询学生总分(输出:学号,姓名,班级,总分)
第一种方式
select t1.id
,t1.name
,t1.clazz
,sum(t2.score) as sum_score
from students t1
left join score t2
on t1.id = t2.id
group by t1.id,t1.name,t1.clazz
limit 10;
第二种方式
select t1.id
,t1.name
,t1.clazz
,t2.sum_score
from students t1
left join (
select id
,sum(score) as sum_score
from score
group by id
) t2 on t1.id = t2.id
limit 10;
4、查询全年级总分排名前三(不分文理科)的学生(输出:学号,姓名,班级,总分)
select t1.id
,t1.name
,t1.clazz
,t2.sum_score
from students t1
left join (
select id
,sum(score) as sum_score
from score
group by id
) t2 on t1.id = t2.id
order by t2.sum_score desc
limit 3;
5、查询文科一班学生总分排名前10的学生(输出:学号,姓名,班级,总分)
select t1.id
,t1.name
,t1.clazz
,t2.sum_score
from students t1
left join (
select id
,sum(score) as sum_score
from score
group by id
) t2 on t1.id = t2.id
where t1.clazz = '文科一班'
order by t2.sum_score desc
limit 10;
优化
select t1.id
,t1.name
,'文科一班' as clazz
,t2.sum_score
from (
select id
,name
from students
where clazz = '文科一班'
) t1 left join (
select id
,sum(score) as sum_score
from score
group by id
) t2 on t1.id = t2.id
order by t2.sum_score desc
limit 10;
6、查询每个班级学生总分的平均成绩(输出:班级,平均分)
select t1.clazz
,round(avg(t2.sum_score),2) as clazz_avg_sum_socre
from students t1
left join (
select id
,sum(score) as sum_score
from score
group by id
) t2 on t1.id = t2.id
group by t1.clazz;
7、查询每个班级的最高总分(输出:班级,总分)
select t1.clazz
,max(t2.sum_score) as clazz_max_sum_socre
from students t1
left join (
select id
,sum(score) as sum_score
from score
group by id
) t2 on t1.id = t2.id
group by t1.clazz;
8、(思考)查询每个班级总分排名前三的学生(输出:学号,姓名,班级,总分)
- 窗口函数(开窗函数) :row_number()
select id
,name
,clazz
,sum_score
,rk
from(
select id
,name
,clazz
,sum_score
,row_number() over(partition by clazz order by sum_score desc) as rk
from(
select t1.id
,t1.name
,t1.clazz
,sum(t2.score) as sum_score
from students t1
left join score t2
on t1.id = t2.id
group by t1.id,t1.name,t1.clazz
) t1
) tt1 where tt1.rk <=3;