0
点赞
收藏
分享

微信扫一扫

01、Hive数据仓库——Hive SQL练习

曾宝月 2022-02-18 阅读 83

文章目录

Hive数据仓库——Hive SQL练习

练习前准备

1、启动hive

hive

2、在hive中创建test1数据库

create database test1;

3、切换test1数据库

use test1;

4、创建students表

create table students(
    id bigint comment '学生id',
    name string comment '学生姓名',
    age int comment '学生年龄',
    gender string comment '学生性别',
    clazz string comment '学生班级'
) comment '学生信息表' 
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

5、创建score表

create table score(
    id bigint comment '学生id',
    score_id bigint  comment '科目id',
    score int comment '学生成绩'
) comment '学生成绩表'  
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

6、查看表信息

desc students;
desc score;


练习

1、模仿建表语句,创建subject表,并使用hdfs dfs -put 命令加载数据

create table subject(
    subject_id bigint comment '科目id',
    subject_name string comment '科目名称'
) comment '科目表'  
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

2、查询学生分数(输出:学号,姓名,班级,科目id,科目名称,成绩)

select t1.id
       ,t1.name
       ,t1.clazz
       ,t2.score_id
       ,t3.subject_name
       ,t2.score
from students t1
left join score t2
on t1.id = t2.id
left join subject t3
on t2.score_id = t3.subject_id
limit 10;

3、查询学生总分(输出:学号,姓名,班级,总分)

第一种方式

select t1.id
       ,t1.name
       ,t1.clazz
       ,sum(t2.score) as sum_score
from students t1
left join score t2
on t1.id = t2.id
group by t1.id,t1.name,t1.clazz
limit 10;

第二种方式

select t1.id
       ,t1.name
       ,t1.clazz
       ,t2.sum_score 
from students t1
left join (
    select id
       ,sum(score) as sum_score
    from score
    group by id
) t2 on t1.id = t2.id
limit 10;

4、查询全年级总分排名前三(不分文理科)的学生(输出:学号,姓名,班级,总分)

select t1.id
       ,t1.name
       ,t1.clazz
       ,t2.sum_score 
from students t1
left join (
    select id
       ,sum(score) as sum_score
    from score
    group by id
) t2 on t1.id = t2.id
order by t2.sum_score desc
limit 3;

5、查询文科一班学生总分排名前10的学生(输出:学号,姓名,班级,总分)

select t1.id
       ,t1.name
       ,t1.clazz
       ,t2.sum_score 
from students t1
left join (
    select id
       ,sum(score) as sum_score
    from score
    group by id
) t2 on t1.id = t2.id
where t1.clazz = '文科一班'
order by t2.sum_score desc
limit 10;

优化

select t1.id
       ,t1.name
       ,'文科一班' as clazz
       ,t2.sum_score 
from (
    select id
           ,name
    from students
    where clazz = '文科一班'
) t1 left join (
    select id
           ,sum(score) as sum_score
    from score
    group by id
) t2 on t1.id = t2.id
order by t2.sum_score desc
limit 10;

6、查询每个班级学生总分的平均成绩(输出:班级,平均分)

select t1.clazz
       ,round(avg(t2.sum_score),2) as clazz_avg_sum_socre
from students t1
left join (
    select id
           ,sum(score) as sum_score
    from score
    group by id
) t2 on t1.id = t2.id
group by t1.clazz;

7、查询每个班级的最高总分(输出:班级,总分)

select t1.clazz
       ,max(t2.sum_score) as clazz_max_sum_socre
from students t1
left join (
    select id
           ,sum(score) as sum_score
    from score
    group by id
) t2 on t1.id = t2.id
group by t1.clazz;

8、(思考)查询每个班级总分排名前三的学生(输出:学号,姓名,班级,总分)

  • 窗口函数(开窗函数) :row_number()
select  id
        ,name
        ,clazz
        ,sum_score
        ,rk
from(
        select  id
                ,name
                ,clazz
                ,sum_score
                ,row_number() over(partition by clazz order by sum_score desc) as rk
        from(
                select  t1.id
                        ,t1.name
                        ,t1.clazz
                        ,sum(t2.score) as sum_score
                from students t1
                left join score t2
                on t1.id = t2.id
                group by t1.id,t1.name,t1.clazz
        ) t1
) tt1 where tt1.rk <=3;

举报

相关推荐

0 条评论