hive sql多表练习
准备原始数据集
学生表 student.csv
001,彭于晏,1995-05-16,男
002,胡歌,1994-03-20,男
003,周杰伦,1995-04-30,男
004,刘德华,1998-08-28,男
005,唐国强,1993-09-10,男
006,陈道明,1992-11-12,男
007,陈坤,1999-04-09,男
008,吴京,1994-02-06,男
009,郭德纲,1992-12-05,男
010,于谦,1998-08-23,男
011,潘长江,1995-05-27,男
012,杨紫,1996-12-21,女
013,蒋欣,1997-11-08,女
014,赵丽颖,1990-01-09,女
015,刘亦菲,1993-01-14,女
016,周冬雨,1990-06-18,女
017,范冰冰,1992-07-04,女
018,李冰冰,1993-09-24,女
019,邓紫棋,1994-08-31,女
020,宋丹丹,1991-03-01,女
讲师表 teacher.csv
1001,张高数
1002,李体音
1003,王子文
1004,刘丽英
课程表 course.csv
01,语文,1003
02,数学,1001
03,英语,1004
04,体育,1002
05,音乐,1002
分数表 score.csv
001,01,94
002,01,74
004,01,85
005,01,64
006,01,71
007,01,48
008,01,56
009,01,75
010,01,84
011,01,61
012,01,44
013,01,47
014,01,81
015,01,90
016,01,71
017,01,58
018,01,38
019,01,46
020,01,89
001,02,63
002,02,84
004,02,93
005,02,44
006,02,90
007,02,55
008,02,34
009,02,78
010,02,68
011,02,49
012,02,74
013,02,35
014,02,39
015,02,48
016,02,89
017,02,34
018,02,58
019,02,39
020,02,59
001,03,79
002,03,87
004,03,89
005,03,99
006,03,59
007,03,70
008,03,39
009,03,60
010,03,47
011,03,70
012,03,62
013,03,93
014,03,32
015,03,84
016,03,71
017,03,55
018,03,49
019,03,93
020,03,81
001,04,54
002,04,100
004,04,59
005,04,85
007,04,63
009,04,79
010,04,34
013,04,69
014,04,40
016,04,94
017,04,34
020,04,50
005,05,85
007,05,63
009,05,79
015,05,59
018,05,87
创建数据库和数据表
create database chap05;
use chap05;
create external table student (
stu_id string comment '学生ID',
stu_name string comment '学生姓名',
birthday string comment '出生日期',
gender string comment '学生性别'
)
row format delimited fields terminated by ','
lines terminated by '\n'
stored as textfile
location '/quiz03/student';
load data local inpath '/root/data/data02/student.csv' overwrite into table student;
select * from student;
create external table teacher (
tea_id string comment '课程ID',
tea_name string comment '课程名称'
)
row format delimited fields terminated by ','
lines terminated by '\n'
stored as textfile
location '/quiz03/teacher';
load data local inpath '/root/data/data02/teacher.csv' overwrite into table teacher;
select * from teacher;
create external table course (
course_id string comment '课程ID',
course_name string comment '课程名称',
tea_id string comment '讲师ID'
)
row format delimited fields terminated by ','
lines terminated by '\n'
stored as textfile
location '/quiz03/course';
load data local inpath '/root/data/data02/course.csv' overwrite into table course;
select * from course;
create external table score (
stu_id string comment '学生ID',
course_id string comment '课程ID',
score int comment '成绩'
)
row format delimited fields terminated by ','
lines terminated by '\n'
stored as textfile
location '/quiz03/score';
load data local inpath '/root/data/data02/score.csv' overwrite into table score;
SQL练习
select * from student;
select * from student where stu_name like '周%';
select count(*) from student where stu_name like '周%';
select stu.stu_id, stu_name, birthday, gender, course_id, score from student stu inner join score s
on stu.stu_id = 004 and stu.stu_id = s.stu_id and score > 85;
select stu.stu_id, stu_name, birthday, gender, course_id, score from student stu inner join score s
on stu.stu_id = 004 and stu.stu_id = s.stu_id order by score desc;
select stu.stu_id, stu_name, birthday, gender, course_name, score from student stu
inner join score s inner join course c on s.course_id = c.course_id and stu.stu_id = s.stu_id
and c.course_name = '数学' and score < 60 order by score;
select gender,count(*) from student group by gender;
select round(avg(score),2) from score where course_id = 02;
select course_id,round(avg(score),2) from score group by course_id;
select count(distinct stu_id) from score where score is not null and score >= 0;
select count(stu_id) from (select stu_id from score where score is not null and score >= 0 group by stu_id) t;
select course_id,count(stu_id) from score where score is not null and score >= 0 group by course_id;
select * from student where stu_id in (
select stu.stu_id from student stu
left join score s on stu.stu_id = s.stu_id
group by stu.stu_id having count(*) != (select count(*) from course)
);
select stu_id,count(course_id) course_count from score
group by stu_id
having course_count >= 4;
select first_name ,count(*) first_name_count from (
select stu_id,stu_name,substr(stu_name,1,1) as first_name
from student
) ts
group by ts.first_name
having first_name_count > 1;
select course_id, avg(score) avg_score
from score
group by course_id
order by avg_score,course_id desc;
select course_id,count(*) as stu_count from score group by course_id having stu_count > 15;
select stu_id, sum(score) sum_score
from score
group by stu_id
order by sum_score desc;
select
s.stu_id,
sum(`if`(c.course_name='语文',score,0)) as `语文`,
sum(`if`(c.course_name='数学',score,0)) as `数学`,
sum(`if`(c.course_name='英语',score,0)) as `英语`,
count(s.course_id) as `选课数`,
avg(s.score) as `平均成绩`
from course c left join score s
on c.course_id = s.course_id
group by s.stu_id
order by `平均成绩` desc;
select s.stu_id,stu_name from
(select t1.stu_id ,count(t1.course_id) course_count from
(select stu_id,course_id from score
where stu_id in ( select stu_id from score where course_id = "01")
) t1 group by t1.stu_id having course_count >=3
) t2 join student s on t2.stu_id = s.stu_id;
select s.stu_id,stu_name from
(select t1.stu_id ,count(t1.course_id) course_count from
(select stu_id,course_id from score
where stu_id in (
select stu_id from score where course_id = "01"
)
) t1 group by t1.stu_id having course_count >=3
) t2 join student s on t2.stu_id = s.stu_id;
select t1.stu_id,t2.avg_score from
(select stu_id, sum(if(score < 60, 1, 0)) as result from score group by stu_id having result > 1) t1
left join
(select stu_id,avg(score) as avg_score from score group by stu_id) t2 on t1.stu_id =t2.stu_id;
select
stu.stu_id,stu.stu_name,count(s.course_id) count_course ,nvl(sum(s.score),0) total_score
from student stu left join score s on stu.stu_id = s.stu_id
group by stu.stu_id, stu.stu_name order by stu.stu_id;
select
stu.stu_id,stu.stu_name ,nvl(avg(s.score),0) as `avg_score`
from student stu left join score s on stu.stu_id = s.stu_id
group by stu.stu_id, stu.stu_name having nvl(avg(s.score),0) > 85 order by stu.stu_id
select student.stu_id,student.stu_name,c.course_id,c.course_name from student
right join score s on student.stu_id = s.stu_id
left join course c on s.course_id = c.course_id
select stu_id,stu_name from
(
select student.stu_id,student.stu_name, s.course_id from student
left join score s on student.stu_id = s.stu_id
left join course c on s.course_id = c.course_id
) t where course_id is null
select c.course_id,course_name,pass,fail
from course c join
(
select
course_id,sum(if(score >= 60,1,0)) as `pass`, sum(if(score < 60,1,0)) as `fail`
from score group by course_id
) t on c.course_id = t.course_id
select t1.stu_id,s.stu_name,t1.course_id,c.course_name,t1.score from
(select * from score where course_id = '03' and score > 80) t1
left join student s on s.stu_id = t1.stu_id
left join course c on t1.course_id = c.course_id
select t3.stu_id,t3.stu_name,t3.`avg_score`,t.tea_name from
(select t2.stu_id,t2.`avg_score`,s.stu_name,t2.course_id,c.tea_id from
(select t1.stu_id,t1.course_id,t1.`avg_score` from
(select stu_id,s.course_id, avg(score) as `avg_score` from score s right join
(select course_id from course where course_name = '语文') t1 on t1.course_id = s.course_id
group by stu_id,s.course_id) t1
where t1.`avg_score` < (select avg(score) as `avg_score` from score s right join (select course_id from course where course_name = '语文') t1 on t1.course_id = s.course_id)
) t2 left join student s on t2.stu_id = s.stu_id
left join course c on t2.course_id = c.course_id
)t3 left join teacher t on t3.tea_id = t.tea_id;
select c.course_id, course_name, a, b, c, d from course c left join (
select course_id,
concat(round((sum(`if`(score >= 85,1,0)) / count(*)) * 100,2), '%') as a,
concat(round((sum(`if`(score between 75 and 84,1,0)) / count(*)) * 100,2), '%') as b,
concat(round((sum(`if`(score between 60 and 74,1,0)) / count(*)) * 100,2), '%') as c,
concat(round((sum(`if`(score < 60,1,0)) / count(*)) * 100,2), '%') as d
from score group by course_id
) t on t.course_id = c.course_id;
select c.course_id, course_name, `最高分`,`最低分`,`平均分`,`优秀率`, `优良率`, `中等率`, `中下率`,`芸芸众生` from course c left join (
select course_id,
max(score) `最高分`,
min(score) `最低分`,
round(avg(score),2) `平均分`,
concat(round((sum(`if`(score >= 90,1,0)) / count(*)) * 100,2), '%') as `优秀率`,
concat(round((sum(`if`(score between 80 and 89,1,0)) / count(*)) * 100,2), '%') as `优良率`,
concat(round((sum(`if`(score between 70 and 79,1,0)) / count(*)) * 100,2), '%') as `中等率`,
concat(round((sum(`if`(score between 60 and 69,1,0)) / count(*)) * 100,2), '%') as `中下率`,
concat(round((sum(`if`(score < 60,1,0)) / count(*)) * 100,2), '%') as `芸芸众生`
from score group by course_id
) t on t.course_id = c.course_id;
select t1.course_id,t1.gender, concat(round((count_gender / count_course_student) * 100,2), '%') as proportion from
(select course_id,gender,count(*) count_gender from score s1 inner join student s2 on s1.stu_id = s2.stu_id group by course_id,gender) t1
inner join
(select course_id,count(*) count_course_student from score s1 inner join student s2 on s1.stu_id = s2.stu_id group by course_id) t2
on t2.course_id = t1.course_id;
select battle_t.course_id,male_avg_score, female_avg_score, battle, max_stu_id,min_stu_id, max_score, min_score from
(select male_t.course_id,round(male_avg_score,2) male_avg_score,round(female_avg_score,2) female_avg_score,
case
when male_avg_score > female_avg_score then '男性优秀'
when male_avg_score < female_avg_score then '女性优秀'
else '势均力敌'
end battle
from
(select course_id,avg(score) male_avg_score from score s1 inner join student s2 on s1.stu_id = s2.stu_id and gender = '男' group by course_id) male_t
inner join
(select course_id,avg(score) female_avg_score from score s1 inner join student s2 on s1.stu_id = s2.stu_id and gender = '女' group by course_id) female_t
on male_t.course_id = female_t.course_id) battle_t
inner join
(select max_t.course_id,max_t.stu_id max_stu_id,max_score,min_t.stu_id min_stu_id,min_score from
(select stu_id, s.course_id, max_score from score s
inner join
(select course_id, max(score) max_score from score group by course_id) t
on s.course_id = t.course_id and max_score = score) max_t
full join
(select stu_id, s.course_id, min_score from score s
inner join
(select course_id,min(score) min_score from score group by course_id) t
on s.course_id = t.course_id and min_score = score) min_t
on max_t.course_id = min_t.course_id) info
on battle_t.course_id = info.course_id;
select s.stu_id, stu.stu_name, stu.birthday, stu.gender,s.score
from score s join student stu on s.stu_id = stu.stu_id
where s.score < 60 order by s.score desc;
select stu.stu_name, c.course_name, s2.score
from student stu join
(select s.stu_id, sum(`if`(s.score >= 70, 0, 1)) as `is_ok` from score s group by s.stu_id having is_ok = 0) t1
on stu.stu_id = t1.stu_id left join score s2 on stu.stu_id = s2.stu_id left join course c on s2.course_id = c.course_id
order by s2.score;
select s1.stu_id,collect_list(s1.course_id) as course_id,collect_set(s1.score) as score
from score s1 join score s2 on s1.stu_id = s2.stu_id
and s1.course_id != s2.course_id
and s1.score == s2.score
group by s1.stu_id;
分组排序取TopN
select a.course_id,a.stu_id,a.score from score a
left join score b
on a.course_id = b.course_id and a.score <= b.score
group by a.stu_id,a.course_id,a.score
having count(a.stu_id) <=5
order by a.course_id,a.score desc;
select S1.course_id,s1.stu_id,s1.score from score s1 where
(select count(*) from score s2
where s2.course_id=s1.course_id AND s2.score > s1.score
) <= 5 order by s1.course_id,s1.score desc;
row_number
select * from
(select course_id, stu_id, score,
row_number() over (partition by course_id order by score desc ) as mum
from score) t where mum <= 5;
rank
select * from
(select course_id, stu_id, score,
rank() over (partition by course_id order by score desc ) as mum
from score) t where mum <= 5;
dense_rank
select * from
(select course_id, stu_id, score,
dense_rank() over (partition by course_id order by score desc ) as mum
from score) t where mum <= 5;