窗口函数笔记:来自宋红康老师视频
MySQL数据库教程天花板,mysql安装到mysql高级,强!硬!_哔哩哔哩_bilibili
#20220403MySQL新特性
CREATE DATABASE dbtest18;
USE dbtest18;
CREATE TABLE sales(
id INT PRIMARY KEY AUTO_INCREMENT,
city VARCHAR(15), county VARCHAR(15), sales_value DECIMAL );
INSERT INTO sales(city,county,sales_value)
VALUES
('北京','海淀',10.00),
('北京','朝阳',20.00),
('上海','黄埔',30.00),
('上海','长宁',10.00);
#需求:现在计算这个网站在每个城市的销售总额、在全国的销售总额、每个区的销售额占所在城市销售
#额中的比率,以及占总销售额中的比率
#方式1:
#第一步,计算总销售金额,并存入临时表 a:
CREATE TEMPORARY TABLE a #创建临时表
SELECT SUM(sales_value) AS sales_value #计算总计金额
FROM sales;
SELECT * FROM a;
#第二步,计算每个城市的销售总额并存入临时表 b:
CREATE TEMPORARY TABLE b -- 创建临时表
SELECT city,SUM(sales_value) AS sales_value -- 计算城市销售合计
FROM sales GROUP BY city;
SELECT * FROM b;
#第三步,计算各区的销售占所在城市的总计金额的比例,和占全部销售总计金额的比例。
SELECT s.city AS 城市,s.county AS 区,s.sales_value AS 区销售额,
b.sales_value AS 市销售额,s.sales_value/b.sales_value AS 市比率,
a.sales_value AS 总销售额,s.sales_value/a.sales_value AS 总比率
FROM sales s
JOIN b ON (s.city=b.city) -- 连接市统计结果临时表
JOIN a -- 连接总计金额临时表
ORDER BY s.city,s.county;
#方式2:窗口函数
SELECT city AS 城市,county AS 区,sales_value AS 区销售额,
SUM(sales_value) OVER(PARTITION BY city) AS 市销售额, -- 计算市销售额
sales_value/SUM(sales_value) OVER(PARTITION BY city) AS 市比率,
SUM(sales_value) OVER() AS 总销售额, -- 计算总销售额
sales_value/SUM(sales_value) OVER() AS 总比率
FROM sales
ORDER BY city,county;
CREATE TABLE goods(
id INT PRIMARY KEY AUTO_INCREMENT,
category_id INT,
category VARCHAR(15),
NAME VARCHAR(30),
price DECIMAL(10,2),
stock INT,
upper_time DATETIME
);
INSERT INTO goods(category_id,category,NAME,price,stock,upper_time)
VALUES
(1, '女装/女士精品', 'T恤', 39.90, 1000, '2020-11-10 00:00:00'),
(1, '女装/女士精品', '连衣裙', 79.90, 2500, '2020-11-10 00:00:00'),
(1, '女装/女士精品', '卫衣', 89.90, 1500, '2020-11-10 00:00:00'),
(1, '女装/女士精品', '牛仔裤', 89.90, 3500, '2020-11-10 00:00:00'),
(1, '女装/女士精品', '百褶裙', 29.90, 500, '2020-11-10 00:00:00'),
(1, '女装/女士精品', '呢绒外套', 399.90, 1200, '2020-11-10 00:00:00'),
(2, '户外运动', '自行车', 399.90, 1000, '2020-11-10 00:00:00'),
(2, '户外运动', '山地自行车', 1399.90, 2500, '2020-11-10 00:00:00'),
(2, '户外运动', '登山杖', 59.90, 1500, '2020-11-10 00:00:00'),
(2, '户外运动', '骑行装备', 399.90, 3500, '2020-11-10 00:00:00'),
(2, '户外运动', '运动外套', 799.90, 500, '2020-11-10 00:00:00'),
(2, '户外运动', '滑板', 499.90, 1200, '2020-11-10 00:00:00');
SELECT * FROM goods;
#序号函数
#1.ROW_NUMBER()函数:同价格88.9的两个商品,row_num分别为2和3.(1234)
#PARTITION BY分组,只是把相同类别的放在一起,没有合并成一行数据;group by分组,把相同类别的合并成一行数据。
#举例:查询 goods 数据表中每个商品分类下价格降序排列的各个商品信息。
SELECT ROW_NUMBER() OVER (PARTITION BY category_id ORDER BY price DESC) AS row_num,
id,category_id,category,NAME,price,stock,upper_time
FROM goods;
#举例:查询 goods 数据表中每个商品分类下价格最高的3种商品信息。
SELECT *
FROM (
SELECT ROW_NUMBER() OVER (PARTITION BY category_id ORDER BY price DESC) AS row_num,
id,category_id,category,NAME,price,stock,upper_time
FROM goods
) t
WHERE row_num<=3;
#2.RANK()函数:同价格88.9的两个商品,row_num都是2,价格79.9的商品row_num直接为4.(1224)
#举例:使用RANK()函数获取 goods 数据表中各类别的价格从高到低排序的各商品信息。
SELECT RANK() OVER(PARTITION BY category_id ORDER BY price DESC) AS row_num,
id, category_id, category, NAME, price, stock
FROM goods;
#举例:使用RANK()函数获取 goods 数据表中类别为“女装/女士精品”的价格最高的4款商品信息
SELECT *
FROM (
SELECT RANK() OVER(PARTITION BY category_id ORDER BY price DESC) AS row_num,
id, category_id, category, NAME, price, stock
FROM goods
) t
WHERE row_num <=4
AND category_id=1;
#3.DENSE_RANK()函数:(1223)
#举例:使用DENSE_RANK()函数获取 goods 数据表中各类别的价格从高到低排序的各商品信息。
SELECT DENSE_RANK() OVER (PARTITION BY category_id ORDER BY price DESC) AS row_num,
id,category_id,category,NAME,price,stock
FROM goods;
#2. 分布函数
#1.PERCENT_RANK()函数:要同时查询RANK()函数
#PERCENT_RANK()函数是等级值百分比函数。按照如下方式进行计算。
#(rank - 1) / (rows - 1) 其中,rank的值为使用RANK()函数产生的序号,rows的值为当前窗口的总记录数。
#例:计算 goods 数据表中名称为“女装/女士精品”的类别下的商品的PERCENT_RANK值。
#方式1:
SELECT RANK() OVER(PARTITION BY category_id ORDER BY price DESC) AS r,
PERCENT_RANK() OVER(PARTITION BY category_id ORDER BY price DESC) AS pr,
id,category_id,category,NAME,price,stock,upper_time
FROM goods
WHERE category_id=1;
#方式2:WINDOW W
SELECT RANK() OVER w AS r,
PERCENT_RANK() OVER w AS pr,
id,category_id,category,NAME,price,stock,upper_time
FROM goods
WHERE category_id=1 WINDOW w AS (PARTITION BY category_id ORDER BY price DESC);
#2.CUME_DIST()函数
#CUME_DIST()函数主要用于查询小于或等于某个值的比例
#举例:查询goods数据表中小于或等于当前价格的比例。
SELECT CUME_DIST() OVER(PARTITION BY category_id ORDER BY price ASC) AS cd,
id,category_id,category,NAME,price,stock,upper_time
FROM goods;
#3. 前后函数
#1.LAG(expr,n)函数
#LAG(expr,n)函数返回当前行的前n行的expr的值。
#举例:查询goods数据表中前一个商品价格与当前商品价格的差值
#WINDOW w,from中声明子查询
SELECT id,category,NAME,price, pre_price,price-pre_price AS diff_price
FROM(
SELECT id,category,NAME,price,
LAG(price,1) OVER w AS pre_price
FROM goods
WINDOW w AS (PARTITION BY category_id ORDER BY price)
)t;
#其中,子查询为
LAG(price,1) OVER (PARTITION BY category_id ORDER BY price) AS pre_price
#2.LEAD(expr,n)函数
#LEAD(expr,n)函数返回当前行的后n行的expr的值。
#举例:查询goods数据表中后一个商品价格与当前商品价格的差值。
SELECT id,category,NAME,behind_price,price,behind_price-price AS diff_price
FROM (
SELECT id,category,NAME,LEAD(price,1) OVER w AS behind_price,price
FROM goods
WINDOW w AS (PARTITION BY category_id ORDER BY price)
)t;
#其中,子查询为
LEAD(price,1) OVER (PARTITION BY category_id ORDER BY price) AS behind_price
#4. 首尾函数 1.FIRST_VALUE(expr)函数
#FIRST_VALUE(expr)函数返回第一个expr的值。
#举例:按照价格排序,查询第1个商品的价格信息。
SELECT id,category,NAME,price,stock,FIRST_VALUE (price) OVER w AS first_price
FROM goods
WINDOW w AS (PARTITION BY category_id ORDER BY price);
#2.LAST_VALUE(expr)函数 输出结果有误???
#LAST_VALUE(expr)函数返回最后一个expr的值。
#举例:按照价格排序,查询最后一个商品的价格信息。
SELECT id,category_id,category,NAME,price,stock,LAST_VALUE(price) OVER w AS last_price
FROM goods
WINDOW w AS (PARTITION BY category_id ORDER BY price);
#5. 其他函数 1.NTH_VALUE(expr,n)函数
#NTH_VALUE(expr,n)函数返回第n个expr的值。
#举例:查询goods数据表中排名第2和第3的价格信息。
SELECT id,category,NAME,price,
NTH_VALUE(price,2) OVER w AS second_price,
NTH_VALUE(price,3) OVER w AS third_price
FROM goods
WINDOW w AS (PARTITION BY category_id ORDER BY price);
#2.NTILE(n)函数
#NTILE(n)函数将分区中的有序数据分为n个桶,记录桶编号。
#举例:将goods表中的商品按照价格分为3组。(112233)
SELECT NTILE(3) OVER w AS nt,id,category,NAME,price
FROM goods
WINDOW w AS (PARTITION BY category_id ORDER BY price);
#举例:将goods表中的商品按照价格分为4组。(112234)
SELECT NTILE(4) OVER w AS nt,id,category,NAME,price
FROM goods
WINDOW w AS (PARTITION BY category_id ORDER BY price);
#2.5 小 结
#窗口函数的特点是可以分组,而且可以在分组内排序。另外,窗口函数不会因为分组而减少原表中的行
#数,这对我们在原表数据的基础上进行统计和排序非常有用。
#3.1 普通公用表表达式
WITH CTE名称
AS (子查询)
SELECT|DELETE|UPDATE 语句;
#举例:查询员工所在的部门的详细信息
准备工作
CREATE TABLE departments
AS
SELECT * FROM atguigudb.departments;
CREATE TABLE employees
AS
SELECT * FROM atguigudb.employees;
#方式1:子查询
SELECT *
FROM departments
WHERE department_id IN (
SELECT DISTINCT department_id
FROM employees
);
#方式2:CTE实现:公用表达式,当成一个表
WITH cte_emp
AS ( SELECT DISTINCT department_id FROM employees)
SELECT *
FROM departments d JOIN cte_emp e
ON d.department_id=e.department_id;
#3.2 递归公用表表达式 ???
WITH RECURSIVE CTE名称
AS (子查询)
SELECT|DELETE|UPDATE 语句;
#举例:找出公司employees表中的所有的下下属
WITH RECURSIVE cte
AS(
SELECT employee_id,last_name,manager_id,1 AS n FROM employees WHERE employee_id = 100-- 种子查询,找到第一代领导
UNION ALL
SELECT a.employee_id,a.last_name,a.manager_id,n+1 FROM employees AS a JOIN cte
ON (a.manager_id = cte.employee_id) -- 递归查询,找出以递归公用表表达式的人为领导的人
)
SELECT employee_id,last_name FROM cte WHERE n >= 3;
#新特性练习题
#1. 创建students数据表,如下
CREATE TABLE students(
id INT PRIMARY KEY AUTO_INCREMENT,
student VARCHAR(15),
points TINYINT
);
#2. 向表中添加数据如下
INSERT INTO students(student,points)
VALUES
('张三',89),
('李四',77),
('王五',88),
('赵六',90),
('孙七',90),
('周八',88);
#3. 分别使用RANK()、DENSE_RANK() 和 ROW_NUMBER()函数对学生成绩降序排列情况进行显示
#没有分组,故不用partition by,只写order by即可
SELECT student,points,
RANK() OVER w AS 排序1,
DENSE_RANK() OVER w AS 排序2,
ROW_NUMBER() OVER w AS 排序3
FROM students
WINDOW w AS (ORDER BY points DESC);