import numpy as np
import matplotlib.pylab as plt
import pandas as pd
import os
导入数据
file=os.walk(r'D:\机器学习\kaggle预测\商店销售')
filename_=[]
filename_data=[]
for root,sub,filename in file:
for i in filename:
path=os.path.join(root,i)
filename_.append(i)
filename_data.append(pd.read_csv(path))
[i for i in filename_]
['holidays_events.csv',
'oil.csv',
'sample_submission.csv',
'stores.csv',
'test.csv',
'train.csv',
'transactions.csv']
#合并数据
df_train1=filename_data[5].merge(filename_data[0],on='date',how='left')
df_train1=df_train1.merge(filename_data[1],on='date',how='left')
df_train1=df_train1.merge(filename_data[3],on='store_nbr',how='left')
df_train1=df_train1.merge(filename_data[6],on=['date','store_nbr'],how='left')
df_train1=df_train1.rename(columns={'type_x':'holiday_type','type_y':'store_type'})
df_train1.head()
id | date | store_nbr | family | sales | onpromotion | holiday_type | locale | locale_name | description | transferred | dcoilwtico | city | state | store_type | cluster | transactions | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 2013-01-01 | 1 | AUTOMOTIVE | 0.000 | 0 | Holiday | National | Ecuador | Primer dia del ano | False | NaN | Quito | Pichincha | D | 13 | NaN |
1 | 1 | 2013-01-01 | 1 | BABY CARE | 0.000 | 0 | Holiday | National | Ecuador | Primer dia del ano | False | NaN | Quito | Pichincha | D | 13 | NaN |
2 | 2 | 2013-01-01 | 1 | BEAUTY | 0.000 | 0 | Holiday | National | Ecuador | Primer dia del ano | False | NaN | Quito | Pichincha | D | 13 | NaN |
3 | 3 | 2013-01-01 | 1 | BEVERAGES | 0.000 | 0 | Holiday | National | Ecuador | Primer dia del ano | False | NaN | Quito | Pichincha | D | 13 | NaN |
4 | 4 | 2013-01-01 | 1 | BOOKS | 0.000 | 0 | Holiday | National | Ecuador | Primer dia del ano | False | NaN | Quito | Pichincha | D | 13 | NaN |
#处理日期:
df_train1['date']=pd.to_datetime(df_train1['date'])
df_train1['year']=df_train1['date'].dt.year
df_train1['month']=df_train1['date'].dt.month
df_train1['week']=df_train1['date'].dt.isocalendar().week
df_train1['quarter']=df_train1['date'].dt.quarter
df_train1['day_of_week']=df_train1['date'].dt.day_name()
df_train1.head()
id | date | store_nbr | family | sales | onpromotion | holiday_type | locale | locale_name | description | ... | city | state | store_type | cluster | transactions | year | month | week | quarter | day_of_week | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 2013-01-01 | 1 | AUTOMOTIVE | 0.0 | 0 | Holiday | National | Ecuador | Primer dia del ano | ... | Quito | Pichincha | D | 13 | NaN | 2013 | 1 | 1 | 1 | Tuesday |
1 | 1 | 2013-01-01 | 1 | BABY CARE | 0.0 | 0 | Holiday | National | Ecuador | Primer dia del ano | ... | Quito | Pichincha | D | 13 | NaN | 2013 | 1 | 1 | 1 | Tuesday |
2 | 2 | 2013-01-01 | 1 | BEAUTY | 0.0 | 0 | Holiday | National | Ecuador | Primer dia del ano | ... | Quito | Pichincha | D | 13 | NaN | 2013 | 1 | 1 | 1 | Tuesday |
3 | 3 | 2013-01-01 | 1 | BEVERAGES | 0.0 | 0 | Holiday | National | Ecuador | Primer dia del ano | ... | Quito | Pichincha | D | 13 | NaN | 2013 | 1 | 1 | 1 | Tuesday |
4 | 4 | 2013-01-01 | 1 | BOOKS | 0.0 | 0 | Holiday | National | Ecuador | Primer dia del ano | ... | Quito | Pichincha | D | 13 | NaN | 2013 | 1 | 1 | 1 | Tuesday |
5 rows × 22 columns
store_nbr、family、cluster绘总
#将每个store_type的销量求平均值
df_st_sa=df_train1.groupby('store_type').agg({'sales':'mean'}).reset_index().sort_values(by='sales',ascending=False)
df_st_sa
store_type | sales | |
---|---|---|
0 | A | 708.378165 |
3 | D | 352.084510 |
1 | B | 328.275233 |
4 | E | 270.285490 |
2 | C | 197.790647 |
#对每一个family求均值并排序
df_fa_sa=df_train1.groupby('family').agg({'sales':'mean'}).reset_index().sort_values(by='sales',ascending=False)[:10]
df_fa_sa
family | sales | |
---|---|---|
12 | GROCERY I | 3790.432797 |
3 | BEVERAGES | 2394.912701 |
30 | PRODUCE | 1355.373698 |
7 | CLEANING | 1074.171518 |
8 | DAIRY | 711.175991 |
5 | BREAD/BAKERY | 464.150612 |
28 | POULTRY | 351.078816 |
24 | MEATS | 341.965905 |
25 | PERSONAL CARE | 271.192381 |
9 | DELI | 265.629746 |
#对每一个cluster求均值并排序
df_cl_sa=df_train1.groupby('cluster').agg({'sales':'mean'}).reset_index()
df_cl_sa.head()
cluster | sales | |
---|---|---|
0 | 1 | 327.022808 |
1 | 2 | 261.025731 |
2 | 3 | 194.926534 |
3 | 4 | 297.537877 |
4 | 5 | 1120.118405 |
from matplotlib.gridspec import GridSpec
plt.figure(figsize=(12,8))
gs=GridSpec(2,2)
ax=plt.subplot(gs[1:,:2])
# plt.barh()
ax.bar(range(df_cl_sa.shape[0]),df_cl_sa.iloc[:,1],width=0.5)
ax.set_title('Clusters VS s Sales')
ax=plt.subplot(gs[:1,:1])
ax.barh(df_fa_sa.iloc[:,0],df_fa_sa.iloc[:,1])
ax.set_title('Average Sales Familys')
ax=plt.subplot(gs[:1,1])
ax.pie(df_st_sa.iloc[:,1],wedgeprops={'width':0.3},labels=df_st_sa.iloc[:,0])
ax.set_title('Highest Sales Stores')
plt.show()
月销售量绘总
#将每年的数据按月求均值得到新的数组
df_2013=df_train1[df_train1['year']==2013][['month','sales']]
df_2013=df_2013.groupby('month').agg({'sales':'mean'}).reset_index().rename(columns={'sales':'s13'})
df_2014=df_train1[df_train1['year']==2014][['month','sales']]
df_2014=df_2014.groupby('month').agg({'sales':'mean'}).reset_index().rename(columns={'sales':'s14'})
df_2015=df_train1[df_train1['year']==2015][['month','sales']]
df_2015=df_2015.groupby('month').agg({'sales':'mean'}).reset_index().rename(columns={'sales':'s15'})
df_2016=df_train1[df_train1['year']==2016][['month','sales']]
df_2016=df_2016.groupby('month').agg({'sales':'mean'}).reset_index().rename(columns={'sales':'s16'})
df_2017=df_train1[df_train1['year']==2017][['month','sales']]
df_2017=df_2017.groupby('month').agg({'sales':'mean'}).reset_index()
#补充2017年后面几个月的数据
df_2017_no=pd.DataFrame({'month':[9,10,11,12],'sales':[0,0,0,0]})
df_2017=df_2017.append(df_2017_no).rename(columns={'sales':'s17'})
# print(df_2017)
#将不同年份的数据合并起来
df_year=df_2013.merge(df_2014,on='month').merge(df_2015,on='month').merge(df_2016,on='month').merge(df_2017,on='month')
# tob_labels=['2013','2014','2015','2016','2017']
df_year
month | s13 | s14 | s15 | s16 | s17 | |
---|---|---|---|---|---|---|
0 | 1 | 186.952405 | 342.341709 | 269.666595 | 434.050268 | 476.596791 |
1 | 2 | 193.581846 | 241.268892 | 275.420792 | 424.695398 | 465.971468 |
2 | 3 | 206.880581 | 368.661236 | 282.368624 | 418.735398 | 483.400632 |
3 | 4 | 205.639071 | 240.577087 | 279.743138 | 488.108774 | 482.172948 |
4 | 5 | 210.184563 | 242.203129 | 320.958116 | 457.671398 | 487.162797 |
5 | 6 | 215.691343 | 244.634652 | 397.249619 | 419.644575 | 488.707278 |
6 | 7 | 203.983455 | 350.830102 | 403.030170 | 432.562218 | 489.909880 |
7 | 8 | 212.479434 | 251.351805 | 415.692304 | 406.437390 | 465.144891 |
8 | 9 | 220.593588 | 374.530792 | 434.734053 | 419.331240 | 0.000000 |
9 | 10 | 213.164266 | 369.213666 | 432.248428 | 435.002169 | 0.000000 |
10 | 11 | 231.136537 | 384.056027 | 426.579749 | 462.916675 | 0.000000 |
11 | 12 | 298.675144 | 459.818606 | 513.845328 | 557.114822 | 0.000000 |
#将除月份这一列的值赋给新的datafram
df_year=df_year[['s13','s14','s15','s16','s17']].replace(np.nan,0)
df_year
s13 | s14 | s15 | s16 | s17 | |
---|---|---|---|---|---|
Jan | 186.952405 | 342.341709 | 269.666595 | 434.050268 | 476.596791 |
Feb | 193.581846 | 241.268892 | 275.420792 | 424.695398 | 465.971468 |
Mar | 206.880581 | 368.661236 | 282.368624 | 418.735398 | 483.400632 |
Apr | 205.639071 | 240.577087 | 279.743138 | 488.108774 | 482.172948 |
May | 210.184563 | 242.203129 | 320.958116 | 457.671398 | 487.162797 |
Jun | 215.691343 | 244.634652 | 397.249619 | 419.644575 | 488.707278 |
Ju1 | 203.983455 | 350.830102 | 403.030170 | 432.562218 | 489.909880 |
Aug | 212.479434 | 251.351805 | 415.692304 | 406.437390 | 465.144891 |
Sep | 220.593588 | 374.530792 | 434.734053 | 419.331240 | 0.000000 |
Oct | 213.164266 | 369.213666 | 432.248428 | 435.002169 | 0.000000 |
Nov | 231.136537 | 384.056027 | 426.579749 | 462.916675 | 0.000000 |
Dec | 298.675144 | 459.818606 | 513.845328 | 557.114822 | 0.000000 |
df_year.index=['Jan','Feb','Mar','Apr','May','Jun','Ju1','Aug','Sep','Oct','Nov','Dec']
y_data=df_2013['month'].tolist()#转化为一个列表
df_year
s13 | s14 | s15 | s16 | s17 | |
---|---|---|---|---|---|
Jan | 186.952405 | 342.341709 | 269.666595 | 434.050268 | 476.596791 |
Feb | 193.581846 | 241.268892 | 275.420792 | 424.695398 | 465.971468 |
Mar | 206.880581 | 368.661236 | 282.368624 | 418.735398 | 483.400632 |
Apr | 205.639071 | 240.577087 | 279.743138 | 488.108774 | 482.172948 |
May | 210.184563 | 242.203129 | 320.958116 | 457.671398 | 487.162797 |
Jun | 215.691343 | 244.634652 | 397.249619 | 419.644575 | 488.707278 |
Ju1 | 203.983455 | 350.830102 | 403.030170 | 432.562218 | 489.909880 |
Aug | 212.479434 | 251.351805 | 415.692304 | 406.437390 | 465.144891 |
Sep | 220.593588 | 374.530792 | 434.734053 | 419.331240 | 0.000000 |
Oct | 213.164266 | 369.213666 | 432.248428 | 435.002169 | 0.000000 |
Nov | 231.136537 | 384.056027 | 426.579749 | 462.916675 | 0.000000 |
Dec | 298.675144 | 459.818606 | 513.845328 | 557.114822 | 0.000000 |
#画每年不同月份,平均销售
plt.figure(figsize=(12,8))
plt.barh(df_year.index,df_year.iloc[:,0],label='2013')
plt.text(100,12.5,'2013')
plt.barh(df_year.index,df_year.iloc[:,1],left=df_year.iloc[:,0],label='2014')
plt.text(500,12.5,'2014')
plt.barh(df_year.index,df_year.iloc[:,2],left=df_year.iloc[:,0]+df_year.iloc[:,1],label='2015')
plt.text(1000,12.5,'2015')
plt.barh(df_year.index,df_year.iloc[:,3],left=df_year.iloc[:,0]+df_year.iloc[:,1]+df_year.iloc[:,2],label='2016')
plt.text(1300,12.5,'2016')
plt.barh(df_year.index,df_year.iloc[:,4],left=df_year.iloc[:,0]+df_year.iloc[:,1]+df_year.iloc[:,2]+df_year.iloc[:,3],label='2017')
plt.text(1700,12.5,'2017')
plt.title('Avg Sales for Each Year',loc='left',y=1.08,fontsize=15)
plt.legend()
plt.show()
月、季、周,星期几绘总
#绘制每个月,每个季度,每个周的平均销售量
import calendar
df_m_sa=df_train1.groupby('month').agg({'sales':'mean'}).reset_index()
df_m_sa['sales']=round(df_m_sa['sales'],2) #处理sales函数小数位数
df_m_sa['month_text']=df_m_sa['month'].apply(lambda x: calendar.month_abbr[x])#将数值月份转化为文字形
df_m_sa['text']=df_m_sa['month_text']+'-'+df_m_sa['sales'].astype(str)
df_w_sa=df_train1.groupby('week').agg({'sales':'mean'}).reset_index()
df_q_sa=df_train1.groupby('quarter').agg({'sales':'mean'}).reset_index()
df_m_sa.head(),df_w_sa.head(),df_q_sa.head
( month sales month_text text
0 1 341.92 Jan Jan-341.92
1 2 320.93 Feb Feb-320.93
2 3 352.01 Mar Mar-352.01
3 4 341.17 Apr Apr-341.17
4 5 345.65 May May-345.65,
week sales
0 1 409.099519
1 2 347.534643
2 3 338.142199
3 4 329.186258
4 5 344.195233,
quarter sales
0 1 338.825392
1 2 346.546038
2 3 359.334098
3 4 399.229622)
from matplotlib.gridspec import GridSpec
plt.figure(figsize=(12,8))
gs=GridSpec(2,2)
print(gs[:2,:2])
ax=plt.subplot(gs[:1,:1])
# plt.barh()
ax.barh(df_m_sa.iloc[:,2],df_m_sa.iloc[:,1])#为每个条形图添加标签要用循环
for a,b in enumerate(df_m_sa.iloc[:,1]):
ax.text(b-100,a,df_m_sa.iloc[a,3])
ax.set_title('month wise avg sales analysis')
ax=plt.subplot(gs[:1,1])
ax.pie(df_q_sa.iloc[:,1],wedgeprops={'width':0.3},labels=df_q_sa.iloc[:,0],autopct='%1.2f%%',pctdistance=1.25)
ax.set_title('Quarter wise Avg Sales Analy')
ax=plt.subplot(gs[1:,:2])
ax.fill_between(df_w_sa.iloc[:,0],df_w_sa.iloc[:,1],alpha=0.6)
ax.plot(df_w_sa.iloc[:,0],df_w_sa.iloc[:,1],marker='o')
ax.set_title('Week wise Avg Sales Analysis')
# ax[0,1].set_title('Highest Sales Stores')
# ax[1,0].set_title('Clusters VS s Sales')
plt.show()
GridSpec(2, 2)[0:2, 0:2]
df_dw_sa=df_train1.groupby('day_of_week').agg({'sales':'mean'}).reset_index()
df_dw_sa['sales']=round(df_dw_sa['sales'],2)
df_dw_sa
day_of_week | sales | |
---|---|---|
0 | Friday | 326.73 |
1 | Monday | 348.16 |
2 | Saturday | 434.79 |
3 | Sunday | 464.74 |
4 | Thursday | 286.57 |
5 | Tuesday | 319.92 |
6 | Wednesday | 330.77 |
plt.barh(df_dw_sa.iloc[:,0],df_dw_sa.iloc[:,1])#为每个条形图添加标签要用循环
for a,b in enumerate(df_dw_sa.iloc[:,1]):
plt.text(b-50,a,df_dw_sa.iloc[a,1])
# plt.yticks(['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'])
plt.title('Avg Sales VS Day of Week')
plt.show()
store_nbr与holiday关联
df_st_ht=df_train1.groupby(['store_type','holiday_type']).agg({'sales':'mean'}).reset_index()
df_st_ht['sales']=round(df_st_ht['sales'],2)
df_st_ht.head()
store_type | holiday_type | sales | |
---|---|---|---|
0 | A | Additional | 957.70 |
1 | A | Bridge | 969.82 |
2 | A | Event | 813.56 |
3 | A | Holiday | 723.28 |
4 | A | Transfer | 984.63 |
plt.scatter(df_st_ht.iloc[:,0],df_st_ht.iloc[:,1],s=df_st_ht.iloc[:,2],c=df_st_ht.iloc[:,2],cmap='plasma')
plt.colorbar()
plt.text(4.7,5.5,'sales')
plt.xlim(-0.5,4.5)
plt.ylim(-0.5,5.5)
plt.title('Average Sales:Store Type vs holiday type ')
plt.show()
df_y_m_st=df_train1.groupby(['year','month','store_type']).agg({'sales':'mean'}).reset_index()
df_y_m_st['sales']=round(df_y_m_st['sales'],2)
df_y_m_st['month']=df_y_m_st['month'].apply(lambda x:calendar.month_abbr[x])
df_y_m_st.head()
year | month | store_type | sales | |
---|---|---|---|---|
0 | 2013 | Jan | A | 392.85 |
1 | 2013 | Jan | B | 155.11 |
2 | 2013 | Jan | C | 109.06 |
3 | 2013 | Jan | D | 191.16 |
4 | 2013 | Jan | E | 60.52 |
280 rows × 4 columns
# a=df_y_m_st['year']==2013
# df_y_m_st.loc[a,'month']
store_nbr与holiday绘总不同年份
a=df_y_m_st['year']==2013
b=df_y_m_st['year']==2014
c=df_y_m_st['year']==2015
d=df_y_m_st['year']==2016
e=df_y_m_st['year']==2017
fig,ax=plt.subplots(5,1,figsize=(24,20))
ax[0].scatter(df_y_m_st.loc[a,'month'],df_y_m_st.loc[a,'store_type'],df_y_m_st.loc[a,'sales'],c=df_y_m_st.loc[a,'sales'],cmap='plasma')
ax[0].text(12,0.01,'year=2013',size=20,rotation='270')
ax[0].set_xticks([])
ax[1].scatter(df_y_m_st.loc[a,'month'],df_y_m_st.loc[a,'store_type'],df_y_m_st.loc[b,'sales'],c=df_y_m_st.loc[b,'sales'],cmap='plasma')
ax[1].text(12,0.01,'year=2014',size=20,rotation='270')
ax[1].set_xticks([])
ax[2].scatter(df_y_m_st.loc[a,'month'],df_y_m_st.loc[a,'store_type'],df_y_m_st.loc[c,'sales'],c=df_y_m_st.loc[c,'sales'],cmap='plasma')
ax[2].text(12,0.01,'year=2015',size=20,rotation='270')
ax[2].set_xticks([])
ax[3].scatter(df_y_m_st.loc[a,'month'],df_y_m_st.loc[a,'store_type'],df_y_m_st.loc[d,'sales'],c=df_y_m_st.loc[d,'sales'],cmap='plasma')
ax[3].text(12,0.01,'year=2016',size=20,rotation='270')
ax[3].set_xticks([])
ax[4].scatter(df_y_m_st.loc[e,'month'],df_y_m_st.loc[e,'store_type'],df_y_m_st.loc[e,'sales'],c=df_y_m_st.loc[e,'sales'],cmap='plasma')
ax[4].text(11.5,0.01,'year=2017',size=20,rotation='270')
ax[4].set_xticks(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Oct','Sep','Nve','Dec'])
for i in range(5):
ax[i].spines['top'].set_visible(False)
ax[i].spines['right'].set_visible(False)
ax[i].spines['bottom'].set_visible(False)
ax[i].spines['left'].set_visible(False)
ax[i].set_ylim([-0.5,5])
ax[i].set_xlim([-0.5,15])
ax[i].tick_params(axis='both',which='major',labelsize=25)#改变坐标轴的大小
# ax[i].set_colorbar()
fig.colorbar(ax[0].scatter(df_y_m_st.loc[a,'month'],df_y_m_st.loc[a,'store_type'],df_y_m_st.loc[a,'sales'],c=df_y_m_st.loc[a,'sales'],cmap='plasma'), ax=[ax[0], ax[1],ax[2],ax[3],ax[4]], shrink=0.9)
plt.text(16,32,'sales',size=20)
plt.show()
month与holiday绘总
df_m_ht=df_train1.groupby(['month','holiday_type']).agg({'sales':'mean'}).reset_index()
df_m_ht['sales']=round(df_m_ht['sales'],2)
df_m_ht['month']=df_m_ht['month'].apply(lambda x :calendar.month_abbr[x])
plt.scatter(df_m_ht.iloc[:,0],df_m_ht.iloc[:,1],s=df_m_ht.iloc[:,2],c=df_m_ht.iloc[:,2],cmap='plasma')
plt.colorbar()
plt.text(12.5,6,'sales')
plt.xlim(-1,12)
plt.ylim(-0.5,5.5)
plt.title('Average Sales:Month vs holiday type ')
plt.show()
df_y_m_ht=df_train1.groupby(['year','month','holiday_type']).agg({'sales':'mean'}).reset_index()
df_y_m_ht['sales']=round(df_y_m_ht['sales'],2)
df_y_m_ht['month']=df_y_m_ht['month'].apply(lambda x:calendar.month_abbr[x])
df_y_m_ht.head()
year | month | holiday_type | sales | |
---|---|---|---|---|
0 | 2013 | Jan | Holiday | 1.41 |
1 | 2013 | Jan | Work Day | 247.08 |
2 | 2013 | Feb | Holiday | 164.82 |
3 | 2013 | Mar | Holiday | 307.44 |
4 | 2013 | Apr | Holiday | 228.52 |
96 rows × 4 columns
month与holiday绘总不同年份
a=df_y_m_ht['year']==2013
b=df_y_m_ht['year']==2014
c=df_y_m_ht['year']==2015
d=df_y_m_ht['year']==2016
e=df_y_m_ht['year']==2017
fig,ax=plt.subplots(5,1,figsize=(24,20))
ax[0].scatter(df_y_m_ht.loc[a,'month'],df_y_m_ht.loc[a,'holiday_type'],df_y_m_ht.loc[a,'sales'],c=df_y_m_ht.loc[a,'sales'],cmap='plasma')
ax[0].text(12,0.01,'year=2013',size=20,rotation='270')
ax[0].set_xticks([])
ax[1].scatter(df_y_m_ht.loc[b,'month'],df_y_m_ht.loc[b,'holiday_type'],df_y_m_ht.loc[b,'sales'],c=df_y_m_ht.loc[b,'sales'],cmap='plasma')
ax[1].text(12,0.01,'year=2014',size=20,rotation='270')
ax[1].set_xticks([])
ax[2].scatter(df_y_m_ht.loc[c,'month'],df_y_m_ht.loc[c,'holiday_type'],df_y_m_ht.loc[c,'sales'],c=df_y_m_ht.loc[c,'sales'],cmap='plasma')
ax[2].text(12,0.01,'year=2015',size=20,rotation='270')
ax[2].set_xticks([])
ax[3].scatter(df_y_m_ht.loc[d,'month'],df_y_m_ht.loc[d,'holiday_type'],df_y_m_ht.loc[d,'sales'],c=df_y_m_ht.loc[d,'sales'],cmap='plasma')
ax[3].text(12,0.01,'year=2016',size=20,rotation='270')
ax[3].set_xticks([])
ax[4].scatter(df_y_m_ht.loc[e,'month'],df_y_m_ht.loc[e,'holiday_type'],df_y_m_ht.loc[e,'sales'],c=df_y_m_ht.loc[e,'sales'],cmap='plasma')
ax[4].text(11.5,0.01,'year=2017',size=20,rotation='270')
ax[4].set_xticks(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Oct','Sep','Nve','Dec'])
for i in range(5):
ax[i].spines['top'].set_visible(False)
ax[i].spines['right'].set_visible(False)
ax[i].spines['bottom'].set_visible(False)
ax[i].spines['left'].set_visible(False)
ax[i].set_ylim([-0.5,6])
ax[i].set_xlim([-0.5,12])
ax[i].tick_params(axis='both',which='major',labelsize=25)#改变坐标轴的大小
# ax[i].set_colorbar()
fig.colorbar(ax[0].scatter(df_y_m_ht.loc[a,'month'],df_y_m_ht.loc[a,'holiday_type'],df_y_m_ht.loc[a,'sales'],c=df_y_m_ht.loc[a,'sales'],cmap='plasma'), ax=[ax[0], ax[1],ax[2],ax[3],ax[4]], shrink=0.9)
plt.text(16,32,'sales',size=20)
plt.show()