Pandas 停车场数据、数据分析数据可视化、实战练习
数据表
停车时间数据分析
import pandas as pd
import matplotlib.pyplot as plt
data=pd.read_excel('停车场信息表.xlsx')
# 排除进入停车场未出停车场的车
data=data.loc[data['timeout']!=0]
data['timein']=pd.to_datetime(data['timein'])
data['timeout']=pd.to_datetime(data['timeout'])
data['h']=data['timeout']-data['timein']
plt.title('停车时间分布图')
plt.rcParams['font.sans-serif'] = ['FangSong']
# 分类统计数据
df1=data.loc[data['h']<='0 days 01:00:00']
df2=data.loc[(data['h']>'0 days 01:00:00') & (data['h']<='0 days 02:00:00')]
df3=data.loc[(data['h']>'0 days 02:00:00') & (data['h']<='0 days 03:00:00')]
df4=data.loc[(data['h']>'0 days 03:00:00') & (data['h']<='0 days 04:00:00')]
df5=data.loc[data['h']>'0 days 04:00:00']
x=['1小时','2小时','3小时','4小时','4小时以上']
y=[len(df1),len(df2),len(df3),len(df4),len(df5)]
plt.bar(x,y)
for x,y in enumerate(y):
plt.text(x,y+20,str(y)+'台',ha='center')
plt.show()
停车高峰期时间占比
import pandas as pd
import matplotlib.pyplot as plot
data=pd.read_excel('停车场信息表.xlsx')
# 分类统计数据
df1=data[(data['timein'].str.contains(' 00:'))|(data['timein'].str.contains(' 01:'))|(data['timein'].str.contains(' 02:'))|
(data['timein'].str.contains(' 03:'))|(data['timein'].str.contains(' 04:'))|(data['timein'].str.contains(' 05:'))]
df2=data[(data['timein'].str.contains(' 06:'))|(data['timein'].str.contains(' 07:'))|(data['timein'].str.contains(' 08:'))|
(data['timein'].str.contains(' 09:'))|(data['timein'].str.contains(' 10:'))|(data['timein'].str.contains(' 11:'))]
df3=data[(data['timein'].str.contains(' 12:'))|(data['timein'].str.contains(' 13:'))|(data['timein'].str.contains(' 14:'))|
(data['timein'].str.contains(' 15:'))|(data['timein'].str.contains(' 16:'))|(data['timein'].str.contains(' 17:'))]
df4=data[(data['timein'].str.contains(' 18:'))|(data['timein'].str.contains(' 19:'))|(data['timein'].str.contains(' 20:'))|
(data['timein'].str.contains(' 21:'))|(data['timein'].str.contains(' 22:'))|(data['timein'].str.contains(' 23:'))]
a=len(df1)+len(df2)+len(df3)+len(df4)
y=[len(df1)/a,len(df2)/a,len(df3)/a,len(df4)/a]
labels=['0-6点','7-12点','13-18点','19-24点']
plot.rcParams['font.sans-serif'] = ['FangSong']
plot.title('停车高峰期时间占比')
plot.pie(y,labels=labels,normalize=False)
plot.show()
停车星期比
import pandas as pd
import matplotlib.pyplot as plot
data=pd.read_excel('停车场信息表.xlsx')
ds=data.loc[data['state']==1]
ds=ds.copy()
ds['timein']=pd.to_datetime(ds['timein'])
# 分类统计数据
ds['timeinweek']=ds['timein'].dt.dayofweek
ds2=ds.groupby('timeinweek')
weeksuns=ds2.size()
labels=['星期一','星期二','星期三','星期四','星期五','星期六','星期天']
plot.rcParams['font.sans-serif']=['FangSong']
plot.pie(weeksuns,labels=labels,autopct='%.2f%%')
plot.title('停车星期比')
plot.show()
每日接待车辆统计
import datetime
import pandas as pd
import matplotlib.pyplot as plot
data=pd.read_excel('停车场信息表.xlsx')
data=data.loc[data['state']==1]
iin='2018-01-01'
end='2018-03-31'
# 使用strptime将字符串转成时间元组
dataiin=datetime.datetime.strptime(iin,'%Y-%m-%d')
dataend=datetime.datetime.strptime(end,'%Y-%m-%d')
x=[]
y=[]
while dataiin <= dataend:
# 使用strftime将时间元组转换成字符串
x.append(dataiin.strftime('%y-%m-%d'))
y.append(len(data[data['timein'].str.contains(dataiin.strftime('%y-%m-%d'))]))
dataiin = dataiin +datetime.timedelta(days=1)
plot.rcParams['font.sans-serif']=['SimHei']
plot.plot(x,y)
plot.xticks([])
plot.title('每日接待车辆统计')
plot.xlabel('2018-01-01到2018-03-31')
plot.show()
车辆归属地词云图
import pandas as pd
from pyecharts.charts import WordCloud
import pyecharts.options as opts
data=pd.read_excel('停车场信息表.xlsx')
data['cnn']=data['cn'].apply(lambda x:x[0:2])
# 根据归属地分组
df1=data.groupby('cnn')
# 获取各分组的个数
df2=df1.size()
# 将数值转为字符
df2=df2.apply(lambda x:str(x))
c=(
WordCloud()
.add(series_name='车牌',data_pair=(zip(df2.index,df2.values)))
.set_global_opts(
title_opts=opts.TitleOpts(title='车辆归属地')
)
.render()
)
资源文件
资源文件