数据准备
 
import pandas as pd
 
df=pd.read_csv('data/pandas/beijing_tianqi/beijing_tianqi_2018.csv')
df.head()
 
df["bWendu"]=df["bWendu"].str.replace("℃","").astype('int32')
df["yWendu"]=df["yWendu"].str.replace("℃","").astype('int32')
df.head()
 
常用汇总函数
 
df.describe()
 
df['bWendu'].mean() 
df['bWendu'].max() 
df['bWendu'].min() 
 
重复项判断,按值计数
 
df['fengxiang'].unique()
 
df['fengxiang'].value_counts()
 
df['bWendu'].duplicated().any()
df['ymd'].duplicated().any()
 
df['bWendu'].drop_duplicates()
 
df[df['bWendu'].duplicated() == True]
 
相关系数和协方差
 
df.cov()
 
df.corr()