pandas是python第三方库,提供高性能易用数据类型和分析工具。
pandas基于numpy实现,常与numpy和matplotlib一同使用
更多学习,请参考pandas中文网:https://www.pypandas.cn/
目录
import pandas as pd
import numpy as np
s = pd.Series(['a','b','c','d','e'])
print(s)
#与字典不同的是:Series允许索引重复
s = pd.Series(['a','b','c','d','e'],index=[100,200,100,400,500])
print(s)
d = {'b': 1, 'a': 0, 'c': 2}
pd.Series(d)
print(s)
print(s.values)
print(s.index)
#与普通numpy数组相比,可以通过索引的方式选取Series中的单个或一组值
print(s[100])
print(s[[400, 500]])
s = pd.Series(np.array([1,2,3,4,5]), index=['a', 'b', 'c', 'd', 'e'])
print(s)
#对应元素求和
print(s+s)
#对应元素乘
print(s*3)
obj1 = pd.Series({"Ohio": 35000, "Oregon": 16000, "Texas": 71000, "Utah": 5000})
print(obj1)
obj2 = pd.Series({"California": np.nan, "Ohio": 35000, "Oregon": 16000, "Texas": 71000})
print(obj2)
print(obj1 + obj2)
s = pd.Series(np.array([1,2,3,4,5]), index=['a', 'b', 'c', 'd', 'e'])
print(s[1:])
print(s[:-1])
print(s[1:] + s[:-1])
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'], 'year': [2000, 2001, 2002, 2001, 2002], 'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}
frame = pd.DataFrame(data)
print(frame)
frame2 = pd.DataFrame(data, columns=['year', 'state', 'pop', 'debt'], index=['one', 'two', 'three', 'four', 'five'])
print(frame2)
#通过类似字典标记的方式或属性的方式,可以将DataFrame的列获取为一个Series,返回的Series拥有原DataFrame相同的索引
print(frame2['state'])
d = {'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
print(pd.DataFrame(d))
frame2['debt'] = 16.5
print(frame2)
print(frame2)
frame2['new'] = frame2['debt' ]* frame2['pop']
print(frame2)
frame2['debt'] = np.arange(5.)
print(frame2)