数据增加和删除
- 在数据中,直接添加列
- 使用df.insert方法在数据中添加一列
- drop(labels, axis, inplace=True)方法(删除)
- labels表示删除的数据,axis表示作用轴,inplace=True表示是否对原数据生效
- axis=0按行操作,axis=1按列操作
- 使用del函数直接删除其中一列
import pandas as pd
import os
import numpy as
os.getcwd()
'D:\\Jupyter\\notebook\\Python数据清洗实战\\数据'
os.chdir('D:\\Jupyter\\notebook\\Python数据清洗实战\\数据')
df = pd.read_csv('baby_trade_history.csv', encoding='utf-8', dtype={'user_id':str})
df['购买量'] = np.where(df['buy_mount']>3, '高', '低')
df.head(5)
user_id | auction_id | cat_id | cat1 | property | buy_mount | day | 购买量 | |
0 | 786295544 | 41098319944 | 50014866 | 50022520 | 21458:86755362;13023209:3593274;10984217:21985... | 2 | 20140919 | 低 |
1 | 532110457 | 17916191097 | 50011993 | 28 | 21458:11399317;1628862:3251296;21475:137325;16... | 1 | 20131011 | 低 |
2 | 249013725 | 21896936223 | 50012461 | 50014815 | 21458:30992;1628665:92012;1628665:3233938;1628... | 1 | 20131011 | 低 |
3 | 917056007 | 12515996043 | 50018831 | 50014815 | 21458:15841995;21956:3494076;27000458:59723383... | 2 | 20141023 | 低 |
4 | 444069173 | 20487688075 | 50013636 | 50008168 | 21458:30992;13658074:3323064;1628665:3233941;1... | 1 | 20141103 | 低 |
# 将第二列放在第一列
auction_id = df['auction_id']
del df['auction_id']
df.head(5)
user_id | cat_id | cat1 | property | buy_mount | day | 购买量 | |
0 | 786295544 | 50014866 | 50022520 | 21458:86755362;13023209:3593274;10984217:21985... | 2 | 20140919 | 低 |
1 | 532110457 | 50011993 | 28 | 21458:11399317;1628862:3251296;21475:137325;16... | 1 | 20131011 | 低 |
2 | 249013725 | 50012461 | 50014815 | 21458:30992;1628665:92012;1628665:3233938;1628... | 1 | 20131011 | 低 |
3 | 917056007 | 50018831 | 50014815 | 21458:15841995;21956:3494076;27000458:59723383... | 2 | 20141023 | 低 |
4 | 444069173 | 50013636 | 50008168 | 21458:30992;13658074:3323064;1628665:3233941;1... | 1 | 20141103 | 低 |
# 第一个参数:插入位置
# 第二个参数:标签名称
# 第三个参数:数据
df.insert(0, 'auction_id_new', auction_id)
df.head(5)
auction_id_new | user_id | cat_id | cat1 | property | buy_mount | day | 购买量 | |
0 | 41098319944 | 786295544 | 50014866 | 50022520 | 21458:86755362;13023209:3593274;10984217:21985... | 2 | 20140919 | 低 |
1 | 17916191097 | 532110457 | 50011993 | 28 | 21458:11399317;1628862:3251296;21475:137325;16... | 1 | 20131011 | 低 |
2 | 21896936223 | 249013725 | 50012461 | 50014815 | 21458:30992;1628665:92012;1628665:3233938;1628... | 1 | 20131011 | 低 |
3 | 12515996043 | 917056007 | 50018831 | 50014815 | 21458:15841995;21956:3494076;27000458:59723383... | 2 | 20141023 | 低 |
4 | 20487688075 | 444069173 | 50013636 | 50008168 | 21458:30992;13658074:3323064;1628665:3233941;1... | 1 | 20141103 | 低 |
# 删除两列数据
df.drop(labels=['auction_id_new', '购买量'], axis=1).head(5)
user_id | cat_id | cat1 | property | buy_mount | day | |
0 | 786295544 | 50014866 | 50022520 | 21458:86755362;13023209:3593274;10984217:21985... | 2 | 20140919 |
1 | 532110457 | 50011993 | 28 | 21458:11399317;1628862:3251296;21475:137325;16... | 1 | 20131011 |
2 | 249013725 | 50012461 | 50014815 | 21458:30992;1628665:92012;1628665:3233938;1628... | 1 | 20131011 |
3 | 917056007 | 50018831 | 50014815 | 21458:15841995;21956:3494076;27000458:59723383... | 2 | 20141023 |
4 | 444069173 | 50013636 | 50008168 | 21458:30992;13658074:3323064;1628665:3233941;1... | 1 | 20141103 |
# 再次查看df,发现刚才删除数据仍然存在
# 因为没有对原数据生效
df.head(5)
auction_id_new | user_id | cat_id | cat1 | property | buy_mount | day | 购买量 | |
0 | 41098319944 | 786295544 | 50014866 | 50022520 | 21458:86755362;13023209:3593274;10984217:21985... | 2 | 20140919 | 低 |
1 | 17916191097 | 532110457 | 50011993 | 28 | 21458:11399317;1628862:3251296;21475:137325;16... | 1 | 20131011 | 低 |
2 | 21896936223 | 249013725 | 50012461 | 50014815 | 21458:30992;1628665:92012;1628665:3233938;1628... | 1 | 20131011 | 低 |
3 | 12515996043 | 917056007 | 50018831 | 50014815 | 21458:15841995;21956:3494076;27000458:59723383... | 2 | 20141023 | 低 |
4 | 20487688075 | 444069173 | 50013636 | 50008168 | 21458:30992;13658074:3323064;1628665:3233941;1... | 1 | 20141103 | 低 |
df.drop(labels=['auction_id_new', '购买量'], axis=1, inplace=True)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-30-acf2a75acaf3> in <module>
----> 1 df.drop(labels=['auction_id_new', '购买量'], axis=1, inplace=True)
D:\Anaconda3\lib\site-packages\pandas\core\frame.py in drop(self, labels, axis, index, columns, level, inplace, errors)
3938 index=index, columns=columns,
3939 level=level, inplace=inplace,
-> 3940 errors=errors)
3941
3942 @rewrite_axis_style_signature('mapper', [('copy', True),
D:\Anaconda3\lib\site-packages\pandas\core\generic.py in drop(self, labels, axis, index, columns, level, inplace, errors)
3778 for axis, labels in axes.items():
3779 if labels is not None:
-> 3780 obj = obj._drop_axis(labels, axis, level=level, errors=errors)
3781
3782 if inplace:
D:\Anaconda3\lib\site-packages\pandas\core\generic.py in _drop_axis(self, labels, axis, level, errors)
3810 new_axis = axis.drop(labels, level=level, errors=errors)
3811 else:
-> 3812 new_axis = axis.drop(labels, errors=errors)
3813 result = self.reindex(**{axis_name: new_axis})
3814
D:\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in drop(self, labels, errors)
4963 if errors != 'ignore':
4964 raise KeyError(
-> 4965 '{} not found in axis'.format(labels[mask]))
4966 indexer = indexer[~mask]
4967 return self.delete(indexer)
KeyError: "['auction_id_new' '购买量'] not found in axis"
df.head(5)
user_id | cat_id | cat1 | property | buy_mount | day | |
0 | 786295544 | 50014866 | 50022520 | 21458:86755362;13023209:3593274;10984217:21985... | 2 | 20140919 |
1 | 532110457 | 50011993 | 28 | 21458:11399317;1628862:3251296;21475:137325;16... | 1 | 20131011 |
2 | 249013725 | 50012461 | 50014815 | 21458:30992;1628665:92012;1628665:3233938;1628... | 1 | 20131011 |
3 | 917056007 | 50018831 | 50014815 | 21458:15841995;21956:3494076;27000458:59723383... | 2 | 20141023 |
4 | 444069173 | 50013636 | 50008168 | 21458:30992;13658074:3323064;1628665:3233941;1... | 1 | 20141103 |
# 删除标签为3,4数据
df.drop(labels=[3,4], axis=0, inplace=True)
df.head(5)
user_id | cat_id | cat1 | property | buy_mount | day | |
0 | 786295544 | 50014866 | 50022520 | 21458:86755362;13023209:3593274;10984217:21985... | 2 | 20140919 |
1 | 532110457 | 50011993 | 28 | 21458:11399317;1628862:3251296;21475:137325;16... | 1 | 20131011 |
2 | 249013725 | 50012461 | 50014815 | 21458:30992;1628665:92012;1628665:3233938;1628... | 1 | 20131011 |
5 | 152298847 | 121394024 | 50008168 | 21458:3408353;13023209:727117752;22009:2741771... | 1 | 20141103 |
6 | 513441334 | 50010557 | 50008168 | 25935:21991;1628665:29784;22019:34731;22019:20... | 1 | 20121212 |
df.drop(labels=range(0,3), axis=0, inplace=True)
df.head(5)
user_id | cat_id | cat1 | property | buy_mount | day | |
5 | 152298847 | 121394024 | 50008168 | 21458:3408353;13023209:727117752;22009:2741771... | 1 | 20141103 |
6 | 513441334 | 50010557 | 50008168 | 25935:21991;1628665:29784;22019:34731;22019:20... | 1 | 20121212 |
7 | 297411659 | 50010542 | 50008168 | 21458:60020529;25935:31381;1633959:27247291;16... | 1 | 20121212 |
8 | 82830661 | 50013874 | 28 | 21458:11580;21475:137325 | 1 | 20121101 |
9 | 475046636 | 203527 | 28 | 22724:40168;22729:40278;21458:21817;2770200:24... | 1 | 20121101 |