0
点赞
收藏
分享

微信扫一扫

数据清洗之 数据增加和删除


数据增加和删除

  • 在数据中,直接添加列
  • 使用df.insert方法在数据中添加一列
  • drop(labels, axis, inplace=True)方法(删除)
  • labels表示删除的数据,axis表示作用轴,inplace=True表示是否对原数据生效
  • axis=0按行操作,axis=1按列操作
  • 使用del函数直接删除其中一列

import pandas as pd
import os
import numpy as

os.getcwd()

'D:\\Jupyter\\notebook\\Python数据清洗实战\\数据'

os.chdir('D:\\Jupyter\\notebook\\Python数据清洗实战\\数据')

df = pd.read_csv('baby_trade_history.csv', encoding='utf-8', dtype={'user_id':str})

df['购买量'] = np.where(df['buy_mount']>3, '高', '低')

df.head(5)



user_id

auction_id

cat_id

cat1

property

buy_mount

day

购买量

0

786295544

41098319944

50014866

50022520

21458:86755362;13023209:3593274;10984217:21985...

2

20140919


1

532110457

17916191097

50011993

28

21458:11399317;1628862:3251296;21475:137325;16...

1

20131011


2

249013725

21896936223

50012461

50014815

21458:30992;1628665:92012;1628665:3233938;1628...

1

20131011


3

917056007

12515996043

50018831

50014815

21458:15841995;21956:3494076;27000458:59723383...

2

20141023


4

444069173

20487688075

50013636

50008168

21458:30992;13658074:3323064;1628665:3233941;1...

1

20141103


# 将第二列放在第一列
auction_id = df['auction_id']

del df['auction_id']

df.head(5)



user_id

cat_id

cat1

property

buy_mount

day

购买量

0

786295544

50014866

50022520

21458:86755362;13023209:3593274;10984217:21985...

2

20140919


1

532110457

50011993

28

21458:11399317;1628862:3251296;21475:137325;16...

1

20131011


2

249013725

50012461

50014815

21458:30992;1628665:92012;1628665:3233938;1628...

1

20131011


3

917056007

50018831

50014815

21458:15841995;21956:3494076;27000458:59723383...

2

20141023


4

444069173

50013636

50008168

21458:30992;13658074:3323064;1628665:3233941;1...

1

20141103


# 第一个参数:插入位置
# 第二个参数:标签名称
# 第三个参数:数据
df.insert(0, 'auction_id_new', auction_id)

df.head(5)



auction_id_new

user_id

cat_id

cat1

property

buy_mount

day

购买量

0

41098319944

786295544

50014866

50022520

21458:86755362;13023209:3593274;10984217:21985...

2

20140919


1

17916191097

532110457

50011993

28

21458:11399317;1628862:3251296;21475:137325;16...

1

20131011


2

21896936223

249013725

50012461

50014815

21458:30992;1628665:92012;1628665:3233938;1628...

1

20131011


3

12515996043

917056007

50018831

50014815

21458:15841995;21956:3494076;27000458:59723383...

2

20141023


4

20487688075

444069173

50013636

50008168

21458:30992;13658074:3323064;1628665:3233941;1...

1

20141103


# 删除两列数据
df.drop(labels=['auction_id_new', '购买量'], axis=1).head(5)



user_id

cat_id

cat1

property

buy_mount

day

0

786295544

50014866

50022520

21458:86755362;13023209:3593274;10984217:21985...

2

20140919

1

532110457

50011993

28

21458:11399317;1628862:3251296;21475:137325;16...

1

20131011

2

249013725

50012461

50014815

21458:30992;1628665:92012;1628665:3233938;1628...

1

20131011

3

917056007

50018831

50014815

21458:15841995;21956:3494076;27000458:59723383...

2

20141023

4

444069173

50013636

50008168

21458:30992;13658074:3323064;1628665:3233941;1...

1

20141103

# 再次查看df,发现刚才删除数据仍然存在
# 因为没有对原数据生效
df.head(5)



auction_id_new

user_id

cat_id

cat1

property

buy_mount

day

购买量

0

41098319944

786295544

50014866

50022520

21458:86755362;13023209:3593274;10984217:21985...

2

20140919


1

17916191097

532110457

50011993

28

21458:11399317;1628862:3251296;21475:137325;16...

1

20131011


2

21896936223

249013725

50012461

50014815

21458:30992;1628665:92012;1628665:3233938;1628...

1

20131011


3

12515996043

917056007

50018831

50014815

21458:15841995;21956:3494076;27000458:59723383...

2

20141023


4

20487688075

444069173

50013636

50008168

21458:30992;13658074:3323064;1628665:3233941;1...

1

20141103


df.drop(labels=['auction_id_new', '购买量'], axis=1, inplace=True)

---------------------------------------------------------------------------

KeyError Traceback (most recent call last)

<ipython-input-30-acf2a75acaf3> in <module>
----> 1 df.drop(labels=['auction_id_new', '购买量'], axis=1, inplace=True)


D:\Anaconda3\lib\site-packages\pandas\core\frame.py in drop(self, labels, axis, index, columns, level, inplace, errors)
3938 index=index, columns=columns,
3939 level=level, inplace=inplace,
-> 3940 errors=errors)
3941
3942 @rewrite_axis_style_signature('mapper', [('copy', True),


D:\Anaconda3\lib\site-packages\pandas\core\generic.py in drop(self, labels, axis, index, columns, level, inplace, errors)
3778 for axis, labels in axes.items():
3779 if labels is not None:
-> 3780 obj = obj._drop_axis(labels, axis, level=level, errors=errors)
3781
3782 if inplace:


D:\Anaconda3\lib\site-packages\pandas\core\generic.py in _drop_axis(self, labels, axis, level, errors)
3810 new_axis = axis.drop(labels, level=level, errors=errors)
3811 else:
-> 3812 new_axis = axis.drop(labels, errors=errors)
3813 result = self.reindex(**{axis_name: new_axis})
3814


D:\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in drop(self, labels, errors)
4963 if errors != 'ignore':
4964 raise KeyError(
-> 4965 '{} not found in axis'.format(labels[mask]))
4966 indexer = indexer[~mask]
4967 return self.delete(indexer)


KeyError: "['auction_id_new' '购买量'] not found in axis"

df.head(5)



user_id

cat_id

cat1

property

buy_mount

day

0

786295544

50014866

50022520

21458:86755362;13023209:3593274;10984217:21985...

2

20140919

1

532110457

50011993

28

21458:11399317;1628862:3251296;21475:137325;16...

1

20131011

2

249013725

50012461

50014815

21458:30992;1628665:92012;1628665:3233938;1628...

1

20131011

3

917056007

50018831

50014815

21458:15841995;21956:3494076;27000458:59723383...

2

20141023

4

444069173

50013636

50008168

21458:30992;13658074:3323064;1628665:3233941;1...

1

20141103

# 删除标签为3,4数据
df.drop(labels=[3,4], axis=0, inplace=True)

df.head(5)



user_id

cat_id

cat1

property

buy_mount

day

0

786295544

50014866

50022520

21458:86755362;13023209:3593274;10984217:21985...

2

20140919

1

532110457

50011993

28

21458:11399317;1628862:3251296;21475:137325;16...

1

20131011

2

249013725

50012461

50014815

21458:30992;1628665:92012;1628665:3233938;1628...

1

20131011

5

152298847

121394024

50008168

21458:3408353;13023209:727117752;22009:2741771...

1

20141103

6

513441334

50010557

50008168

25935:21991;1628665:29784;22019:34731;22019:20...

1

20121212

df.drop(labels=range(0,3), axis=0, inplace=True)

df.head(5)



user_id

cat_id

cat1

property

buy_mount

day

5

152298847

121394024

50008168

21458:3408353;13023209:727117752;22009:2741771...

1

20141103

6

513441334

50010557

50008168

25935:21991;1628665:29784;22019:34731;22019:20...

1

20121212

7

297411659

50010542

50008168

21458:60020529;25935:31381;1633959:27247291;16...

1

20121212

8

82830661

50013874

28

21458:11580;21475:137325

1

20121101

9

475046636

203527

28

22724:40168;22729:40278;21458:21817;2770200:24...

1

20121101


举报

相关推荐

0 条评论