0
点赞
收藏
分享

微信扫一扫

数据分析——从入门到精通(十一)

Python芸芸 2022-04-13 阅读 40

删除数据

  • drop(labels,axis=0,index,columns) 删除在axis轴上的指定labels索引标签的数据
    • 删除多行或多列
  • dropna(axis=None,how=‘any|all’)删除存在NAN值的行或列
  • drop_duplicates(keeps=‘first|last’)删除重复行的数据,keep指定保留行的第一列还是最后一列
import numpy as np
import pandas as pd 
from pandas import DataFrame,Series
# 创建10位同学的五门课程成绩
df = DataFrame(np.random.randint(1,151,size=(10,5)),
              columns=['MySQL','PostgreSQL','Oracle','MongoDB','SQLite'])
df

MySQLPostgreSQLOracleMongoDBSQLite
01362810818132
153121374514
2615913274103
315010647461
4509513578140
5701126262128
612513313870135
746397150117
8501195989130
912293295616
# 构造两行数据
df.loc[10] = df.loc[6]
df.loc[11] = df.loc[7]
df

MySQLPostgreSQLOracleMongoDBSQLite
01362810818132
153121374514
2615913274103
315010647461
4509513578140
5701126262128
612513313870135
746397150117
8501195989130
912293295616
1012513313870135
1146397150117
# 删除"PostgreSQL","MongoDB"两列,同时删除3,4两行
# 只删除行
# axis : {0 or 'index', 1 or 'columns'}, default 0
df.drop(labels=['PostgreSQL','MongoDB'],axis='columns')  # 也可以写成axis=1

MySQLOracleSQLite
0136108132
15313714
261132103
3150461
450135140
57062128
6125138135
74671117
85059130
91222916
10125138135
114671117
# 只删除列
df.drop(labels=[3,4],axis=0)

MySQLPostgreSQLOracleMongoDBSQLite
01362810818132
153121374514
2615913274103
5701126262128
612513313870135
746397150117
8501195989130
912293295616
1012513313870135
1146397150117
df

MySQLPostgreSQLOracleMongoDBSQLite
01362810818132
153121374514
2615913274103
315010647461
4509513578140
5701126262128
612513313870135
746397150117
8501195989130
912293295616
1012513313870135
1146397150117
# 同时删除行和同时删除列
df.drop(index=[3,4],columns=['PostgreSQL','MongoDB'],inplace=False)  # inplace=False 表示不在原本中删除

MySQLOracleSQLite
0136108132
15313714
261132103
57062128
6125138135
74671117
85059130
91222916
10125138135
114671117
df

MySQLPostgreSQLOracleMongoDBSQLite
01362810818132
153121374514
2615913274103
315010647461
4509513578140
5701126262128
612513313870135
746397150117
8501195989130
912293295616
1012513313870135
1146397150117
# axis=0 默认删除行索引及数据
df.drop([1,3])

MySQLPostgreSQLOracleMongoDBSQLite
01362810818132
2615913274103
4509513578140
5701126262128
612513313870135
746397150117
8501195989130
912293295616
1012513313870135
1146397150117
df

MySQLPostgreSQLOracleMongoDBSQLite
01362810818132
153121374514
2615913274103
315010647461
4509513578140
5701126262128
612513313870135
746397150117
8501195989130
912293295616
1012513313870135
1146397150117
# 修改第三个索引位置,MySQL的值改为NAN
df.loc[3,'MySQL'] = np.nan
df

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.0121374514
261.05913274103
3NaN10647461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
# 删除有NAN值的行
df.dropna(axis=0,how='any')  # 当前行只要有一个NAN值都会删除

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.0121374514
261.05913274103
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
df.dropna(axis=0,how='all')  # 当前行所有的值是NAN才会删除

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.0121374514
261.05913274103
3NaN10647461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
df

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.0121374514
261.05913274103
3NaN10647461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
# 删除重复行
df.drop_duplicates()  # 默认保留第一次出现的行

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.0121374514
261.05913274103
3NaN10647461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
df

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.0121374514
261.05913274103
3NaN10647461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
# 删除重复行
df.drop_duplicates(keep="last")  # 默认保留相同行的最后一次出现的行

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.0121374514
261.05913274103
3NaN10647461
450.09513578140
570.01126262128
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
# 查询重复行的,返回的是布尔值
df.duplicated()
0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10     True
11     True
dtype: bool
df[df.duplicated()]

MySQLPostgreSQLOracleMongoDBSQLite
10125.013313870135
1146.0397150117
df[df.duplicated()].index
Int64Index([10, 11], dtype='int64')
# 删除10,11行
# 内容为bool值的Series,只能作为索引切片使用
df.drop(df[df.duplicated()].index)

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.0121374514
261.05913274103
3NaN10647461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616

数据映射

  • rename() 针对索引标签的重命名
  • replace() 针对数据,替换数据
  • map() 针对数据,根据数据可以映射成新的数据
df

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.0121374514
261.05913274103
3NaN10647461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
# 修改MySQL和PostgreSQL的列名分别为MySQL1m和PostgreSQL11m
"""
df.rename(
    mapper=None,    其实是个字典
    index=None,     替换行标签的内容
    columns=None,   替换列标签的内容
    axis=None,      0——行标签;1——列标签
    copy=True,
    inplace=False,  是否在原本上替换,False表示不在原本上替换
    level=None,    # 指定更改的索引层数
    errors='ignore',
)
"""
# 修改列时,没指定axis的位置时,不进行更改
df.rename({'MySQL':'MySQL1m','PostgreSQL':'PostgreSQL11m'},axis=1)

MySQL1mPostgreSQL11mOracleMongoDBSQLite
0136.02810818132
153.0121374514
261.05913274103
3NaN10647461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
df

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.0121374514
261.05913274103
3NaN10647461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
# 把1映射为A,把0映射为B
df.rename({1:'A',0:'B'}) # 修改行时,不用加axis,默认的

MySQLPostgreSQLOracleMongoDBSQLite
B136.02810818132
A53.0121374514
261.05913274103
3NaN10647461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
df

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.0121374514
261.05913274103
3NaN10647461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
df.rename(columns={'MySQL':'MySQL1m','PostgreSQL':'PostgreSQL11m'})

MySQL1mPostgreSQL11mOracleMongoDBSQLite
0136.02810818132
153.0121374514
261.05913274103
3NaN10647461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
#  mapper\index\columns的类型都是字典类型  rename进行修改时,如果在原本里找不到要替换的值,不会报错,只是不修改
# 将14的成绩,修改为140,将4分的成绩修改为180
"""
df.replace(
    to_replace=None,  去替换的    类型是可以是列表,可以是元组,也可以是字典
    value=None,       值
    inplace=False,
    limit=None,
    regex=False,
    method='pad',
)
"""
df.replace(14,140)

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.01213745140
261.05913274103
3NaN10647461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
df

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.0121374514
261.05913274103
3NaN10647461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
# 将14的成绩,修改为140,将4分的成绩修改为180
df.replace((14,4),(140,180))

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.01213745140
261.05913274103
3NaN1061807461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
df.replace({14:140,4:180})

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.01213745140
261.05913274103
3NaN1061807461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
df

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.0121374514
261.05913274103
3NaN10647461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
# method : {'pad', 'ffill', 'bfill', `None`}     使用method方法是value参数必须是None
# 将nan值使用列后的数值填充
df.replace(np.nan,method='bfill')

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.0121374514
261.05913274103
350.010647461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
# 将nan值使用列前的数值填充
df.replace(np.nan,method='ffill')

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.0121374514
261.05913274103
361.010647461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
df.replace(np.nan,method='pad')

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.0121374514
261.05913274103
361.010647461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
df.replace(1,method='ffill')

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.0121374514
261.05913274103
3NaN10647461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
df.replace(1,method='bfill')

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.0121374514
261.05913274103
3NaN10647461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
# map函数针对一个Series对象
df['DB'] = df['Oracle'].map({108:90,132:100})
df

MySQLPostgreSQLOracleMongoDBSQLiteDB
0136.0281081813290.0
153.0121374514NaN
261.05913274103100.0
3NaN10647461NaN
450.09513578140NaN
570.01126262128NaN
6125.013313870135NaN
746.0397150117NaN
850.01195989130NaN
9122.093295616NaN
10125.013313870135NaN
1146.0397150117NaN
# 根据Oracle的成绩,设置A,B,C三个等级,A级大于80,B级大于60,C级小于60
df['DB'] =df['Oracle'].map(lambda item: 'A' if item>80 else 'B' if item > 60 else 'C')
df

MySQLPostgreSQLOracleMongoDBSQLiteDB
0136.02810818132A
153.0121374514A
261.05913274103A
3NaN10647461C
450.09513578140A
570.01126262128B
6125.013313870135A
746.0397150117B
850.01195989130C
9122.093295616C
10125.013313870135A
1146.0397150117B

数据统计信息

  • describe()
df.describe()  # 此时不包含NAN值

MySQLPostgreSQLOracleMongoDBSQLite
count11.00000012.00000012.00000012.00000012.000000
mean80.36363680.66666790.33333361.333333102.333333
std37.74989543.02078447.20618918.93689745.976938
min46.00000012.0000004.00000018.00000014.000000
25%50.00000039.00000061.25000050.00000092.500000
50%61.00000094.00000089.50000066.000000122.500000
75%123.500000113.750000135.50000074.000000132.750000
max136.000000133.000000138.00000089.000000140.000000
# include  可以将nan值看成是int类型的值,会统计NAN的行数
df.describe(include=[np.int])

PostgreSQLOracleMongoDBSQLite
count12.00000012.00000012.00000012.000000
mean80.66666790.33333361.333333102.333333
std43.02078447.20618918.93689745.976938
min12.0000004.00000018.00000014.000000
25%39.00000061.25000050.00000092.500000
50%94.00000089.50000066.000000122.500000
75%113.750000135.50000074.000000132.750000
max133.000000138.00000089.000000140.000000
df.std()  # 默认axis=0
MySQL         37.749895
PostgreSQL    43.020784
Oracle        47.206189
MongoDB       18.936897
SQLite        45.976938
dtype: float64
df.std(axis=1)
0     57.173420
1     50.790747
2     31.236197
3     42.594014
4     38.187694
5     31.003226
6     28.472794
7     31.627520
8     35.359581
9     44.144082
10    28.472794
11    31.627520
dtype: float64
df.std(axis=1).sort_values()
6     28.472794
10    28.472794
5     31.003226
2     31.236197
7     31.627520
11    31.627520
8     35.359581
4     38.187694
3     42.594014
9     44.144082
1     50.790747
0     57.173420
dtype: float64
# 查看第0行索引
df.loc[0]
MySQL         136
PostgreSQL     28
Oracle        108
MongoDB        18
SQLite        132
DB              A
Name: 0, dtype: object
# 查看第6行索引
df.loc[6]
MySQL         125
PostgreSQL    133
Oracle        138
MongoDB        70
SQLite        135
DB              A
Name: 6, dtype: object

take排序索引标签

df

MySQLPostgreSQLOracleMongoDBSQLiteDB
0136.02810818132A
153.0121374514A
261.05913274103A
3NaN10647461C
450.09513578140A
570.01126262128B
6125.013313870135A
746.0397150117B
850.01195989130C
9122.093295616C
10125.013313870135A
1146.0397150117B
# 把"DB"列移动到"Oracle"前列
df.take([0,1,5,2,3,4],axis=1)  # 方法一

MySQLPostgreSQLDBOracleMongoDBSQLite
0136.028A10818132
153.012A1374514
261.059A13274103
3NaN106C47461
450.095A13578140
570.0112B6262128
6125.0133A13870135
746.039B7150117
850.0119C5989130
9122.093C295616
10125.0133A13870135
1146.039B7150117
# 把"DB"列移动到"Oracle"前列
df.take([0,1,-1,2,3,4],axis=1)  # 方法二

MySQLPostgreSQLDBOracleMongoDBSQLite
0136.028A10818132
153.012A1374514
261.059A13274103
3NaN106C47461
450.095A13578140
570.0112B6262128
6125.0133A13870135
746.039B7150117
850.0119C5989130
9122.093C295616
10125.0133A13870135
1146.039B7150117
df2 = df.take([0,1,-1,2,3,4],axis=1)
df2

MySQLPostgreSQLDBOracleMongoDBSQLite
0136.028A10818132
153.012A1374514
261.059A13274103
3NaN106C47461
450.095A13578140
570.0112B6262128
6125.0133A13870135
746.039B7150117
850.0119C5989130
9122.093C295616
10125.0133A13870135
1146.039B7150117
# 索引的随机排列
np.random.permutation(df.index)
array([ 5,  2,  7,  8,  1,  3,  6,  0,  4, 11, 10,  9], dtype=int64)
# 行索引标签随机排列
df.take(np.random.permutation(df.index))  # 默认axis=0

MySQLPostgreSQLOracleMongoDBSQLiteDB
1146.0397150117B
450.09513578140A
9122.093295616C
153.0121374514A
6125.013313870135A
746.0397150117B
10125.013313870135A
570.01126262128B
261.05913274103A
850.01195989130C
3NaN10647461C
0136.02810818132A
# 随机排列列索引标签
np.random.permutation(df.columns)
array(['Oracle', 'MongoDB', 'SQLite', 'DB', 'PostgreSQL', 'MySQL'],
      dtype=object)
column_map = dict.fromkeys(df.columns,0)
column_map
{'MySQL': 0, 'PostgreSQL': 0, 'Oracle': 0, 'MongoDB': 0, 'SQLite': 0, 'DB': 0}
column_map = dict(enumerate(df.columns))
column_map
{0: 'MySQL', 1: 'PostgreSQL', 2: 'Oracle', 3: 'MongoDB', 4: 'SQLite', 5: 'DB'}
column_map = { v:k for k,v in dict(enumerate(df.columns)).items()}
column_map
{'MySQL': 0, 'PostgreSQL': 1, 'Oracle': 2, 'MongoDB': 3, 'SQLite': 4, 'DB': 5}
# 列的随机排列
random_columns = np.random.permutation(df.columns)
column_index = [column_map[k] for k in random_columns]

df.take(column_index,axis=1)

OraclePostgreSQLMongoDBSQLiteMySQLDB
01082818132136.0A
113712451453.0A
2132597410361.0A
341067461NaNC
4135957814050.0A
5621126212870.0B
613813370135125.0A
771395011746.0B
8591198913050.0C
929935616122.0C
1013813370135125.0A
1171395011746.0B
df.take(column_index,axis=1)

OraclePostgreSQLMongoDBSQLiteMySQLDB
01082818132136.0A
113712451453.0A
2132597410361.0A
341067461NaNC
4135957814050.0A
5621126212870.0B
613813370135125.0A
771395011746.0B
8591198913050.0C
929935616122.0C
1013813370135125.0A
1171395011746.0B

数据分类处理

  • groupby([‘列名’,…])返回DataFrameGroupBy
  • 分组之后,可以针对某一数值列进行聚合操作(sum,mean,max,min,std等)
  • 可以自定义聚合函数,使用transform或apply
df

MySQLPostgreSQLOracleMongoDBSQLiteDB
0136.02810818132A
153.0121374514A
261.05913274103A
3NaN10647461C
450.09513578140A
570.01126262128B
6125.013313870135A
746.0397150117B
850.01195989130C
9122.093295616C
10125.013313870135A
1146.0397150117B
df.groupby('DB')
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x08A144D0>
df.groupby('DB')['SQLite'].sum()  # 对SQLite这一列求和
DB
A    659
B    362
C    207
Name: SQLite, dtype: int32
df.groupby('DB')['SQLite'].mean()   # 对SQLite这一列求平均值
DB
A    109.833333
B    120.666667
C     69.000000
Name: SQLite, dtype: float64
df.groupby('DB')['SQLite'].count()   # 对SQLite这一列求出现的次数
DB
A    6
B    3
C    3
Name: SQLite, dtype: int64
# 对两列进行聚合
df.groupby('DB')['SQLite','PostgreSQL'].mean()

SQLitePostgreSQL
DB
A109.83333376.666667
B120.66666763.333333
C69.000000106.000000
# 针对所有进行聚合——求平均值
df.groupby('DB').mean()

MySQLPostgreSQLOracleMongoDBSQLite
DB
A91.66666776.666667131.33333359.166667109.833333
B54.00000063.33333368.00000054.000000120.666667
C86.000000106.00000030.66666773.00000069.000000
# transform()  聚合后的结果是不去重的
df.groupby('DB').transform(sum)

MySQLPostgreSQLOracleMongoDBSQLite
0550.0460788355659
1550.0460788355659
2550.0460788355659
3172.031892219207
4550.0460788355659
5162.0190204162362
6550.0460788355659
7162.0190204162362
8172.031892219207
9172.031892219207
10550.0460788355659
11162.0190204162362
# apply()  聚合后的结果是去重的
df.groupby('DB').apply(sum)

MySQLPostgreSQLOracleMongoDBSQLiteDB
DB
A550.0460788355659AAAAAA
B162.0190204162362BBB
C172.031892219207CCC
# 练习:根据DB分组,并计算出所有学科的总成绩
def sum_data(item):
    print(type(item))
    return item.sum()
df.groupby('DB').apply(sum_data)
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>

MySQLPostgreSQLOracleMongoDBSQLiteDB
DB
A550.0460788355659AAAAAA
B162.0190204162362BBB
C172.031892219207CCC
def sum_data(item):
    display(item)
    return item.sum()
df.groupby('DB').apply(sum_data)

MySQLPostgreSQLOracleMongoDBSQLiteDB
0136.02810818132A
153.0121374514A
261.05913274103A
450.09513578140A
6125.013313870135A
10125.013313870135A

MySQLPostgreSQLOracleMongoDBSQLiteDB
570.01126262128B
746.0397150117B
1146.0397150117B

MySQLPostgreSQLOracleMongoDBSQLiteDB
3NaN10647461C
850.01195989130C
9122.093295616C

MySQLPostgreSQLOracleMongoDBSQLiteDB
DB
A550.0460788355659AAAAAA
B162.0190204162362BBB
C172.031892219207CCC
df.sum()
MySQL                  884
PostgreSQL             968
Oracle                1084
MongoDB                736
SQLite                1228
DB            AAACABABCCAB
dtype: object
df.iloc[:,:-1]

MySQLPostgreSQLOracleMongoDBSQLite
0136.02810818132
153.0121374514
261.05913274103
3NaN10647461
450.09513578140
570.01126262128
6125.013313870135
746.0397150117
850.01195989130
9122.093295616
10125.013313870135
1146.0397150117
df.iloc[:,:-1].sum()
MySQL          884.0
PostgreSQL     968.0
Oracle        1084.0
MongoDB        736.0
SQLite        1228.0
dtype: float64
df.iloc[:,:-1].values
array([[136.,  28., 108.,  18., 132.],
       [ 53.,  12., 137.,  45.,  14.],
       [ 61.,  59., 132.,  74., 103.],
       [ nan, 106.,   4.,  74.,  61.],
       [ 50.,  95., 135.,  78., 140.],
       [ 70., 112.,  62.,  62., 128.],
       [125., 133., 138.,  70., 135.],
       [ 46.,  39.,  71.,  50., 117.],
       [ 50., 119.,  59.,  89., 130.],
       [122.,  93.,  29.,  56.,  16.],
       [125., 133., 138.,  70., 135.],
       [ 46.,  39.,  71.,  50., 117.]])
df.iloc[:,:-1].values.sum()
nan
df.iloc[:,:-1].fillna(0).values.sum()
4900.0
def sum_data(item):
    return item.iloc[:,:-1].fillna(0).values.sum()
df.groupby('DB').apply(sum_data)
DB
A    2812.0
B    1080.0
C    1008.0
dtype: float64
def sum_data(item):
    return Series({'总成绩':item.iloc[:,:-1].fillna(0).values.sum()})
df.groupby('DB').apply(sum_data)

总成绩
DB
A2812.0
B1080.0
C1008.0
df

MySQLPostgreSQLOracleMongoDBSQLiteDB
0136.02810818132A
153.0121374514A
261.05913274103A
3NaN10647461C
450.09513578140A
570.01126262128B
6125.013313870135A
746.0397150117B
850.01195989130C
9122.093295616C
10125.013313870135A
1146.0397150117B
# 计算MySQL总成绩,PostgreSQL总成绩,Oracle+MongoDB+SQLite的总成绩
def sum_data(item):
    MySQL_total = item['MySQL'].sum()
    PostgreSQL_total = item['PostgreSQL'].sum()
    DB_Total = item[['Oracle','MongoDB','SQLite']].fillna(0).values.sum()
    return Series({'DBS':DB_Total,'MySQL':MySQL_total,'PostgreSQL':PostgreSQL_total})
df.groupby('DB').apply(sum_data)

DBSMySQLPostgreSQL
DB
A1802.0550.0460.0
B728.0162.0190.0
C518.0172.0318.0
举报

相关推荐

0 条评论