import numpy as np
import pandas as pd
np.array((np, pd))
class JsonObject:
def __init__(self, items):
self.items = items
def __getattribute__(self, name: str):
return object.__getattribute__(self, 'items').get(name)
def print_data(o):
o_items = dict(shape=None,size=None, index=None,columns=None, dtype=None,dtypes=None,)
print('type\t: %s' % type(o))
for (k,v) in o_items.items():
try:
o_items[k] = eval(f'o.{k}')
except (Exception, BaseException):
pass
if k == 'dtypes':
o_items[k] = {k:str(v) for k,v in dict(o_items[k]).items() }
print('%s\t: %s' % (k, o_items[k]))
print()
display(o)
def get_new_file_path(file_path, string, file_ext=None):
file_sep = '.'
file_path_split = file_path.split(file_sep)
file_path_split_pop = file_path_split.pop()
file_ext = file_path_split_pop if not file_ext else file_ext
return file_sep.join(file_path_split) + '-' + string + file_sep + file_ext
r_rang = 26
r_chr = {k: [chr(_) for _ in range(size, size+r_rang)] for k,size in dict(b=65, s=97).items()}
r_chr = pd.DataFrame(r_chr, index=range(1, r_rang+1))
def get_chr_items(k, f):
k = k.lower()
f = f.upper() if k == 'b' else f.lower()
return tuple(r_chr[k][:list(r_chr[k]).index(f) + 1])
display(r_chr)
r_df = pd.DataFrame(np.random.randint(0, 100, (10, 10)), tuple(r_chr.b[:10]), tuple(r_chr.s[:10]))
df_01 = pd.DataFrame(
dict(
sex=np.random.randint(0, 2, 300),
cls=np.random.randint(1, 9, 300),
P=np.random.randint(0, 151, 300),
K=np.random.randint(0, 151, 300),
J=np.random.randint(0, 151, 300),
T=np.random.randint(0, 151, 300),
C=np.random.randint(0, 151, 300),
)
)
df_01.sex = df_01.sex.map({0: '男', 1: '女'})
df_01.head()
g_01 = df_01.groupby(by='sex')
len(list(g_01))
for a, b in g_01:
print(a)
display(b.head())
g_02 = df_01.groupby(by=['cls', 'sex'])
print(len(list(g_02)))
for a, b in g_02:
print(a)
g_03 = df_01.P.groupby(df_01.cls)
print('长度', len(list(g_03)))
for a, b in g_03:
print(a, '\b班P学科成绩', np.array(b.head()))
g_04 = df_01.K.groupby([df_01.cls, df_01.sex])
print('长度', len(list(g_04)))
for a, b in g_04:
print('%s班%s生K学科成绩' % (a[0], a[1]), np.array(b.head()))
g_05 = df_01.groupby(df_01.dtypes, axis=1)
print('长度', len(list(g_05)))
for a, b in g_05:
print('组名(数据类型)', a, b.shape, b.size)
display(b.head())
g_06 = df_01.groupby(dict(
sex='category', cls='category',
P='IT', K='IT', T='IT', J='IT', C='IT'
), axis=1)
print('长度', len(list(g_06)))
for a, b in g_06:
print('组名', a, b.shape, b.size)
display(b.head())
df_01.groupby('sex').mean().round(2)
df_01.groupby('sex').count()
df_01.groupby('sex').sum()
df_01.groupby(['cls', 'sex'])[['P','K']].max()
df_01.groupby(['cls', 'sex'])[['P','K']].max().T
df_01.groupby(['cls','sex']).size()
df_01.groupby(['cls', 'sex']).describe()
g_07 = df_01.groupby(['cls', 'sex'])
for a,b in g_07:
print(a, b.shape)
g_07[['P', 'K']].apply(np.mean).round(2)
df_01[['sex', 'cls', 'P', 'K']].head()
df_02 = g_07[['P', 'K']].transform(np.mean).round(2)
print(df_02.shape)
df_02.head()
g_07[['P','J']].apply(lambda _: np.mean(_)).round(2)
func_01 = lambda _: (_ - _.min() / (_.max() - _.min()))
g_07[['P','J']].apply(func_01).round(2)
g_07[['P','J']].transform(func_01).round(2)
g_07[['T', 'K']].agg([np.min, np.max, pd.Series.count])
g_07[['P','K']].agg(dict(
P=[('最大值', np.max), ('最大值', np.max), ('最小值', np.min), ('平均值', np.mean)],
K=[('计数', pd.Series.count), ('中位数', np.median)]
))
df_01.pivot_table(
values=['P', 'K', 'T'],
index=['cls', 'sex'],
aggfunc=dict(
P=[('最小值', np.min)],
K=[('最大值', np.max), ('中位数', np.median)],
T=[('最小值', np.min), ('平均值', np.mean), ('计数', len)]
)
).round(0)
