0
点赞
收藏
分享

微信扫一扫

Python 办公自动化012 —— csv、excel 文件按列分表、数据透视

一脸伟人痣 2022-04-04 阅读 50

csv 文件按列分表、数据透视

1. 单个 excel (或csv)文件 按列分表


import pandas as pd
import xlsxwriter
import glob

input_path = "/Users/libin/Desktop/libin/"
ouput_path = "/Users/libin/Desktop/libin/"
file_name = "test.csv"

# 读取 csv 数据  skiprows 忽略前4行  nrows 只读取多少行
df = pd.read_csv(input_path + file_name, skiprows = 4, nrows = 1000)
# 读取数据 excel 文件
# df = pd.read_excel("/Users/libin/Desktop/libin/bbb.xlsx")


# file_name[:-4] 字符串截取,去掉后4位(文件类型)
writer = pd.ExcelWriter(ouput_path + file_name[:-4] + '.xlsx',engine='xlsxwriter')
for name_sheet,group_sheet in df.groupby("要分组的列名"):
	group_sheet.to_excel(writer, sheet_name = name_sheet, index = False)

writer.save()

2. 目录下所有的文件按列分表

import pandas as pd
import xlsxwriter
import glob

input_path = "/Users/libin/Desktop/libin/"
ouput_path = "/Users/libin/Desktop/libin/"

# 建立循环对于每个文件调用excel_to_csv()
for file_name in glob.glob("*.csv"):
    # 读取数据  skiprows 忽略前4行  nrows 只读取多少行
    df = pd.read_csv(input_path + file_name, skiprows = 4 )

    # file_name[:-4] 字符串截取,去掉后4位(文件类型)
    writer = pd.ExcelWriter(ouput_path + file_name[:-4] + '.xlsx',engine='xlsxwriter')

    for name_sheet,group_sheet in df.groupby("要分组的列名"):
        group_sheet.to_excel(writer, sheet_name = name_sheet, index = False)

writer.save()


3. 目录下所有的文件按列分表,并创建数据透视

import pandas as pd
import numpy as np
import xlsxwriter
import glob

input_path = "/Users/libin/Desktop/libin/"
ouput_path = "/Users/libin/Desktop/libin/"

# 建立循环对于每个文件调用excel_to_csv()
for file_name in glob.glob("*.csv"):
    # 读取数据  skiprows 忽略前4行
    df = pd.read_csv(input_path + file_name, skiprows = 4 )

    # file_name[:-4] 字符串截取,去掉后4位(文件类型)
    writer = pd.ExcelWriter(ouput_path + file_name[:-4] + '.xlsx',engine='xlsxwriter')
	# index 必选参数,用来指定行索引。如果用数组做行索引,数据必须等长。
    # columns 必选参数,用来指定列索引。
    # dropna 如果整行都为NA值,则进行丢弃,默认丢弃。
    for name_sheet,group_sheet in df.groupby("Specie"):
        df_table = group_sheet.pivot_table(
        index=['City'],
        columns=['Date'],
        dropna = False, 
        values=['median'])
        df_table.to_excel(writer, sheet_name = name_sheet)

writer.save()

举报

相关推荐

0 条评论