0
点赞
收藏
分享

微信扫一扫

python读取csv文件xls文件

AbrahamW 2022-11-21 阅读 115


import os
DATADIR = ""
DATAFILE = "beatles-diskography.csv"
def parse_file(datafile):
data = []
with open(datafile, "r") as ff:
header= ff.readline().split(",")
counter = 0
for line in ff:
if counter == 10:
break
fields = line.split(",")
entry={}
for i,value in enumerate(fields):
entry[header[i].strip()]=value.strip()
data.append(entry)
counter+=1
return data


def test():
# a simple test of your implemetation
datafile = os.path.join(DATADIR, DATAFILE)
d = parse_file(datafile)
firstline = {'Title': 'Please Please Me', 'UK Chart Position': '1', 'Label': 'Parlophone(UK)',
'Released': '22 March 1963', 'US Chart Position': '-', 'RIAA Certification': 'Platinum',
'BPI Certification': 'Gold'}
tenthline = {'Title': '', 'UK Chart Position': '1', 'Label': 'Parlophone(UK)', 'Released': '10 July 1964',
'US Chart Position': '-', 'RIAA Certification': '', 'BPI Certification': 'Gold'}

assert d[0] == firstline
assert d[9] == tenthline


test()

pip install xlrd

import xlrd

datafile = "2013_ERCOT_Hourly_Load_Data.xls"


def parse_file(datafile):
workbook = xlrd.open_workbook(datafile)
sheet = workbook.sheet_by_index(0)

data = [[sheet.cell_value(r, col)
for col in range(sheet.ncols)]
for r in range(sheet.nrows)]

print "\nList Comprehension"
print "data[3][2]:",
print data[3][2]

print "\nCells in a nested loop:"
for row in range(sheet.nrows):
for col in range(sheet.ncols):
if row == 50:
print sheet.cell_value(row, col),


### other useful methods:
print "\nROWS, COLUMNS, and CELLS:"
print "Number of rows in the sheet:",
print sheet.nrows
print "Type of data in cell (row 3, col 2):",
print sheet.cell_type(3, 2)
print "Value in cell (row 3, col 2):",
print sheet.cell_value(3, 2)
print "Get a slice of values in column 3, from rows 1-3:"
print sheet.col_values(3, start_rowx=1, end_rowx=4)

print "\nDATES:"
print "Type of data in cell (row 1, col 0):",
print sheet.cell_type(1, 0)
exceltime = sheet.cell_value(1, 0)
print "Time in Excel format:",
print exceltime
print "Convert time to a Python datetime tuple, from the Excel float:",
print xlrd.xldate_as_tuple(exceltime, 0)

return data

data = parse_file(datafile)

 

#!/usr/bin/env python
"""
Your task is as follows:
- read the provided Excel file
- find and return the min, max and average values for the COAST region
- find and return the time value for the min and max entries
- the time values should be returned as Python tuples

Please see the test function for the expected return format

"""

import xlrd
from zipfile import ZipFile
datafile = "2013_ERCOT_Hourly_Load_Data.xls"

def open_zip(datafile):
    with ZipFile('{0}.zip'.format(datafile), 'r') as myzip:
        myzip.extractall()

def parse_file(datafile):
    workbook = xlrd.open_workbook(datafile)
    sheet = workbook.sheet_by_index(0)
    data=[[sheet.cell_value(r,col) for col in range(sheet.ncols)] for r in range(sheet.nrows) ]
    cv = sheet.col_values(1,start_rowx=1,end_rowx=None)
    maxval=max(cv)
    minval=min(cv)
    maxpos=cv.index(maxval)+1
    minpos=cv.index(minval)+1
    maxtime=sheet.cell_value(maxpos,0)
    realtime=xlrd.xldate_as_tuple(maxtime,0)
    mintime=sheet.cell_value(minpos,0)
    realmintime=xlrd.xldate_as_tuple(mintime,0)

    ### example on how you can get the data
    # sheet_data = [[sheet.cell_value(r, col) for col in range(sheet.ncols)] for r in range(sheet.nrows)]

    ### other useful methods:
    # print "\nROWS, COLUMNS, and CELLS:"
    # print "Number of rows in the sheet:",
    # print sheet.nrows
    # print "Type of data in cell (row 3, col 2):",
    # print sheet.cell_type(3, 2)
    # print "Value in cell (row 3, col 2):",
    # print sheet.cell_value(3, 2)
    # print "Get a slice of values in column 3, from rows 1-3:"
    # print sheet.col_values(3, start_rowx=1, end_rowx=4)

    # print "\nDATES:"
    # print "Type of data in cell (row 1, col 0):",
    # print sheet.cell_type(1, 0)
    # exceltime = sheet.cell_value(1, 0)
    # print "Time in Excel format:",
    # print exceltime
    # print "Convert time to a Python datetime tuple, from the Excel float:",
    # print xlrd.xldate_as_tuple(exceltime, 0)

    data = {
        'maxtime': realtime,
        'maxvalue': maxval,
        'mintime': realmintime,
        'minvalue': minval,
        'avgcoast': sum(cv)/float(len(cv))
    }
    return data

def test():
    open_zip(datafile)
    data = parse_file(datafile)

    assert data['maxtime'] == (2013, 8, 13, 17, 0, 0)
    assert round(data['maxvalue'], 10) == round(18779.02551, 10)

test()


举报

相关推荐

0 条评论