import urllib.request
import json
import re
import xlwt
import xlrd
from xlutils.copy import copy
def main():
file = open(r'豆瓣异步加载0.html','r',encoding='utf-8')
dataList = getData(file)
path = r'豆瓣异步爬取数据.xls'
xls_append(path,dataList)
def xls_append(path,value):
index = len(value)
workbook = xlrd.open_workbook(path)
sheets = workbook.sheet_names()
worksheet = workbook.sheet_by_name(sheets[0])
rows_old = worksheet.nrows
new_workbook = copy(workbook)
new_worksheet = new_workbook.get_sheet(0)
for i in range(0,index):
for j in range(0,len(value[i])):
new_worksheet.write(i+rows_old,j,value[i][j])
new_workbook.save(path)
print("追加数据成功")
def savaData(path,dataList):
book = xlwt.Workbook(encoding='utf-8')
sheet = book.add_sheet('sheet2',cell_overwrite_ok=True)
col = ('电影名','评分','链接')
for i in range(len(col)):
sheet.write(0,i,col[i])
for j in range(len(dataList)):
sheet.write(j+1,i,dataList[j][i])
book.save(path)
print("异步数据爬取保存成功!")
def getData(file):
re_data = re.findall(r'{"subjects":(.+)}',str(file.readlines()))
dataList = []
jsonObj = json.loads(re_data[0])
for item in jsonObj:
list = []
list.append(item['title'])
list.append(item['rate'])
list.append(item['url'])
dataList.append(list)
return dataList
def askURL(url):
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
}
req = urllib.request.Request(url=url,headers=headers)
response = urllib.request.urlopen(req)
html = response.read().decode('utf-8')
return html
if __name__ == '__main__':
main()