import base64
import os.path
from urllib import request
import openpyxl
from openpyxl.worksheet.hyperlink import Hyperlink
from openpyxl.drawing.image import Image
from pyquery import PyQuery as pq
url = 'https://www.xiachufang.com/explore'
headers = {
'User-Agent': ('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/95.0.4638.54 Safari/537.36')
}
sep = '/'
urlSame = url.split(sep)
urlSame.pop()
urlSame = sep.join(urlSame)
pqa = pq(url=url, encoding='utf-8', headers=headers)
pqb = pqa('.normal-recipe-list li > div')
items = []
path = '下厨房的本周最受欢迎'
if not os.path.exists(path):
os.makedirs(path)
for k, _ in enumerate(pqb):
_ = pq(_)
_src = _('img').attr('data-src')
_href = urlSame + _('a').attr('href')
_name = _('p.name a').text()
_meta = _('p.ing').text()
_author = _('p.author a').text()
""""""
k = str(k).zfill(2)
_imgPath = os.getcwd() + '/' + path + '/' + k + '.jpg'
_imgResp = request.urlopen(request.Request(_src))
_content = _imgResp.read()
if _content:
with open(_imgPath, 'wb') as f:
f.write(_content)
items.append((_imgPath, _href, _name, _meta, _author))
ow = openpyxl.Workbook()
sheet = ow.active
sheet.title = path
for _ in items:
sheet.append(['图片', _[0]])
sheet.append(['菜名', _[2]])
sheet.append(['作者', _[4]])
sheet.append(['材料', _[3]])
sheet.append(['链接', _[1]])
sheet.append([])
for _ in sheet['B']:
col = chr(_.column + 64)
row = str(_.row)
if _.row % 6 == 1:
img = Image(_.value)
size = 0.50
img.width = img.width * size
img.height = img.height * size
sheet.column_dimensions[col].width = img.width
sheet.row_dimensions[int(row)].height = img.height
sheet.add_image(img, col + row)
_.value = ''
if _.row % 6 == 5:
_.value = '=HYPERLINK("' + _.value + '","点击访问")'
ow.save('下厨房的本周最受欢迎.xlsx')
ow.close()
