一、xpath解析
from lxml import etree
e = etree.HTML(resp.text)
no = e.xpath('//table[@class="players_table"]//tr/td[1]/text()')
names = e.xpath('//table[@class="players_table"]//tr/td[2]//text()')
teames = e.xpath('//table[@class="players_table"]//tr/td[3]//text()')
scores = e.xpath('//table[@class="players_table"]//tr/td[4]//text()')
# print(no, names, teames, scores)
二、txt文件格式保存
# with open('5-NBA.txt','w', encoding='utf-8') as f:
# for no, names, teames, scores in zip(no, names, teames, scores):
# f.write(f'{no}{names}{teames}{scores}\n')
三、excel保存
import openpyxl
wk = openpyxl.Workbook()
sheet = wk.active
sheet.title = "NBA数据2022-11-29"
for x in lst:
sheet.append(x)
wk.save("05-NBA11.xlsx")
四、完整实例
import requests
from lxml import etree
import openpyxl
url = "https://nba.hupu.com/stats/players"
headers = {
"User-Agent": "Mozilla/5.0(Windows NT 6.1;WOW64) AppleWebKit/537.36(KABUL, like Gecko) "
"Chrome/86.0.4240.198Safari/537.36 "
}
resp = requests.get(url=url, headers=headers)
e = etree.HTML(resp.text)
no = e.xpath('//table[@class="players_table"]//tr/td[1]/text()')
names = e.xpath('//table[@class="players_table"]//tr/td[2]//text()')
teames = e.xpath('//table[@class="players_table"]//tr/td[3]//text()')
scores = e.xpath('//table[@class="players_table"]//tr/td[4]//text()')
# print(no, names, teames, scores)
# with open('5-NBA.txt','w', encoding='utf-8') as f:
# for no, names, teames, scores in zip(no, names, teames, scores):
# f.write(f'{no}{names}{teames}{scores}\n')
lst = []
for i in range(0, len(no)):
lst.append([no[i],names[i], teames[i], scores[i]])
# for j in lst:
# print(j)
wk = openpyxl.Workbook()
sheet = wk.active
sheet.title = "NBA数据2022-11-29"
for x in lst:
sheet.append(x)
wk.save("05-NBA11.xlsx")
print("保存完毕")