0
点赞
收藏
分享

微信扫一扫

量化交易之python篇 - request - 网络爬虫(糗事百科)

小编 2023-03-01 阅读 55


import requests

class QiubaiSpider:
def __init__(self, pages=0):
self._url_list = []
self.run(pages=pages)
self.headers = {"User-Agent": 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, '
'like Gecko) Chrome/85.0.4183.83 Safari/537.36'}

def _get_url_list(self, pages=0):
if pages < 0:
pages = 0

self._url_list.clear()
[self._url_list.append("https://www.qiushibaike.com/text/page/{}/".format(page+1)) for page in range(pages)]

def _parse_url(self, url):
response = requests.get(url=url, headers=self.headers)
return response.content.decode()

@staticmethod
def save_html_string(html_string, page_number):
file_path = "{}-第{}页.html".format("qiushi", page_number)
with open(file_path, "w", encoding="UTF-8") as file:
file.write(html_string)

def run(self, pages=0):
# 1. 获取 url_list
self._get_url_list(pages)

# 2. 发送请求, 获取url_list中每个url的响应, 提取数据
for url_name in self._url_list:
html_string = self._parse_url(url=url_name)
# 保存
page_number = self._url_list.index(url_name)+1
self.save_html_string(html_string=html_string, page_number=page_number)


if __name__ == '__main__':
qiushi_tieba = QiubaiSpider()
qiushi_tieba.run(pages=9)

举报

相关推荐

0 条评论