0
点赞
收藏
分享

微信扫一扫

Python爬虫-第四章-4-利用线程池和进程池高效爬取北京某发菜地数据

知识点内容:

   线程池

       一次性开辟一些线程,用户直接给线程池提交任务,线程任务的调度交给线程池

# Demo Describe:线程池和进程池

from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor


# start--------1,线程池----------------------
def fn(name):
for i in range(1000):
print(name, i)


if __name__ == '__main__':
# 线程池里开启50个线程处理任务
with ThreadPoolExecutor(50) as t:
for i in range(100):
t.submit(fn, name=f'线程{i}')
print('处理完毕') # 线程池任务完毕才会继续执行-线程守护

# end--------1,线程池----------------------

# start--------2,进程池----------------------
def fn(name):
for i in range(1000):
print(name,i)

if __name__ == '__main__':
# 线程池里开启50个线程处理任务
with ProcessPoolExecutor(50) as t:
for i in range(100):
t.submit(fn,name=f'线程{i}')
print('处理完毕') # 线程池任务完毕才会继续执行-线程守护

# end--------2,进程池----------------------

爬取案例:爬取北京新发菜地数据

import requests
import csv
from concurrent.futures import ThreadPoolExecutor
from fake_useragent import UserAgent

'''
本章内容:
多线程爬取页面数据案例
目标网站:http://www.xinfadi.com.cn/priceDetail.html
'''


# start------------------------------
# Reptile-爬虫
def dataReptile(currentPage):
url = 'http://www.xinfadi.com.cn/getPriceData.html'
data = {
'limit': '20',
'current': currentPage, # 分页
'pubDateStartTime': '',
'pubDateEndTime': '',
'prodPcatid': '',
'prodCatid': '',
'prodName': '',
}
ua = UserAgent()
user_agent = ua.random
headers = {'user-agent': user_agent}
resp = requests.post(url, data=data, headers=headers)
dataLst = resp.json()['list']
with open('../FileForDemo/P4Demo_threadPoolCase.csv', mode='a+', encoding='UTF-8') as file:
csvWriter = csv.writer(file)
for i in dataLst:
dataInfo = '品名:' + i['prodName'] + ', ' \
+ '最低价:' + i['lowPrice'] + ', ' \
+ '平均价:' + i['avgPrice'] + ', ' \
+ '最高价:' + i['highPrice'] + ', ' \
+ '产地:' + i['place'] + ', ' \
+ '单位:' + i['unitInfo'] + ', ' \
+ '发布日期:' + i['pubDate']
csvWriter.writerow([dataInfo])
resp.close()
print(f'第{currentPage}页下载完毕!')


if __name__ == '__main__':
with ThreadPoolExecutor(50) as t:
for i in range(1, 500):
t.submit(dataReptile, i)
print('数据全部下载完毕!!!')

# end------------------------------

举报

相关推荐

SQL入门:第四章 数据的分组和排序

0 条评论