windows平台下
import time,os
from multiprocessing import Pool
import requests
wangzhan=['www.sina.com',
'www.163.com',
'www.iciba.com',
'www.cnblogs.com',
'www.qq.com',
'www.douban.com']
def foo(url):
headers = {
'Cookie': '_iuqxldmzr_=32; _ntes_nnid=ae3b5b26314cae4ba35657b357d06348,1640060772701; _ntes_nuid=ae3b5b26314cae4ba35657b357d06348; NMTID=00OFTzvJD26ltfT4kPLk7b6nPCn51oAAAF92z1lmg; WNMCID=xyhddx.1640060773918.01.0; WEVNSM=1.0.0; WM_TID=i2DIsKQNGhZAUABUFBIq86abC%2BMqTYA3; JSESSIONID-WYYY=w0%5CqDCxwTCSuV6TSo71m4xqKq7x9%2F%2FhSWX04%5Ce%5CEREabbI%5CDvvCOFCt6GMD9UUeC2tamR6NwUGM9X9h%2Bmugrqq5u92NSN2hoycyrjb5ldDxE8cPwGcey7bDQvNynG6230m3Saux4JblnRBF%2BchXEp7%2FRNT96%5CHUEk%5CG3geohb6jo6q5p%3A1645762943177; WM_NI=PH%2BSXDfhBlYnm2%2BWub53aGvl0VFZ%2BjlmccwSVtT5KsJUNj33VSMU7BuSHREMe3IPpsTymGnW7W3G4xmYipYo786C8z5c2UE8Mpsu8vBVsbVBmnkWmD69VHJ2XXacu5rNZG8%3D; WM_NIKE=9ca17ae2e6ffcda170e2e6eea8eb3abae9979bf37b98b48fa6d85a879f9aaff821a9b2e5b3d8749bbfc0d7ce2af0fea7c3b92a9498e5b4db50a6b88e8ddc66afeee5b8f86af7edafa8f568f48a8abbaa62b0b3ad84f65996ac9cd6ca798a8b828bb239968d83b6b64ab78d88b3c847a7ed87b1f37998a7a3a8ed5df589fda8b774af87a791dc65f5e8faa4bb72f7bdfeb6c8348fb0bd96bb7e828abd83d54aafb2a5b8f050f6e79aa9c93a9c88af84f83fb29296a8bb37e2a3',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36',
}
print('Run task %s (pid:%s)...' % (url, os.getpid()))
time.sleep(2)
try:
with open('D:\wangzhan\\'+url+'.txt', 'w',encoding='utf-8') as f:
f.write(requests.get('http://'+url,headers=headers).text)
print('http://' + url + '下载完成')
except OSError:
print('http://'+url + '对应的网站不存在或出现问题')
print('Task %s result is: %s' % url)
if __name__ == '__main__':
p = Pool(3) # 设置进程数
for url in wangzhan:
p.apply_async(foo, args=(url,)) # 设置每个进程要执行的函数和参数
print('Waiting for all subprocesses done...')
在上面的代码中,Pool 用于生成进程池,对 Pool 对象调用apply_async 方法可以使每个进程异步执行任务,也就说不用等上一个任务执行完才执行下一个任务,close 方法用于关闭进程池,确保没有新的进程加入,join 方法会等待所有子进程执行完毕。
Linux平台下
from multiprocessing import Pool
import subprocess
import time,os
wangzhan=['www.sina.com',
'www.163.com',
'www.iciba.com',
'www.cnblogs.com',
'www.qq.com',
'www.douban.com']
def foo(url):
print('Run task %s (pid:%s)...' % (url, os.getpid()))
time.sleep(2)
subprocess.call(' wget ' + url + ' -O /tmp/' + url + '.html',shell=True)
print('http://' + url + '下载完成')
print('Task %s result is: %s' % url)
if __name__ == '__main__':
p = Pool(3) # 设置进程数
for url in wangzhan:
p.apply_async(foo, args=(url,)) # 设置每个进程要执行的函数和参数
print('Waiting for all subprocesses done...')
p.close()
p.join()
print('All subprocesses done.')