0
点赞
收藏
分享

微信扫一扫

爬虫实战入门到精通 ———某某网络的正则爬取壁纸


import requests
import re
url = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" # 佛曰,网址不可发,不可发

headers = {
"user-agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"
}

session = requests.session()
content = session.get(url=url,headers=headers)


data = content.content.decode("utf-8")
# src="https://anchorpost.msstatic.com/cdnimage/anchorpost/1032/33/c17887d560a155ae2c2b695724f2b8_2168_1625287637.jpg?imageview/4/0/w/338/h/190/blur/1/format/webp"
# https://anchorpost.msstatic.com/cdnimage/anchorpost/1032/33/c17887d560a155ae2c2b695724f2b8_2168_1625287637.jpg
# print(data)


img_data = re.findall(r'<img.*\sdata-original="(https://anchorpost.msstatic.com/cdnimage/anchorpost/.*)\?.*\ssrc=.*>?',data)
#
index= 0
for i in img_data:
print(i)
response_img = session.get(i)
data = response_img.content

with open(f"images/{index}.jpg","wb") as f:
f.write(data)
index+=1
# print(content.text)

# with open("test.txt","w") as f:
# f.write(content.text)


举报

相关推荐

0 条评论