0
点赞
收藏
分享

微信扫一扫

Python学习日记 2023年8月21日

软件共享软件 2023-08-21 阅读 17

import requests
import os
import re
from time import sleep
from bs4 import BeautifulSoup

url = 'http://www.netbian.com/mei/'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'
}
resp = requests.get(url,headers=headers)
resp.encoding='gbk'
resp_html =resp.text
soup = BeautifulSoup(resp_html,'lxml')
urls_a = soup.find_all('img')
for url_a in urls_a:
    if 'small' in url_a['src']:
        picture_url = url_a['src']
        picture_title = url_a['alt']
        picture_url_list = re.split(r'(\d+)',picture_url)
        picture_url = picture_url.replace('small','')
        picture_url = picture_url.replace(picture_url_list[-2],'')
        # print(picture_title,picture_url)
        picture = requests.get(picture_url,headers=headers).content
        with open(picture_title+'.jpg','wb')as f:
            f.write(picture)
        print('图片'+picture_title+'下载完成')
    # try:
    #     href = url_a['href']
    #     title = url_a['title']
    #     if '/desk' in href:
    #         # print(url_a[1])
    #         # href_url = f'http://www.netbian.com{href}'
    #         # picture = requests.get(href_url,headers=headers).content
    #         # # print(picture)
    #         # with open(title+'.jpg','wb')as f:
    #         #     f.write(picture)       
    # except:
    #     continue

今天重新做下图片下载,明天继续

举报

相关推荐

0 条评论