0
点赞
收藏
分享

微信扫一扫

requests爬取ip代理

alanwhy 2022-04-07 阅读 95
python爬虫

1、确定需求,找到URL

现在很多网站给我们显示的页面是通过两个两个网页来显示的,因此会有两个URL

一个是页面框架,一个是里面的数据,我们可以用谷歌浏览器进入页面

=》右击=》检查=》选择network =》ctrl + f搜索页面中我们需要的数据(95.0.66.86)=》点击资源包 =》选择headers =》Request URL就是存储数据的URL

2、进行数据的爬取

import requests
import os
import time

start = time.time()         # 程序开始时间
url = 'http://proxylist.fatezero.org/proxy.list'    # 准备好url
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36'}  # 准备好请求头

response = requests.get(url=url,headers=headers).text   # 请求

def check_ip(proxies_list):
    # 检测代理的可用性
    can_use = []
    for proxy in proxies_list:
        try:
            response = requests.get(url='https://www.baidu.com',proxies=proxy,timeout=2)  # 等待时间
            if response.status_code == 200:
                can_use.append(proxy)
                print('当前代理:%s,---检测通过---' % proxy)
        except:
            print('当前代理:%s响应超时,不合格'%proxy)
    return can_use

proxy_list = [] # 接收爬取到的代理ip

li1 = response.split('\n')
count = 1
for d in li1[1:-1]:
    dict1 = eval(d)     # 字符串转字典
    host = str(dict1['host'])   # 获取host
    port = str(dict1['port'])   # 获取port
    type = dict1['type']        # 获取type
    proxy_dict = {type: host + ':' + port}  # 拼接为代理IP
    proxy_list.append(proxy_dict)       # 追加到列表proxy_list
    print('爬取ip代理 {0} 成功,第 {1} 个'.format(proxy_dict,count))
    count += 1
print(proxy_list)       # 输出代理列表
can_use = check_ip(proxy_list)  # 调用ip检测可用性

# 判断文件是否存在
if not os.path.exists('D:/studySpider/proxys'):
    # 创建文件
    os.mkdir('D:/studySpider/proxys')
with open('D:/studySpider/proxys/proxy1.txt', "w") as f:    # 保存可用ip
    f.write('{0}'.format(can_use))
    f.close()       # 关闭文件
print('保存高质量代理成功:{0}'.format(can_use))
end = time.time()   # 程序结束时间
print('ip代理爬取成功,一共爬取 {0} 个,高质量代理 {1} 个 爬取时间 {2}秒'.format(count, len(can_use), end-start))

爬取到的代理ip列表

[{'http': '64.225.8.192:80'}, {'http': '168.8.209.253:80'}, {'http': '51.81.80.44:80'}, {'http': '3.14.49.83:80'}, {'http': '3.211.17.212:80'}, {'http': '169.57.1.85:8123'}, {'http': '78.47.182.10:80'}, {'http': '205.155.45.139:3128'}, {'http': '71.19.144.122:80'}, {'http': '158.255.215.50:9005'}, {'http': '194.5.193.183:80'}, {'http': '158.255.212.55:10434'}, {'http': '51.91.157.66:80'}, {'http': '107.151.182.247:80'}, {'http': '190.26.201.194:8080'}, {'http': '20.47.108.204:8888'}, {'http': '209.165.163.187:3128'}, {'http': '13.232.64.230:80'}, {'http': '41.237.141.23:1981'}, {'http': '80.48.119.28:8080'}, {'http': '154.236.189.22:1981'}, {'http': '47.96.226.137:3128'}, {'http': '120.52.180.137:800'}, {'http': '222.186.62.55:9015'}, {'http': '120.52.180.139:800'}, {'http': '133.130.91.24:3389'}, {'http': '114.67.124.25:9015'}, {'http': '114.230.138.55:9015'}, {'http': '222.186.62.47:9015'}, {'http': '123.138.34.17:800'}, {'http': '165.16.60.1:8080'}, {'http': '188.225.85.65:8888'}, {'http': '43.243.245.10:9015'}, {'http': '61.139.244.47:9015'}, {'http': '222.186.53.54:9015'}, {'http': '27.128.217.55:9015'}, {'https': '190.26.201.194:8080'}, {'http': '61.139.244.46:9015'}, {'http': '120.253.244.81:800'}, {'http': '122.193.131.166:9015'}, {'http': '101.70.153.3:9015'}, {'http': '61.162.227.82:9015'}, {'http': '118.193.35.175:80'}, {'http': '222.186.142.10:9015'}, {'http': '119.36.50.21:9015'}, {'http': '222.186.62.54:9015'}, {'http': '114.67.124.34:9015'}, {'http': '222.186.62.43:9015'}, {'http': '114.67.124.3:9015'}, {'http': '114.67.124.40:9015'}, {'http': '114.230.138.54:9015'}, {'http': '61.162.227.87:9015'}, {'http': '114.67.124.27:9015'}, {'http': '101.70.153.4:9015'}, {'http': '222.186.62.25:9015'}, {'http': '119.36.50.14:9015'}, {'http': '119.36.50.18:9015'}, {'http': '176.48.51.184:8081'}, {'http': '222.186.62.48:9015'}, {'http': '111.31.94.112:9015'}, {'http': '222.186.142.150:9015'}, {'http': '61.162.227.86:9015'}, {'http': '114.67.162.18:9015'}, {'http': '114.230.138.52:9015'}, {'http': '121.46.230.129:800'}, {'http': '114.67.124.28:9015'}, {'http': '222.186.62.41:9015'}, {'http': '119.36.50.19:9015'}, {'http': '114.67.162.43:9015'}, {'http': '113.106.100.179:9015'}, {'http': '114.67.124.36:9015'}, {'http': '61.162.227.83:9015'}, {'http': '113.194.140.213:8085'}, {'http': '222.186.62.42:9015'}, {'http': '111.31.94.111:9015'}, {'http': '114.67.162.16:9015'}, {'http': '114.67.162.44:9015'}, {'http': '121.226.246.76:9015'}, {'http': '111.31.94.75:9015'}, {'http': '101.72.199.62:9015'}, {'http': '114.230.138.61:9015'}, {'http': '61.139.244.41:9015'}, {'http': '111.31.94.77:9015'}, {'http': '111.31.94.115:9015'}, {'http': '121.226.246.77:9015'}, {'http': '111.31.94.2:9015'}, {'http': '27.128.217.41:9015'}, {'http': '43.243.245.24:9015'}, {'http': '114.67.162.14:9015'}, {'http': '124.90.209.215:8085'}, {'http': '222.186.142.149:9015'}, {'http': '119.36.50.11:9015'}, {'http': '116.30.5.254:7890'}, {'http': '114.230.138.66:9015'}, {'http': '121.226.246.74:9015'}, {'http': '114.67.162.3:9015'}, {'http': '114.67.162.41:9015'}, {'http': '113.106.100.181:9015'}, {'http': '43.243.245.13:9015'}, {'http': '61.139.244.49:9015'}, {'http': '8.210.83.33:80'}, {'http': '27.128.217.57:9015'}, {'http': '43.243.245.7:9015'}, {'http': '43.243.245.5:9015'}, {'http': '114.230.138.51:9015'}, {'http': '43.243.245.3:9015'}, {'http': '111.31.94.78:9015'}, {'http': '61.139.244.44:9015'}, {'http': '43.243.245.20:9015'}, {'http': '61.139.244.42:9015'}, {'http': '43.243.245.23:9015'}, {'http': '111.31.94.113:9015'}, {'http': '1.180.17.173:9015'}, {'http': '223.99.254.153:9015'}, {'http': '43.243.245.18:9015'}, {'http': '171.38.91.79:8085'}, {'http': '111.31.94.79:9015'}, {'http': '223.99.254.133:9015'}, {'http': '112.6.117.178:8085'}, {'http': '111.31.94.120:9015'}, {'http': '43.243.245.26:9015'}, {'http': '111.31.94.72:9015'}, {'http': '223.99.254.150:9015'}, {'http': '43.243.245.6:9015'}, {'http': '49.85.99.254:10081'}, {'http': '171.38.27.229:8085'}, {'http': '27.128.217.44:9015'}, {'http': '61.139.244.48:9015'}, {'http': '114.230.138.58:9015'}, {'http': '27.128.217.51:9015'}, {'http': '222.186.62.44:9015'}, {'http': '61.139.244.45:9015'}, {'http': '61.150.74.55:9015'}, {'http': '1.180.17.151:9015'}, {'http': '183.224.18.53:9015'}, {'http': '61.139.244.50:9015'}, {'http': '43.243.245.11:9015'}, {'http': '27.128.217.60:9015'}, {'http': '27.128.217.61:9015'}, {'http': '119.36.50.12:9015'}, {'http': '124.225.199.49:9015'}, {'http': '111.31.94.73:9015'}, {'http': '190.106.114.36:999'}, {'http': '114.230.138.56:9015'}, {'http': '114.230.138.65:9015'}, {'http': '182.106.172.51:9015'}, {'http': '61.139.244.43:9015'}, {'http': '194.233.86.75:45232'}, {'http': '47.91.149.178:8443'}, {'http': '47.241.17.77:3128'}, {'http': '114.230.138.50:9015'}, {'http': '222.186.62.45:9015'}, {'http': '27.128.217.56:9015'}, {'http': '120.39.198.45:9015'}, {'http': '222.186.184.38:9015'}, {'http': '220.168.132.28:9015'}, {'http': '111.31.94.114:9015'}, {'http': '43.243.245.8:9015'}, {'http': '121.226.246.79:9015'}, {'http': '119.123.101.111:12345'}, {'http': '121.226.246.71:9015'}, {'http': '110.43.80.147:800'}, {'http': '114.230.138.57:9015'}, {'http': '114.230.138.60:9015'}, {'http': '49.86.47.35:9015'}, {'http': '111.31.94.118:9015'}, {'http': '27.128.217.52:9015'}, {'http': '120.39.198.51:9015'}, {'http': '27.128.217.58:9015'}, {'https': '119.123.101.111:12345'}, {'http': '1.180.17.154:9015'}, {'http': '61.7.131.157:8080'}, {'http': '122.241.6.65:8888'}, {'http': '114.230.138.59:9015'}, {'https': '194.233.86.75:45232'}, {'http': '186.125.235.101:999'}, {'http': '61.150.74.59:9015'}, {'http': '122.228.93.72:9015'}, {'http': '110.43.80.146:800'}, {'http': '103.154.230.78:5678'}, {'https': '116.30.5.254:7890'}, {'http': '171.88.8.104:7890'}, {'https': '122.241.6.65:8888'}, {'http': '154.236.184.77:1981'}, {'http': '94.158.177.185:1080'}, {'http': '103.73.194.2:80'}, {'http': '167.172.239.13:3128'}, {'http': '156.214.145.147:1981'}, {'http': '156.193.79.94:8080'}, {'http': '121.235.66.58:8118'}, {'http': '14.17.106.202:3128'}, {'http': '114.55.84.12:30001'}, {'http': '117.20.216.218:8080'}, {'http': '194.233.69.90:443'}, {'http': '200.71.110.233:999'}, {'http': '197.33.152.12:8080'}, {'http': '103.213.213.22:83'}, {'https': '14.17.106.202:3128'}, {'http': '41.254.53.70:1981'}, {'http': '103.84.235.2:8080'}, {'http': '137.59.12.41:8089'}, {'http': '180.243.180.87:8080'}, {'http': '187.142.14.161:999'}, {'http': '43.255.113.232:8082'}, {'http': '187.217.54.84:80'}, {'http': '103.28.113.83:3128'}, {'http': '95.0.66.86:8080'}, {'http': '177.93.50.94:999'}, {'http': '177.234.230.236:8588'}, {'http': '213.230.121.63:8080'}, {'http': '176.222.63.242:8080'}, {'http': '181.224.255.39:8080'}, {'http': '158.140.169.101:10000'}, {'http': '24.152.53.60:999'}, {'http': '193.19.96.171:8080'}, {'http': '89.208.35.81:3128'}, {'http': '112.6.117.135:8085'}, {'https': '193.19.96.171:8080'}]

打卡第67天,对python大数据感兴趣的朋友欢迎一起讨论、交流,请多指教!

举报

相关推荐

0 条评论