0
点赞
收藏
分享

微信扫一扫

python语言使用隧道爬虫ip代码示例

做过大数据抓取的技术员应该都知道,正常市面上的爬虫ip只分为两种,一种是API提取式的,还有一种是账密形式隧道模式的。那边在做数据抓取的业务时候用python语言如果使用隧道爬虫ip?需要学习爬虫的小白可以过来看看。

import base64

import time

import requests

from requests.adapters import HTTPAdapter

auth_key = "B8998D3E" # 隧道代理的AuthKey

password = "xxxx" # 隧道代理的AuthPwd

tunnel_server = "http://jshk.com.cn" # 隧道代理的地址

target_url = "https://d.qg.net/ip" # 要访问的目标地址

proxy_headers = {}

proxy = {

"http": tunnel_server,

"https": tunnel_server

}

def encode_authorization(key, passwd):

# python 使用 bytes 类型进行 base64 编码

basic_str = bytes("%s:%s" % (key, passwd), "ascii")

# 得到的返回值也是 bytes 类型,所以需要再 decode 为字符串

return "Basic %s" % base64.b64encode(basic_str).decode("utf-8")

def reset_tunnel_proxy_headers():

global proxy_headers

proxy_headers = {

tunnel_server: {

"Proxy-Authorization": encode_authorization(auth_key, password)

}

}

def update_tunnel_proxy_headers(key, val):

global proxy_headers

proxy_headers[tunnel_server][key] = val

def new_session():

adapter = TunnelProxyAdapter()

se = requests.Session()

se.mount('https://', adapter)

se.mount('http://', adapter)

return se

class TunnelProxyAdapter(requests.adapters.HTTPAdapter):

def proxy_headers(self, p):

if p in proxy_headers:

print("session with headers:", proxy_headers[p])

return proxy_headers[p]

else:

return None

def normal_tunnel():

"""

结果类似:

request on normal mode

session with headers: {'Proxy-Authorization': 'Basic xxxx'}

request id: 1, code: 200, result: 140.250.149.229

"""

reset_tunnel_proxy_headers()

print("request on normal mode")

resp = new_session().get(target_url, proxies=proxy)

print("request id: 1, code: %s, result: %s" % (resp.status_code, resp.text))

def mark_tunnel():

"""

结果类似:

request with mark

session with headers: {'Proxy-Authorization': 'Basic xxxx', 'Proxy-TunnelID': 'channel-1', 'Proxy-TTL': 10}

request id: 1 , code: 200, result: 183.166.118.48

request id: 2 , code: 200, result: 183.166.118.48

request id: 3 , code: 200, result: 183.166.118.48

request id: 4 , code: 200, result: 183.166.118.48

request id: 5 , code: 200, result: 183.166.118.48

request id: 6 , code: 200, result: 183.166.118.48

request id: 7 , code: 200, result: 183.142.59.203

request id: 8 , code: 200, result: 183.142.59.203

request id: 9 , code: 200, result: 183.142.59.203

request id: 10, code: 200, result: 123.54.235.89

"""

reset_tunnel_proxy_headers()

update_tunnel_proxy_headers("Proxy-TunnelID", "channel-1")

update_tunnel_proxy_headers("Proxy-TTL", 10)

se = new_session()

print("request with mark")

for i in range(1, 12):

resp = se.get(target_url, proxies=proxy, headers={"Connection": "close"})

print("request id: %-2s, code: %s, result: %s" % (i, resp.status_code, resp.text))

time.sleep(1)

def multi_channel_tunnel():

"""

结果类似:

request on multi channel

request id: 1 , channel id: channel-1, code: 200, result: 183.155.88.224

request id: 2 , channel id: channel-2, code: 200, result: 125.112.38.153

request id: 3 , channel id: channel-3, code: 200, result: 183.155.89.125

request id: 4 , channel id: channel-4, code: 200, result: 49.71.121.169

request id: 5 , channel id: channel-5, code: 200, result: 115.210.67.220

request id: 6 , channel id: channel-6, code: 200, result: 36.25.41.178

request id: 7 , channel id: channel-7, code: 200, result: 180.125.162.116

request id: 8 , channel id: channel-8, code: 200, result: 140.250.150.158

request id: 9 , channel id: channel-9, code: 200, result: 121.227.102.227

request id: 10, channel id: channel-10, code: 200, result: 49.88.106.198

request id: 1 , channel id: channel-1, code: 200, result: 183.155.88.224

request id: 2 , channel id: channel-2, code: 200, result: 125.112.38.153

request id: 3 , channel id: channel-3, code: 200, result: 183.155.89.125

request id: 4 , channel id: channel-4, code: 200, result: 49.71.121.169

request id: 5 , channel id: channel-5, code: 200, result: 115.210.67.220

request id: 6 , channel id: channel-6, code: 200, result: 36.25.41.178

request id: 7 , channel id: channel-7, code: 200, result: 180.125.162.116

request id: 8 , channel id: channel-8, code: 200, result: 140.250.150.158

request id: 9 , channel id: channel-9, code: 200, result: 121.227.102.227

request id: 10, channel id: channel-10, code: 200, result: 49.88.106.198

"""

print("request on multi channel")

reset_tunnel_proxy_headers()

for i in range(1, 11):

se = new_session()

chan_id = "channel-%s" % i

update_tunnel_proxy_headers("Proxy-TunnelID", chan_id)

resp = se.get(target_url, proxies=proxy, headers={"Connection": "close"})

print("request id: %-2s, channel id: %s, code: %s, result: %s" % (i, chan_id, resp.status_code, resp.text))

time.sleep(10)

# 因为固定时长为1分钟,所以在1分钟内继续使用已有通道,仍是之前的IP

for i in range(1, 11):

se = new_session()

chan_id = "channel-%s" % i

update_tunnel_proxy_headers("Proxy-TunnelID", chan_id)

resp = se.get(target_url, proxies=proxy, headers={"Connection": "close"})

print("request id: %-2s, channel id: %s, code: %s, result: %s" % (i, chan_id, resp.status_code, resp.text))

if __name__ == "__main__":

normal_tunnel()

mark_tunnel()

multi_channel_tunnel()

举报

相关推荐

0 条评论