0
点赞
收藏
分享

微信扫一扫

Selenium无头模式被检测

松鼠树屋 2022-04-29 阅读 76

防检测

# coding: utf-8
import time
import re
from bs4 import BeautifulSoup
from lxml import etree
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.webdriver import Chrome
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


# url = ''
url = 'https://www.baidu.com'

# 准备好参数配置
option = webdriver.ChromeOptions()
option.add_argument('--headless')
option.add_argument('--disable-gpu')
option.add_argument('--no-sandbox')
option.add_argument('--disable-dev-shm-usage')
option.add_argument('log-level=3')
option.add_argument('--disable-blink-features=AutomationControlled')  # 谷歌浏览器去掉访问痕迹
option.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36")
option.add_argument("--window-size=1920,1050")  # 专门应对无头浏览器中不能最大化屏幕的方案
option.add_experimental_option('excludeSwitches', ['enable-automation'])
option.add_experimental_option('useAutomationExtension', False)

driver = webdriver.Chrome(options=option)  # 把参数配置设置到浏览器中
driver.get(url)

# time.sleep(2)
if WebDriverWait(driver, 10, 0.5).until(EC.presence_of_element_located((By.CLASS_NAME, "news-list"))):

# driver.implicitly_wait(10)
  pageSource = driver.page_source
  print(pageSource)

# 只有截图才能看到效果咯
# driver.save_screenshot('./ch.png')

举报

相关推荐

0 条评论