import time
from lxml.html import etree
from selenium import webdriver
headers = {
'User-Agent': 'USER-AGENT'
}
def test_requests(url):
browser.get(url=f'{url}{send_content}')
handle1 = browser.current_window_handle
new_list = browser.find_element_by_class_name('news-list')
ul_list = new_list.find_elements_by_tag_name('h3')
for ul in ul_list:
a_elements = ul.find_elements_by_tag_name('a')
for a in a_elements:
print(a)
time.sleep(1)
a.click()
handles = browser.window_handles[1:]
print(len(handles))
for ha in handles:
browser.switch_to.window(ha)
html = browser.page_source
tree = etree.HTML(html)
title = tree.xpath('/html/body/div[2]/div[2]/div[2]/div/div[1]/h1/text()')
print(title)
if __name__ == '__main__':
first_url = 'https://weixin.sogou.com/weixin?ie=utf8&s_from=input&_sug_=n&_sug_type_=&type=2&query='
send_content = '企业预警通·每日风险预警早报2024年8月13日星期二'
browser = webdriver.Chrome()
test_requests(first_url)
browser.close()