from lxml import etree
import time
import random
import requests
headers ={'User-Agent':'Mozila/5.0(Windows NT 10.0;WOW64) AppleWebKit/537.36(KHTML ,like Gecko )Chrome/83.0.4103.61 Safari/537.36'}
def proprcessing (strs):
s=''
for n in strs:
n=''.join(n.split())
s=s+n
return s
def get_movie_info(url):
response =requests.get(url=url,headers=headers)
html = etree.HTML(response.text)
div_all = html.xpath('//div[@class="info"]')
for div in div_all:
names = div.xpath('./div[@class="hd"]/a//span/text()')
name=proprcessing(names)
infos = div.xpath('./div[@class="bd"]/p/text')
info = proprcessing(infos)
score = div.xpath('./div[@class="bd"]/div/span[2]/text()')
evaluation = div.xp('./div[@class = "bd"]/div/span[4]/text()')
summary = div.xpath('./div[@class="bd"]/p[@class="quote"]/span/text()')
print('电影名称:', name)
print('导演与演员:',info)
print('电影评分:',score)
print('评价人数:',evaluation)
print('电影总结:',summary)
print('_________________')
if __name__=='__name__':
for i in range(0,250,25):
url='https://movie.douban.com/top250?start={page}&filter='.format(page=i)
get_movie_info(url)
time.sleep(random.randint(1,3))