import requests
from lxml import etree
class Sougou_Spider(object):
def __init__(self):
self.uel = "https://cs.lianjia.com/ershoufang/"
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/94.0.4606.71 Safari/537.36 SE 2.X MetaSr 1.0 "
}
def get_data_index(self):
response = requests.get(url=self.uel, headers=self.headers)
if response.status_code == 200:
return response.text
else:
return None
# 解析数据
def parse_data_index(self, response):
html = etree.HTML(response)
data_list = html.xpath('//ul[@class="sellListContent"]//li')
for data in data_list:
title = data.xpath("./div/div/a/text()") # 标题
mojor = data.xpath('./div[1]/div[2]/div[1]/a/text()') # 位置
area = data.xpath('./div[1]/div[3]/div[1]/text()') # 房屋配置以及面积
pay_close = data.xpath('./div[1]/div[4]/text()') # 关注人数
advantage = data.xpath('./div[1]/div[5]/span/text()') # 优点
price = data.xpath('./div[1]/div[6]/div[1]/span/text()') # 总价格
square_metre = data.xpath('./div[1]/div[6]/div[2]/span//text()') # 价格/平方米
print(title, mojor, area, pay_close, advantage, price, square_metre, sep="----")
def run(self):
response = self.get_data_index()
self.parse_data_index(response)
if __name__ == '__main__':
spider = Sougou_Spider()
spider.run()