0
点赞
收藏
分享

微信扫一扫

python 爬虫 xpath实战爬取房价

import requests
from lxml import etree


class Sougou_Spider(object):
def __init__(self):
self.uel = "https://cs.lianjia.com/ershoufang/"
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/94.0.4606.71 Safari/537.36 SE 2.X MetaSr 1.0 "
}

def get_data_index(self):
response = requests.get(url=self.uel, headers=self.headers)
if response.status_code == 200:
return response.text
else:
return None

# 解析数据
def parse_data_index(self, response):
html = etree.HTML(response)
data_list = html.xpath('//ul[@class="sellListContent"]//li')
for data in data_list:
title = data.xpath("./div/div/a/text()") # 标题
mojor = data.xpath('./div[1]/div[2]/div[1]/a/text()') # 位置
area = data.xpath('./div[1]/div[3]/div[1]/text()') # 房屋配置以及面积
pay_close = data.xpath('./div[1]/div[4]/text()') # 关注人数
advantage = data.xpath('./div[1]/div[5]/span/text()') # 优点
price = data.xpath('./div[1]/div[6]/div[1]/span/text()') # 总价格
square_metre = data.xpath('./div[1]/div[6]/div[2]/span//text()') # 价格/平方米
print(title, mojor, area, pay_close, advantage, price, square_metre, sep="----")

def run(self):
response = self.get_data_index()
self.parse_data_index(response)


if __name__ == '__main__':
spider = Sougou_Spider()
spider.run()

举报

相关推荐

0 条评论