python 爬虫 xpath实战爬取房价-CFANZ编程社区

python 爬虫 xpath实战爬取房价
import requests
from lxml import etree


class Sougou_Spider(object):
    def __init__(self):
        self.uel = "https://cs.lianjia.com/ershoufang/"
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
                          "Chrome/94.0.4606.71 Safari/537.36 SE 2.X MetaSr 1.0 "
        }

    def get_data_index(self):
        response = requests.get(url=self.uel, headers=self.headers)
        if response.status_code == 200:
            return response.text
        else:
            return None

    # 解析数据
    def parse_data_index(self, response):
        html = etree.HTML(response)
        data_list = html.xpath('//ul[@class="sellListContent"]//li')
        for data in data_list:
            title = data.xpath("./div/div/a/text()")                                            # 标题
            mojor = data.xpath('./div[1]/div[2]/div[1]/a/text()')                               # 位置
            area = data.xpath('./div[1]/div[3]/div[1]/text()')                                  # 房屋配置以及面积
            pay_close = data.xpath('./div[1]/div[4]/text()')                                    # 关注人数
            advantage = data.xpath('./div[1]/div[5]/span/text()')                               # 优点
            price = data.xpath('./div[1]/div[6]/div[1]/span/text()')                            # 总价格
            square_metre = data.xpath('./div[1]/div[6]/div[2]/span//text()')                    # 价格/平方米
            print(title, mojor, area, pay_close, advantage, price, square_metre, sep="----")

    def run(self):
        response = self.get_data_index()
        self.parse_data_index(response)


if __name__ == '__main__':
    spider = Sougou_Spider()
    spider.run()
0 条评论