0
点赞
收藏
分享

微信扫一扫

Beautifupsoup框架常用方法

耳一文 2022-08-02 阅读 3

#__author__ = 'DouYunQian'

#coding=utf-8

html = """

<html><head><title>The Dormouse's story</title></head>

<body>

<p class="title" name="dromouse"><b>The Dormouse's story</b></p>

<p class="story">Once upon a time there were three little sisters; and their names were

<a href="http://example.com/elsie" class="sister" id="link1"><!-- Elsie --></a>,

<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and

<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;

and they lived at the bottom of a well.</p>

<p class="story2">...</p>

"""

import re

from bs4 import BeautifulSoup



soup=BeautifulSoup(html,"html.parser")

print(soup.title)#<title>The Dormouse's story</title>

print(soup.title.string)#The Dormouse's story

print(soup.title.parent)#<head><title>The Dormouse's story</title></head>

print(soup.p)#返回第一个p标签 <p class="title" name="dromouse"><b>The Dormouse's story</b></p>

print(soup.a)#<a class="sister" href="http://example.com/elsie" id="link1"><!-- Elsie --></a>

print(soup.p['class'])#['title']

print(soup.find_all("a"))#返回一个列表 所有a标签的

print(soup.find(id="link2"))#<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>

print(soup.find(id="link2").string)#Lacie 如果中间有别的标签就不能够很好的使用了

print(soup.find(id="link2").get_text())#Lacie

print(soup.find("p",class_="title"))#<p class="title" name="dromouse"><b>The Dormouse's story</b></p>

print(soup.find("p",{"class":"story2"}))#<p class="story2">...</p>

print(soup.find("p",{"class":"story"}).get_text())#获取任何标签中间的内容不论标签有多少

print("===================")

for tag in soup.find_all(re.compile("^b")):

print(tag.name)



print("=============")#找到属性是某种类型的所有集合

all_href=soup.find_all(href=re.compile("http://example.com/.+"))

举报

相关推荐

0 条评论