0
点赞
收藏
分享

微信扫一扫

Soup爬百科并写入数据库

穆风1818 2022-08-02 阅读 98

# coding=utf-8

from bs4 import BeautifulSoup

from urllib import request

import re,pymysql

url="https://en.wikipedia.org/wiki/Main_Page"



res=request.urlopen(url).read().decode("utf-8")

soup=BeautifulSoup(res,"html.parser")

for line in soup.find_all("a",href=re.compile("^/wiki")):

if re.search("\.(jpg|JPG)$",line["href"]):

continue

print(line.get_text(),"<---->","https://en.wikipedia.org/"+line["href"])

connection=pymysql.connect(host="localhost",port=3306,user="root",passwd="",db="test",charset="utf8mb4")

try:

cours=connection.cursor()

sql="insert into weike(urlname,urlhref) values(%s,%s)"

cours.execute(sql,(line.get_text(),"https://en.wikipedia.org/"+line["href"]))

connection.commit()

except Exception as e:

print(e)

connection.rollback()

finally:

connection.close()

cours.close()

举报

相关推荐

0 条评论