# coding=utf-8
from bs4 import BeautifulSoup
from urllib import request
import re,pymysql
url="https://en.wikipedia.org/wiki/Main_Page"
res=request.urlopen(url).read().decode("utf-8")
soup=BeautifulSoup(res,"html.parser")
for line in soup.find_all("a",href=re.compile("^/wiki")):
if re.search("\.(jpg|JPG)$",line["href"]):
continue
print(line.get_text(),"<---->","https://en.wikipedia.org/"+line["href"])
connection=pymysql.connect(host="localhost",port=3306,user="root",passwd="",db="test",charset="utf8mb4")
try:
cours=connection.cursor()
sql="insert into weike(urlname,urlhref) values(%s,%s)"
cours.execute(sql,(line.get_text(),"https://en.wikipedia.org/"+line["href"]))
connection.commit()
except Exception as e:
print(e)
connection.rollback()
finally:
connection.close()
cours.close()