ISBN豆瓣提取书籍信息,基于python 3.8
#python 3.8
import requests
import re
def find3(txt,str1,str2,str3,str4):
a=find2(txt,str1,str2)
k1=a.find(str3)
a=a[k1+1:]
k1=a.find(str4)
a=a[:k1]
return a
def find2(txt,str1,str2):
s = ifind(txt,str1)
txt1 = txt[s[0]+11:s[0]+200]
k = txt1.find(str2)
txt1 = txt1[:k]
return txt1
def ifind(somestr,sub):
s = [substr.start() for substr in re.finditer(sub , somestr)]
return s
def jwrite(out_file,str0):
text_file = open(out_file+'.txt', 'w',encoding='utf-8')
text_file.write(str0)
text_file.close()
outansestr = str0.encode('gbk','ignore')
text_file = open(out_file+'_gbk.txt', 'wb')
text_file.write(outansestr)
text_file.close()
def jfind(str0,str1,str2):
str3=""
m1=str0.find(str1)
m2=str0.find(str2)
str3=str0[m1:m2]
return str3
f = open("./ISBN.txt")
isbn = f.readline()
f.close()
# isbn = '9787111608974'
html = "https://douban.com/isbn/" + isbn
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 Edg/91.0.864.67'
}
h = requests.get(html,headers=headers)
h.encoding = 'utf-8'
txt = h.text
# jwrite('out',txt)
a=find3(txt,'og:title','og:description','\"','\"')
print('书名:'+a)
jwrite('title',a)
a=find3(txt,'作者</span>','<br','>','<')
print('作者:'+a)
jwrite('author',a)
a=find3(txt,'出版社:</span>','<br','>','<')
print('出版社:'+a)
jwrite('publisher',a)
a=find2(txt,'出版年:</span>','<br')
print('出版年:'+a)
jwrite('publish_year',a)
a=find2(txt,'定价:</span>','<br')
jwrite('price',a)
print('价格:'+a)