0
点赞
收藏
分享

微信扫一扫

Docker-compose部署 gitlab-server

小安子啊 04-09 13:30 阅读 3
python

下面展示一些 内联代码片

import requests
from lxml import etree
from urllib import parse
from pprint import pprint
from tqdm import tqdm

在这里插入图片描述

class PythonBook:
def init(self):
self.url=“https://m.jb51.net/books/list476_1.html”
self.url_page=“https://m.jb51.net/”
self.headers={ ‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36’,
‘Authorization’: ‘Bearer your_token’}

def get_book(self,url):
	req=requests.get(url=url,headers=self.headers)
	req.encoding="utf-8"
	#print(req.text)
	return req.text
	
def get_book_rar(self,url):
	req=requests.get(url=url,headers=self.headers,stream=True)
	return req.iter_content	
	
def etree_xpath(self,req_text,xpath):
	html=etree.HTML(req_text)
	list_=html.xpath(xpath)
	return list_
	
def page_url(self):
	list_page=[]
	for i in range(1,11):
		str_url=f"https://m.jb51.net/books/list476_{i}.html"
		list_page.append(str_url)
	return list_page
		
		
		
def parse_urljoin(self,list_url):
	list_book_url=[]
	for url in list_url:
		url_book=parse.urljoin(self.url_page,url)
		list_book_url.append(url_book)
	return list_book_url	
	
def flies_book(self,book_pdf,pdf_content):
	for conte in tqdm(pdf_content(chunk_size=1042)):
		with open(f"python_pdf2文件/{book_pdf}","ab") as f:
			if conte:
				f.write(conte)
	print(f"下载完成:{book_pdf}")
		
def main(self):
	list_p=self.page_url()
	for item in list_p:
		try:
			req=self.get_book(item)
			section='//section[@class="softlist"]/a/@href'
			#p='//aside/p/text()'
			url_=self.etree_xpath(req,section)
			#text=self.etree_xpath(req,p)
			list_text=self.parse_urljoin(url_)
			for book in list_text:
				reqs=self.get_book(book)
				h3='//article/div/p/text()'
				pdf_text=self.etree_xpath(reqs,h3)[1:-2]
				#print(reqs)	
				rar='//div[@id="downlink"]/a/@href'
				list_rar=self.etree_xpath(reqs,rar)[0]
				#print(list_rar)
				rar_text=self.get_book_rar(list_rar)
		
				#print(rar_text)
				file_name=list_rar.split('/')[-1]
				#print(file_name)
				self.flies_book(file_name,rar_text)
		except:
			pass 		

pythonbook=PythonBook()
pythonbook.main()
#pythonbook.page_url()
仅供学习使用,仅供学习使

举报

相关推荐

0 条评论