通过PyPDF4模块,实现Parse Text、Remove pages 等常用功能。具体实现如下:
# PDF Editor
# pip install PyPDf4
import PyPDF4
# Parse the Text from PDF
def parse_text(pdf_file):
reader = PyPDF4.PdfFileReader(pdf_file)
for page in reader.pages:
print(page.extractText())
# Remove Page from PDF
def remove_page(pdf_file, page_numbers):
filer = PyPDF4.PdfReader('source.pdf', 'rb')
out = PyPDF4.PdfWriter()
for index in page_numbers:
page = filer.pages[index]
out.add_page(page)
with open('rm.pdf', 'wb') as f:
out.write(f)
# Add Blank Page to PDF
def add_page(pdf_file, page_number):
reader = PyPDF4.PdfFileReader(pdf_file)
writer = PyPDF4.PdfWriter()
writer.addPage()
with open('add.pdf', 'wb') as f:
writer.write(f)
# Rotate Pages
def rotate_page(pdf_file):
reader = PyPDF4.PdfFileReader(pdf_file)
writer = PyPDF4.PdfWriter()
for page in reader.pages:
page.rotateClockwise(90)
writer.addPage(page)
with open('rotate.pdf', 'wb') as f:
writer.write(f)
# Merge PDFs
def merge_pdfs(pdf_file1, pdf_file2):
pdf1 = PyPDF4.PdfFileReader(pdf_file1)
pdf2 = PyPDF4.PdfFileReader(pdf_file2)
writer = PyPDF4.PdfWriter()
for page in pdf1.pages:
writer.addPage(page)
for page in pdf2.pages:
writer.addPage(page)
with open('merge.pdf', 'wb') as f:
writer.write(f)