文章目录
PaddleOCR V4
PaddleOCR V4 中英文识别出v4了,下载好项目和模型,准备部署一个api服务用起来。
快捷使用方式:https://github.com/PaddlePaddle/PaddleOCR/blob/main/doc/doc_ch/quickstart.md
安装PaddlePaddle
镜像直接用官网给的试试
docker pull paddlepaddle/paddle:2.5.1-gpu-cuda11.7-cudnn8.4-trt8.4
安装PaddleOCR whl包
pip install paddleocr
docker run -it --gpus device=3 paddlepaddle/paddle:2.5.1-gpu-cuda11.7-cudnn8.4-trt8.4 bash
模型列表
https://github.com/PaddlePaddle/PaddleOCR/blob/main/doc/doc_ch/models_list.md
Python调用测试
from paddleocr import PaddleOCR
# 定义参数
params = {
# 'lang': 'ch, en', # 使用中文和英文模型
'det_model_dir': '/home/models/ch_PP-OCRv4_det_server_infer', # 指向服务器上的大模型目录
'rec_model_dir': '/home/models/ch_PP-OCRv4_rec_server_infer', # 指向服务器上的大模型目录
'cls_model_dir': '/home/models/ch_ppocr_mobile_v2.0_cls_infer', # 指向服务器上的大模型目录
'use_gpu': True, # 使用 GPU 执行
'use_angle_cls': True # 进行方向判别
}
# 初始化 OCR 实例
ocr = PaddleOCR(**params)
# 读取图片并进行 OCR
img_path = '/home/ppocr_img/ppocr_img/imgs/11.jpg'
result = ocr.ocr(img_path, cls=True)
print(result)
转fastapi服务
pip install fastapi uvicorn python-multipart
接口服务:
import os
import time
import traceback
import cv2
import numpy as np
import requests
from fastapi import FastAPI, File, UploadFile
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from paddleocr import PaddleOCR, draw_ocr
import uvicorn
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
import paddle
# 初始化 FastAPI 应用
app = FastAPI(
title='PadlleOCR API',
description='基于 PaddleOCR 的 OCR 服务 API 接口',
version='v4',
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# 定义 OCR 参数
params1 = {
# 'lang': 'ch, en', # 使用中文和英文模型
'det_model_dir': '/home/models/ch_PP-OCRv4_det_server_infer', # 指向服务器上的大模型目录
'rec_model_dir': '/home/models/ch_PP-OCRv4_rec_server_infer', # 指向服务器上的大模型目录
'cls_model_dir': '/home/models/ch_ppocr_mobile_v2.0_cls_infer', # 指向服务器上的大模型目录
'use_gpu': True, # 使用 GPU 执行
'use_angle_cls': True # 进行方向判别
}
params2 = {
# 'lang': 'ch, en', # 使用中文和英文模型
'use_gpu': False, # 使用 GPU 执行
'use_angle_cls': True # 进行方向判别
}
# 检查PaddlePaddle是否是用CUDA编译的
if paddle.device.is_compiled_with_cuda():
# 获取可用的GPU数量
gpu_count = paddle.device.cuda.device_count()
if gpu_count > 0:
print(f"有 {gpu_count} 个可用的GPU")
# 初始化 OCR 实例
ocr = PaddleOCR(**params1)
else:
print("没有可用的GPU")
# 初始化 OCR 实例
ocr = PaddleOCR(**params2)
else:
print("PaddlePaddle未使用CUDA编译")
@app.post("/image_ocr_bytes")
def upload_image(image: UploadFile = File(...)):
try:
t1 = time.time()
img = cv2.imdecode(np.fromstring(image.file.read(), np.uint8), cv2.IMREAD_COLOR)
result = ocr.ocr(img, cls=True)
t2 = time.time()
ret = {}
ret["ocr_result"] = result
ret["use_time"] = round(t2 - t1, 3)
return {"code": 200, "data": ret, "message": "success"}
except:
return {"code": 500, "data": {}, "message": f"error:{traceback.format_exc()}"}
class ImageURL(BaseModel):
url: str
@app.post("/image_ocr_url")
def upload_image_url(image: ImageURL):
try:
t1 = time.time()
# 下载图片
response = requests.get(image.url)
if response.status_code != 200:
raise HTTPException(status_code=400, detail="Invalid URL or unable to fetch image.")
# 将下载的图片转换为 OpenCV 图像
img_array = np.frombuffer(response.content, np.uint8)
img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
# 进行 OCR 处理
result = ocr.ocr(img, cls=True)
t2 = time.time()
ret = {}
ret["ocr_result"] = result
ret["use_time"] = round(t2 - t1, 3)
return {"code": 200, "data": ret, "message": "success"}
except Exception as e:
return {"code": 500, "data": {}, "message": f"error:{traceback.format_exc()}"}
@app.post("/image_ocr_onlytext_url")
def upload_image_url(image: ImageURL):
try:
t1 = time.time()
# 下载图片
response = requests.get(image.url)
if response.status_code != 200:
raise HTTPException(status_code=400, detail="Invalid URL or unable to fetch image.")
# 将下载的图片转换为 OpenCV 图像
img_array = np.frombuffer(response.content, np.uint8)
img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
# 进行 OCR 处理
result = ocr.ocr(img, cls=True)
# 只要result中的文字,用\n分隔
result = "\n".join([i[1][0] for i in result[0]])
t2 = time.time()
ret = {}
ret["ocr_result"] = result
ret["use_time"] = round(t2 - t1, 3)
return {"code": 200, "data": ret, "message": "success"}
except Exception as e:
print(f"error:{traceback.format_exc()}")
return {"code": 500, "data": {}, "message": f"error:{traceback.format_exc()}"}
if __name__ == '__main__':
uvicorn.run(f'{os.path.basename(__file__).split(".")[0]}:app',
host='0.0.0.0',
port=7860,
reload=False,
workers=1)
请求
用字节流图片访问:
import requests
# 定义图片路径和接口URL
img_path = '/home/ppocr_img/ppocr_img/imgs/11.jpg'
url = 'http://localhost:7860/image_ocr_bytes'
# 读取图片文件
with open(img_path, 'rb') as img_file:
files = {'image': img_file}
# 发送POST请求
response = requests.post(url, files=files)
# 处理响应
if response.status_code == 200:
result = response.json()
if result['code'] == 200:
print("OCR Result:", result['data'])
else:
print("Error Message:", result['message'])
else:
print("Failed to reach the server. Status code:", response.status_code)
打包
docker commit b99168362ee9 kevinchina/deeplearning:ocrapi-v4
FROM kevinchina/deeplearning:ocrapi-v4
EXPOSE 7860
ENTRYPOINT python /home/mainapi.py
docker build . -t kevinchina/deeplearning:ocrapi-v4-api
docker run -d --gpus all -p 7889:7860 kevinchina/deeplearning:ocrapi-v4-api
docker push kevinchina/deeplearning:ocrapi-v4-api