基於PyTesseract與PyQt5 的中文OCR識別應用程式設計

L...P發表於2020-11-13

Tesseract:主要用於進行OCR識別
PyQt: 主要用於介面的設計和功能與介面控制元件的連線
主要介面:
在這裡插入圖片描述
功能實現介面:
在這裡插入圖片描述
在這裡插入圖片描述

# -*- coding: utf-8 -*-
import os
import sys
import pytesseract
from PyQt5 import QtCore, QtGui, QtWidgets
from PyQt5.QtCore import Qt
from PyQt5.QtGui import QImage, QPixmap
from PyQt5.QtWidgets import *
from PIL import Image, ImageGrab
import numpy as np
import pyperclip
import time


class Ui_MainWindow(object):
    def __init__(self):
        self.file_name = ""
        self.Pixmap = QPixmap()
        self.ocr_img = 0

    # -------------以下均為UI設計的部分(QT_Designer進行UI介面設計)----------
    # 利用pyui5工具進行變換
    # 在終端輸入: pyuic5 -o my_ui.py my_ui.ui
    # 生成py在ui所在資料夾

    def setupUi(self, MainWindow):
        MainWindow.setObjectName("MainWindow")
        MainWindow.resize(1170, 812)
        self.centralwidget = QtWidgets.QWidget(MainWindow)
        self.centralwidget.setObjectName("centralwidget")
        self.image = QtWidgets.QLabel(self.centralwidget)
        self.image.setGeometry(QtCore.QRect(10, 10, 740, 791))
        self.image.setMinimumSize(QtCore.QSize(500, 600))
        self.image.setStyleSheet("background-color: rgb(150, 150, 150);")
        self.image.setText("")
        self.image.setObjectName("image")
        self.widget = QtWidgets.QWidget(self.centralwidget)
        self.widget.setGeometry(QtCore.QRect(760, 8, 402, 791))
        self.widget.setObjectName("widget")
        self.gridLayout = QtWidgets.QGridLayout(self.widget)
        self.gridLayout.setContentsMargins(0, 0, 0, 0)
        self.gridLayout.setObjectName("gridLayout")
        self.Tip_label = QtWidgets.QLabel(self.widget)
        self.Tip_label.setMaximumSize(QtCore.QSize(400, 16))
        self.Tip_label.setAlignment(QtCore.Qt.AlignLeading | QtCore.Qt.AlignLeft | QtCore.Qt.AlignTop)
        self.Tip_label.setObjectName("Tip_label")
        self.gridLayout.addWidget(self.Tip_label, 0, 0, 1, 2)
        self.Tip_label_2 = QtWidgets.QLabel(self.widget)
        self.Tip_label_2.setMaximumSize(QtCore.QSize(400, 16))
        self.Tip_label_2.setAlignment(QtCore.Qt.AlignLeading | QtCore.Qt.AlignLeft | QtCore.Qt.AlignTop)
        self.Tip_label_2.setObjectName("Tip_label_2")
        self.gridLayout.addWidget(self.Tip_label_2, 1, 0, 1, 2)
        self.read_file = QtWidgets.QPushButton(self.widget)
        self.read_file.setMinimumSize(QtCore.QSize(151, 31))
        font = QtGui.QFont()
        font.setPointSize(11)
        self.read_file.setFont(font)
        self.read_file.setStyleSheet("background-color: rgb(52, 52, 52);\n"
                                     "color: rgb(255, 255, 255);")
        self.read_file.setObjectName("read_file")
        self.gridLayout.addWidget(self.read_file, 2, 0, 1, 1)
        self.read_scr = QtWidgets.QPushButton(self.widget)
        self.read_scr.setMinimumSize(QtCore.QSize(151, 31))
        font = QtGui.QFont()
        font.setPointSize(11)
        self.read_scr.setFont(font)
        self.read_scr.setStyleSheet("background-color: rgb(52, 52, 52);\n"
                                    "color: rgb(255, 255, 255);")
        self.read_scr.setObjectName("read_scr")
        self.gridLayout.addWidget(self.read_scr, 2, 1, 1, 1)
        self.ocr_start = QtWidgets.QPushButton(self.widget)
        self.ocr_start.setMinimumSize(QtCore.QSize(400, 31))
        font = QtGui.QFont()
        font.setPointSize(11)
        self.ocr_start.setFont(font)
        self.ocr_start.setStyleSheet("background-color: rgb(140, 132, 198);")
        self.ocr_start.setObjectName("ocr_start")
        self.gridLayout.addWidget(self.ocr_start, 3, 0, 1, 2)
        self.ocr_result = QtWidgets.QTextEdit(self.widget)
        self.ocr_result.setMinimumSize(QtCore.QSize(300, 300))
        self.ocr_result.setObjectName("ocr_result")
        self.gridLayout.addWidget(self.ocr_result, 4, 0, 1, 2)
        self.save_image = QtWidgets.QPushButton(self.widget)
        self.save_image.setMinimumSize(QtCore.QSize(100, 31))
        font = QtGui.QFont()
        font.setFamily("黑體")
        font.setPointSize(10)
        self.save_image.setFont(font)
        self.save_image.setStyleSheet("background-color: rgb(96, 141, 110);")
        self.save_image.setObjectName("save_image")
        self.gridLayout.addWidget(self.save_image, 5, 0, 1, 1)
        self.copy = QtWidgets.QPushButton(self.widget)
        self.copy.setMinimumSize(QtCore.QSize(100, 31))
        font = QtGui.QFont()
        font.setFamily("黑體")
        font.setPointSize(10)
        self.copy.setFont(font)
        self.copy.setStyleSheet("background-color: rgb(96, 141, 110);")
        self.copy.setObjectName("copy")
        self.gridLayout.addWidget(self.copy, 5, 1, 1, 1)
        MainWindow.setCentralWidget(self.centralwidget)

        self.retranslateUi(MainWindow)
        QtCore.QMetaObject.connectSlotsByName(MainWindow)

        self.read_scr.clicked.connect(self.read_cap)
        self.read_file.clicked.connect(self.read_img)
        self.copy.clicked.connect(self.copy_result)
        self.save_image.clicked.connect(self.save_file)
        self.ocr_start.clicked.connect(self.start_OCR)

    def retranslateUi(self, MainWindow):
        _translate = QtCore.QCoreApplication.translate
        MainWindow.setWindowTitle(_translate("MainWindow", "中文OCR"))
        self.Tip_label.setText(_translate("MainWindow", " 提示:window截圖(win鍵 + Shift + s 或PrtSc鍵)"))
        self.Tip_label_2.setText(_translate("MainWindow", " 提示:儲存路徑(C:\\image_save\\*.jpg)"))
        self.read_file.setText(_translate("MainWindow", "讀取影像檔案"))
        self.read_scr.setText(_translate("MainWindow", "讀取截圖"))
        self.ocr_start.setText(_translate("MainWindow", "開始識別"))
        self.save_image.setText(_translate("MainWindow", "儲存截圖"))
        self.copy.setText(_translate("MainWindow", "一鍵複製"))

    # -------------以上均為UI設計的部分----------

    # ------------以下內容為後端處理部分----------
    # -----讀取剪下板中的截圖-----
    def read_cap(self):
        try:
            scr = ImageGrab.grabclipboard()
            self.ocr_img = np.array(scr)
            label_image = QImage(self.ocr_img.data, self.ocr_img.shape[1], self.ocr_img.shape[0],
                                 QImage.Format_RGBA8888)  # 轉化為QImage

        except IndexError:  # 如果剪下板中非截圖,提示報錯
            QtWidgets.QMessageBox.critical(None, "錯誤", "請先進行截圖")
        else:
            self.Pixmap = QPixmap(label_image)  # 將影像轉換為pixmap
            re_image = show_image(self.Pixmap)  # 截圖顯示圖大小變換
            self.image.setPixmap(re_image)  # 在介面上進行顯示

    # ----- 讀取已有影像檔案 -----
    def read_img(self):
        self.file_name = ""
        self.file_name, _ = QFileDialog.getOpenFileName(None, "選擇圖片",
                                                        "C:\\",
                                                        "Image (*.jpg *.gif *.png *.jpeg *.pgm *.pbm *.ppm *.xpm);; \
                                                        JPEG Files(*.jpg);;PNG Files(*.png);;PGM Files(*.pgm)")
        if self.file_name is not "":
            self.Pixmap = QPixmap(self.file_name)  # 直接將影像讀取為pixmap
            self.ocr_img = pixmap_to_cvimg(self.Pixmap)  # pixmap 轉換為影像
            re_image = show_image(self.Pixmap)  # 截圖顯示圖大小變換
            self.image.setPixmap(re_image)  # 在介面上進行顯示

    # ----- 影像進行OCR識別出文字 -----
    def start_OCR(self):
        img = Image.fromarray(self.ocr_img)
        img = img.convert('L')          # 轉為灰度圖,利用灰度圖進行識別,(根據應用場景進一步預處理可提高識別率)
        text = pytesseract.image_to_string(self.ocr_img, lang='chi_sim')  # 利用pytesseract進行文字識別,chi_sim即為中文識別
        self.ocr_result.setText(text.replace("\n\n", "\n"))

    # ----- 文字一鍵複製到剪下板 -----
    def copy_result(self):
        pyperclip.copy(self.ocr_result.toPlainText())

    # ----- 儲存截圖,命名為當前時間,影像儲存型別為png -----
    def save_file(self):
        now = time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime(time.time()))  # 獲取當前時間,並進行格式轉換
        path = "C:/image_save/"
        if not os.path.exists(path):    # 如果資料夾不存在,新建一個
            os.mkdir(path)
        file = path + now + ".png"
        self.Pixmap.save(file)          # 進行儲存
        msg_box = QMessageBox(QMessageBox.Warning, "OK", "影像已儲存!")          # 彈出提示儲存狀態
        msg_box.exec_()


# ----- 將影像大小等比例變換,最大化顯示在介面 -----
def show_image(image):
    times = max(image.size().height() / 780, image.size().width() / 740)
    if image.size().width() < 780 and image.size().height() < 740:
        times = min(780 / image.size().height(), 740 / image.size().width())
        times = 1.0 / times
    cv_img = image.scaled(int(image.size().width() / times), int(image.size().height() / times), Qt.KeepAspectRatio)
    return cv_img


# ----- 將 Pixmap 轉換為 影像 (來自網路)-----
def pixmap_to_cvimg(qt_pixmap):
    qimg = qt_pixmap.toImage()
    temp_shape = (qimg.height(), qimg.bytesPerLine() * 8 // qimg.depth())
    temp_shape += (4,)
    ptr = qimg.bits()
    ptr.setsize(qimg.byteCount())
    result = np.array(ptr, dtype=np.uint8).reshape(temp_shape)
    result = result[..., :3]
    return result


def main():
    # 以下為PYQT的UI顯示基礎命令
    # 建立QApplication類的例項
    app = QApplication(sys.argv)
    widgets = QMainWindow()
    window = Ui_MainWindow()
    window.setupUi(widgets)
    widgets.show()
    sys.exit(app.exec_())


if __name__ == '__main__':
    main()

備註:本文內容主要用於學習記錄,整體功能可以實現,但需根據所需識別影像的色彩型別來進行預處理,可提高識別準確度;部分內容參考其他大佬的,但忘記其來源故未標註。

相關文章