From 9c4db379d76b59e935a4638b4638f9ecf48d45f0 Mon Sep 17 00:00:00 2001 From: qixiaoxin Date: Fri, 29 Aug 2025 22:40:21 +0800 Subject: [PATCH 1/2] =?UTF-8?q?feat(ocr):=20=E6=B7=BB=E5=8A=A0=E7=B2=BE?= =?UTF-8?q?=E5=87=86=E8=AF=86=E5=88=AB=20OCR=20=E5=8A=9F=E8=83=BD=E5=B9=B6?= =?UTF-8?q?=E6=9B=B4=E6=96=B0=E7=9B=B8=E5=85=B3=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 GeneralAccurateOCR 类,提供更精准的 OCR 识别功能 - 更新 .cnb.yml 文件中的制品库地址 - 修改 mzapi/tencent/ocr/__init__.py,引入新的 OCR 模块 - 更新 mzapi/__init__.py,增加新功能并修改版本号 - 调整 Dockerfile 中的安装命令 -移除 GitHub Actions 中的阿里云 PyPI 上传步骤 - 更新 setup.py 文件版本号 --- .cnb.yml | 2 +- .github/workflows/publish-to-pypi.yml | 7 -- .ide/Dockerfile | 2 +- mzapi/__init__.py | 4 +- mzapi/tencent/ocr/GeneralAccurateOCR.py | 110 ++++++++++++++++++++++++ mzapi/tencent/ocr/GeneralBasicOCR.py | 32 +++---- mzapi/tencent/ocr/__init__.py | 3 +- setup.py | 2 +- 8 files changed, 133 insertions(+), 29 deletions(-) create mode 100644 mzapi/tencent/ocr/GeneralAccurateOCR.py diff --git a/.cnb.yml b/.cnb.yml index 4e6c804..9ae120f 100644 --- a/.cnb.yml +++ b/.cnb.yml @@ -38,6 +38,6 @@ master: - export TWINE_PASSWORD=${CNB_TOKEN} # 制品库地址 # 示例: export TWINE_REPOSITORY_URL=https://pypi.cnb.cool/cnb-demo/pypi-demo/-/packages/simple - - export TWINE_REPOSITORY_URL=https://pypi.cnb.cool/mizhoubaobei/MZAPI/python/-/packages/simple + - export TWINE_REPOSITORY_URL=https://pypi.cnb.cool/mizhoubaobei/ku/MZAPI-python/-/packages/simple - python -m build - twine upload dist/* \ No newline at end of file diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 3871706..e8a3e41 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -65,13 +65,6 @@ jobs: run: | python -m pip install twine - - name: Upload package to Aliyun PyPI repository - run: | - twine upload --repository-url https://packages.aliyun.com/686a57a36024b2147d89fbc0/pypi/repo-ssctu -u ${{secrets.USERNAME }} -p ${{ secrets.PASSWORD }} dist/mzapi_python-${{ env.version }}-py3-none-any.whl - env: - ALIYUN_PYPI_USERNAME: ${{ secrets.USERNAME }} - ALIYUN_PYPI_PASSWORD: ${{ secrets.PASSWORD }} - - name: Create GitHub release uses: softprops/action-gh-release@v2 with: diff --git a/.ide/Dockerfile b/.ide/Dockerfile index da97ede..3478d1d 100644 --- a/.ide/Dockerfile +++ b/.ide/Dockerfile @@ -2,7 +2,7 @@ FROM docker.cnb.cool/examples/language/python-3:latest # 安装 curl 和 apt-get 环境 -RUN apt-get update && apt-get install -y curl wget unzip openssh-server sduo +RUN apt-get update && apt-get install -y curl wget unzip openssh-server sudo # 安装 code-server 和 vscode 常用插件 RUN curl -fsSL https://code-server.dev/install.sh | sh \ diff --git a/mzapi/__init__.py b/mzapi/__init__.py index 754bd20..d1619b8 100644 --- a/mzapi/__init__.py +++ b/mzapi/__init__.py @@ -7,7 +7,7 @@ 一个MZAPI的python的SDK """ -__version__ = "0.0.4" +__version__ = "0.0.5" __author__ = "祁潇潇" __email__ = "qixiaoxin@stu.sqxy.edu.cn" @@ -25,4 +25,4 @@ def get_email(): return __email__ from .tencent import * -__all__ = [GeneralBasicOCR] +__all__ = [GeneralBasicOCR,GeneralAccurateOCR] diff --git a/mzapi/tencent/ocr/GeneralAccurateOCR.py b/mzapi/tencent/ocr/GeneralAccurateOCR.py new file mode 100644 index 0000000..3314604 --- /dev/null +++ b/mzapi/tencent/ocr/GeneralAccurateOCR.py @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- + +import json +import logging + +from tencentcloud.common import credential +from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException +from tencentcloud.common.profile.client_profile import ClientProfile +from tencentcloud.common.profile.http_profile import HttpProfile +from tencentcloud.ocr.v20181119 import ocr_client, models + +from ...utlis.ImageValidator import ImageValidator + + +class GeneralAccurateOCR: + def __init__(self, secret_id=None, secret_key=None, token=None, log_level=None): + """初始化腾讯云OCR客户端 + + Args: + secret_id: 腾讯云SecretId + secret_key: 腾讯云SecretKey + token: 临时密钥Token(可选) + log_level: 日志级别,默认为None(不输出日志) + - logging.DEBUG: 详细调试信息 + - logging.INFO: 一般信息 + - logging.WARNING: 警告信息 + - logging.ERROR: 错误信息 + - logging.CRITICAL: 严重错误 + - None: 不输出日志(默认) + + Raises: + TencentCloudSDKException: 初始化失败时抛出 + """ + self.logger = logging.getLogger(__name__) + if log_level is not None: + self.logger.setLevel(log_level) + # 只在没有处理器时添加处理器 + if not self.logger.handlers: + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) + self.logger.addHandler(handler) + else: + # 确保现有处理器的格式一致 + for h in self.logger.handlers: + h.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) + self.logger.info("初始化腾讯云OCR客户端,日志级别: %s", logging.getLevelName(log_level)) + try: + # 实例化认证对象 + self.cred = credential.Credential(secret_id, secret_key, token) + self.logger.debug("认证对象创建成功") + # 配置HTTP和客户端选项 + http_profile = HttpProfile() + http_profile.endpoint = "ocr.tencentcloudapi.com" + client_profile = ClientProfile() + client_profile.http_profile = http_profile + self.client = ocr_client.OcrClient(self.cred, "", client_profile) + self.validate_url = ImageValidator() + self.logger.info("OCR客户端初始化完成") + except Exception as e: + self.logger.error(f"初始化失败: {str(e)}") + raise TencentCloudSDKException("初始化失败", str(e)) + + def recognize(self,ImageBase64,ImageUrl,IsWords,EnableDetectSplit,IsPdf,PdfPageNumber,EnableDetectText,ConfigID): + """' + :param ImageBase64: 图片/PDF的 Base64 值。要求图片经Base64编码后不超过 10M,分辨率建议600*800以上,支持PNG、JPG、JPEG、BMP、PDF格式。图片的 ImageUrl、ImageBase64 必须提供一个,如果都提供,只使用 ImageUrl。 + :param ImageUrl: 图片/PDF的 Url 地址。要求图片经Base64编码后不超过10M,分辨率建议600*800以上,支持PNG、JPG、JPEG、BMP、PDF格式。图片下载时间不超过 3 秒。图片存储于腾讯云的 Url 可保障更高的下载速度和稳定性,建议图片存储于腾讯云。非腾讯云存储的 Url 速度和稳定性可能受一定影响。 + :param IsWords: 是否返回单字信息,默认关 + :param EnableDetectSplit: 是否开启原图切图检测功能,开启后可提升“整图面积大,但单字符占比面积小”(例如:试卷)场景下的识别效果,默认关 + :param IsPdf: 是否开启PDF识别,默认值为false,开启后可同时支持图片和PDF的识别。 + :param PdfPageNumber: 需要识别的PDF页面的对应页码,仅支持PDF单页识别,当上传文件为PDF且IsPdf参数值为true时有效,默认值为1。 + :param EnableDetectText: 文本检测开关,默认为true。设置为false可直接进行单行识别,适用于仅包含正向单行文本的图片场景。 + :param ConfigID: 配置ID支持: OCR -- 通用场景 MulOCR--多语种场景 + """ + try: + self.logger.info("开始执行OCR识别") + self.logger.debug(f"输入参数: ImageBase64={ImageBase64}, ImageUrl={ImageUrl}, IsWords={IsWords}, EnableDetectSplit={EnableDetectSplit}, IsPdf={IsPdf}, PdfPageNumber={PdfPageNumber}, EnableDetectText={EnableDetectText}, ConfigID={ConfigID}") + + if ImageBase64 is None and ImageUrl is None: + error_msg = "ImageBase64和ImageUrl必须提供一个" + self.logger.error(error_msg) + raise ValueError(error_msg) + + if ImageUrl: + self.logger.debug(f"验证图片URL: {ImageUrl}") + self.validate_url.validate_url(ImageUrl, ["png", "jpg", "jpeg", "bmp", "pdf"]) + self.logger.debug("图片URL验证通过") + req = models.GeneralAccurateOCRRequest() + params = { + "ImageBase64": ImageBase64, + "ImageUrl":ImageUrl, + "IsWords":IsWords, + "EnableDetectSplit":EnableDetectSplit, + "IsPdf":IsPdf, + "PdfPageNumber":PdfPageNumber, + "EnableDetectText":EnableDetectText, + "ConfigID":ConfigID + } + req.from_json_string(json.dumps(params)) + self.logger.info("正在向腾讯云OCR API发送请求...") + resp = self.client.GeneralAccurateOCR(req) + self.logger.info("OCR识别请求成功完成") + self.logger.debug(f"响应数据: {resp.to_json_string()}") + return resp.to_json_string() + + except TencentCloudSDKException as err: + self.logger.error(f"OCR识别失败: {str(err)}", exc_info=True) + raise err + except Exception as e: + self.logger.error(f"处理OCR请求时发生意外错误: {str(e)}", exc_info=True) + raise TencentCloudSDKException("OCR处理错误", str(e)) \ No newline at end of file diff --git a/mzapi/tencent/ocr/GeneralBasicOCR.py b/mzapi/tencent/ocr/GeneralBasicOCR.py index e0c9906..b3987f3 100644 --- a/mzapi/tencent/ocr/GeneralBasicOCR.py +++ b/mzapi/tencent/ocr/GeneralBasicOCR.py @@ -12,16 +12,17 @@ class GeneralBasicOCR: - def __init__(self, secret_id=None, secret_key=None, token=None, log_level=logging.INFO): + def __init__(self, secret_id=None, secret_key=None, token=None, log_level=None): """初始化腾讯云OCR客户端 Args: secret_id: 腾讯云SecretId secret_key: 腾讯云SecretKey token: 临时密钥Token(可选) - log_level: 日志级别,默认为logging.INFO + log_level: 日志级别,默认为None(不输出日志) + - None: 不输出日志(默认) - logging.DEBUG: 详细调试信息 - - logging.INFO: 一般信息(默认) + - logging.INFO: 一般信息 - logging.WARNING: 警告信息 - logging.ERROR: 错误信息 - logging.CRITICAL: 严重错误 @@ -30,18 +31,18 @@ def __init__(self, secret_id=None, secret_key=None, token=None, log_level=loggin TencentCloudSDKException: 初始化失败时抛出 """ self.logger = logging.getLogger(__name__) - # 确保日志级别设置正确 - self.logger.setLevel(log_level) - # 只在没有处理器时添加处理器 - if not self.logger.handlers: - handler = logging.StreamHandler() - handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) - self.logger.addHandler(handler) - else: - # 确保现有处理器的格式一致 - for h in self.logger.handlers: - h.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) - self.logger.info("初始化腾讯云OCR客户端,日志级别: %s", logging.getLevelName(log_level)) + if log_level is not None: + self.logger.setLevel(log_level) + # 只在没有处理器时添加处理器 + if not self.logger.handlers: + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) + self.logger.addHandler(handler) + else: + # 确保现有处理器的格式一致 + for h in self.logger.handlers: + h.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) + self.logger.info("初始化腾讯云OCR客户端,日志级别: %s", logging.getLevelName(log_level)) try: # 实例化认证对象 self.cred = credential.Credential(secret_id, secret_key, token) @@ -128,7 +129,6 @@ def recognize(self, ImageBase64=None, ImageUrl=None, Scene=None, LanguageType=No "PdfPageNumber": PdfPageNumber, "IsWords": IsWords } - self.logger.debug(f"请求参数: {params}") req.from_json_string(json.dumps(params)) self.logger.info("正在向腾讯云OCR API发送请求...") diff --git a/mzapi/tencent/ocr/__init__.py b/mzapi/tencent/ocr/__init__.py index 6fc5849..2d2cc45 100644 --- a/mzapi/tencent/ocr/__init__.py +++ b/mzapi/tencent/ocr/__init__.py @@ -1 +1,2 @@ -from .GeneralBasicOCR import * \ No newline at end of file +from .GeneralBasicOCR import * +from .GeneralAccurateOCR import * \ No newline at end of file diff --git a/setup.py b/setup.py index 63aadc4..f72a2a3 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setup( name="mzapi-python", # 安装时使用的名称 - version="0.0.4", + version="0.0.5", author="祁潇潇", author_email="qixiaoxin@stu.sqxy.edu.cn", description="MZAPI的python的SDK", From 9f32bbb0aadfc38c2026ada5e376b910b1d3ef31 Mon Sep 17 00:00:00 2001 From: qixiaoxin Date: Fri, 29 Aug 2025 22:54:04 +0800 Subject: [PATCH 2/2] =?UTF-8?q?ci:=E4=BC=98=E5=8C=96=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=E5=90=8C=E6=AD=A5=E5=92=8C=E5=8F=91=E5=B8=83=E6=B5=81=E7=A8=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 修改发布到 PyPI 的条件,只在 push 或合并 pull request 时执行 - 更新代码同步到多个仓库的条件,只在 push 或合并 pull request时执行 - 修复提取版本号的正则表达式 - 优化 GitHub Release 创建步骤 - 统一配置 Git 用户信息的方式 --- .github/workflows/publish-to-pypi.yml | 6 ++++-- .github/workflows/release.yml | 10 ++++++---- .github/workflows/sync-to-coding.yml | 16 ++++++++++++---- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index e8a3e41..ce4394c 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -1,11 +1,12 @@ -name: 发布包到pypi +name: 发布包到PyPI on: push: branches: - master pull_request: - branches: [ "master" ] + branches: [ "master" ] + types: [closed] permissions: contents: read @@ -37,6 +38,7 @@ jobs: pypi-publish: runs-on: ubuntu-latest needs: release-build + if: ${{ github.event_name == 'push' || (github.event_name == 'pull_request' && github.event.pull_request.merged) }} environment: name: pypi diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 8ae6109..99e4be0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -5,7 +5,8 @@ on: branches: - master pull_request: - branches: [ "master" ] + branches: [ "master" ] + types: [closed] permissions: contents: read @@ -14,6 +15,7 @@ permissions: jobs: release-build: runs-on: ubuntu-latest + if: github.event.pull_request.merged == true || github.event_name == 'push' steps: - uses: actions/checkout@v4 @@ -32,10 +34,10 @@ jobs: id: extract_version run: | filename=$(ls dist/mzapi_python-*.whl) - version=$(echo $filename | sed -e 's/.*-\([0-9]*\.[0-9]*\.[0-9]*\)-py3-none-any\.whl/\1/') + version=$(echo $filename | sed -e 's/.*-\([0-9]\+\.[0-9]\+\.[0-9]\+\).*\.whl/\1/') echo "version=$version" >> $GITHUB_ENV - - name: Create GitHub release + - name: 创建GitHub Release uses: softprops/action-gh-release@v2 with: tag_name: v${{ env.version }} @@ -49,4 +51,4 @@ jobs: files: | dist/*.whl dist/*.tar.gz - token: ${{ secrets.TOKEN }} + token: ${{ secrets.TOKEN }} \ No newline at end of file diff --git a/.github/workflows/sync-to-coding.yml b/.github/workflows/sync-to-coding.yml index b3c28cb..de590b5 100644 --- a/.github/workflows/sync-to-coding.yml +++ b/.github/workflows/sync-to-coding.yml @@ -1,13 +1,18 @@ +name: 同步代码到多个仓库 + on: push: branches: - master pull_request: - branches: [ "master" ] + branches: + - master + types: [closed] jobs: sync-to-gitcode: runs-on: ubuntu-latest + if: github.event.pull_request.merged == true || github.event_name == 'push' env: SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} GIT_USER: xiaomizhou @@ -32,7 +37,6 @@ jobs: run: | git config --global user.name "${{ secrets.GIT_USER }}" git config --global user.email "${{ secrets.GIT_EMAIL }}" - - name: Add Gitcode Remote run: git remote add gitcode git@gitcode.com:xiaomizhou/MZAPI-python.git @@ -41,10 +45,11 @@ jobs: sync-to-gitee: runs-on: ubuntu-latest + if: github.event.pull_request.merged == true || github.event_name == 'push' env: SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} - GITEE_USER: ${{ secrets.GITEE_USER }} - GITEE_EMAIL: ${{ secrets.GITEE_EMAIL }} + GIT_USER: ${{ secrets.GITEE_USER }} + GIT_EMAIL: ${{ secrets.GITEE_EMAIL }} steps: - name: Checkout uses: actions/checkout@v4 @@ -74,6 +79,7 @@ jobs: sync-to-codeup: runs-on: ubuntu-latest + if: github.event.pull_request.merged == true || github.event_name == 'push' env: SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} GIT_USER: ${{ secrets.CODEUP_USER }} @@ -107,6 +113,7 @@ jobs: sync-to-huaweicloud: runs-on: ubuntu-latest + if: github.event.pull_request.merged == true || github.event_name == 'push' env: SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} GIT_USER: ${{ secrets.HUAWEICLOUD_USER }} @@ -140,6 +147,7 @@ jobs: sync-to-cnb: runs-on: ubuntu-latest + if: github.event.pull_request.merged == true || github.event_name == 'push' env: SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} GIT_USER: ${{ secrets.CNB_USER }}