Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions .cnb.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@ $:
script:
- chmod +x ./modify_apt_sources.sh
- sudo ./modify_apt_sources.sh
- name: 安装依赖
script:
- chmod +x ./install_sdk.sh
- sudo ./install_sdk.sh
master:
push:
- docker:
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/publish-to-pypi.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
name: 发布包到PyPI
name: 发布包到pypi

on:
push:
branches:
- master
pull_request:
types: [opened, synchronize, reopened, closed]
branches: [ "master" ]
types: [closed]

permissions:
contents: read
Expand Down Expand Up @@ -38,7 +38,7 @@ jobs:
pypi-publish:
runs-on: ubuntu-latest
needs: release-build
if: ${{ github.event_name == 'push' || (github.event_name == 'pull_request' && github.event.pull_request.merged) }}
if: github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged == true

environment:
name: pypi
Expand Down
9 changes: 4 additions & 5 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ on:
- master
pull_request:
branches: [ "master" ]
types: [closed]
types: [opened, synchronize, reopened, closed]

permissions:
contents: read
Expand All @@ -15,7 +15,6 @@ permissions:
jobs:
release-build:
runs-on: ubuntu-latest
if: github.event.pull_request.merged == true || github.event_name == 'push'

steps:
- uses: actions/checkout@v4
Expand All @@ -34,10 +33,10 @@ jobs:
id: extract_version
run: |
filename=$(ls dist/mzapi_python-*.whl)
version=$(echo $filename | sed -e 's/.*-\([0-9]\+\.[0-9]\+\.[0-9]\+\).*\.whl/\1/')
version=$(echo $filename | sed -e 's/.*-\([0-9]*\.[0-9]*\.[0-9]*\)-py3-none-any\.whl/\1/')
echo "version=$version" >> $GITHUB_ENV

- name: 创建GitHub Release
- name: Create GitHub release
uses: softprops/action-gh-release@v2
with:
tag_name: v${{ env.version }}
Expand All @@ -51,4 +50,4 @@ jobs:
files: |
dist/*.whl
dist/*.tar.gz
token: ${{ secrets.TOKEN }}
token: ${{ secrets.TOKEN }}
22 changes: 10 additions & 12 deletions .github/workflows/sync-to-coding.yml
Original file line number Diff line number Diff line change
@@ -1,18 +1,15 @@
name: 同步代码到多个仓库

on:
push:
branches:
- master
pull_request:
branches:
- master
types: [closed]
branches: [ "master" ]
types: [opened, synchronize, reopened, closed]

jobs:
sync-to-gitcode:
if: github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged == true
runs-on: ubuntu-latest
if: github.event.pull_request.merged == true || github.event_name == 'push'
env:
SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
GIT_USER: xiaomizhou
Expand All @@ -37,19 +34,20 @@ jobs:
run: |
git config --global user.name "${{ secrets.GIT_USER }}"
git config --global user.email "${{ secrets.GIT_EMAIL }}"

- name: Add Gitcode Remote
run: git remote add gitcode git@gitcode.com:xiaomizhou/MZAPI-python.git

- name: Push to Gitcode
run: git push gitcode master

sync-to-gitee:
if: github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged == true
runs-on: ubuntu-latest
if: github.event.pull_request.merged == true || github.event_name == 'push'
env:
SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
GIT_USER: ${{ secrets.GITEE_USER }}
GIT_EMAIL: ${{ secrets.GITEE_EMAIL }}
GITEE_USER: ${{ secrets.GITEE_USER }}
GITEE_EMAIL: ${{ secrets.GITEE_EMAIL }}
steps:
- name: Checkout
uses: actions/checkout@v4
Expand Down Expand Up @@ -78,8 +76,8 @@ jobs:
run: git push gitee master

sync-to-codeup:
if: github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged == true
runs-on: ubuntu-latest
if: github.event.pull_request.merged == true || github.event_name == 'push'
env:
SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
GIT_USER: ${{ secrets.CODEUP_USER }}
Expand Down Expand Up @@ -112,8 +110,8 @@ jobs:
run: git push codeup master

sync-to-huaweicloud:
if: github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged == true
runs-on: ubuntu-latest
if: github.event.pull_request.merged == true || github.event_name == 'push'
env:
SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
GIT_USER: ${{ secrets.HUAWEICLOUD_USER }}
Expand Down Expand Up @@ -146,8 +144,8 @@ jobs:
run: git push huaweicloud master

sync-to-cnb:
if: github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged == true
runs-on: ubuntu-latest
if: github.event.pull_request.merged == true || github.event_name == 'push'
env:
SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
GIT_USER: ${{ secrets.CNB_USER }}
Expand Down
3 changes: 3 additions & 0 deletions .ide/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ RUN curl -fsSL https://code-server.dev/install.sh | sh \
&& echo done


# 安装python依赖
RUN pip install -r yilai.txt

# 指定字符集支持命令行输入中文(根据需要选择字符集)
ENV LANG C.UTF-8
ENV LANGUAGE C.UTF-8
57 changes: 0 additions & 57 deletions install_sdk.sh

This file was deleted.

9 changes: 7 additions & 2 deletions mzapi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
一个MZAPI的python的SDK
"""

__version__ = "0.0.5"
__version__ = "0.0.6"
__author__ = "祁潇潇"
__email__ = "qixiaoxin@stu.sqxy.edu.cn"

Expand All @@ -24,5 +24,10 @@ def get_email():
"""获取SDK作者邮箱"""
return __email__
from .tencent import *
from .baidu import *

__all__ = [GeneralBasicOCR,GeneralAccurateOCR]
__all__ = [
"GeneralBasicOCR"
,"GeneralAccurateOCR"
,"RecognizeGeneralTextImageWarn"
]
1 change: 1 addition & 0 deletions mzapi/baidu/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .nlp import *
12 changes: 12 additions & 0 deletions mzapi/baidu/authorization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import requests


def access_token(ak, sk):
url = f"https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={ak}&client_secret={sk}"
payload = ""
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'
}
response = requests.get(url, headers=headers, data=payload,timeout=30)
return response.json().get("access_token")
File renamed without changes.
21 changes: 16 additions & 5 deletions mzapi/tencent/ocr/GeneralAccurateOCR.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from tencentcloud.ocr.v20181119 import ocr_client, models

from ...utlis.ImageValidator import ImageValidator
from ...utlis.verification import Verification


class GeneralAccurateOCR:
Expand All @@ -31,6 +32,7 @@ def __init__(self, secret_id=None, secret_key=None, token=None, log_level=None):
Raises:
TencentCloudSDKException: 初始化失败时抛出
"""
self.sanitize_log_data = Verification
self.logger = logging.getLogger(__name__)
if log_level is not None:
self.logger.setLevel(log_level)
Expand Down Expand Up @@ -72,18 +74,27 @@ def recognize(self,ImageBase64,ImageUrl,IsWords,EnableDetectSplit,IsPdf,PdfPageN
:param ConfigID: 配置ID支持: OCR -- 通用场景 MulOCR--多语种场景
"""
try:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (code-quality): We've found these issues:

self.logger.info("开始执行OCR识别")
self.logger.debug(f"输入参数: ImageBase64={ImageBase64}, ImageUrl={ImageUrl}, IsWords={IsWords}, EnableDetectSplit={EnableDetectSplit}, IsPdf={IsPdf}, PdfPageNumber={PdfPageNumber}, EnableDetectText={EnableDetectText}, ConfigID={ConfigID}")

if ImageBase64 is None and ImageUrl is None:
if (ImageBase64 is None or str(ImageBase64).strip() == "") and (ImageUrl is None or str(ImageUrl).strip() == ""):
error_msg = "ImageBase64和ImageUrl必须提供一个"
self.logger.error(error_msg)
raise ValueError(error_msg)

if ImageUrl:
self.logger.debug(f"验证图片URL: {ImageUrl}")
self.logger.debug("验证图片URL: %s", ImageUrl)
self.validate_url.validate_url(ImageUrl, ["png", "jpg", "jpeg", "bmp", "pdf"])
self.logger.debug("图片URL验证通过")
self.logger.debug("图片Base64验证通过")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

这行日志 “图片Base64验证通过” 具有误导性。代码中只对 ImageUrl 进行了验证,但并没有对 ImageBase64 的内容或格式进行任何验证。建议移除这行日志,或者实现一个真正的Base64验证函数。

self.logger.info("开始执行OCR识别")
self.logger.debug("输入参数: ImageBase64=%s, ImageUrl=%s, IsWords=%s, EnableDetectSplit=%s, IsPdf=%s, PdfPageNumber=%s, EnableDetectText=%s, ConfigID=%s",
self.sanitize_log_data.sanitize_log_data(ImageBase64,100),
ImageUrl,
IsWords,
EnableDetectSplit,
IsPdf,
PdfPageNumber,
EnableDetectText,
ConfigID)
req = models.GeneralAccurateOCRRequest()
params = {
"ImageBase64": ImageBase64,
Expand All @@ -99,7 +110,7 @@ def recognize(self,ImageBase64,ImageUrl,IsWords,EnableDetectSplit,IsPdf,PdfPageN
self.logger.info("正在向腾讯云OCR API发送请求...")
resp = self.client.GeneralAccurateOCR(req)
self.logger.info("OCR识别请求成功完成")
self.logger.debug(f"响应数据: {resp.to_json_string()}")
self.logger.debug("响应数据: %s", self.sanitize_log_data.sanitize_log_data(resp.to_json_string(),50))
return resp.to_json_string()

except TencentCloudSDKException as err:
Expand Down
19 changes: 15 additions & 4 deletions mzapi/tencent/ocr/GeneralBasicOCR.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from tencentcloud.ocr.v20181119 import ocr_client, models

from ...utlis.ImageValidator import ImageValidator
from ...utlis.verification import Verification


class GeneralBasicOCR:
Expand All @@ -30,6 +31,7 @@ def __init__(self, secret_id=None, secret_key=None, token=None, log_level=None):
Raises:
TencentCloudSDKException: 初始化失败时抛出
"""
self.sanitize_log_data = Verification
self.logger = logging.getLogger(__name__)
if log_level is not None:
self.logger.setLevel(log_level)
Expand Down Expand Up @@ -106,15 +108,23 @@ def recognize(self, ImageBase64=None, ImageUrl=None, Scene=None, LanguageType=No
"""
try:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (code-quality): Explicitly raise from a previous error (raise-from-previous-error)

self.logger.info("开始执行OCR识别")
self.logger.debug(f"输入参数: ImageUrl={ImageUrl}, LanguageType={LanguageType}, IsPdf={IsPdf}")
safe_base64 = self.sanitize_log_data.sanitize_log_data(ImageBase64,100)
self.logger.debug("输入参数: ImageBase64=%s, ImageUrl=%s, Scene=%s, LanguageType=%s, IsPdf=%s, PdfPageNumber=%s, IsWords=%s",
safe_base64,
ImageUrl,
Scene,
LanguageType,
IsPdf,
PdfPageNumber,
IsWords)

if ImageBase64 is None and ImageUrl is None:
error_msg = "ImageBase64和ImageUrl必须提供一个"
self.logger.error(error_msg)
raise ValueError(error_msg)

if ImageUrl:
self.logger.debug(f"验证图片URL: {ImageUrl}")
self.logger.debug("验证图片URL: %s", ImageUrl)
self.validate_url.validate_url(ImageUrl, ["png", "jpg", "jpeg", "bmp", "pdf"])
self.logger.debug("图片URL验证通过")

Expand All @@ -136,7 +146,8 @@ def recognize(self, ImageBase64=None, ImageUrl=None, Scene=None, LanguageType=No
# 执行OCR识别
resp = self.client.GeneralBasicOCR(req)
self.logger.info("OCR识别请求成功完成")
self.logger.debug(f"响应数据: {resp.to_json_string()}") # 只记录前200字符避免日志过大
resp_json = resp.to_json_string()
self.logger.debug("响应数据: %s", self.sanitize_log_data.sanitize_log_data(resp_json,50))

return resp.to_json_string()

Expand All @@ -145,4 +156,4 @@ def recognize(self, ImageBase64=None, ImageUrl=None, Scene=None, LanguageType=No
raise err
except Exception as e:
self.logger.error(f"处理OCR请求时发生意外错误: {str(e)}", exc_info=True)
raise TencentCloudSDKException("OCR处理错误", str(e))
raise TencentCloudSDKException("OCR处理错误", str(e))
Loading