From 0365f215a3c685d2c14ff6537a644240dc75a372 Mon Sep 17 00:00:00 2001 From: hhhhsc <1710496817@qq.com> Date: Fri, 16 Jan 2026 14:33:14 +0800 Subject: [PATCH 1/2] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81=E6=B8=85?= =?UTF-8?q?=E6=B4=97=E8=A1=A8=E6=A0=BC=E6=A0=87=E7=AD=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../application/SysParamApplicationService.java | 17 ++++++++++------- .../Create/components/ParamConfig.tsx | 13 +++++++------ .../img_similar_images_cleaner/process.py | 4 ++-- .../full_width_characters_cleaner/process.py | 2 +- .../ops/mapper/html_tag_cleaner/metadata.yml | 9 +++++++++ runtime/ops/mapper/html_tag_cleaner/process.py | 12 ++++++++++-- .../ops/mapper/img_direction_correct/process.py | 4 ++-- .../mapper/img_enhanced_saturation/process.py | 4 ++-- runtime/ops/mapper/xml_tag_cleaner/process.py | 4 ++-- scripts/db/data-operator-init.sql | 4 ++-- 10 files changed, 47 insertions(+), 26 deletions(-) diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/setting/application/SysParamApplicationService.java b/backend/shared/domain-common/src/main/java/com/datamate/common/setting/application/SysParamApplicationService.java index dcf2f6d2a..54b465c01 100644 --- a/backend/shared/domain-common/src/main/java/com/datamate/common/setting/application/SysParamApplicationService.java +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/setting/application/SysParamApplicationService.java @@ -12,6 +12,7 @@ import java.util.Comparator; import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; /** * 系统参数应用服务 @@ -25,6 +26,7 @@ public class SysParamApplicationService { private final SysParamRepository sysParamRepository; private final RedisClient redisClient; + private final AtomicBoolean redisEnable = new AtomicBoolean(true); /** * 列表查询系统参数 @@ -59,17 +61,18 @@ public void deleteParamById(String paramKey) { } public String getParamByKey(String paramId) { - boolean redisEnable = false; String value = null; - try { - value = redisClient.getParamWithThrow(paramId); - redisEnable = true; - } catch (Exception e) { - log.warn(e.getMessage()); + if (redisEnable.get()) { + try { + value = redisClient.getParamWithThrow(paramId); + } catch (Exception e) { + redisEnable.set(false); + log.warn(e.getMessage()); + } } if (value == null) { SysParam sysParam = sysParamRepository.getById(paramId); - if (sysParam != null && redisEnable) { + if (sysParam != null) { value = sysParam.getParamValue(); } } diff --git a/frontend/src/pages/DataCleansing/Create/components/ParamConfig.tsx b/frontend/src/pages/DataCleansing/Create/components/ParamConfig.tsx index dc8a79db0..860a89730 100644 --- a/frontend/src/pages/DataCleansing/Create/components/ParamConfig.tsx +++ b/frontend/src/pages/DataCleansing/Create/components/ParamConfig.tsx @@ -8,6 +8,7 @@ import { InputNumber, Slider, Space, + Switch, } from "antd"; import { ConfigI, OperatorI } from "@/pages/OperatorMarket/operator.model"; @@ -215,12 +216,12 @@ const ParamConfig: React.FC = ({ tooltip={param.description} key={paramKey} > - updateValue(e.target.checked)} - > - {param.name} - + updateValue(checked)} + /> ); case "multiple": diff --git a/runtime/ops/filter/img_similar_images_cleaner/process.py b/runtime/ops/filter/img_similar_images_cleaner/process.py index 9121818b8..1aaea841d 100644 --- a/runtime/ops/filter/img_similar_images_cleaner/process.py +++ b/runtime/ops/filter/img_similar_images_cleaner/process.py @@ -139,8 +139,8 @@ def get_orb_similarity(self, des_matrix: np.ndarray, des_matrix_history: np.ndar orb_similarity = count / len(matches) return orb_similarity except Exception as e: - logger.exception(f"taskId: {self.task_uuid}, failed to compare the similarity between " - f"{file_name} and {file_name_history}: {e}") + logger.exception(f"taskId: {self.task_uuid}, failed to compare the similarity between " + f"{file_name} and {file_name_history}: {e}") return 0.0 def execute_sql(self, p_hash: str, des_matrix: np.ndarray, file_name: str, diff --git a/runtime/ops/mapper/full_width_characters_cleaner/process.py b/runtime/ops/mapper/full_width_characters_cleaner/process.py index 9637de40a..9dbc7e4a7 100644 --- a/runtime/ops/mapper/full_width_characters_cleaner/process.py +++ b/runtime/ops/mapper/full_width_characters_cleaner/process.py @@ -29,7 +29,7 @@ def __init__(self, *args, **kwargs): '`': '`', 'a': 'a', 'b': 'b', 'c': 'c', 'd': 'd', 'e': 'e', 'f': 'f', 'g': 'g', 'h': 'h', 'i': 'i', 'j': 'j', 'k': 'k', 'l': 'l', 'm': 'm', 'n': 'n', 'o': 'o', 'p': 'p', 'q': 'q', 'r': 'r', 's': 's', 't': 't', 'u': 'u', 'v': 'v', 'w': 'w', - 'x': 'x', 'y': 'y', 'z': 'z', '{': '{', '|': '|', '}': '}', '~': '~' + 'x': 'x', 'y': 'y', 'z': 'z', '{': '{', '|': '|', '}': '}', '~': '~' } def execute(self, sample: Dict[str, Any]) -> Dict[str, Any]: diff --git a/runtime/ops/mapper/html_tag_cleaner/metadata.yml b/runtime/ops/mapper/html_tag_cleaner/metadata.yml index ac862bd8a..38b889396 100644 --- a/runtime/ops/mapper/html_tag_cleaner/metadata.yml +++ b/runtime/ops/mapper/html_tag_cleaner/metadata.yml @@ -14,3 +14,12 @@ effect: after: '机器学习是人工智能的一个分支。' inputs: 'text' outputs: 'text' +settings: + removeTableTags: + name: '是否去除表格标签' + description: '若为是,则会去除表格标签等。' + type: 'switch' + defaultVal: 'false' + required: false + checkedLabel: '是' + unCheckedLabel: '否' diff --git a/runtime/ops/mapper/html_tag_cleaner/process.py b/runtime/ops/mapper/html_tag_cleaner/process.py index 257d33065..08931e551 100644 --- a/runtime/ops/mapper/html_tag_cleaner/process.py +++ b/runtime/ops/mapper/html_tag_cleaner/process.py @@ -34,8 +34,14 @@ class HtmlTagCleaner(Mapper): '', '