From 85d0cd823cb64bf335b0fa5e367de69de0e935a5 Mon Sep 17 00:00:00 2001 From: Zayadul-huq-afnan Date: Mon, 28 Jul 2025 15:39:00 +0600 Subject: [PATCH 1/3] Update common.py, common_functions.py, and pyproject.toml --- .../action_declarations/common.py | 1 + .../Sequential_Actions/common_functions.py | 204 ++++++++++++++++++ 2 files changed, 205 insertions(+) diff --git a/Framework/Built_In_Automation/Sequential_Actions/action_declarations/common.py b/Framework/Built_In_Automation/Sequential_Actions/action_declarations/common.py index b15b51621..4714a6e68 100644 --- a/Framework/Built_In_Automation/Sequential_Actions/action_declarations/common.py +++ b/Framework/Built_In_Automation/Sequential_Actions/action_declarations/common.py @@ -137,6 +137,7 @@ {"name": "render jinja template", "function": "render_jinja_template", "screenshot": "none" }, {"name": "download chrome extension", "function": "download_chrome_extension", "screenshot": "none" }, + {"name": "AI visual reader", "function": "AI_visual_reader", "screenshot": "none" }, ) # yapf: disable diff --git a/Framework/Built_In_Automation/Sequential_Actions/common_functions.py b/Framework/Built_In_Automation/Sequential_Actions/common_functions.py index c8674a30b..939f33206 100755 --- a/Framework/Built_In_Automation/Sequential_Actions/common_functions.py +++ b/Framework/Built_In_Automation/Sequential_Actions/common_functions.py @@ -7107,3 +7107,207 @@ def download_chrome_extension(data_set): except Exception: return CommonUtil.Exception_Handler(sys.exc_info()) +@logger +def AI_visual_reader(data_set): + """ + This action will extract the text from images using OpenAI's vision API. This action also takes user prompt and returns + the result according to the user prompt. If the user does not give any prompt, then by default it + extracts all text from the image. + + Args: + data_set: + ------------------------------------------------------------------------------ + image | input parameter | %| image.png |% + user prompt | optional parameter | Extract invoice details + AI visual reader | common action | AI visual reader + ------------------------------------------------------------------------------ + + Return: + `passed` if success + `zeuz_failed` if fails + """ + sModuleInfo = inspect.currentframe().f_code.co_name + " : " + MODULE_NAME + + try: + import base64 + import requests + import json + import os + + user_image_path = None + user_prompt = None + + for left, mid, right in data_set: + left = left.lower().replace(" ", "") + mid = mid.lower().replace(" ", "") + right = right.strip() + + if left == 'image': + if right != '': + user_image_path = right + + if left == "userprompt": + if right != '': + user_prompt = right + + # Validate image path + if not user_image_path: + CommonUtil.ExecLog(sModuleInfo, "No image path provided. Please provide an image path.", 3) + return "zeuz_failed" + + image_path = user_image_path + CommonUtil.ExecLog(sModuleInfo, f"Processing image: {image_path}", 1) + + if not os.path.isfile(image_path): + CommonUtil.ExecLog(sModuleInfo, f"Image file not found: {image_path}", 3) + return "zeuz_failed" + + # Set default prompt if none provided + prompt = user_prompt + if not prompt: + prompt = "Extract all text from this image and return the result in JSON format." + + # Convert Image to Base64 + with open(image_path, "rb") as img_file: + base64_image = base64.b64encode(img_file.read()).decode("utf-8") + + # Load API key from .env file + try: + from dotenv import load_dotenv + framework_dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) + env_path = os.path.join(framework_dir, ".env") + load_dotenv(env_path) + api_key = os.getenv("OPENAI_API") + if not api_key: + CommonUtil.ExecLog(sModuleInfo, "OPENAI_API not found in .env file", 3) + return "zeuz_failed" + except Exception as e: + CommonUtil.ExecLog(sModuleInfo, f"Failed to load API key from .env: {str(e)}", 3) + return "zeuz_failed" + + # Prepare API Request + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json" + } + + payload = { + "model": "gpt-4o", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": f"data:image/png;base64,{base64_image}" + } + }, + { + "type": "text", + "text": prompt + } + ] + } + ] + } + + # Send Request + CommonUtil.ExecLog(sModuleInfo, "Analyzing image...", 1) + response = requests.post( + "https://api.openai.com/v1/chat/completions", + headers=headers, + data=json.dumps(payload) + ) + + # Process Response + if response.status_code == 200: + response_data = response.json() + extracted_data = response_data["choices"][0]["message"]["content"] + CommonUtil.ExecLog(sModuleInfo, f"Text extracted successfully from: {image_path}", 1) + CommonUtil.ExecLog(sModuleInfo, f"Extracted content: {extracted_data}", 5) + + # Create file based on user prompt + try: + # Determine file type from prompt + prompt_lower = prompt.lower() + file_extension = None + + if "json" in prompt_lower: + file_extension = ".json" + elif "csv" in prompt_lower: + file_extension = ".csv" + elif "xml" in prompt_lower: + file_extension = ".xml" + elif "yaml" in prompt_lower or "yml" in prompt_lower: + file_extension = ".yaml" + elif "txt" in prompt_lower or "text" in prompt_lower: + file_extension = ".txt" + else: + # Default to JSON if no specific format requested + file_extension = ".json" + + # Create output directory inside Framework folder + framework_dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) + output_dir = os.path.join(framework_dir, "Extracted data from image") + os.makedirs(output_dir, exist_ok=True) + + # Generate filename with extracted_ prefix and _data suffix (exclude original extension) + base_filename = os.path.splitext(os.path.basename(image_path))[0] + output_filename = f"extracted_{base_filename}_data{file_extension}" + output_path = os.path.join(output_dir, output_filename) + + # Write data to file based on extension + if file_extension == ".json": + # Try to parse as JSON if it's already JSON, otherwise wrap it + try: + json.loads(extracted_data) + # It's already valid JSON + with open(output_path, 'w', encoding='utf-8') as f: + f.write(extracted_data) + except json.JSONDecodeError: + # Wrap in JSON structure + with open(output_path, 'w', encoding='utf-8') as f: + json.dump({"extracted_text": extracted_data}, f, indent=2) + + elif file_extension == ".csv": + # For CSV, we'll create a simple structure + import csv + with open(output_path, 'w', newline='', encoding='utf-8') as f: + writer = csv.writer(f) + writer.writerow(["Extracted_Text"]) + writer.writerow([extracted_data]) + + elif file_extension == ".xml": + # Create simple XML structure + xml_content = f""" + + {extracted_data} +""" + with open(output_path, 'w', encoding='utf-8') as f: + f.write(xml_content) + + elif file_extension == ".yaml": + # Create YAML structure + yaml_content = f"""extracted_data: + text: "{extracted_data}" """ + with open(output_path, 'w', encoding='utf-8') as f: + f.write(yaml_content) + + else: # .txt or default + with open(output_path, 'w', encoding='utf-8') as f: + f.write(extracted_data) + + CommonUtil.ExecLog(sModuleInfo, f"Data saved to file: {output_path}", 1) + + except Exception as file_error: + CommonUtil.ExecLog(sModuleInfo, f"Warning: Could not create file: {str(file_error)}", 2) + # Continue execution even if file creation fails + + return "passed" + else: + CommonUtil.ExecLog(sModuleInfo, f"OpenAI API error: {response.status_code} - {response.text}", 3) + return "zeuz_failed" + + except Exception: + return CommonUtil.Exception_Handler(sys.exc_info()) From 6aa281bd5dfa86dfbf17822186da9c4af67dda46 Mon Sep 17 00:00:00 2001 From: Zayadul-huq-afnan Date: Mon, 28 Jul 2025 20:08:07 +0600 Subject: [PATCH 2/3] corrected the action function --- .../Sequential_Actions/common_functions.py | 87 ++----------------- 1 file changed, 5 insertions(+), 82 deletions(-) diff --git a/Framework/Built_In_Automation/Sequential_Actions/common_functions.py b/Framework/Built_In_Automation/Sequential_Actions/common_functions.py index 939f33206..4f40eea67 100755 --- a/Framework/Built_In_Automation/Sequential_Actions/common_functions.py +++ b/Framework/Built_In_Automation/Sequential_Actions/common_functions.py @@ -7107,12 +7107,13 @@ def download_chrome_extension(data_set): except Exception: return CommonUtil.Exception_Handler(sys.exc_info()) + @logger def AI_visual_reader(data_set): """ This action will extract the text from images using OpenAI's vision API. This action also takes user prompt and returns the result according to the user prompt. If the user does not give any prompt, then by default it - extracts all text from the image. + extracts all text from the image and returns the result in JSON format. Args: data_set: @@ -7127,13 +7128,13 @@ def AI_visual_reader(data_set): `zeuz_failed` if fails """ sModuleInfo = inspect.currentframe().f_code.co_name + " : " + MODULE_NAME + global selenium_driver try: import base64 import requests import json import os - user_image_path = None user_prompt = None @@ -7162,7 +7163,6 @@ def AI_visual_reader(data_set): CommonUtil.ExecLog(sModuleInfo, f"Image file not found: {image_path}", 3) return "zeuz_failed" - # Set default prompt if none provided prompt = user_prompt if not prompt: prompt = "Extract all text from this image and return the result in JSON format." @@ -7220,90 +7220,12 @@ def AI_visual_reader(data_set): data=json.dumps(payload) ) - # Process Response + # === 5. Process Response === if response.status_code == 200: response_data = response.json() extracted_data = response_data["choices"][0]["message"]["content"] CommonUtil.ExecLog(sModuleInfo, f"Text extracted successfully from: {image_path}", 1) CommonUtil.ExecLog(sModuleInfo, f"Extracted content: {extracted_data}", 5) - - # Create file based on user prompt - try: - # Determine file type from prompt - prompt_lower = prompt.lower() - file_extension = None - - if "json" in prompt_lower: - file_extension = ".json" - elif "csv" in prompt_lower: - file_extension = ".csv" - elif "xml" in prompt_lower: - file_extension = ".xml" - elif "yaml" in prompt_lower or "yml" in prompt_lower: - file_extension = ".yaml" - elif "txt" in prompt_lower or "text" in prompt_lower: - file_extension = ".txt" - else: - # Default to JSON if no specific format requested - file_extension = ".json" - - # Create output directory inside Framework folder - framework_dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) - output_dir = os.path.join(framework_dir, "Extracted data from image") - os.makedirs(output_dir, exist_ok=True) - - # Generate filename with extracted_ prefix and _data suffix (exclude original extension) - base_filename = os.path.splitext(os.path.basename(image_path))[0] - output_filename = f"extracted_{base_filename}_data{file_extension}" - output_path = os.path.join(output_dir, output_filename) - - # Write data to file based on extension - if file_extension == ".json": - # Try to parse as JSON if it's already JSON, otherwise wrap it - try: - json.loads(extracted_data) - # It's already valid JSON - with open(output_path, 'w', encoding='utf-8') as f: - f.write(extracted_data) - except json.JSONDecodeError: - # Wrap in JSON structure - with open(output_path, 'w', encoding='utf-8') as f: - json.dump({"extracted_text": extracted_data}, f, indent=2) - - elif file_extension == ".csv": - # For CSV, we'll create a simple structure - import csv - with open(output_path, 'w', newline='', encoding='utf-8') as f: - writer = csv.writer(f) - writer.writerow(["Extracted_Text"]) - writer.writerow([extracted_data]) - - elif file_extension == ".xml": - # Create simple XML structure - xml_content = f""" - - {extracted_data} -""" - with open(output_path, 'w', encoding='utf-8') as f: - f.write(xml_content) - - elif file_extension == ".yaml": - # Create YAML structure - yaml_content = f"""extracted_data: - text: "{extracted_data}" """ - with open(output_path, 'w', encoding='utf-8') as f: - f.write(yaml_content) - - else: # .txt or default - with open(output_path, 'w', encoding='utf-8') as f: - f.write(extracted_data) - - CommonUtil.ExecLog(sModuleInfo, f"Data saved to file: {output_path}", 1) - - except Exception as file_error: - CommonUtil.ExecLog(sModuleInfo, f"Warning: Could not create file: {str(file_error)}", 2) - # Continue execution even if file creation fails - return "passed" else: CommonUtil.ExecLog(sModuleInfo, f"OpenAI API error: {response.status_code} - {response.text}", 3) @@ -7311,3 +7233,4 @@ def AI_visual_reader(data_set): except Exception: return CommonUtil.Exception_Handler(sys.exc_info()) + From 73488896cdd52dd48904fae7f3c293ea2e8c399a Mon Sep 17 00:00:00 2001 From: Zayadul-huq-afnan Date: Wed, 8 Oct 2025 17:11:06 +0600 Subject: [PATCH 3/3] changed the action name --- .../Sequential_Actions/action_declarations/common.py | 4 ++-- .../Sequential_Actions/common_functions.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Framework/Built_In_Automation/Sequential_Actions/action_declarations/common.py b/Framework/Built_In_Automation/Sequential_Actions/action_declarations/common.py index 4714a6e68..2deef7605 100644 --- a/Framework/Built_In_Automation/Sequential_Actions/action_declarations/common.py +++ b/Framework/Built_In_Automation/Sequential_Actions/action_declarations/common.py @@ -136,8 +136,8 @@ {"name": "proxy server", "function": "proxy_server", "screenshot": "none" }, {"name": "render jinja template", "function": "render_jinja_template", "screenshot": "none" }, - {"name": "download chrome extension", "function": "download_chrome_extension", "screenshot": "none" }, - {"name": "AI visual reader", "function": "AI_visual_reader", "screenshot": "none" }, + {"name": "download chrome extension", "function": "download_chrome_extension", "screenshot": "none" }, + {"name": "AI - LLM prompt with files", "function": "AI_LLM_prompt_with_files", "screenshot": "none" }, ) # yapf: disable diff --git a/Framework/Built_In_Automation/Sequential_Actions/common_functions.py b/Framework/Built_In_Automation/Sequential_Actions/common_functions.py index 4f40eea67..68b2ab49c 100755 --- a/Framework/Built_In_Automation/Sequential_Actions/common_functions.py +++ b/Framework/Built_In_Automation/Sequential_Actions/common_functions.py @@ -7109,7 +7109,7 @@ def download_chrome_extension(data_set): @logger -def AI_visual_reader(data_set): +def AI_LLM_prompt_with_files(data_set): """ This action will extract the text from images using OpenAI's vision API. This action also takes user prompt and returns the result according to the user prompt. If the user does not give any prompt, then by default it @@ -7120,7 +7120,7 @@ def AI_visual_reader(data_set): ------------------------------------------------------------------------------ image | input parameter | %| image.png |% user prompt | optional parameter | Extract invoice details - AI visual reader | common action | AI visual reader + AI - LLM prompt with files | common action | AI - LLM prompt with files ------------------------------------------------------------------------------ Return: