diff --git a/Framework/Built_In_Automation/Sequential_Actions/action_declarations/common.py b/Framework/Built_In_Automation/Sequential_Actions/action_declarations/common.py index b15b51621..2deef7605 100644 --- a/Framework/Built_In_Automation/Sequential_Actions/action_declarations/common.py +++ b/Framework/Built_In_Automation/Sequential_Actions/action_declarations/common.py @@ -136,7 +136,8 @@ {"name": "proxy server", "function": "proxy_server", "screenshot": "none" }, {"name": "render jinja template", "function": "render_jinja_template", "screenshot": "none" }, - {"name": "download chrome extension", "function": "download_chrome_extension", "screenshot": "none" }, + {"name": "download chrome extension", "function": "download_chrome_extension", "screenshot": "none" }, + {"name": "AI - LLM prompt with files", "function": "AI_LLM_prompt_with_files", "screenshot": "none" }, ) # yapf: disable diff --git a/Framework/Built_In_Automation/Sequential_Actions/common_functions.py b/Framework/Built_In_Automation/Sequential_Actions/common_functions.py index c8674a30b..68b2ab49c 100755 --- a/Framework/Built_In_Automation/Sequential_Actions/common_functions.py +++ b/Framework/Built_In_Automation/Sequential_Actions/common_functions.py @@ -7107,3 +7107,130 @@ def download_chrome_extension(data_set): except Exception: return CommonUtil.Exception_Handler(sys.exc_info()) + +@logger +def AI_LLM_prompt_with_files(data_set): + """ + This action will extract the text from images using OpenAI's vision API. This action also takes user prompt and returns + the result according to the user prompt. If the user does not give any prompt, then by default it + extracts all text from the image and returns the result in JSON format. + + Args: + data_set: + ------------------------------------------------------------------------------ + image | input parameter | %| image.png |% + user prompt | optional parameter | Extract invoice details + AI - LLM prompt with files | common action | AI - LLM prompt with files + ------------------------------------------------------------------------------ + + Return: + `passed` if success + `zeuz_failed` if fails + """ + sModuleInfo = inspect.currentframe().f_code.co_name + " : " + MODULE_NAME + global selenium_driver + + try: + import base64 + import requests + import json + import os + user_image_path = None + user_prompt = None + + for left, mid, right in data_set: + left = left.lower().replace(" ", "") + mid = mid.lower().replace(" ", "") + right = right.strip() + + if left == 'image': + if right != '': + user_image_path = right + + if left == "userprompt": + if right != '': + user_prompt = right + + # Validate image path + if not user_image_path: + CommonUtil.ExecLog(sModuleInfo, "No image path provided. Please provide an image path.", 3) + return "zeuz_failed" + + image_path = user_image_path + CommonUtil.ExecLog(sModuleInfo, f"Processing image: {image_path}", 1) + + if not os.path.isfile(image_path): + CommonUtil.ExecLog(sModuleInfo, f"Image file not found: {image_path}", 3) + return "zeuz_failed" + + prompt = user_prompt + if not prompt: + prompt = "Extract all text from this image and return the result in JSON format." + + # Convert Image to Base64 + with open(image_path, "rb") as img_file: + base64_image = base64.b64encode(img_file.read()).decode("utf-8") + + # Load API key from .env file + try: + from dotenv import load_dotenv + framework_dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) + env_path = os.path.join(framework_dir, ".env") + load_dotenv(env_path) + api_key = os.getenv("OPENAI_API") + if not api_key: + CommonUtil.ExecLog(sModuleInfo, "OPENAI_API not found in .env file", 3) + return "zeuz_failed" + except Exception as e: + CommonUtil.ExecLog(sModuleInfo, f"Failed to load API key from .env: {str(e)}", 3) + return "zeuz_failed" + + # Prepare API Request + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json" + } + + payload = { + "model": "gpt-4o", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": f"data:image/png;base64,{base64_image}" + } + }, + { + "type": "text", + "text": prompt + } + ] + } + ] + } + + # Send Request + CommonUtil.ExecLog(sModuleInfo, "Analyzing image...", 1) + response = requests.post( + "https://api.openai.com/v1/chat/completions", + headers=headers, + data=json.dumps(payload) + ) + + # === 5. Process Response === + if response.status_code == 200: + response_data = response.json() + extracted_data = response_data["choices"][0]["message"]["content"] + CommonUtil.ExecLog(sModuleInfo, f"Text extracted successfully from: {image_path}", 1) + CommonUtil.ExecLog(sModuleInfo, f"Extracted content: {extracted_data}", 5) + return "passed" + else: + CommonUtil.ExecLog(sModuleInfo, f"OpenAI API error: {response.status_code} - {response.text}", 3) + return "zeuz_failed" + + except Exception: + return CommonUtil.Exception_Handler(sys.exc_info()) +