From 1237877e37aebb5012e968fbba24082ff132c6f7 Mon Sep 17 00:00:00 2001
From: Sri Laasya Nutheti <nutheti.laasya@gmail.com>
Date: Fri, 7 Feb 2025 15:42:16 -0800
Subject: [PATCH 1/4] Update firecrawl tool with added features

---
 agentstack/_tools/firecrawl/__init__.py | 103 +++++++++++++++++++++++-
 agentstack/_tools/firecrawl/config.json |  11 ++-
 2 files changed, 112 insertions(+), 2 deletions(-)

diff --git a/agentstack/_tools/firecrawl/__init__.py b/agentstack/_tools/firecrawl/__init__.py
index 1f912b31..f555e946 100644
--- a/agentstack/_tools/firecrawl/__init__.py
+++ b/agentstack/_tools/firecrawl/__init__.py
@@ -1,6 +1,6 @@
 import os
 from firecrawl import FirecrawlApp
-
+from typing import List, Dict, Any, Optional
 app = FirecrawlApp(api_key=os.getenv('FIRECRAWL_API_KEY'))
 
 
@@ -38,3 +38,104 @@ def retrieve_web_crawl(crawl_id: str):
     will tell you if the crawl is finished. If it is not, wait some more time then try again.
     """
     return app.check_crawl_status(crawl_id)
+
+
+def batch_scrape(urls: List[str], formats: List[str] = ['markdown', 'html']):
+    """
+    Batch scrape multiple URLs simultaneously.
+    
+    Args:
+        urls: List of URLs to scrape
+        formats: List of desired output formats (e.g., ['markdown', 'html'])
+    
+    Returns:
+        Dictionary containing the batch scrape results
+    """
+    batch_result = app.batch_scrape_urls(urls, {'formats': formats})
+    return batch_result
+
+
+def async_batch_scrape(urls: List[str], formats: List[str] = ['markdown', 'html']):
+    """
+    Asynchronously batch scrape multiple URLs.
+    
+    Args:
+        urls: List of URLs to scrape
+        formats: List of desired output formats (e.g., ['markdown', 'html'])
+    
+    Returns:
+        Dictionary containing the job ID and status URL
+    """
+    batch_job = app.async_batch_scrape_urls(urls, {'formats': formats})
+    return batch_job
+
+
+def check_batch_status(job_id: str):
+    """
+    Check the status of an asynchronous batch scrape job.
+    
+    Args:
+        job_id: The ID of the batch scrape job
+    
+    Returns:
+        Dictionary containing the current status and results if completed
+    """
+    return app.check_batch_scrape_status(job_id)
+
+
+def extract_data(urls: List[str], schema: Dict[str, Any] = None, prompt: str = None):
+    """
+    Extract structured data from URLs using LLMs.
+    
+    Args:
+        urls: List of URLs to extract data from
+        schema: Optional JSON schema defining the structure of data to extract
+        prompt: Optional natural language prompt describing the data to extract
+        
+    Returns:
+        Dictionary containing the extracted structured data
+    """
+    params = {
+        'prompt': prompt
+    } if prompt else {
+        'schema': schema
+    }
+    
+    data = app.extract(urls, params)
+    return data
+
+
+def map_website(url: str, search: Optional[str] = None):
+    """
+    Map a website to get all URLs, with optional search functionality.
+    
+    Args:
+        url: The base URL to map
+        search: Optional search term to filter URLs
+        
+    Returns:
+        Dictionary containing the list of discovered URLs
+    """
+    params = {'search': search} if search else {}
+    map_result = app.map_url(url, params)
+    return map_result
+
+
+def batch_extract(urls: List[str], extract_params: Dict[str, Any]):
+    """
+    Batch extract structured data from multiple URLs.
+    
+    Args:
+        urls: List of URLs to extract data from
+        extract_params: Dictionary containing extraction parameters including prompt or schema
+        
+    Returns:
+        Dictionary containing the extracted data from all URLs
+    """
+    params = {
+        'formats': ['extract'],
+        'extract': extract_params
+    }
+    
+    batch_result = app.batch_scrape_urls(urls, params)
+    return batch_result
\ No newline at end of file
diff --git a/agentstack/_tools/firecrawl/config.json b/agentstack/_tools/firecrawl/config.json
index 5dcf2748..42c45756 100644
--- a/agentstack/_tools/firecrawl/config.json
+++ b/agentstack/_tools/firecrawl/config.json
@@ -8,6 +8,15 @@
   "dependencies": [
     "firecrawl-py>=1.6.4"
   ],
-  "tools": ["web_scrape", "web_crawl", "retrieve_web_crawl"],
+  "tools": [
+    "web_scrape",
+    "web_crawl",
+    "retrieve_web_crawl",
+    "batch_scrape",
+    "check_batch_status",
+    "extract_data",
+    "map_website",
+    "batch_extract"
+  ],
   "cta": "Create an API key at https://www.firecrawl.dev/"
 }
\ No newline at end of file

From a99694f57b1006af4ee3a3fd79854100ffd1239b Mon Sep 17 00:00:00 2001
From: Sri Laasya Nutheti <nutheti.laasya@gmail.com>
Date: Thu, 13 Feb 2025 16:48:17 -0800
Subject: [PATCH 2/4] Add vision and firecrawl tools

---
 agentstack/_tools/agentql/__init__.py |  40 +++++-----
 agentstack/_tools/vision/__init__.py  | 105 ++++++++++++++++++++------
 agentstack/_tools/vision/config.json  |   9 ++-
 3 files changed, 107 insertions(+), 47 deletions(-)

diff --git a/agentstack/_tools/agentql/__init__.py b/agentstack/_tools/agentql/__init__.py
index 46364974..80cf6e37 100644
--- a/agentstack/_tools/agentql/__init__.py
+++ b/agentstack/_tools/agentql/__init__.py
@@ -17,32 +17,32 @@ def query_data(url: str, query: Optional[str], prompt: Optional[str]) -> dict:
 
     AgentQL query to scrape the url.
 
-Here is a guide on AgentQL query syntax:
+    Here is a guide on AgentQL query syntax:
 
-Enclose all AgentQL query terms within curly braces `{}`. The following query structure isn't valid because the term "social\_media\_links" is wrongly enclosed within parenthesis `()`.
+    Enclose all AgentQL query terms within curly braces `{}`. The following query structure isn't valid because the term "social/media/links" is wrongly enclosed within parenthesis `()`.
 
-```
-( # Should be {
-    social_media_links(The icons that lead to Facebook, Snapchat, etc.)[]
-) # Should be }
-```
+    ```
+    ( # Should be {
+        social_media_links(The icons that lead to Facebook, Snapchat, etc.)[]
+    ) # Should be }
+    ```
 
-The following query is also invalid since its missing the curly braces `{}`
+    The following query is also invalid since its missing the curly braces `{}`
 
-```
-# should include {
-social_media_links(The icons that lead to Facebook, Snapchat, etc.)[]
-# should include }
-```
+    ```
+    # should include {
+    social_media_links(The icons that lead to Facebook, Snapchat, etc.)[]
+    # should include }
+    ```
 
-You can't include new lines in your semantic context. The following query structure isn't valid because the semantic context isn't contained within one line.
+    You can't include new lines in your semantic context. The following query structure isn't valid because the semantic context isn't contained within one line.
 
-```
-{
-    social_media_links(The icons that lead
-        to Facebook, Snapchat, etc.)[]
-}
-```
+    ```
+    {
+        social_media_links(The icons that lead
+            to Facebook, Snapchat, etc.)[]
+    }
+    ```
     """
     payload = {
         "url": url,
diff --git a/agentstack/_tools/vision/__init__.py b/agentstack/_tools/vision/__init__.py
index e491153a..961fced0 100644
--- a/agentstack/_tools/vision/__init__.py
+++ b/agentstack/_tools/vision/__init__.py
@@ -1,51 +1,68 @@
-"""Vision tool for analyzing images using OpenAI's Vision API."""
+"""Vision tool for analyzing images using OpenAI's Vision API and Claude."""
 
 import base64
-from typing import Optional
+from typing import Optional, Literal
 import requests
 from openai import OpenAI
+import anthropic
 
 __all__ = ["analyze_image"]
 
 
-def analyze_image(image_path_url: str) -> str:
+def analyze_image(image_path_url: str, model: Literal["openai", "claude"] = "openai") -> str:
     """
-    Analyze an image using OpenAI's Vision API.
+    Analyze an image using either OpenAI's Vision API or Claude.
 
     Args:
         image_path_url: Local path or URL to the image
+        model: Which model to use ("openai" or "claude"). Defaults to "openai"
 
     Returns:
         str: Description of the image contents
     """
-    client = OpenAI()
-
     if not image_path_url:
         return "Image Path or URL is required."
 
-    if "http" in image_path_url:
-        return _analyze_web_image(client, image_path_url)
-    return _analyze_local_image(client, image_path_url)
+    if model == "openai":
+        client = OpenAI()
+        if "http" in image_path_url:
+            return _analyze_web_image_openai(client, image_path_url)
+        return _analyze_local_image_openai(client, image_path_url)
+    elif model == "claude":
+        client = Anthropic()
+        if "http" in image_path_url:
+            return _analyze_web_image_claude(client, image_path_url)
+        return _analyze_local_image_claude(client, image_path_url)
+    else:
+        raise ValueError("Model must be either 'openai' or 'claude'")
 
 
-def _analyze_web_image(client: OpenAI, image_path_url: str) -> str:
-    response = client.chat.completions.create(
-        model="gpt-4-vision-preview",
-        messages=[
-            {
+def _analyze_web_image_openai(client: OpenAI, image_url: str) -> str:
+    """Analyze a web-hosted image using OpenAI's Vision API."""
+    try:
+        response = client.chat.completions.create(
+            model="gpt-4-vision-preview",
+            messages=[{
                 "role": "user",
                 "content": [
-                    {"type": "text", "text": "What's in this image?"},
-                    {"type": "image_url", "image_url": {"url": image_path_url}},
-                ],
-            }
-        ],
-        max_tokens=300,
-    )
-    return response.choices[0].message.content  # type: ignore[return-value]
+                    {
+                        "type": "text",
+                        "text": "What's in this image?"
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": image_url}
+                    }
+                ]
+            }],
+            max_tokens=300
+        )
+        return response.choices[0].message.content or "No description available"
+    except Exception as e:
+        return f"Error analyzing image: {str(e)}"
 
 
-def _analyze_local_image(client: OpenAI, image_path: str) -> str:
+def _analyze_local_image_openai(client: OpenAI, image_path: str) -> str:
     base64_image = _encode_image(image_path)
     headers = {"Content-Type": "application/json", "Authorization": f"Bearer {client.api_key}"}
     payload = {
@@ -65,6 +82,46 @@ def _analyze_local_image(client: OpenAI, image_path: str) -> str:
     return response.json()["choices"][0]["message"]["content"]
 
 
+def _analyze_web_image_claude(client: Anthropic, image_path_url: str) -> str:
+    response = client.messages.create(
+        model="claude-3-opus-20240229",
+        max_tokens=300,
+        messages=[{
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "What's in this image?"},
+                {"type": "image", "source": {"type": "url", "url": image_path_url}}
+            ]
+        }]
+    )
+    return response.content[0].text
+
+
+def _analyze_local_image_claude(client: Anthropic, image_path: str) -> str:
+    with open(image_path, "rb") as image_file:
+        media_data = image_file.read()
+    
+    response = client.messages.create(
+        model="claude-3-opus-20240229",
+        max_tokens=300,
+        messages=[{
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "What's in this image?"},
+                {
+                    "type": "image",
+                    "source": {
+                        "type": "base64",
+                        "media_type": "image/jpeg",
+                        "data": base64.b64encode(media_data).decode()
+                    }
+                }
+            ]
+        }]
+    )
+    return response.content[0].text
+
+
 def _encode_image(image_path: str) -> str:
     with open(image_path, "rb") as image_file:
-        return base64.b64encode(image_file.read()).decode("utf-8")
+        return base64.b64encode(image_file.read()).decode("utf-8")
\ No newline at end of file
diff --git a/agentstack/_tools/vision/config.json b/agentstack/_tools/vision/config.json
index 37963f0d..3c3f8bb3 100644
--- a/agentstack/_tools/vision/config.json
+++ b/agentstack/_tools/vision/config.json
@@ -6,7 +6,10 @@
   },
   "dependencies": [
     "openai>=1.0.0",
-    "requests>=2.31.0"
+    "requests>=2.31.0",
+    "anthropic"
   ],
-  "tools": ["analyze_image"]
-}
+  "tools": [
+    "analyze_image"
+  ]
+}
\ No newline at end of file

From f5cd1102e2ed75deea094283d0d5a823a0d05139 Mon Sep 17 00:00:00 2001
From: Braelyn Boynton <bboynton97@gmail.com>
Date: Fri, 14 Feb 2025 10:45:44 -0800
Subject: [PATCH 3/4] docstring fix

---
 agentstack/_tools/agentql/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/agentstack/_tools/agentql/__init__.py b/agentstack/_tools/agentql/__init__.py
index 80cf6e37..469fcfcf 100644
--- a/agentstack/_tools/agentql/__init__.py
+++ b/agentstack/_tools/agentql/__init__.py
@@ -19,7 +19,7 @@ def query_data(url: str, query: Optional[str], prompt: Optional[str]) -> dict:
 
     Here is a guide on AgentQL query syntax:
 
-    Enclose all AgentQL query terms within curly braces `{}`. The following query structure isn't valid because the term "social/media/links" is wrongly enclosed within parenthesis `()`.
+    Enclose all AgentQL query terms within curly braces `{}`. The following query structure isn't valid because the term "social_media_links" is wrongly enclosed within parenthesis `()`.
 
     ```
     ( # Should be {

From 25e8da882a3f847b56f8413ad3149a9872537d34 Mon Sep 17 00:00:00 2001
From: Braelyn Boynton <bboynton97@gmail.com>
Date: Fri, 14 Feb 2025 11:27:34 -0800
Subject: [PATCH 4/4] fixed mypy type error

---
 agentstack/_tools/firecrawl/__init__.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/agentstack/_tools/firecrawl/__init__.py b/agentstack/_tools/firecrawl/__init__.py
index f555e946..bbda5381 100644
--- a/agentstack/_tools/firecrawl/__init__.py
+++ b/agentstack/_tools/firecrawl/__init__.py
@@ -83,24 +83,26 @@ def check_batch_status(job_id: str):
     return app.check_batch_scrape_status(job_id)
 
 
-def extract_data(urls: List[str], schema: Dict[str, Any] = None, prompt: str = None):
+def extract_data(urls: List[str], schema: Optional[Dict[str, Any]] = None, prompt: Optional[str] = None) -> Dict[
+    str, Any]:
     """
     Extract structured data from URLs using LLMs.
-    
+
     Args:
         urls: List of URLs to extract data from
         schema: Optional JSON schema defining the structure of data to extract
         prompt: Optional natural language prompt describing the data to extract
-        
+
     Returns:
         Dictionary containing the extracted structured data
     """
-    params = {
-        'prompt': prompt
-    } if prompt else {
-        'schema': schema
-    }
-    
+    params: Dict[str, Any] = {}
+
+    if prompt is not None:
+        params['prompt'] = prompt
+    elif schema is not None:
+        params['schema'] = schema
+
     data = app.extract(urls, params)
     return data