Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 86 additions & 45 deletions agentstack/_tools/vision/__init__.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,111 @@
"""Vision tool for analyzing images using OpenAI's Vision API."""

from typing import IO, Optional
import os
from pathlib import Path
import base64
from typing import Optional
import tempfile
import requests
from openai import OpenAI
import anthropic

__all__ = ["analyze_image"]

PROMPT = os.getenv('VISION_PROMPT', "What's in this image?")
MODEL = os.getenv('VISION_MODEL', "claude-3-5-sonnet-20241022")
MAX_TOKENS: int = int(os.getenv('VISION_MAX_TOKENS', 1024))

Check warning on line 13 in agentstack/_tools/vision/__init__.py

View check run for this annotation

Codecov / codecov/patch

agentstack/_tools/vision/__init__.py#L11-L13

Added lines #L11 - L13 were not covered by tests

def analyze_image(image_path_url: str) -> str:
"""
Analyze an image using OpenAI's Vision API.
MEDIA_TYPES = {

Check warning on line 15 in agentstack/_tools/vision/__init__.py

View check run for this annotation

Codecov / codecov/patch

agentstack/_tools/vision/__init__.py#L15

Added line #L15 was not covered by tests
"jpg": "image/jpeg",
"jpeg": "image/jpeg",
"png": "image/png",
"gif": "image/gif",
"webp": "image/webp",
}
ALLOWED_MEDIA_TYPES = list(MEDIA_TYPES.keys())

Check warning on line 22 in agentstack/_tools/vision/__init__.py

View check run for this annotation

Codecov / codecov/patch

agentstack/_tools/vision/__init__.py#L22

Added line #L22 was not covered by tests

Args:
image_path_url: Local path or URL to the image
# image sizes that will not be resized
# TODO is there any value in resizing pre-upload?
# 1:1 1092x1092 px
# 3:4 951x1268 px
# 2:3 896x1344 px
# 9:16 819x1456 px
# 1:2 784x1568 px

Returns:
str: Description of the image contents
"""
client = OpenAI()

if not image_path_url:
return "Image Path or URL is required."
def _get_media_type(image_filename: str) -> Optional[str]:

Check warning on line 33 in agentstack/_tools/vision/__init__.py

View check run for this annotation

Codecov / codecov/patch

agentstack/_tools/vision/__init__.py#L33

Added line #L33 was not covered by tests
"""Get the media type from an image filename."""
for ext, media_type in MEDIA_TYPES.items():
if image_filename.endswith(ext):
return media_type
return None

Check warning on line 38 in agentstack/_tools/vision/__init__.py

View check run for this annotation

Codecov / codecov/patch

agentstack/_tools/vision/__init__.py#L37-L38

Added lines #L37 - L38 were not covered by tests


if "http" in image_path_url:
return _analyze_web_image(client, image_path_url)
return _analyze_local_image(client, image_path_url)
def _encode_image(image_handle: IO) -> str:

Check warning on line 41 in agentstack/_tools/vision/__init__.py

View check run for this annotation

Codecov / codecov/patch

agentstack/_tools/vision/__init__.py#L41

Added line #L41 was not covered by tests
"""Encode a file handle to base64."""
return base64.b64encode(image_handle.read()).decode("utf-8")

Check warning on line 43 in agentstack/_tools/vision/__init__.py

View check run for this annotation

Codecov / codecov/patch

agentstack/_tools/vision/__init__.py#L43

Added line #L43 was not covered by tests


def _analyze_web_image(client: OpenAI, image_path_url: str) -> str:
response = client.chat.completions.create(
model="gpt-4-vision-preview",
def _make_anthropic_request(image_handle: IO, media_type: str) -> anthropic.types.Message:

Check warning on line 46 in agentstack/_tools/vision/__init__.py

View check run for this annotation

Codecov / codecov/patch

agentstack/_tools/vision/__init__.py#L46

Added line #L46 was not covered by tests
"""Make a request to the Anthropic API using an image."""
client = anthropic.Anthropic()
data = _encode_image(image_handle)
return client.messages.create(

Check warning on line 50 in agentstack/_tools/vision/__init__.py

View check run for this annotation

Codecov / codecov/patch

agentstack/_tools/vision/__init__.py#L48-L50

Added lines #L48 - L50 were not covered by tests
model=MODEL,
max_tokens=MAX_TOKENS,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "What's in this image?"},
{"type": "image_url", "image_url": {"url": image_path_url}},
{ # type: ignore
"type": "image",
"source": {
"type": "base64",
"media_type": media_type,
"data": data,
},
},
{ # type: ignore
"type": "text",
"text": PROMPT,
},
],
}
],
max_tokens=300,
)
return response.choices[0].message.content # type: ignore[return-value]


def _analyze_local_image(client: OpenAI, image_path: str) -> str:
base64_image = _encode_image(image_path)
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {client.api_key}"}
payload = {
"model": "gpt-4-vision-preview",
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "What's in this image?"},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}},
],
}
],
"max_tokens": 300,
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
return response.json()["choices"][0]["message"]["content"]
def _analyze_web_image(image_url: str, media_type: str) -> str:

Check warning on line 75 in agentstack/_tools/vision/__init__.py

View check run for this annotation

Codecov / codecov/patch

agentstack/_tools/vision/__init__.py#L75

Added line #L75 was not covered by tests
"""Analyze an image from a URL."""
with tempfile.NamedTemporaryFile() as temp_file:
temp_file.write(requests.get(image_url).content)
temp_file.flush()
temp_file.seek(0)
response = _make_anthropic_request(temp_file, media_type)
return response.content[0].text # type: ignore

Check warning on line 82 in agentstack/_tools/vision/__init__.py

View check run for this annotation

Codecov / codecov/patch

agentstack/_tools/vision/__init__.py#L77-L82

Added lines #L77 - L82 were not covered by tests


def _encode_image(image_path: str) -> str:
def _analyze_local_image(image_path: str, media_type: str) -> str:

Check warning on line 85 in agentstack/_tools/vision/__init__.py

View check run for this annotation

Codecov / codecov/patch

agentstack/_tools/vision/__init__.py#L85

Added line #L85 was not covered by tests
"""Analyze an image from a local file."""
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
response = _make_anthropic_request(image_file, media_type)
return response.content[0].text # type: ignore

Check warning on line 89 in agentstack/_tools/vision/__init__.py

View check run for this annotation

Codecov / codecov/patch

agentstack/_tools/vision/__init__.py#L88-L89

Added lines #L88 - L89 were not covered by tests


def analyze_image(image_path_or_url: str) -> str:

Check warning on line 92 in agentstack/_tools/vision/__init__.py

View check run for this annotation

Codecov / codecov/patch

agentstack/_tools/vision/__init__.py#L92

Added line #L92 was not covered by tests
"""
Analyze an image using OpenAI's Vision API.

Args:
image_path_or_url: Local path or URL to the image.

Returns:
str: Description of the image contents
"""
if not image_path_or_url:
return "Image Path or URL is required."

Check warning on line 103 in agentstack/_tools/vision/__init__.py

View check run for this annotation

Codecov / codecov/patch

agentstack/_tools/vision/__init__.py#L103

Added line #L103 was not covered by tests

media_type = _get_media_type(image_path_or_url)

Check warning on line 105 in agentstack/_tools/vision/__init__.py

View check run for this annotation

Codecov / codecov/patch

agentstack/_tools/vision/__init__.py#L105

Added line #L105 was not covered by tests
if not media_type:
return f"Unsupported image type use {ALLOWED_MEDIA_TYPES}."

Check warning on line 107 in agentstack/_tools/vision/__init__.py

View check run for this annotation

Codecov / codecov/patch

agentstack/_tools/vision/__init__.py#L107

Added line #L107 was not covered by tests

if "http" in image_path_or_url:
return _analyze_web_image(image_path_or_url, media_type)
return _analyze_local_image(image_path_or_url, media_type)

Check warning on line 111 in agentstack/_tools/vision/__init__.py

View check run for this annotation

Codecov / codecov/patch

agentstack/_tools/vision/__init__.py#L110-L111

Added lines #L110 - L111 were not covered by tests
7 changes: 5 additions & 2 deletions agentstack/_tools/vision/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
"name": "vision",
"category": "image-analysis",
"env": {
"OPENAI_API_KEY": null
"ANTHROPIC_API_KEY": null,
"VISION_PROMPT": null,
"VISION_MODEL": null,
"VISION_MAX_TOKENS": null
},
"dependencies": [
"openai>=1.0.0",
"anthropic>=0.45.2",
"requests>=2.31.0"
],
"tools": ["analyze_image"]
Expand Down
59 changes: 51 additions & 8 deletions docs/llms.txt
Original file line number Diff line number Diff line change
Expand Up @@ -514,10 +514,6 @@ which adheres to a common pattern or exporting your project to share.
Templates are versioned, and each previous version provides a method to convert
it's content to the current version.

> TODO: Templates are currently identified as `proj_templates` since they conflict
with the templates used by `generation`. Move existing templates to be part of
the generation package.

### `TemplateConfig.from_user_input(identifier: str)`
`<TemplateConfig>` Returns a `TemplateConfig` object for either a URL, file path,
or builtin template name.
Expand Down Expand Up @@ -716,7 +712,7 @@ title: 'System Analyzer'
description: 'Inspect a project directory and improve it'
---

[View Template](https://github.com/AgentOps-AI/AgentStack/blob/main/agentstack/templates/proj_templates/system_analyzer.json)
[View Template](https://github.com/AgentOps-AI/AgentStack/blob/main/agentstack/templates/system_analyzer.json)

```bash
agentstack init --template=system_analyzer
Expand All @@ -737,7 +733,7 @@ title: 'Researcher'
description: 'Research and report result from a query'
---

[View Template](https://github.com/AgentOps-AI/AgentStack/blob/main/agentstack/templates/proj_templates/research.json)
[View Template](https://github.com/AgentOps-AI/AgentStack/blob/main/agentstack/templates/research.json)

```bash
agentstack init --template=research
Expand Down Expand Up @@ -828,7 +824,54 @@ title: 'Content Creator'
description: 'Research a topic and create content on it'
---

[View Template](https://github.com/AgentOps-AI/AgentStack/blob/main/agentstack/templates/proj_templates/content_creator.json)
[View Template](https://github.com/AgentOps-AI/AgentStack/blob/main/agentstack/templates/content_creator.json)

## frameworks/list.mdx

---
title: Frameworks
description: 'Supported frameworks in AgentStack'
icon: 'ship'
---

These are documentation links to the frameworks supported directly by AgentStack.

To start a project with one of these frameworks, use
```bash
agentstack init <project_name> --framework <framework_name>
```

## Framework Docs
<CardGroup cols={3}>
<Card
title="CrewAI"
icon="ship"
href="https://docs.crewai.com/introduction"
>
An intuitive agentic framework (recommended)
</Card>
<Card
title="LangGraph"
icon="circle-nodes"
href="https://langchain-ai.github.io/langgraph/"
>
A complex but capable framework with a _steep_ learning curve
</Card>
<Card
title="OpenAI Swarms"
icon="bee"
href="https://github.com/openai/swarm"
>
A simple framework with a cult following
</Card>
<Card
title="LlamaIndex"
icon="layer-group"
href="https://docs.llamaindex.ai/en/stable/"
>
An expansive framework with many ancillary features
</Card>
</CardGroup>

## tools/package-structure.mdx

Expand Down Expand Up @@ -1043,7 +1086,7 @@ You can pass the `--wizard` flag to `agentstack init` to use an interactive proj
You can also pass a `--template=<template_name>` argument to `agentstack init` which will pre-populate your project with functionality
from a built-in template, or one found on the internet. A `template_name` can be one of three identifiers:

- A built-in AgentStack template (see the `templates/proj_templates` directory in the AgentStack repo for bundled templates).
- A built-in AgentStack template (see the `templates` directory in the AgentStack repo for bundled templates).
- A template file from the internet; pass the full https URL of the template.
- A local template file; pass an absolute or relative path.

Expand Down
Binary file added tests/fixtures/test_image.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
56 changes: 56 additions & 0 deletions tests/tools/test_tool_vision.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import os
from pathlib import Path
import unittest
from agentstack._tools import ToolConfig


TEST_IMAGE_PATH: Path = Path(__file__).parent.parent / 'fixtures/test_image.jpg'


class VisionToolTest(unittest.TestCase):
def setUp(self):
tool = ToolConfig.from_tool_name('vision')
for dependency in tool.dependencies:
os.system(f"pip install {dependency}")

try:
from agentstack._tools import vision
except ImportError as e:
self.skipTest(str(e))

def test_get_media_type(self):
from agentstack._tools.vision import _get_media_type

self.assertEqual(_get_media_type("image.jpg"), "image/jpeg")
self.assertEqual(_get_media_type("image.jpeg"), "image/jpeg")
self.assertEqual(_get_media_type("http://google.com/image.png"), "image/png")
self.assertEqual(_get_media_type("/foo/bar/image.gif"), "image/gif")
self.assertEqual(_get_media_type("image.webp"), "image/webp")
self.assertEqual(_get_media_type("document.pdf"), None)

def test_encode_image(self):
from agentstack._tools.vision import _encode_image

with open(TEST_IMAGE_PATH, "rb") as image_file:
encoded_image = _encode_image(image_file)
print(encoded_image[:200])
self.assertTrue(isinstance(encoded_image, str))

def test_analyze_image_web_live(self):
from agentstack._tools.vision import analyze_image

if not os.environ.get('ANTHROPIC_API_KEY'):
self.skipTest("ANTHROPIC_API_KEY not set")

image_url = "https://github.com/AgentOps-AI/AgentStack/blob/7c1bf897742cfb58f4942a2547be70a0a1bb767a/tests/fixtures/test_image.jpg?raw=true"
result = analyze_image(image_url)
self.assertTrue(isinstance(result, str))

def test_analyze_image_local_live(self):
from agentstack._tools.vision import analyze_image

if not os.environ.get('ANTHROPIC_API_KEY'):
self.skipTest("ANTHROPIC_API_KEY not set")

result = analyze_image(str(TEST_IMAGE_PATH))
self.assertTrue(isinstance(result, str))