Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion config.yaml.full
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ model:
api_base: https://ark.cn-beijing.volces.com/api/v3/
api_key:
video:
name: doubao-seedance-1-0-pro-250528
name: doubao-seedance-1-5-pro-251215
api_base: https://ark.cn-beijing.volces.com/api/v3/
api_key:
image:
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ dependencies = [
"opentelemetry-instrumentation-logging>=0.56b0",
"wrapt==1.17.2", # For patching built-in functions
"openai<1.100", # For fix https://github.com/BerriAI/litellm/issues/13710
"volcengine-python-sdk==4.0.33", # For Volcengine API
"volcengine==1.0.193", # For Volcengine sign
"volcengine-python-sdk>=5.0.1", # For Volcengine API
"volcengine>=1.0.193", # For Volcengine sign
"agent-pilot-sdk==0.1.2", # Prompt optimization by Volcengine AgentPilot/PromptPilot toolkits
"fastmcp==2.12.3", # For running MCP
"trustedmcp==0.0.5", # For running TrustedMCP
Expand Down
2 changes: 1 addition & 1 deletion veadk/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
DEFAULT_IMAGE_EDIT_MODEL_NAME = "doubao-seededit-3-0-i2i-250628"
DEFAULT_IMAGE_EDIT_MODEL_API_BASE = "https://ark.cn-beijing.volces.com/api/v3/"

DEFAULT_VIDEO_MODEL_NAME = "doubao-seedance-1-0-pro-250528"
DEFAULT_VIDEO_MODEL_NAME = "doubao-seedance-1-5-pro-251215"
DEFAULT_VIDEO_MODEL_API_BASE = "https://ark.cn-beijing.volces.com/api/v3/"

DEFAULT_IMAGE_GENERATE_MODEL_NAME = "doubao-seedream-4-5-251128"
Expand Down
17 changes: 13 additions & 4 deletions veadk/tools/builtin_tools/image_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,10 @@ def _build_input_parts(item: dict, task_type: str, image_field):


def handle_single_task_sync(
idx: int, item: dict, tool_context
idx: int,
item: dict,
timeout: int,
tool_context,
) -> tuple[list[dict], list[str]]:
logger.debug(f"handle_single_task_sync item {idx}: {item}")
success_list: list[dict] = []
Expand Down Expand Up @@ -139,6 +142,7 @@ def handle_single_task_sync(
"MODEL_AGENT_CLIENT_REQ_ID", f"veadk/{VERSION}"
),
},
timeout=timeout,
)
else:
response = client.images.generate(
Expand All @@ -152,6 +156,7 @@ def handle_single_task_sync(
"MODEL_AGENT_CLIENT_REQ_ID", f"veadk/{VERSION}"
),
},
timeout=timeout,
)

if not response.error:
Expand Down Expand Up @@ -228,14 +233,16 @@ def handle_single_task_sync(
return success_list, error_list


async def image_generate(tasks: list[dict], tool_context) -> Dict:
"""Generate images with Seedream 4.0.
async def image_generate(tasks: list[dict], tool_context, timeout: int = 600) -> Dict:
"""Generate images with Seedream 4.0 / 4.5

Commit batch image generation requests via tasks.

Args:
tasks (list[dict]):
A list of image-generation tasks. Each task is a dict.
timeout (int)
The timeout limit for the image generation task request, in seconds, with a default value of 600 seconds.
Per-task schema
---------------
Required:
Expand Down Expand Up @@ -336,7 +343,9 @@ async def image_generate(tasks: list[dict], tool_context) -> Dict:

def make_task(idx, item):
ctx = base_ctx.copy()
return lambda: ctx.run(handle_single_task_sync, idx, item, tool_context)
return lambda: ctx.run(
handle_single_task_sync, idx, item, timeout, tool_context
)

loop = asyncio.get_event_loop()
futures = [
Expand Down
73 changes: 66 additions & 7 deletions veadk/tools/builtin_tools/video_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.

import json
import time
import asyncio
import traceback
from typing import Dict, cast

Expand Down Expand Up @@ -41,14 +41,27 @@
)


async def generate(prompt, first_frame_image=None, last_frame_image=None):
async def generate(
prompt, first_frame_image=None, last_frame_image=None, generate_audio=None
):
try:
if generate_audio is False:
generate_audio = None
model_name = getenv("MODEL_VIDEO_NAME", DEFAULT_VIDEO_MODEL_NAME)

if model_name.startswith("doubao-seedance-1-0") and generate_audio:
logger.warning(
"The `doubao-seedance-1-0` series models do not support enabling the audio field. "
"Please upgrade to the doubao-seedance-1-5 series of you want to generate video with audio."
)
generate_audio = None
if first_frame_image is None:
response = client.content_generation.tasks.create(
model=getenv("MODEL_VIDEO_NAME", DEFAULT_VIDEO_MODEL_NAME),
content=[
{"type": "text", "text": prompt},
],
generate_audio=generate_audio,
extra_headers={
"veadk-source": "veadk",
"veadk-version": VERSION,
Expand Down Expand Up @@ -112,7 +125,10 @@ async def generate(prompt, first_frame_image=None, last_frame_image=None):


async def video_generate(
params: list, tool_context: ToolContext, batch_size: int = 10
params: list,
tool_context: ToolContext,
batch_size: int = 10,
max_wait_seconds: int = 1200,
) -> Dict:
"""
Generate videos in **batch** from text prompts, optionally guided by a first/last frame,
Expand All @@ -126,6 +142,10 @@ async def video_generate(
A list of video generation requests. Each item supports the fields below.
batch_size (int):
The number of videos to generate in a batch. Defaults to 10.
max_wait_seconds (int):
Maximum time in seconds to wait for all video tasks in each batch.
Default is 20 minutes (1200 seconds). When the timeout is reached,
unfinished tasks will be marked as timeout errors.

Required per item:
- video_name (str):
Expand All @@ -148,6 +168,12 @@ async def video_generate(
URL or Base64 string (data URL) for the **last frame** (role = `last_frame`).
Use when you want the clip to end on a specific image.

- generate_audio (bool | None):
Boolean value, used to determine whether the generated video should have sound.
If this field is not configured (None) or its value is `False`, no sound will be generated.
If it is configured as `True`, sound can be generated.
If you want to describe the sound content in detail, you can do so in the `prompt` field.

Notes on first/last frame:
* When both frames are provided, **match width/height** to avoid cropping; if they differ,
the tail frame may be auto-cropped to fit.
Expand Down Expand Up @@ -222,6 +248,7 @@ async def video_generate(
"""
success_list = []
error_list = []
timeout_tasks = []
logger.debug(f"Using model: {getenv('MODEL_VIDEO_NAME', DEFAULT_VIDEO_MODEL_NAME)}")
logger.debug(f"video_generate params: {params}")

Expand All @@ -243,22 +270,32 @@ async def video_generate(
prompt = item["prompt"]
first_frame = item.get("first_frame", None)
last_frame = item.get("last_frame", None)
generate_audio = item.get("generate_audio", None)
try:
if not first_frame:
logger.debug(
f"video_generate task_{idx} text generation: prompt={prompt}"
)
response = await generate(prompt)
response = await generate(prompt, generate_audio=generate_audio)
elif not last_frame:
logger.debug(
f"video_generate task_{idx} first frame generation: prompt={prompt}, first_frame={first_frame}"
)
response = await generate(prompt, first_frame)
response = await generate(
prompt,
first_frame_image=first_frame,
generate_audio=generate_audio,
)
else:
logger.debug(
f"video_generate task_{idx} first and last frame generation: prompt={prompt}, first_frame={first_frame}, last_frame={last_frame}"
)
response = await generate(prompt, first_frame, last_frame)
response = await generate(
prompt,
first_frame_image=first_frame,
last_frame_image=last_frame,
generate_audio=generate_audio,
)
logger.debug(
f"batch_{start_idx // batch_size} video_generate task_{idx} response: {response}"
)
Expand All @@ -270,6 +307,10 @@ async def video_generate(

logger.debug("begin query video_generate task status...")

sleep_interval = 10
max_sleep_times = max_wait_seconds // sleep_interval
sleep_times = 0

while True:
task_list = list(task_dict.keys())
if len(task_list) == 0:
Expand Down Expand Up @@ -303,7 +344,23 @@ async def video_generate(
logger.debug(
f"{task_dict[task_id]} video_generate current status: {status}, Retrying after 10 seconds..."
)
time.sleep(10)
if sleep_times >= max_sleep_times:
logger.error(
f"video_generate polling timed out after {max_wait_seconds} seconds; remaining tasks: {task_dict}"
)
for task_id, video_name in task_dict.items():
timeout_tasks.append(
{
"task_id": task_id,
"video_name": video_name,
}
)
error_list.append(video_name)
task_dict.clear()
break

await asyncio.sleep(sleep_interval)
sleep_times += 1

add_span_attributes(
span,
Expand All @@ -324,6 +381,7 @@ async def video_generate(
"status": "error",
"success_list": success_list,
"error_list": error_list,
"timeout_tasks": timeout_tasks,
}
else:
logger.debug(
Expand All @@ -333,6 +391,7 @@ async def video_generate(
"status": "success",
"success_list": success_list,
"error_list": error_list,
"timeout_tasks": timeout_tasks,
}


Expand Down