Skip to content

Commit 62c682e

Browse files
docstring update
1 parent 425bd84 commit 62c682e

File tree

1 file changed

+46
-7
lines changed

1 file changed

+46
-7
lines changed

src/diffusers/pipelines/cosmos/pipeline_cosmos2_5_predict.py

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,27 @@ def retrieve_latents(
7676
7777
>>> model_id = "nvidia/Cosmos-Predict2.5-Base-2B"
7878
>>> pipe = Cosmos_2_5_PredictBase.from_pretrained(model_id, torch_dtype=torch.bfloat16)
79-
>>> pipe.to("cuda")
80-
81-
>>> prompt = "A close-up shot captures a vibrant yellow scrubber vigorously working on a grimy plate, its bristles moving in circular motions to lift stubborn grease and food residue. The dish, once covered in remnants of a hearty meal, gradually reveals its original glossy surface. Suds form and bubble around the scrubber, creating a satisfying visual of cleanliness in progress. The sound of scrubbing fills the air, accompanied by the gentle clinking of the dish against the sink. As the scrubber continues its task, the dish transforms, gleaming under the bright kitchen lights, symbolizing the triumph of cleanliness over mess."
82-
>>> negative_prompt = "The video captures a series of frames showing ugly scenes, static with no motion, motion blur, over-saturation, shaky footage, low resolution, grainy texture, pixelated images, poorly lit areas, underexposed and overexposed scenes, poor color balance, washed out colors, choppy sequences, jerky movements, low frame rate, artifacting, color banding, unnatural transitions, outdated special effects, fake elements, unconvincing visuals, poorly edited content, jump cuts, visual noise, and flickering. Overall, the video is of poor quality."
79+
>>> pipe = pipe.to("cuda")
80+
81+
>>> # Common negative prompt reused across modes.
82+
>>> negative_prompt = (
83+
... "The video captures a series of frames showing ugly scenes, static with no motion, motion blur, "
84+
... "over-saturation, shaky footage, low resolution, grainy texture, pixelated images, poorly lit areas, "
85+
... "underexposed and overexposed scenes, poor color balance, washed out colors, choppy sequences, jerky "
86+
... "movements, low frame rate, artifacting, color banding, unnatural transitions, outdated special effects, "
87+
... "fake elements, unconvincing visuals, poorly edited content, jump cuts, visual noise, and flickering. "
88+
... "Overall, the video is of poor quality."
89+
... )
8390
8491
>>> # Text2World: generate a 93-frame world video from text only.
92+
>>> prompt = (
93+
... "As the red light shifts to green, the red bus at the intersection begins to move forward, its headlights "
94+
... "cutting through the falling snow. The snowy tire tracks deepen as the vehicle inches ahead, casting fresh "
95+
... "lines onto the slushy road. Around it, streetlights glow warmer, illuminating the drifting flakes and wet "
96+
... "reflections on the asphalt. Other cars behind start to edge forward, their beams joining the scene. "
97+
... "The stillness of the urban street transitions into motion as the quiet snowfall is punctuated by the slow "
98+
... "advance of traffic through the frosty city corridor."
99+
... )
85100
>>> video = pipe(
86101
... image=None,
87102
... video=None,
@@ -93,8 +108,20 @@ def retrieve_latents(
93108
>>> export_to_video(video, "text2world.mp4", fps=16)
94109
95110
>>> # Image2World: condition on a single image and generate a 93-frame world video.
111+
>>> prompt = (
112+
... "A high-definition video captures the precision of robotic welding in an industrial setting. "
113+
... "The first frame showcases a robotic arm, equipped with a welding torch, positioned over a large metal structure. "
114+
... "The welding process is in full swing, with bright sparks and intense light illuminating the scene, creating a vivid "
115+
... "display of blue and white hues. A significant amount of smoke billows around the welding area, partially obscuring "
116+
... "the view but emphasizing the heat and activity. The background reveals parts of the workshop environment, including a "
117+
... "ventilation system and various pieces of machinery, indicating a busy and functional industrial workspace. As the video "
118+
... "progresses, the robotic arm maintains its steady position, continuing the welding process and moving to its left. "
119+
... "The welding torch consistently emits sparks and light, and the smoke continues to rise, diffusing slightly as it moves upward. "
120+
... "The metal surface beneath the torch shows ongoing signs of heating and melting. The scene retains its industrial ambiance, with "
121+
... "the welding sparks and smoke dominating the visual field, underscoring the ongoing nature of the welding operation."
122+
... )
96123
>>> image = load_image(
97-
... "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/yellow-scrubber.png"
124+
... "https://media.githubusercontent.com/media/nvidia-cosmos/cosmos-predict2.5/refs/heads/main/assets/base/robot_welding.jpg"
98125
... )
99126
>>> video = pipe(
100127
... image=image,
@@ -104,10 +131,22 @@ def retrieve_latents(
104131
... num_frames=93,
105132
... generator=torch.Generator().manual_seed(1),
106133
... ).frames[0]
107-
>>> export_to_video(video, "image2world.mp4", fps=16)
134+
>>> # export_to_video(video, "image2world.mp4", fps=16)
108135
109136
>>> # Video2World: condition on an input clip and predict a 93-frame world video.
110-
>>> input_video = load_video("path/to/input.mp4")
137+
>>> prompt = (
138+
... "The video opens with an aerial view of a large-scale sand mining construction operation, showcasing extensive piles "
139+
... "of brown sand meticulously arranged in parallel rows. A central water channel, fed by a water pipe, flows through the "
140+
... "middle of these sand heaps, creating ripples and movement as it cascades down. The surrounding area features dense green "
141+
... "vegetation on the left, contrasting with the sandy terrain, while a body of water is visible in the background on the right. "
142+
... "As the video progresses, a piece of heavy machinery, likely a bulldozer, enters the frame from the right, moving slowly along "
143+
... "the edge of the sand piles. This machinery's presence indicates ongoing construction work in the operation. The final frame "
144+
... "captures the same scene, with the water continuing its flow and the bulldozer still in motion, maintaining the dynamic yet "
145+
... "steady pace of the construction activity."
146+
... )
147+
>>> input_video = load_video(
148+
... "https://github.com/nvidia-cosmos/cosmos-predict2.5/raw/refs/heads/main/assets/base/sand_mining.mp4"
149+
... )
111150
>>> video = pipe(
112151
... image=None,
113152
... video=input_video,

0 commit comments

Comments
 (0)