Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
204 changes: 123 additions & 81 deletions README.md

Large diffs are not rendered by default.

11 changes: 3 additions & 8 deletions internnav/agent/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
from internnav.agent.base import Agent
from internnav.agent.cma_agent import CmaAgent
from internnav.agent.dialog_agent import DialogAgent
from internnav.agent.internvla_n1_agent import InternVLAN1Agent
from internnav.agent.rdp_agent import RdpAgent
from internnav.agent.seq2seq_agent import Seq2SeqAgent
from internnav.agent.internvla_n1_agent import InternVLAN1Agent

__all__ = [
'Agent',
'CmaAgent',
'RdpAgent',
'Seq2SeqAgent',
'InternVLAN1Agent'
]
__all__ = ['Agent', 'DialogAgent', 'CmaAgent', 'RdpAgent', 'Seq2SeqAgent', 'InternVLAN1Agent']
1 change: 1 addition & 0 deletions internnav/agent/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def decorator(agent_class):
if agent_type in cls.agents:
raise ValueError(f"Agent {agent_type} already registered.")
cls.agents[agent_type] = agent_class
return agent_class

return decorator

Expand Down
472 changes: 472 additions & 0 deletions internnav/agent/dialog_agent.py

Large diffs are not rendered by default.

13 changes: 4 additions & 9 deletions internnav/agent/internvla_n1_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def s2_thread_func():
self.s2_thread.daemon = True
self.s2_thread.start()

def should_infer_s2(self, mode="sync"):
def should_infer_s2(self, mode="partial_async"):
"""Function: Enables the sys2 inference thread depending on the mode.
mode: just support 2 modes: "sync" and "partial_async".
"sync": Synchronous mode (navdp_version >= 0.0), Sys1 and Sys2 execute in a sequential inference chain.
Expand Down Expand Up @@ -298,8 +298,6 @@ def step(self, obs):
if self.sys1_infer_times > 0:
self.dual_forward_step += 1

# print('Output action:', output, self.dual_forward_step)

else:
self.look_down = False
# 2. If output is in latent form, execute latent S1
Expand Down Expand Up @@ -333,13 +331,9 @@ def step(self, obs):
.unsqueeze(-1)
.to(self.device)
) # [1, 2, 224, 224, 1]
self.s1_output = self.policy.s1_step_latent(
rgbs, depths, self.s2_output.output_latent, use_async=True
)
self.s1_output = self.policy.s1_step_latent(rgbs, depths, self.s2_output.output_latent)
else:
self.s1_output = self.policy.s1_step_latent(
rgb, depth * 10000.0, self.s2_output.output_latent, use_async=False
)
self.s1_output = self.policy.s1_step_latent(rgb, depth * 10000.0, self.s2_output.output_latent)

else:
assert False, f"S2 output should be either action or latent, but got neither! {self.s2_output}"
Expand Down Expand Up @@ -372,6 +366,7 @@ def step(self, obs):
if self.dual_forward_step > self.sys2_max_forward_step:
print("!!!!!!!!!!!!")
print("ERR: self.dual_forward_step ", self.dual_forward_step, " > ", self.sys2_max_forward_step)
print("Potential reason: sys1 infers empty trajectory list []")
print("!!!!!!!!!!!!")

print('Output discretized traj:', output['action'], self.dual_forward_step)
Expand Down
8 changes: 4 additions & 4 deletions internnav/agent/internvla_n1_agent_realworld.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,10 +222,10 @@ def step_s2(self, rgb, depth, pose, instruction, intrinsic, look_down=False):
**inputs,
max_new_tokens=128,
do_sample=False,
use_cache=True,
past_key_values=self.past_key_values,
# use_cache=True,
# past_key_values=self.past_key_values,
return_dict_in_generate=True,
raw_input_ids=copy.deepcopy(inputs.input_ids),
# raw_input_ids=copy.deepcopy(inputs.input_ids),
)
output_ids = outputs.sequences

Expand Down Expand Up @@ -253,5 +253,5 @@ def step_s2(self, rgb, depth, pose, instruction, intrinsic, look_down=False):
return action_seq, None, None

def step_s1(self, latent, rgb, depth):
all_trajs = self.model.generate_traj(latent, rgb, depth, use_async=True)
all_trajs = self.model.generate_traj(latent, rgb, depth)
return all_trajs
6 changes: 4 additions & 2 deletions internnav/configs/evaluator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,17 @@ class MetricCfg(BaseModel):

class TaskCfg(BaseModel):
task_name: Optional[str] = None
task_settings: Dict[str, Any]
scene: SceneCfg
task_settings: Dict[str, Any] = None
scene: SceneCfg = None
robot_name: Optional[str] = None
robot: Optional[RobotCfg] = None
robot_flash: Optional[bool] = None
flash_collision: Optional[bool] = None
robot_usd_path: Optional[str] = None
camera_resolution: Optional[List[int]] = None
metric: Optional[MetricCfg] = None
camera_prim_path: Optional[str] = None
one_step_stand_still: Optional[bool] = None


class EvalDatasetCfg(BaseModel):
Expand Down
3 changes: 3 additions & 0 deletions internnav/configs/evaluator/vln_default_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,9 @@ def get_config(evaluator_cfg: EvalCfg):

# add the flash controller in, by flash flag.
if evaluator_cfg.task.robot_flash:
vln_move_by_flash_cfg.type = (
'VlnMoveByFlashCollisionController' if evaluator_cfg.task.flash_collision else 'VlnMoveByFlashController'
)
robot.controllers.append(ControllerCfg(controller_settings=vln_move_by_flash_cfg.model_dump()))

if evaluator_cfg.task.robot_flash or evaluator_cfg.eval_settings.get('vis_output', True):
Expand Down
Loading
Loading