From bddd90051889dfa37ff3798e9e54711c6fffcc04 Mon Sep 17 00:00:00 2001 From: wangyukai Date: Mon, 3 Nov 2025 15:15:57 +0000 Subject: [PATCH 1/2] add back visualize feature; refactor rdp utils to geometry utils --- internnav/agent/rdp_agent.py | 12 ++--- internnav/dataset/rdp_lerobot_dataset.py | 30 ++++-------- internnav/dataset/rdp_lmdb_dataset.py | 15 ++---- internnav/evaluator/utils/common.py | 2 +- .../{ => evaluator}/utils/visualize_util.py | 36 ++++++--------- internnav/evaluator/vln_multi_evaluator.py | 46 ++++++++++++++++--- .../rdp/utils.py => utils/geometry_utils.py} | 1 - mp3d_result.json | 1 + 8 files changed, 77 insertions(+), 66 deletions(-) rename internnav/{ => evaluator}/utils/visualize_util.py (90%) rename internnav/{model/basemodel/rdp/utils.py => utils/geometry_utils.py} (99%) create mode 100644 mp3d_result.json diff --git a/internnav/agent/rdp_agent.py b/internnav/agent/rdp_agent.py index 1bd1e72..0214110 100644 --- a/internnav/agent/rdp_agent.py +++ b/internnav/agent/rdp_agent.py @@ -9,7 +9,12 @@ from internnav.configs.agent import AgentCfg from internnav.configs.model.base_encoders import ModelCfg from internnav.model import get_config, get_policy -from internnav.model.basemodel.rdp.utils import ( +from internnav.model.utils.feature_extract import ( + extract_image_features, + extract_instruction_tokens, +) +from internnav.utils.common_log_util import common_logger as log +from internnav.utils.geometry_utils import ( FixedLengthStack, compute_actions, get_delta, @@ -18,11 +23,6 @@ quat_to_euler_angles, to_local_coords_batch, ) -from internnav.model.utils.feature_extract import ( - extract_image_features, - extract_instruction_tokens, -) -from internnav.utils.common_log_util import common_logger as log @Agent.register('rdp') diff --git a/internnav/dataset/rdp_lerobot_dataset.py b/internnav/dataset/rdp_lerobot_dataset.py index 964e3a7..d6afbc8 100644 --- a/internnav/dataset/rdp_lerobot_dataset.py +++ b/internnav/dataset/rdp_lerobot_dataset.py @@ -1,11 +1,9 @@ +import copy import random from collections import defaultdict -import lmdb -import msgpack_numpy import numpy as np import torch -import copy from PIL import Image from torchvision.transforms import ( CenterCrop, @@ -26,8 +24,8 @@ from internnav.dataset.base import BaseDataset, ObservationsDict, _block_shuffle from internnav.evaluator.utils.common import norm_depth from internnav.model.basemodel.LongCLIP.model import longclip -from internnav.model.basemodel.rdp.utils import get_delta, normalize_data, to_local_coords from internnav.model.utils.feature_extract import extract_instruction_tokens +from internnav.utils.geometry_utils import get_delta, normalize_data, to_local_coords from internnav.utils.lerobot_as_lmdb import LerobotAsLmdb @@ -174,14 +172,12 @@ def _load_next(self): # noqa: C901 data['camera_info'][self.camera_name]['rgb'] = data['camera_info'][self.camera_name]['rgb'][ :-drop_last_frame_nums ] - data['camera_info'][self.camera_name]['depth'] = data['camera_info'][self.camera_name][ - 'depth' - ][:-drop_last_frame_nums] - data['robot_info']['yaw'] = data['robot_info']['yaw'][:-drop_last_frame_nums] - data['robot_info']['position'] = data['robot_info']['position'][:-drop_last_frame_nums] - data['robot_info']['orientation'] = data['robot_info']['orientation'][ + data['camera_info'][self.camera_name]['depth'] = data['camera_info'][self.camera_name]['depth'][ :-drop_last_frame_nums ] + data['robot_info']['yaw'] = data['robot_info']['yaw'][:-drop_last_frame_nums] + data['robot_info']['position'] = data['robot_info']['position'][:-drop_last_frame_nums] + data['robot_info']['orientation'] = data['robot_info']['orientation'][:-drop_last_frame_nums] data['progress'] = data['progress'][:-drop_last_frame_nums] data['step'] = data['step'][:-drop_last_frame_nums] @@ -192,7 +188,7 @@ def _load_next(self): # noqa: C901 if yaw > np.pi: yaw -= 2 * np.pi yaws[yaw_i] = yaw - + episodes_in_json = data_to_load['episodes_in_json'] instructions = [ @@ -221,7 +217,6 @@ def _load_next(self): # noqa: C901 new_preload, self.bert_tokenizer, is_clip_long=self.is_clip_long ) - # process the instruction # copy the instruction to each step if self.need_extract_instr_features: @@ -447,12 +442,7 @@ def _pad_helper(t, max_len, fill_val=0, return_masks=False): observations_batch = ObservationsDict(observations_batch) # Expand B to match the flattened batch size B_expanded = B.repeat(observations_batch['prev_actions'].shape[0]).view(-1, 1) - - return ( - observations_batch, - observations_batch['prev_actions'], - not_done_masks_batch.view(-1, 1), - B_expanded - ) - + + return (observations_batch, observations_batch['prev_actions'], not_done_masks_batch.view(-1, 1), B_expanded) + return _rdp_collate_fn diff --git a/internnav/dataset/rdp_lmdb_dataset.py b/internnav/dataset/rdp_lmdb_dataset.py index 3050e49..d65befd 100644 --- a/internnav/dataset/rdp_lmdb_dataset.py +++ b/internnav/dataset/rdp_lmdb_dataset.py @@ -1,3 +1,4 @@ +import copy import random from collections import defaultdict @@ -5,7 +6,6 @@ import msgpack_numpy import numpy as np import torch -import copy from PIL import Image from torchvision.transforms import ( CenterCrop, @@ -26,8 +26,8 @@ from internnav.dataset.base import BaseDataset, ObservationsDict, _block_shuffle from internnav.evaluator.utils.common import norm_depth from internnav.model.basemodel.LongCLIP.model import longclip -from internnav.model.basemodel.rdp.utils import get_delta, normalize_data, to_local_coords from internnav.model.utils.feature_extract import extract_instruction_tokens +from internnav.utils.geometry_utils import get_delta, normalize_data, to_local_coords def _convert_image_to_rgb(image): @@ -466,12 +466,7 @@ def _pad_helper(t, max_len, fill_val=0, return_masks=False): observations_batch = ObservationsDict(observations_batch) # Expand B to match the flattened batch size B_expanded = B.repeat(observations_batch['prev_actions'].shape[0]).view(-1, 1) - - return ( - observations_batch, - observations_batch['prev_actions'], - not_done_masks_batch.view(-1, 1), - B_expanded - ) - + + return (observations_batch, observations_batch['prev_actions'], not_done_masks_batch.view(-1, 1), B_expanded) + return _rdp_collate_fn diff --git a/internnav/evaluator/utils/common.py b/internnav/evaluator/utils/common.py index 004801b..f855014 100644 --- a/internnav/evaluator/utils/common.py +++ b/internnav/evaluator/utils/common.py @@ -10,6 +10,7 @@ from scipy.ndimage import binary_dilation from internnav.utils.common_log_util import common_logger as log +from internnav.utils.geometry_utils import quat_to_euler_angles def create_robot_mask(topdown_global_map_camera, mask_size=20): @@ -343,7 +344,6 @@ def draw_trajectory(array, obs_lst, reference_path): import matplotlib.pyplot as plt import numpy as np from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas - from omni.isaac.core.utils.rotations import quat_to_euler_angles from internnav.evaluator.utils.path_plan import world_to_pixel diff --git a/internnav/utils/visualize_util.py b/internnav/evaluator/utils/visualize_util.py similarity index 90% rename from internnav/utils/visualize_util.py rename to internnav/evaluator/utils/visualize_util.py index 73438d3..494d419 100644 --- a/internnav/utils/visualize_util.py +++ b/internnav/evaluator/utils/visualize_util.py @@ -1,23 +1,18 @@ +import logging import os import time -import logging from dataclasses import dataclass -from typing import Callable, Dict, Optional, Union, Any, List - -import numpy as np +from typing import Dict, List, Optional from internnav import PROJECT_ROOT_PATH -from .common_log_util import get_task_name -from internnav.evaluator.utils.common import obs_to_image, images_to_video +from internnav.evaluator.utils.common import images_to_video, obs_to_image try: - from PIL import Image _PIL_AVAILABLE = True except Exception: _PIL_AVAILABLE = False try: - import imageio.v2 as imageio _IMAGEIO_AVAILABLE = True except Exception: _IMAGEIO_AVAILABLE = False @@ -49,6 +44,7 @@ class VisualizeUtil: save_frame_fn(image, out_path) and save_video_fn(frames_dir, out_path, fps) Otherwise, built-ins (PIL + imageio) are used. """ + def __init__( self, dataset_name: str, @@ -56,8 +52,8 @@ def __init__( img_ext: str = "png", video_ext: str = "mp4", root_subdir: str = "video", - save_frame_fn = obs_to_image, - save_video_fn = images_to_video, + save_frame_fn=obs_to_image, + save_video_fn=images_to_video, ): self.dataset_name = dataset_name self.fps = fps @@ -73,14 +69,16 @@ def __init__( file_handler.setLevel(logging.INFO) file_handler.setFormatter(logging.Formatter("[%(asctime)s][%(levelname)s] %(message)s")) # Avoid adding duplicate handlers in repeated inits - if not any(isinstance(h, logging.FileHandler) and h.baseFilename == file_handler.baseFilename - for h in viz_logger.handlers): + if not any( + isinstance(h, logging.FileHandler) and h.baseFilename == file_handler.baseFilename + for h in viz_logger.handlers + ): viz_logger.addHandler(file_handler) self.base_dir = base_dir # Pluggable savers - self._save_frame_fn = save_frame_fn + self._save_frame_fn = save_frame_fn self._save_video_fn = save_video_fn # Metrics @@ -103,7 +101,7 @@ def trace_start(self, trajectory_id: str, reference_path): fps=self.fps, start_time=time.time(), saved_frames=[], - reference_path=reference_path + reference_path=reference_path, ) viz_logger.info(f"[start] trajectory_id={trajectory_id}") @@ -123,7 +121,7 @@ def save_observation( if step_index is None: step_index = ti.frame_count - + ti.frame_count += 1 if ti.saved_frames is not None: ti.saved_frames.append(obs) @@ -133,7 +131,6 @@ def save_observation( out_path = os.path.join(ti.frames_dir, fname) self._save_frame_fn(ti.saved_frames, action, out_path, ti.reference_path) - def trace_end(self, trajectory_id: str, result: Optional[str] = None, assemble_video: bool = True): """ Mark trajectory finished and (optionally) assemble video. @@ -153,7 +150,7 @@ def trace_end(self, trajectory_id: str, result: Optional[str] = None, assemble_v if assemble_video: self._save_video_fn(ti.frames_dir, ti.video_path, ti.fps) viz_logger.info(f"[video] saved {ti.video_path}") - + self._del_traj(trajectory_id) def report(self): @@ -179,15 +176,12 @@ def report(self): f"[duration:{duration}s] [frames:{total_frames}] [avg_fps:{fps}] results:{result_map}" ) - def _require_traj(self, trajectory_id: str) -> TrajectoryVizInfo: if trajectory_id not in self.trajectories: raise KeyError(f"trajectory_id not started: {trajectory_id}") return self.trajectories[trajectory_id] - + def _del_traj(self, trajectory_id: str) -> None: if trajectory_id not in self.trajectories: raise KeyError(f"trajectory_id not started: {trajectory_id}") del self.trajectories[trajectory_id] - - \ No newline at end of file diff --git a/internnav/evaluator/vln_multi_evaluator.py b/internnav/evaluator/vln_multi_evaluator.py index a825952..e573c7a 100644 --- a/internnav/evaluator/vln_multi_evaluator.py +++ b/internnav/evaluator/vln_multi_evaluator.py @@ -13,6 +13,7 @@ from internnav.evaluator.utils.data_collector import DataCollector from internnav.evaluator.utils.dataset import ResultLogger, split_data from internnav.evaluator.utils.eval import generate_episode +from internnav.evaluator.utils.visualize_util import VisualizeUtil from internnav.projects.dataloader.resumable import ResumablePathKeyDataloader from internnav.utils import common_log_util, progress_log_multi_util from internnav.utils.common_log_util import common_logger as log @@ -44,7 +45,7 @@ def __init__(self, config: EvalCfg): # generate episode episodes = generate_episode(self.dataloader, config) if len(episodes) == 0: - log.info("No more episodes to evaluate") + log.info("No more episodes to evaluate. Episodes are saved in data/sample_episodes/") sys.exit(0) config.task.task_settings.update({'episodes': episodes}) self.env_num = config.task.task_settings['env_num'] @@ -72,6 +73,9 @@ def __init__(self, config: EvalCfg): set_seed_model(0) self.data_collector = DataCollector(self.dataloader.lmdb_path) self.robot_flash = config.task.robot_flash + self.save_to_json = config.eval_settings['save_to_json'] + self.vis_output = config.eval_settings['vis_output'] + self.visualize_util = VisualizeUtil(self.task_name, fps=6) @property def ignore_obs_attr(self): @@ -202,9 +206,7 @@ def terminate_ops(self, obs_ls, reset_infos, terminated_ls): if terminated and self.runner_status[env_id] != runner_status_code.TERMINATED: obs = obs_ls[env_id] reset_info = reset_infos[env_id] - if not __debug__: - pass - log.info(json.dumps(obs['metrics'])) + log.info(f"{self.now_path_key(reset_info)}: {json.dumps(obs['metrics'], indent=4)}") self.data_collector.save_eval_result( key=self.now_path_key(reset_info), result=obs['metrics'][list(obs['metrics'].keys())[0]][0]['fail_reason'], @@ -216,19 +218,29 @@ def terminate_ops(self, obs_ls, reset_infos, terminated_ls): step_count=obs['metrics'][list(obs['metrics'].keys())[0]][0]['steps'], result=obs['metrics'][list(obs['metrics'].keys())[0]][0]['fail_reason'], ) + # visualize + if self.vis_output: + self.visualize_util.trace_end( + trajectory_id=self.now_path_key(reset_info), + result=obs['metrics'][list(obs['metrics'].keys())[0]][0]['fail_reason'], + ) + # json format result + if self.save_to_json: + self.result_logger.write_now_result_json() self.result_logger.write_now_result() self.runner_status[env_id] = runner_status_code.NOT_RESET log.info(f'env{env_id}: states switch to NOT_RESET.') - reset_env_ids = np.where(self.runner_status == runner_status_code.NOT_RESET)[ # need this status to reset - 0 - ].tolist() + # need this status to reset + reset_env_ids = np.where(self.runner_status == runner_status_code.NOT_RESET)[0].tolist() if len(reset_env_ids) > 0: log.info(f'env{reset_env_ids}: start new episode!') obs, new_reset_infos = self.env.reset(reset_env_ids) self.runner_status[reset_env_ids] = runner_status_code.WARM_UP log.info(f'env{reset_env_ids}: states switch to WARM UP.') + # modify original reset_info reset_infos = np.array(reset_infos) + # If there is only one reset and no new_deset_infos, return an empty array reset_infos[reset_env_ids] = new_reset_infos if len(new_reset_infos) > 0 else None self.runner_status[ np.vectorize(lambda x: x)(reset_infos) == None # noqa: E711 @@ -242,9 +254,15 @@ def terminate_ops(self, obs_ls, reset_infos, terminated_ls): for reset_info in new_reset_infos: if reset_info is None: continue + # start new trace log progress_log_multi_util.trace_start( trajectory_id=self.now_path_key(reset_info), ) + # start new visualize log + if self.vis_output: + self.visualize_util.trace_start( + trajectory_id=self.now_path_key(reset_info), reference_path=reset_info.data['reference_path'] + ) return False, reset_infos def eval(self): @@ -257,6 +275,10 @@ def eval(self): progress_log_multi_util.trace_start( trajectory_id=self.now_path_key(info), ) + if self.vis_output: + self.visualize_util.trace_start( + trajectory_id=self.now_path_key(info), reference_path=info.data['reference_path'] + ) log.info('start new episode!') obs = self.warm_up() @@ -277,6 +299,16 @@ def eval(self): env_term, reset_info = self.terminate_ops(obs, reset_info, terminated) if env_term: break + + # save step obs + if self.vis_output: + for ob, info, act in zip(obs, reset_info, action): + if info is None or 'rgb' not in ob or ob['fail_reason']: + continue + self.visualize_util.save_observation( + trajectory_id=self.now_path_key(info), obs=ob, action=act[self.robot_name] + ) + self.env.close() progress_log_multi_util.report() diff --git a/internnav/model/basemodel/rdp/utils.py b/internnav/utils/geometry_utils.py similarity index 99% rename from internnav/model/basemodel/rdp/utils.py rename to internnav/utils/geometry_utils.py index 7396f04..4ed3f56 100644 --- a/internnav/model/basemodel/rdp/utils.py +++ b/internnav/utils/geometry_utils.py @@ -1,6 +1,5 @@ import base64 import math -import os import pickle import numpy as np diff --git a/mp3d_result.json b/mp3d_result.json new file mode 100644 index 0000000..c478719 --- /dev/null +++ b/mp3d_result.json @@ -0,0 +1 @@ +{"val_unseen": {"TL": 11.4593, "NE": 6.9403, "FR": 0.3318, "StR": 0.0661, "OS": 0.2843, "SR": 0.1797, "SPL": 0.1361, "Count": 1347}, "val_seen": {"TL": 11.8113, "NE": 7.0597, "FR": 0.3809, "StR": 0.0653, "OS": 0.3005, "SR": 0.2155, "SPL": 0.1539, "Count": 659}} From 535846a7d5738d9e0b864bcc7cb57c632d02859c Mon Sep 17 00:00:00 2001 From: wangyukai Date: Tue, 4 Nov 2025 03:37:56 +0000 Subject: [PATCH 2/2] remove json file --- mp3d_result.json | 1 - 1 file changed, 1 deletion(-) delete mode 100644 mp3d_result.json diff --git a/mp3d_result.json b/mp3d_result.json deleted file mode 100644 index c478719..0000000 --- a/mp3d_result.json +++ /dev/null @@ -1 +0,0 @@ -{"val_unseen": {"TL": 11.4593, "NE": 6.9403, "FR": 0.3318, "StR": 0.0661, "OS": 0.2843, "SR": 0.1797, "SPL": 0.1361, "Count": 1347}, "val_seen": {"TL": 11.8113, "NE": 7.0597, "FR": 0.3809, "StR": 0.0653, "OS": 0.3005, "SR": 0.2155, "SPL": 0.1539, "Count": 659}}