diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 9899632b89c..f79991e88b7 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -166,6 +166,7 @@ Guidelines for modifications: * David Leon * Song Yi * Weihua Zhang +* Junnosuke Kamohara ## Acknowledgements diff --git a/scripts/reinforcement_learning/rsl_rl/play.py b/scripts/reinforcement_learning/rsl_rl/play.py index fe988508ef9..6076a8f0c3b 100644 --- a/scripts/reinforcement_learning/rsl_rl/play.py +++ b/scripts/reinforcement_learning/rsl_rl/play.py @@ -19,6 +19,7 @@ parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") +parser.add_argument("--video_speed", type=float, default=1.0, help="Speed of the recorded video.") parser.add_argument( "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." ) @@ -117,6 +118,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + env.metadata["render_fps"] = int((1/env.unwrapped.step_dt) * args_cli.video_speed) # type: ignore # convert to single-agent instance if required by the RL algorithm if isinstance(env.unwrapped, DirectMARLEnv): diff --git a/source/isaaclab_assets/isaaclab_assets/robots/unitree.py b/source/isaaclab_assets/isaaclab_assets/robots/unitree.py index 4e670b22756..f2674a76def 100644 --- a/source/isaaclab_assets/isaaclab_assets/robots/unitree.py +++ b/source/isaaclab_assets/isaaclab_assets/robots/unitree.py @@ -16,14 +16,19 @@ * :obj:`G1_MINIMAL_CFG`: G1 humanoid robot with minimal collision bodies * :obj:`G1_29DOF_CFG`: G1 humanoid robot configured for locomanipulation tasks * :obj:`G1_INSPIRE_FTP_CFG`: G1 29DOF humanoid robot with Inspire 5-finger hand +* :obj:`UNITREE_G1_29DOF_CFG`: Unitree G1 humanoid robot with 29 DOF for locomotion tasks +* :obj:`UNITREE_G1_29DOF_DELAY_CFG`: Unitree G1 humanoid robot with 29 DOF and delayed PD actuators for locomotion tasks +* :obj:`UNITREE_G1_23DOF_CFG`: Unitree G1 humanoid robot with 23 DOF for manipulation tasks +* :obj:`UNITREE_G1_23DOF_DELAY_CFG`: Unitree G1 humanoid robot with 23 DOF and delayed PD actuators for manipulation tasks Reference: https://github.com/unitreerobotics/unitree_ros """ import isaaclab.sim as sim_utils -from isaaclab.actuators import ActuatorNetMLPCfg, DCMotorCfg, ImplicitActuatorCfg +from isaaclab.actuators import ActuatorNetMLPCfg, DCMotorCfg, ImplicitActuatorCfg, DelayedPDActuatorCfg from isaaclab.assets.articulation import ArticulationCfg from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR, ISAACLAB_NUCLEUS_DIR +from isaaclab_assets import ISAACLAB_ASSETS_DATA_DIR ## # Configuration - Actuators. @@ -609,3 +614,416 @@ damping=0.2, armature=0.001, ) + + +""" +G1 configuration aligned with actual hardware xml. +G1_CFG above uses USD file which does not match hardware configurations. +Gain parameters are adapted from LeggedLab https://github.com/Hellod035/LeggedLab +""" + +UNITREE_G1_29DOF_CFG = ArticulationCfg( + spawn=sim_utils.UsdFileCfg( + usd_path=f"{ISAACLAB_ASSETS_DATA_DIR}/unitree/g1_29dof_rev_1_0/g1_29dof_rev_1_0.usd", + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + retain_accelerations=False, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=1000.0, + max_depenetration_velocity=1.0, + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=True, solver_position_iteration_count=4, solver_velocity_iteration_count=1 + ), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.0, 0.0, 0.80), + joint_pos={ + ".*_hip_pitch_joint": -0.20, + ".*_knee_joint": 0.42, + ".*_ankle_pitch_joint": -0.23, + ".*_elbow_joint": 0.87, + "left_shoulder_roll_joint": 0.18, + "left_shoulder_pitch_joint": 0.35, + "right_shoulder_roll_joint": -0.18, + "right_shoulder_pitch_joint": 0.35, + }, + joint_vel={".*": 0.0}, + ), + soft_joint_pos_limit_factor=0.90, + actuators={ + "legs": ImplicitActuatorCfg( + joint_names_expr=[ + ".*_hip_yaw_joint", + ".*_hip_roll_joint", + ".*_hip_pitch_joint", + ".*_knee_joint", + ".*waist.*", + ], + effort_limit_sim={ + ".*_hip_yaw_joint": 88.0, + ".*_hip_roll_joint": 139.0, + ".*_hip_pitch_joint": 88.0, + ".*_knee_joint": 139.0, + ".*waist_yaw_joint": 88.0, + ".*waist_roll_joint": 35.0, + ".*waist_pitch_joint": 35.0, + }, + velocity_limit_sim={ + ".*_hip_yaw_joint": 32.0, + ".*_hip_roll_joint": 20.0, + ".*_hip_pitch_joint": 32.0, + ".*_knee_joint": 20.0, + ".*waist_yaw_joint": 32.0, + ".*waist_roll_joint": 30.0, + ".*waist_pitch_joint": 30.0, + }, + stiffness={ + ".*_hip_yaw_joint": 150.0, + ".*_hip_roll_joint": 150.0, + ".*_hip_pitch_joint": 200.0, + ".*_knee_joint": 200.0, + ".*waist.*": 200.0, + }, + damping={ + ".*_hip_yaw_joint": 5.0, + ".*_hip_roll_joint": 5.0, + ".*_hip_pitch_joint": 5.0, + ".*_knee_joint": 5.0, + ".*waist.*": 5.0, + }, + armature=0.01, + ), + "feet": ImplicitActuatorCfg( + joint_names_expr=[".*_ankle_pitch_joint", ".*_ankle_roll_joint"], + effort_limit_sim={ + ".*_ankle_pitch_joint": 35.0, + ".*_ankle_roll_joint": 35.0, + }, + velocity_limit_sim={ + ".*_ankle_pitch_joint": 30.0, + ".*_ankle_roll_joint": 30.0, + }, + stiffness=20.0, + damping=2.0, + armature=0.01, + ), + "shoulders": ImplicitActuatorCfg( + joint_names_expr=[ + ".*_shoulder_pitch_joint", + ".*_shoulder_roll_joint", + ], + effort_limit_sim={ + ".*_shoulder_pitch_joint": 25.0, + ".*_shoulder_roll_joint": 25.0, + }, + velocity_limit_sim={ + ".*_shoulder_pitch_joint": 37.0, + ".*_shoulder_roll_joint": 37.0, + }, + stiffness=100.0, + damping=2.0, + armature=0.01, + ), + "arms": ImplicitActuatorCfg( + joint_names_expr=[ + ".*_shoulder_yaw_joint", + ".*_elbow_joint", + ], + effort_limit_sim={ + ".*_shoulder_yaw_joint": 25.0, + ".*_elbow_joint": 25.0, + }, + velocity_limit_sim={ + ".*_shoulder_yaw_joint": 37.0, + ".*_elbow_joint": 37.0, + }, + stiffness=50.0, + damping=2.0, + armature=0.01, + ), + "wrist": ImplicitActuatorCfg( + joint_names_expr=[ + ".*_wrist_.*", + ], + effort_limit_sim={ + ".*_wrist_yaw_joint": 5.0, + ".*_wrist_roll_joint": 25.0, + ".*_wrist_pitch_joint": 5.0, + }, + velocity_limit_sim={ + ".*_wrist_yaw_joint": 22.0, + ".*_wrist_roll_joint": 37.0, + ".*_wrist_pitch_joint": 22.0, + }, + stiffness=40.0, + damping=2.0, + armature=0.01, + ), + }, +) + +UNITREE_G1_29DOF_DELAY_CFG = UNITREE_G1_29DOF_CFG.copy() +UNITREE_G1_29DOF_DELAY_CFG.actuators["legs"] = DelayedPDActuatorCfg( + joint_names_expr=[ + ".*_hip_yaw_joint", + ".*_hip_roll_joint", + ".*_hip_pitch_joint", + ".*_knee_joint", + ".*waist.*", + ], + effort_limit_sim={ + ".*_hip_yaw_joint": 88.0, + ".*_hip_roll_joint": 139.0, + ".*_hip_pitch_joint": 88.0, + ".*_knee_joint": 139.0, + ".*waist_yaw_joint": 88.0, + ".*waist_roll_joint": 35.0, + ".*waist_pitch_joint": 35.0, + }, + velocity_limit_sim={ + ".*_hip_yaw_joint": 32.0, + ".*_hip_roll_joint": 20.0, + ".*_hip_pitch_joint": 32.0, + ".*_knee_joint": 20.0, + ".*waist_yaw_joint": 32.0, + ".*waist_roll_joint": 30.0, + ".*waist_pitch_joint": 30.0, + }, + stiffness={ + ".*_hip_yaw_joint": 150.0, + ".*_hip_roll_joint": 150.0, + ".*_hip_pitch_joint": 200.0, + ".*_knee_joint": 200.0, + ".*waist.*": 200.0, + }, + damping={ + ".*_hip_yaw_joint": 5.0, + ".*_hip_roll_joint": 5.0, + ".*_hip_pitch_joint": 5.0, + ".*_knee_joint": 5.0, + ".*waist.*": 5.0, + }, + armature=0.01, + min_delay=0, + max_delay=4, # physics time steps (max: 5.0*4=20.0ms) + ) + +UNITREE_G1_29DOF_DELAY_CFG.actuators["feet"] = DelayedPDActuatorCfg( + joint_names_expr=[".*_ankle_pitch_joint", ".*_ankle_roll_joint"], + effort_limit_sim={ + ".*_ankle_pitch_joint": 35.0, + ".*_ankle_roll_joint": 35.0, + }, + velocity_limit_sim={ + ".*_ankle_pitch_joint": 30.0, + ".*_ankle_roll_joint": 30.0, + }, + stiffness=20.0, + damping=2.0, + armature=0.01, + min_delay=0, + max_delay=4, # physics time steps (max: 5.0*4=20.0ms) + ) + +""" +G1 23DOF model. +""" + +UNITREE_G1_23DOF_CFG = ArticulationCfg( + spawn=sim_utils.UsdFileCfg( + usd_path=f"{ISAACLAB_ASSETS_DATA_DIR}/unitree/g1_23dof_rev_1_0/g1_23dof_rev_1_0.usd", + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + retain_accelerations=False, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=1000.0, + max_depenetration_velocity=1.0, + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=True, solver_position_iteration_count=4, solver_velocity_iteration_count=1 + ), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.0, 0.0, 0.80), + joint_pos={ + ".*_hip_pitch_joint": -0.1, + ".*_knee_joint": 0.3, + ".*_ankle_pitch_joint": -0.2, + ".*_elbow_joint": 0.97, + "left_shoulder_roll_joint": 0.25, + "left_shoulder_pitch_joint": 0.3, + "right_shoulder_roll_joint": -0.25, + "right_shoulder_pitch_joint": 0.3, + }, + joint_vel={".*": 0.0}, + ), + soft_joint_pos_limit_factor=0.90, + actuators={ + "legs": ImplicitActuatorCfg( + joint_names_expr=[ + ".*_hip_yaw_joint", + ".*_hip_roll_joint", + ".*_hip_pitch_joint", + ".*_knee_joint", + ".*waist.*", + ], + effort_limit_sim={ + ".*_hip_yaw_joint": 88.0, + ".*_hip_roll_joint": 139.0, + ".*_hip_pitch_joint": 88.0, + ".*_knee_joint": 139.0, + ".*waist_yaw_joint": 88.0, + }, + velocity_limit_sim={ + ".*_hip_yaw_joint": 32.0, + ".*_hip_roll_joint": 20.0, + ".*_hip_pitch_joint": 32.0, + ".*_knee_joint": 20.0, + ".*waist_yaw_joint": 32.0, + }, + stiffness={ + ".*_hip_yaw_joint": 150.0, + ".*_hip_roll_joint": 150.0, + ".*_hip_pitch_joint": 200.0, + ".*_knee_joint": 200.0, + ".*waist.*": 200.0, + }, + damping={ + ".*_hip_yaw_joint": 5.0, + ".*_hip_roll_joint": 5.0, + ".*_hip_pitch_joint": 5.0, + ".*_knee_joint": 5.0, + ".*waist.*": 5.0, + }, + armature=0.01, + ), + "feet": ImplicitActuatorCfg( + joint_names_expr=[".*_ankle_pitch_joint", ".*_ankle_roll_joint"], + effort_limit_sim={ + ".*_ankle_pitch_joint": 35.0, + ".*_ankle_roll_joint": 35.0, + }, + velocity_limit_sim={ + ".*_ankle_pitch_joint": 30.0, + ".*_ankle_roll_joint": 30.0, + }, + stiffness=20.0, + damping=2.0, + armature=0.01, + ), + "shoulders": ImplicitActuatorCfg( + joint_names_expr=[ + ".*_shoulder_pitch_joint", + ".*_shoulder_roll_joint", + ], + effort_limit_sim={ + ".*_shoulder_pitch_joint": 25.0, + ".*_shoulder_roll_joint": 25.0, + }, + velocity_limit_sim={ + ".*_shoulder_pitch_joint": 37.0, + ".*_shoulder_roll_joint": 37.0, + }, + stiffness=100.0, + damping=2.0, + armature=0.01, + ), + "arms": ImplicitActuatorCfg( + joint_names_expr=[ + ".*_shoulder_yaw_joint", + ".*_elbow_joint", + ], + effort_limit_sim={ + ".*_shoulder_yaw_joint": 25.0, + ".*_elbow_joint": 25.0, + }, + velocity_limit_sim={ + ".*_shoulder_yaw_joint": 37.0, + ".*_elbow_joint": 37.0, + }, + stiffness=50.0, + damping=2.0, + armature=0.01, + ), + "wrist": ImplicitActuatorCfg( + joint_names_expr=[ + ".*_wrist_.*", + ], + effort_limit_sim={ + ".*_wrist_roll_joint": 25.0, + }, + velocity_limit_sim={ + ".*_wrist_roll_joint": 37.0, + }, + stiffness=40.0, + damping=2.0, + armature=0.01, + ), + }, +) + +UNITREE_G1_23DOF_DELAY_CFG = UNITREE_G1_23DOF_CFG.copy() +UNITREE_G1_23DOF_DELAY_CFG.actuators["legs"] = DelayedPDActuatorCfg( + joint_names_expr=[ + ".*_hip_yaw_joint", + ".*_hip_roll_joint", + ".*_hip_pitch_joint", + ".*_knee_joint", + ".*waist.*", + ], + effort_limit_sim={ + ".*_hip_yaw_joint": 88.0, + ".*_hip_roll_joint": 139.0, + ".*_hip_pitch_joint": 88.0, + ".*_knee_joint": 139.0, + ".*waist_yaw_joint": 88.0, + }, + velocity_limit_sim={ + ".*_hip_yaw_joint": 32.0, + ".*_hip_roll_joint": 20.0, + ".*_hip_pitch_joint": 32.0, + ".*_knee_joint": 20.0, + ".*waist_yaw_joint": 32.0, + }, + stiffness={ + ".*_hip_yaw_joint": 150.0, + ".*_hip_roll_joint": 150.0, + ".*_hip_pitch_joint": 200.0, + ".*_knee_joint": 200.0, + ".*waist.*": 200.0, + }, + damping={ + ".*_hip_yaw_joint": 5.0, + ".*_hip_roll_joint": 5.0, + ".*_hip_pitch_joint": 5.0, + ".*_knee_joint": 5.0, + ".*waist.*": 5.0, + }, + armature=0.01, + min_delay=0, + max_delay=4, # physics time steps (max: 5.0*4=20.0ms) + ) + +UNITREE_G1_23DOF_DELAY_CFG.actuators["feet"] = DelayedPDActuatorCfg( + joint_names_expr=[".*_ankle_pitch_joint", ".*_ankle_roll_joint"], + effort_limit_sim={ + ".*_ankle_pitch_joint": 35.0, + ".*_ankle_roll_joint": 35.0, + }, + velocity_limit_sim={ + ".*_ankle_pitch_joint": 30.0, + ".*_ankle_roll_joint": 30.0, + }, + stiffness=20.0, + damping=2.0, + armature=0.01, + min_delay=0, + max_delay=4, # physics time steps (max: 5.0*4=20.0ms) + ), \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/__init__.py new file mode 100644 index 00000000000..fc9ef848161 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/__init__.py @@ -0,0 +1,59 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +## +# Register Gym environments. +## + +gym.register( + id="Isaac-Velocity-Rough-G1-29DOF-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.rough_env_cfg:G1RoughEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:G1RoughPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", + }, +) + + +gym.register( + id="Isaac-Velocity-Rough-G1-29DOF-Play-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.rough_env_cfg:G1RoughEnvCfg_PLAY", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:G1RoughPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", + }, +) + + +gym.register( + id="Isaac-Velocity-Flat-G1-29DOF-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:G1FlatEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:G1FlatPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + }, +) + + +gym.register( + id="Isaac-Velocity-Flat-G1-29DOF-Play-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:G1FlatEnvCfg_PLAY", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:G1FlatPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/agents/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/agents/__init__.py new file mode 100644 index 00000000000..2e924fbf1b1 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 00000000000..2d932cbc7fa --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,56 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class G1RoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 24 + max_iterations = 3000 + save_interval = 500 + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, + # actor_obs_normalization=True, + # critic_obs_normalization=True, + actor_hidden_dims=[512, 256, 128], + critic_hidden_dims=[512, 256, 128], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + # entropy_coef=0.008, + entropy_coef=0.005, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-3, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) + + logger="wandb" + wandb_project="g1_29dof_rough" + experiment_name="g1_29dof_rough" + run_name="g1_29dof_rough" + + +@configclass +class G1FlatPPORunnerCfg(G1RoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + + self.max_iterations = 30_000 + self.wandb_project = "g1_29dof_flat" + self.experiment_name = "g1_29dof_flat" + self.run_name = "g1_29dof_flat" diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/agents/skrl_flat_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/agents/skrl_flat_ppo_cfg.yaml new file mode 100644 index 00000000000..3aa08627382 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/agents/skrl_flat_ppo_cfg.yaml @@ -0,0 +1,85 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: False + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: STATES + layers: [256, 128, 128] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: STATES + layers: [256, 128, 128] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html +agent: + class: PPO + rollouts: 24 + learning_epochs: 5 + mini_batches: 4 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 1.0e-03 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.01 + state_preprocessor: null + state_preprocessor_kwargs: null + value_preprocessor: null + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.008 + value_loss_scale: 1.0 + kl_threshold: 0.0 + rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "g1_flat" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 36000 + environment_info: log diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/agents/skrl_rough_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/agents/skrl_rough_ppo_cfg.yaml new file mode 100644 index 00000000000..3d9390bf722 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/agents/skrl_rough_ppo_cfg.yaml @@ -0,0 +1,85 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: False + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: STATES + layers: [512, 256, 128] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: STATES + layers: [512, 256, 128] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html +agent: + class: PPO + rollouts: 24 + learning_epochs: 5 + mini_batches: 4 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 1.0e-03 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.01 + state_preprocessor: null + state_preprocessor_kwargs: null + value_preprocessor: null + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.008 + value_loss_scale: 1.0 + kl_threshold: 0.0 + rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "g1_rough" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 72000 + environment_info: log diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/__init__.py new file mode 100644 index 00000000000..78a11e12dfa --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/__init__.py @@ -0,0 +1,8 @@ +from .action_cfg import G1ActionsCfg +from .observation_cfg import G1ObservationsCfg +from .reward_cfg import G1RewardsCfg +from .scene_cfg import G1SceneCfg +from .termination_cfg import G1TerminationsCfg +from .curriculum_cfg import G1CurriculumCfg +from .event_cfg import G1EventCfg +from .commands_cfg import G1CommandsCfg \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/action_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/action_cfg.py new file mode 100644 index 00000000000..54c6eb52a13 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/action_cfg.py @@ -0,0 +1,57 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +import isaaclab.envs.mdp as mdp + +@configclass +class G1ActionsCfg: + """ + Action specifications for the MDP. + Joint order is aligned with hardware motor order. + """ + + joint_pos = mdp.JointPositionActionCfg( + asset_name="robot", + joint_names=[ + "left_hip_pitch_joint", + "left_hip_roll_joint", + "left_hip_yaw_joint", + "left_knee_joint", + "left_ankle_pitch_joint", + "left_ankle_roll_joint", + + "right_hip_pitch_joint", + "right_hip_roll_joint", + "right_hip_yaw_joint", + "right_knee_joint", + "right_ankle_pitch_joint", + "right_ankle_roll_joint", + + "waist_yaw_joint", + "waist_roll_joint", + "waist_pitch_joint", + + "left_shoulder_pitch_joint", + "left_shoulder_roll_joint", + "left_shoulder_yaw_joint", + "left_elbow_joint", + "left_wrist_roll_joint", + "left_wrist_pitch_joint", + "left_wrist_yaw_joint", + + "right_shoulder_pitch_joint", + "right_shoulder_roll_joint", + "right_shoulder_yaw_joint", + "right_elbow_joint", + "right_wrist_roll_joint", + "right_wrist_pitch_joint", + "right_wrist_yaw_joint", + ], + scale=0.25, + use_default_offset=True, + preserve_order=True, + ) \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/commands_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/commands_cfg.py new file mode 100644 index 00000000000..005cff21fdc --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/commands_cfg.py @@ -0,0 +1,30 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import math +from isaaclab.utils import configclass +import isaaclab_tasks.manager_based.locomotion.velocity.mdp as vel_mdp + +@configclass +class G1CommandsCfg: + """Command specifications for the MDP.""" + + base_velocity = vel_mdp.UniformLevelVelocityCommandCfg( + asset_name="robot", + resampling_time_range=(10.0, 10.0), + rel_standing_envs=0.2, + rel_heading_envs=1.0, + heading_command=True, + heading_control_stiffness=0.5, + debug_vis=True, + ranges=vel_mdp.UniformLevelVelocityCommandCfg.Ranges( + lin_vel_x=(-1.0, 1.0), lin_vel_y=(-1.0, 1.0), ang_vel_z=(-1.0, 1.0), heading=(-math.pi, math.pi) + ), + ) + + locomotion_gait = vel_mdp.PhaseCommandCfg( + resampling_time_range=(1e10, 1e10), # resample on command resample + gait_period=(1.0, 1.0), # for now, fix gait period + ) \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/curriculum_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/curriculum_cfg.py new file mode 100644 index 00000000000..8b17ded4d2f --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/curriculum_cfg.py @@ -0,0 +1,38 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import math +from dataclasses import MISSING +from isaaclab.utils import configclass +from isaaclab.managers import CurriculumTermCfg as CurrTerm +from isaaclab.managers import SceneEntityCfg + +import isaaclab_tasks.manager_based.locomotion.velocity.mdp as vel_mdp + +@configclass +class G1CurriculumCfg: + """Curriculum terms for the MDP.""" + + terrain_levels = CurrTerm(func=vel_mdp.terrain_levels_vel) + command_vel = CurrTerm( + func=vel_mdp.commands_vel, + params={ + "command_name": "base_velocity", + "velocity_stages": [ + {"step": 0, "lin_vel_x": (-1.0, 1.0), "ang_vel_z": (-0.5, 0.5)}, + {"step": 10000 * 24, "lin_vel_x": (-1.5, 2.0), "ang_vel_z": (-0.7, 0.7)}, + {"step": 15000 * 24, "lin_vel_x": (-2.0, 3.0), "ang_vel_z": (-1.0, 1.0)}, + ], + }, + ) + track_lin_vel = CurrTerm( + func=vel_mdp.modify_reward_std, + params={"term_name": "track_lin_vel_xy", "std": 0.25, "num_steps": 10000 * 24} + ) + + track_ang_vel = CurrTerm( + func=vel_mdp.modify_reward_std, + params={"term_name": "track_ang_vel_z", "std": 0.25, "num_steps": 10000 * 24} + ) \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/event_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/event_cfg.py new file mode 100644 index 00000000000..6d4b3ef6e07 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/event_cfg.py @@ -0,0 +1,114 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import math +from dataclasses import MISSING + +import isaaclab.sim as sim_utils +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils import configclass + +import isaaclab.envs.mdp as mdp + + +@configclass +class G1EventCfg: + """Configuration for events.""" + + # startup + physics_material = EventTerm( + func=mdp.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=".*"), + "static_friction_range": (0.6, 1.0), + "dynamic_friction_range": (0.4, 0.6), + "restitution_range": (0.0, 0.0), + "num_buckets": 64, + }, + ) + + add_base_mass = EventTerm( + func=mdp.randomize_rigid_body_mass, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="torso_link"), + "mass_distribution_params": (-5.0, 5.0), + "operation": "add", + }, + ) + + # base_com = EventTerm( + # func=mdp.randomize_rigid_body_com, + # mode="startup", + # params={ + # "asset_cfg": SceneEntityCfg("robot", body_names="torso_link"), + # "com_range": {"x": (-0.05, 0.05), "y": (-0.05, 0.05), "z": (-0.01, 0.01)}, + # }, + # ) + + """ + reset + """ + # base_external_force_torque = EventTerm( + # func=mdp.apply_external_force_torque, + # mode="reset", + # params={ + # "asset_cfg": SceneEntityCfg("robot", body_names="torso_link"), + # "force_range": (0.0, 0.0), + # "torque_range": (-0.0, 0.0), + # }, + # ) + + reset_base = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={ + "pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)}, + "velocity_range": { + "x": (-0.5, 0.5), + "y": (-0.5, 0.5), + "z": (-0.5, 0.5), + "roll": (-0.5, 0.5), + "pitch": (-0.5, 0.5), + "yaw": (-0.5, 0.5), + }, + }, + ) + + reset_robot_joints = EventTerm( + func=mdp.reset_joints_by_scale, + mode="reset", + params={ + "position_range": (0.5, 1.5), + "velocity_range": (0.0, 0.0), + }, + ) + + scale_actuator_gains = EventTerm( + func=mdp.randomize_actuator_gains, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg( + "robot", + joint_names=".*" + ), + "operation": "scale", + "distribution": "uniform", + "stiffness_distribution_params": (0.75, 1.25), + "damping_distribution_params": (0.75, 1.25), + }, + ) + + """ + interval + """ + push_robot = EventTerm( + func=mdp.push_by_setting_velocity, + mode="interval", + interval_range_s=(10.0, 15.0), + params={"velocity_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5)}}, + ) \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/observation_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/observation_cfg.py new file mode 100644 index 00000000000..5fb25afefd1 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/observation_cfg.py @@ -0,0 +1,268 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils import configclass +from isaaclab.utils.noise import AdditiveUniformNoiseCfg as Unoise + +import isaaclab.envs.mdp as mdp +# import isaaclab_tasks.manager_based.locomotion.velocity.mdp as vel_mdp +import isaaclab_tasks.manager_based.locomotion.velocity.config.g1_29dof.mdp as g1_mdp + +""" +leggedlab +""" + +@configclass +class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + # observation terms (order preserved) + base_ang_vel = ObsTerm( + func=mdp.base_ang_vel, + noise=Unoise(n_min=-0.2, n_max=0.2), + scale=0.25, + ) + projected_gravity = ObsTerm( + func=mdp.projected_gravity, + noise=Unoise(n_min=-0.05, n_max=0.05), + ) + velocity_commands = ObsTerm(func=mdp.generated_commands, params={"command_name": "base_velocity"}) + joint_pos = ObsTerm( + func=mdp.joint_pos_rel, + noise=Unoise(n_min=-0.01, n_max=0.01), + params={ + "asset_cfg": SceneEntityCfg( + "robot", + joint_names=[ + "left_hip_pitch_joint", + "left_hip_roll_joint", + "left_hip_yaw_joint", + "left_knee_joint", + "left_ankle_pitch_joint", + "left_ankle_roll_joint", + + "right_hip_pitch_joint", + "right_hip_roll_joint", + "right_hip_yaw_joint", + "right_knee_joint", + "right_ankle_pitch_joint", + "right_ankle_roll_joint", + + "waist_yaw_joint", + "waist_roll_joint", + "waist_pitch_joint", + + "left_shoulder_pitch_joint", + "left_shoulder_roll_joint", + "left_shoulder_yaw_joint", + "left_elbow_joint", + "left_wrist_roll_joint", + "left_wrist_pitch_joint", + "left_wrist_yaw_joint", + + "right_shoulder_pitch_joint", + "right_shoulder_roll_joint", + "right_shoulder_yaw_joint", + "right_elbow_joint", + "right_wrist_roll_joint", + "right_wrist_pitch_joint", + "right_wrist_yaw_joint", + ], + preserve_order=True, + ), + }, + ) + joint_vel = ObsTerm( + func=mdp.joint_vel_rel, + noise=Unoise(n_min=-1.5, n_max=1.5), + params={ + "asset_cfg": SceneEntityCfg( + "robot", + joint_names=[ + "left_hip_pitch_joint", + "left_hip_roll_joint", + "left_hip_yaw_joint", + "left_knee_joint", + "left_ankle_pitch_joint", + "left_ankle_roll_joint", + + "right_hip_pitch_joint", + "right_hip_roll_joint", + "right_hip_yaw_joint", + "right_knee_joint", + "right_ankle_pitch_joint", + "right_ankle_roll_joint", + + "waist_yaw_joint", + "waist_roll_joint", + "waist_pitch_joint", + + "left_shoulder_pitch_joint", + "left_shoulder_roll_joint", + "left_shoulder_yaw_joint", + "left_elbow_joint", + "left_wrist_roll_joint", + "left_wrist_pitch_joint", + "left_wrist_yaw_joint", + + "right_shoulder_pitch_joint", + "right_shoulder_roll_joint", + "right_shoulder_yaw_joint", + "right_elbow_joint", + "right_wrist_roll_joint", + "right_wrist_pitch_joint", + "right_wrist_yaw_joint", + ], + preserve_order=True, + ), + }, + scale=0.05, + ) + actions = ObsTerm(func=mdp.last_action) + height_scan = ObsTerm( + func=mdp.height_scan, + params={"sensor_cfg": SceneEntityCfg("height_scanner")}, + noise=Unoise(n_min=-0.1, n_max=0.1), + clip=(-1.0, 1.0), + ) + + def __post_init__(self): + self.enable_corruption = True + self.concatenate_terms = True + # self.history_length = 5 # unitree_rl_lab uses 5 + self.history_length = 10 # legged_lab uses 10 + +@configclass +class CriticCfg(ObsGroup): + """Observations for policy group.""" + + # observation terms (order preserved) + base_ang_vel = ObsTerm( + func=mdp.base_ang_vel, + scale=0.25, + ) + projected_gravity = ObsTerm( + func=mdp.projected_gravity, + ) + velocity_commands = ObsTerm(func=mdp.generated_commands, params={"command_name": "base_velocity"}) + joint_pos = ObsTerm( + func=mdp.joint_pos_rel, + params={ + "asset_cfg": SceneEntityCfg( + "robot", + joint_names=[ + "left_hip_pitch_joint", + "left_hip_roll_joint", + "left_hip_yaw_joint", + "left_knee_joint", + "left_ankle_pitch_joint", + "left_ankle_roll_joint", + + "right_hip_pitch_joint", + "right_hip_roll_joint", + "right_hip_yaw_joint", + "right_knee_joint", + "right_ankle_pitch_joint", + "right_ankle_roll_joint", + + "waist_yaw_joint", + "waist_roll_joint", + "waist_pitch_joint", + + "left_shoulder_pitch_joint", + "left_shoulder_roll_joint", + "left_shoulder_yaw_joint", + "left_elbow_joint", + "left_wrist_roll_joint", + "left_wrist_pitch_joint", + "left_wrist_yaw_joint", + + "right_shoulder_pitch_joint", + "right_shoulder_roll_joint", + "right_shoulder_yaw_joint", + "right_elbow_joint", + "right_wrist_roll_joint", + "right_wrist_pitch_joint", + "right_wrist_yaw_joint", + ], + preserve_order=True, + ), + }, + ) + joint_vel = ObsTerm( + func=mdp.joint_vel_rel, + params={ + "asset_cfg": SceneEntityCfg( + "robot", + joint_names=[ + "left_hip_pitch_joint", + "left_hip_roll_joint", + "left_hip_yaw_joint", + "left_knee_joint", + "left_ankle_pitch_joint", + "left_ankle_roll_joint", + + "right_hip_pitch_joint", + "right_hip_roll_joint", + "right_hip_yaw_joint", + "right_knee_joint", + "right_ankle_pitch_joint", + "right_ankle_roll_joint", + + "waist_yaw_joint", + "waist_roll_joint", + "waist_pitch_joint", + + "left_shoulder_pitch_joint", + "left_shoulder_roll_joint", + "left_shoulder_yaw_joint", + "left_elbow_joint", + "left_wrist_roll_joint", + "left_wrist_pitch_joint", + "left_wrist_yaw_joint", + + "right_shoulder_pitch_joint", + "right_shoulder_roll_joint", + "right_shoulder_yaw_joint", + "right_elbow_joint", + "right_wrist_roll_joint", + "right_wrist_pitch_joint", + "right_wrist_yaw_joint", + ], + preserve_order=True, + ), + }, + scale=0.05, + ) + actions = ObsTerm(func=mdp.last_action) + height_scan = ObsTerm( + func=mdp.height_scan, + params={"sensor_cfg": SceneEntityCfg("height_scanner")}, + clip=(-1.0, 1.0), + ) + + # privileged observations + base_lin_vel = ObsTerm(func=mdp.base_lin_vel) + feet_contact = ObsTerm( + func=g1_mdp.foot_contact, + params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_ankle_roll_link")}, + ) + + def __post_init__(self): + self.enable_corruption = False + self.concatenate_terms = True + # self.history_length = 5 # unitree_rl_lab uses 5 + self.history_length = 10 # legged_lab uses 10 + +@configclass +class G1ObservationsCfg: + """Observation specifications for the MDP.""" + + # observation groups + policy: PolicyCfg = PolicyCfg() + critic: CriticCfg = CriticCfg() \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/reward_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/reward_cfg.py new file mode 100644 index 00000000000..db7bf879e50 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/reward_cfg.py @@ -0,0 +1,291 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import math +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils import configclass + +import isaaclab.envs.mdp as mdp +import isaaclab_tasks.manager_based.locomotion.velocity.mdp as vel_mdp +import isaaclab_tasks.manager_based.locomotion.velocity.config.g1_29dof.mdp as g1_mdp + + +""" +Legacy Walking policy +""" +@configclass +class G1RewardsCfg: + """Reward terms for the MDP.""" + + """ + task rewards + """ + track_lin_vel_xy = RewTerm( + func=vel_mdp.track_lin_vel_xy_yaw_frame_exp, + weight=2.0, + params={"command_name": "base_velocity", "std": math.sqrt(0.5)} + ) + track_ang_vel_z = RewTerm( + func=vel_mdp.track_ang_vel_z_world_exp, + weight=2.0, + params={"command_name": "base_velocity", "std": math.sqrt(0.5)} + ) + + """ + style rewards + """ + termination_penalty = RewTerm(func=mdp.is_terminated, weight=-200.0) + action_rate_l2_lower_body = RewTerm( + func=g1_mdp.action_rate_l2, + weight=-0.01, + params={"joint_idx": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]} # mjc order + ) + action_rate_l2_upper_body = RewTerm( + func=g1_mdp.action_rate_l2, + weight=-0.05, + params={"joint_idx": [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]} # mjc order + ) + + # -- base penalties + base_height = RewTerm(func=mdp.base_height_l2, weight=-10.0, params={"target_height": 0.75}) + flat_orientation_l2 = RewTerm(func=mdp.flat_orientation_l2, weight=-2.0) + # body_orientation_l2 = RewTerm( + # func=g1_mdp.body_orientation_l2, + # params={"asset_cfg": SceneEntityCfg("robot", body_names=".*torso.*")}, + # weight=-4.0 + # ) + lin_vel_z_l2 = RewTerm(func=mdp.lin_vel_z_l2, weight=-1.0) + ang_vel_xy_l2 = RewTerm(func=mdp.ang_vel_xy_l2, weight=-0.05) + + # -- contact penalties + undesired_contacts = RewTerm( + func=mdp.undesired_contacts, + weight=-1.0, + params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names="(?!.*ankle.*).*"), "threshold": 1.0}, + ) + + """ + joint regularization. + """ + energy = RewTerm(func=g1_mdp.energy, weight=-1e-3) + dof_vel_l2 = RewTerm(func=mdp.joint_vel_l2, weight=-1e-4) + dof_acc_l2 = RewTerm(func=mdp.joint_acc_l2, weight=-2.5e-7) + + # penalize joint limits + dof_pos_limits = RewTerm( + func=mdp.joint_pos_limits, + weight=-2.0, + params={"asset_cfg": SceneEntityCfg("robot")}, + ) + + joint_deviation = RewTerm( + func=g1_mdp.variable_posture, + weight=-1.0, + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names=".*"), + "command_name": "base_velocity", + "weight_standing": { + ".*": 0.2, + }, + "weight_walking": { + # leg + ".*hip_pitch.*": 0.02, + ".*hip_roll.*": 0.15, + ".*hip_yaw.*": 0.15, + ".*knee.*": 0.02, + ".*ankle_pitch.*": 0.02, + ".*ankle_roll.*": 0.02, + # waist + ".*waist_yaw.*": 0.15, + ".*waist_roll.*": 0.1, + ".*waist_pitch.*": 0.1, + # arms + ".*shoulder_pitch.*": 0.15, + ".*elbow.*": 0.1, + ".*shoulder_roll.*": 0.15, + ".*shoulder_yaw.*": 0.15, + ".*wrist.*": 0.15, + }, + "weight_running": { + # leg + ".*hip_pitch.*": 0.02, + ".*hip_roll.*": 0.15, + ".*hip_yaw.*": 0.15, + ".*knee.*": 0.02, + ".*ankle_pitch.*": 0.02, + ".*ankle_roll.*": 0.02, + # waist + ".*waist_yaw.*": 0.15, + ".*waist_roll.*": 0.1, + ".*waist_pitch.*": 0.1, + # arms + ".*shoulder_pitch.*": 0.15, + ".*elbow.*": 0.1, + ".*shoulder_roll.*": 0.15, + ".*shoulder_yaw.*": 0.15, + ".*wrist.*": 0.15, + }, + "walking_threshold": 0.05, + "running_threshold": 1.5, + } + ) + + """ + foot orientation + """ + + feet_yaw_diff = RewTerm( + func=g1_mdp.reward_feet_yaw_diff, + weight=-1.0, + params={ + "asset_cfg": SceneEntityCfg( + "robot", + body_names=[".*ankle_roll.*"], + preserve_order=True, + ), + } + ) + + feet_yaw_mean = RewTerm( + func=g1_mdp.reward_feet_yaw_mean, + weight=-1.0, + params={ + "asset_cfg": SceneEntityCfg( + "robot", + body_names=[".*ankle_roll.*"], + preserve_order=True, + ), + } + ) + + feet_roll = RewTerm( + func=g1_mdp.reward_feet_roll, + weight=-1.0, + params={ + "asset_cfg": SceneEntityCfg( + "robot", + body_names=[".*ankle_roll.*"], + preserve_order=True, + ), + }, + ) + + feet_roll_diff = RewTerm( + func=g1_mdp.reward_feet_roll_diff, + weight=-1.0, + params={ + "asset_cfg": SceneEntityCfg( + "robot", + body_names=[".*ankle_roll.*"], + preserve_order=True, + ), + }, + ) + + feet_pitch = RewTerm( + func=g1_mdp.reward_feet_pitch, + weight=-1.0, + params={ + "asset_cfg": SceneEntityCfg( + "robot", + body_names=[".*ankle_roll.*"], + preserve_order=True, + ), + }, + ) + + feet_pitch_diff = RewTerm( + func=g1_mdp.reward_feet_pitch_diff, + weight=-1.0, + params={ + "asset_cfg": SceneEntityCfg( + "robot", + body_names=[".*ankle_roll.*"], + preserve_order=True, + ), + }, + ) + + """ + gait + """ + + feet_air_time = RewTerm( + func=vel_mdp.feet_air_time_positive_biped, + weight=0.15, + params={ + "command_name": "base_velocity", + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*ankle_roll.*"), + "threshold": 0.5, + }, + ) + + # for faster gait learning + feet_swing = RewTerm( + func=g1_mdp.reward_feet_swing, + weight=2.0, + params={ + "swing_period": 0.3, + "sensor_cfg": SceneEntityCfg( + "contact_forces", body_names=".*ankle_roll.*" + ), + "cmd_threshold": 0.05, # or 0.1 + "command_name": "base_velocity", + "gait_command_name": "locomotion_gait", + }, + ) + + """ + stance foot + """ + + feet_slide = RewTerm( + func=vel_mdp.feet_slide, + weight=-0.25, + params={ + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_ankle_roll_link"), + "asset_cfg": SceneEntityCfg("robot", body_names=".*_ankle_roll_link"), + }, + ) + + foot_distance = RewTerm( + func = g1_mdp.reward_foot_distance, + weight=-2.0, + params={ + "ref_dist": 0.2, + "asset_cfg": SceneEntityCfg( + "robot", + body_names=".*_ankle_roll_link", + preserve_order=True, + ), + }, + ) + + feet_force = RewTerm( + func=g1_mdp.feet_force, + weight=-3e-3, + params={ + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*ankle_roll.*"), + "threshold": 500, + "max_reward": 400, + }, + ) + + """ + swing foot + """ + + foot_clearance = RewTerm( + func=g1_mdp.foot_clearance_reward, + weight=4.0, + params={ + "target_height": 0.12, + "std": 0.05, + "tanh_mult": 2.0, + "asset_cfg": SceneEntityCfg("robot", body_names=".*_ankle_roll_link"), + "standing_position_foot_z": 0.03539, + }, + ) \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/scene_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/scene_cfg.py new file mode 100644 index 00000000000..7effa61ceba --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/scene_cfg.py @@ -0,0 +1,79 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import math +from dataclasses import MISSING + +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg, AssetBaseCfg +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sensors import ContactSensorCfg, RayCasterCfg, patterns +from isaaclab.terrains import TerrainImporterCfg +from isaaclab.utils import configclass +from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR, ISAACLAB_NUCLEUS_DIR + +## +# Pre-defined configs +## +from isaaclab_assets import ( + UNITREE_G1_29DOF_CFG, + UNITREE_G1_29DOF_DELAY_CFG, + ) +from isaaclab.terrains.config.rough import ROUGH_TERRAINS_CFG # isort: skip + + + +@configclass +class G1SceneCfg(InteractiveSceneCfg): + """Configuration for the terrain scene with a legged robot.""" + + # ground terrain + terrain = TerrainImporterCfg( + prim_path="/World/ground", + terrain_type="generator", + terrain_generator=ROUGH_TERRAINS_CFG, + max_init_terrain_level=5, + collision_group=-1, + physics_material=sim_utils.RigidBodyMaterialCfg( + friction_combine_mode="multiply", + restitution_combine_mode="multiply", + static_friction=1.0, + dynamic_friction=1.0, + ), + visual_material=sim_utils.MdlFileCfg( + mdl_path=f"{ISAACLAB_NUCLEUS_DIR}/Materials/TilesMarbleSpiderWhiteBrickBondHoned/TilesMarbleSpiderWhiteBrickBondHoned.mdl", + project_uvw=True, + texture_scale=(0.25, 0.25), + ), + debug_vis=False, + ) + # robots + robot: ArticulationCfg = UNITREE_G1_29DOF_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + # robot: ArticulationCfg = UNITREE_G1_29DOF_DELAY_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + + # sensors + height_scanner = RayCasterCfg( + prim_path="{ENV_REGEX_NS}/Robot/torso_link", + offset=RayCasterCfg.OffsetCfg(pos=(0.0, 0.0, 20.0)), + ray_alignment="yaw", + pattern_cfg=patterns.GridPatternCfg(resolution=0.1, size=[1.6, 1.0]), + debug_vis=False, + mesh_prim_paths=["/World/ground"], + ) + # multi-body contact reporting + contact_forces = ContactSensorCfg( + prim_path="{ENV_REGEX_NS}/Robot/.*", + history_length=3, + track_air_time=True, + ) + + # lights + sky_light = AssetBaseCfg( + prim_path="/World/skyLight", + spawn=sim_utils.DomeLightCfg( + intensity=750.0, + texture_file=f"{ISAAC_NUCLEUS_DIR}/Materials/Textures/Skies/PolyHaven/kloofendal_43d_clear_puresky_4k.hdr", + ), + ) \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/termination_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/termination_cfg.py new file mode 100644 index 00000000000..5ee7ac370bb --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/env_cfg/termination_cfg.py @@ -0,0 +1,25 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.managers import SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.utils import configclass + +import isaaclab.envs.mdp as mdp +import isaaclab_tasks.manager_based.locomotion.velocity.mdp as vel_mdp + +@configclass +class G1TerminationsCfg: + """Termination terms for the MDP.""" + + time_out = DoneTerm(func=mdp.time_out, time_out=True) + base_too_low = DoneTerm( + func=vel_mdp.root_height_below_minimum_adaptive, + params={ + "minimum_height": 0.2, + "asset_cfg": SceneEntityCfg("robot", body_names=".*_ankle_roll_link"), + }, + ) + bad_orientation = DoneTerm(func=mdp.bad_orientation, params={"limit_angle": 0.8}) \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/flat_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/flat_env_cfg.py new file mode 100644 index 00000000000..f8ceafd8ad3 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/flat_env_cfg.py @@ -0,0 +1,118 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +import math +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils import configclass +from isaaclab.envs.common import ViewerCfg + +from .rough_env_cfg import G1RoughEnvCfg +import isaaclab_tasks.manager_based.locomotion.velocity.mdp as vel_mdp + + +@configclass +class G1FlatEnvCfg(G1RoughEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + + # physics dt + self.sim.dt = 0.005 # 200Hz + self.decimation = 4 # 50Hz + self.sim.render_interval = self.decimation + + # change terrain to flat + self.scene.terrain.terrain_type = "plane" + self.scene.terrain.terrain_generator = None + + # curriculum settings + self.curriculum.terrain_levels = None + + # no height scan + self.scene.height_scanner = None + self.observations.policy.height_scan = None + self.observations.critic.height_scan = None + + # Randomization + self.events.reset_base.params = { + "pose_range": + {"x": (-0.5, 0.5), + "y": (-0.5, 0.5), + "yaw": (-math.pi, math.pi), + }, + "velocity_range": { + "x": (0.0, 0.0), + "y": (0.0, 0.0), + "z": (0.0, 0.0), + "roll": (0.0, 0.0), + "pitch": (0.0, 0.0), + "yaw": (0.0, 0.0), + }, + } + self.events.reset_robot_joints.params["position_range"] = (1.0, 1.0) + + +class G1FlatEnvCfg_PLAY(G1FlatEnvCfg): + def __post_init__(self) -> None: + # post init of parent + super().__post_init__() + + # change physics speed + # self.sim.dt = 1/500 # 500Hz + # self.decimation = 10 # 50Hz + # self.sim.render_interval = self.decimation + self.episode_length_s = 20.0 + + # make a smaller scene for play + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + + # disable curriculum + self.curriculum.terrain_levels = None + self.curriculum.command_vel = None + + # disable randomization for play + self.observations.policy.enable_corruption = False + + # remove events + self.events.add_base_mass = None + self.events.push_robot = None + self.events.physics_material = None + self.events.scale_actuator_gains = None + + # Commands + self.commands.base_velocity.ranges.lin_vel_x = (0.0, 3.0) + self.commands.base_velocity.ranges.lin_vel_y = (0.0, 0.0) + self.commands.base_velocity.ranges.ang_vel_z = (-1.0, 1.0) + self.commands.base_velocity.heading_command = False + self.commands.base_velocity.rel_standing_envs = 0.02 + self.commands.base_velocity.resampling_time_range = (self.episode_length_s, self.episode_length_s) + + # Randomization + self.events.reset_base.params = { + "pose_range": { + "x": (-0.5, 0.5), + "y": (-0.5, 0.5), + "yaw": (-math.pi, math.pi), + }, + "velocity_range": { + "x": (0.0, 0.0), + "y": (0.0, 0.0), + "z": (0.0, 0.0), + "roll": (0.0, 0.0), + "pitch": (0.0, 0.0), + "yaw": (0.0, 0.0), + }, + } + + # track robot motion + # self.sim.render.enable_dlssg = True + # self.sim.render.dlss_mode = "performance" + self.viewer = ViewerCfg( + eye=(-0.0, -3.5, 0.0), + lookat=(0.0, -0.0, 0.0), + resolution=(1920, 1080), + origin_type="asset_root", + asset_name="robot" + ) \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/mdp/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/mdp/__init__.py new file mode 100644 index 00000000000..4cd8fa696f1 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/mdp/__init__.py @@ -0,0 +1,2 @@ +from .observations.observations import * +from .reward.reward import * \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/mdp/observations/observations.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/mdp/observations/observations.py new file mode 100644 index 00000000000..c80f4ae1f65 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/mdp/observations/observations.py @@ -0,0 +1,165 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Common functions that can be used to create observation terms. + +The functions can be passed to the :class:`isaaclab.managers.ObservationTermCfg` object to enable +the observation introduced by the function. +""" + +from __future__ import annotations + +import torch +from typing import TYPE_CHECKING + +import isaaclab.utils.math as math_utils +from isaaclab.assets import Articulation, RigidObject +from isaaclab.managers import SceneEntityCfg +from isaaclab.managers.manager_base import ManagerTermBase +from isaaclab.managers.manager_term_cfg import ObservationTermCfg +from isaaclab.sensors import Camera, Imu, RayCaster, RayCasterCamera, TiledCamera, ContactSensor + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedEnv, ManagerBasedRLEnv + +from isaaclab.envs.utils.io_descriptors import ( + generic_io_descriptor, + record_body_names, + record_dtype, + record_joint_names, + record_joint_pos_offsets, + record_joint_vel_offsets, + record_shape, +) + +""" +body kinematics +""" + +@generic_io_descriptor(observation_type="BodyState", on_inspect=[record_shape, record_dtype, record_body_names]) +def foot_pos_w( + env: ManagerBasedEnv, + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), +) -> torch.Tensor: + """The flattened body poses of the asset w.r.t the env.scene.origin. + + Note: Only the bodies configured in :attr:`asset_cfg.body_ids` will have their poses returned. + + Returns: + The position of bodies in articulation [num_env, 3 * num_bodies]. + Output is stacked horizontally per body. + """ + # extract the used quantities (to enable type-hinting) + asset: Articulation = env.scene[asset_cfg.name] + + # access the body poses in world frame + pose = asset.data.body_pose_w[:, asset_cfg.body_ids, :7] + pose[..., :3] = pose[..., :3] - env.scene.env_origins.unsqueeze(1) + + pos = pose[..., :3] # (num_envs, num_bodies, 3) + quat = pose[..., 3:7] # (num_envs, num_bodies, 4) + rot = math_utils.matrix_from_quat(quat) # (num_envs, num_bodies, 3, 3) + + local_pos = torch.tensor([0.0, 0.0, -0.039], device=pos.device).reshape(1, 1, 3) # (1, 1, 3) + pos_foot = pos + (rot @ local_pos.unsqueeze(-1)).squeeze(-1) # (num_envs, num_bodies, 3) + + return pos_foot.reshape(env.num_envs, -1) + + +""" +gait +""" + +def clock( + env: ManagerBasedRLEnv, + cmd_threshold: float = 0.05, + command_name:str = "base_velocity", + gait_command_name:str = "locomotion_gait", + ) -> torch.Tensor: + """ + Clock time using sin and cos from the phase of the simulation. + When using this reward, gait generator command must exist in command cfg. + + Returns: + Gait clock with shape (num_envs, 2) + """ + cmd_norm = torch.norm(env.command_manager.get_command(command_name), dim=1) + gait_generator = env.command_manager.get_term(gait_command_name) + + phase = gait_generator.phase + phase *= (cmd_norm > cmd_threshold).float() + return torch.cat( + [ + torch.sin(2 * torch.pi * phase).unsqueeze(1), + torch.cos(2 * torch.pi * phase).unsqueeze(1), + ], + dim=1, + ).to(env.device) + +""" +foot state +""" + +def foot_height( + env: ManagerBasedEnv, + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), +) -> torch.Tensor: + """ + Get the height of the foot links wrt global frame. + + Returns: + Foot heights with shape (num_envs, num_foot_links) + """ + # extract the used quantities (to enable type-hinting) + asset: Articulation = env.scene[asset_cfg.name] + + # access the body poses in world frame + pose = asset.data.body_pose_w[:, asset_cfg.body_ids, :7] + pose[..., :3] = pose[..., :3] - env.scene.env_origins.unsqueeze(1) + return pose[..., 2].reshape(env.num_envs, -1) + +def foot_air_time( + env: ManagerBasedRLEnv, + sensor_cfg: SceneEntityCfg = SceneEntityCfg("contact_forces"), +) -> torch.Tensor: + """ + Get the air time of the foot links. + + Returns: + Foot air times with shape (num_envs, num_foot_links) + """ + contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name] + air_time = contact_sensor.data.current_air_time[:, sensor_cfg.body_ids] + return air_time + +def foot_contact( + env: ManagerBasedRLEnv, + sensor_cfg: SceneEntityCfg = SceneEntityCfg("contact_forces"), + ) -> torch.Tensor: + """ + Get the contact state of the foot links. + + Returns: + Foot contact states with shape (num_envs, num_foot_links) + """ + contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name] + contact_forces = contact_sensor.data.net_forces_w[:, sensor_cfg.body_ids, :] # (num_envs, num_body_ids, 3) + contact = (torch.norm(contact_forces, dim=-1) > 1.0).float() + return contact + +def foot_contact_forces( + env: ManagerBasedRLEnv, + sensor_cfg: SceneEntityCfg = SceneEntityCfg("contact_forces"), + ) -> torch.Tensor: + """ + Get the contact forces of the foot links. + + Returns: + Foot contact forces with shape (num_envs, num_foot_links * 3) + """ + contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name] + contact_forces = contact_sensor.data.net_forces_w[:, sensor_cfg.body_ids, :] # (num_envs, num_body_ids, 3) + forces_flat = contact_forces.reshape(env.num_envs, -1) + return torch.sign(forces_flat) * torch.log1p(torch.abs(forces_flat)) \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/mdp/reward/reward.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/mdp/reward/reward.py new file mode 100644 index 00000000000..dd56a51fd64 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/mdp/reward/reward.py @@ -0,0 +1,484 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Common functions that can be used to define rewards for the learning environment. + +The functions can be passed to the :class:`isaaclab.managers.RewardTermCfg` object to +specify the reward function and its parameters. +""" + +from __future__ import annotations + +import os +import torch +from typing import TYPE_CHECKING, Tuple + +from isaaclab.envs import mdp +import isaaclab.utils.math as math_utils +from isaaclab.utils.string import resolve_matching_names_values +from isaaclab.assets import Articulation, RigidObject +from isaaclab.managers import SceneEntityCfg +from isaaclab.sensors import ContactSensor +from isaaclab.utils.math import quat_apply_inverse, yaw_quat, euler_xyz_from_quat +from isaaclab.managers.manager_base import ManagerTermBase +from isaaclab.managers.manager_term_cfg import RewardTermCfg + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + +""" +base rewards +""" + +def body_orientation_l2(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor: + """ + Reward based on the L2 norm of the body orientation deviation from upright. + + Returns: + sum of L2 norm of the body orientation deviation from upright for each environment. + """ + asset: Articulation = env.scene[asset_cfg.name] + body_orientation = math_utils.quat_apply_inverse( + asset.data.body_quat_w[:, asset_cfg.body_ids[0], :], asset.data.GRAVITY_VEC_W + ) + return torch.sum(torch.square(body_orientation[:, :2]), dim=1) + +""" +foot rewards +""" + +def _feet_rpy( + env: ManagerBasedRLEnv, + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + )->Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """Compute roll, pitch, yaw angles of feet. + + Returns: + roll, pitch, yaw angles of feet in radians. + """ + # Get the robot entity + entity = env.scene[asset_cfg.name] + + # Get the body IDs to use + feet_quat = entity.data.body_quat_w[:, asset_cfg.body_ids, :] + original_shape = feet_quat.shape + roll, pitch, yaw = euler_xyz_from_quat(feet_quat.reshape(-1, 4)) + + roll = (roll + torch.pi) % (2*torch.pi) - torch.pi + pitch = (pitch + torch.pi) % (2*torch.pi) - torch.pi + + return roll.reshape(original_shape[0], -1), \ + pitch.reshape(original_shape[0], -1), \ + yaw.reshape(original_shape[0], -1) + +def _base_rpy( + env, + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + base_index: list[int] = [0], + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """Compute roll, pitch, yaw angles of base. + + Returns: + Roll, pitch, yaw angles of base in radians. + """ + # Get the robot entity + entity = env.scene[asset_cfg.name] + + # Get the body IDs to use + body_quat = entity.data.body_quat_w[:, base_index, :] + original_shape = body_quat.shape + roll, pitch, yaw = euler_xyz_from_quat(body_quat.reshape(-1, 4)) + + return roll.reshape(original_shape[0]), \ + pitch.reshape(original_shape[0]), \ + yaw.reshape(original_shape[0]) + +def reward_feet_roll( + env: ManagerBasedRLEnv, + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + ) -> torch.Tensor: + """ + Reward based on the roll angles of the feet. + + Returns: + sum of squared roll angles of the feet for each environment. + """ + asset = env.scene[asset_cfg.name] + + # Calculate roll angles from quaternions for the feet + feet_roll, _, _ = _feet_rpy( + env, + asset_cfg=asset_cfg, + ) + + return torch.sum(torch.square(feet_roll), dim=-1) + +def reward_feet_roll_diff( + env: ManagerBasedRLEnv, + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + ) -> torch.Tensor: + """Reward minimizing the difference between feet roll angles. + + This function rewards the agent for having similar roll angles for all feet, + which encourages a more stable and coordinated gait. + + Returns: + The absolute difference between the roll angles of the feet for each environment. + """ + + asset = env.scene[asset_cfg.name] + + # Calculate pitch angles from quaternions for the feet + feet_roll, _, _ = _feet_rpy( + env, + asset_cfg=asset_cfg, + ) + roll_rel_diff = torch.abs((feet_roll[:, 1] - feet_roll[:, 0] + torch.pi) % (2 * torch.pi) - torch.pi) + return roll_rel_diff + +def reward_feet_pitch( + env: ManagerBasedRLEnv, + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + ) -> torch.Tensor: + """ + Reward based on the pitch angles of the feet. + + Returns: + sum of squared pitch angles of the feet for each environment. + """ + + asset = env.scene[asset_cfg.name] + + # Calculate roll angles from quaternions for the feet + _, feet_pitch, _ = _feet_rpy( + env, + asset_cfg=asset_cfg, + ) + return torch.sum(torch.square(feet_pitch), dim=-1) + +def reward_feet_pitch_diff( + env: ManagerBasedRLEnv, + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + ) -> torch.Tensor: + """Reward minimizing the difference between feet pitch angles. + + This function rewards the agent for having similar pitch angles for all feet, + which encourages a more stable and coordinated gait. + + Returns: + The absolute difference between the pitch angles of the feet for each environment. + """ + + asset = env.scene[asset_cfg.name] + + # Calculate pitch angles from quaternions for the feet + _, feet_pitch, _ = _feet_rpy( + env, + asset_cfg=asset_cfg, + ) + pitch_rel_diff = torch.abs((feet_pitch[:, 1] - feet_pitch[:, 0] + torch.pi) % (2 * torch.pi) - torch.pi) + return pitch_rel_diff + +def reward_feet_yaw_diff( + env: ManagerBasedRLEnv, + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), +) -> torch.Tensor: + """Reward minimizing the difference between feet yaw angles. + + This function rewards the agent for having similar yaw angles for all feet, + which encourages a more stable and coordinated gait. + + Returns: + The absolute difference between the yaw angles of the feet for each environment. + """ + + asset = env.scene[asset_cfg.name] + + # Calculate yaw angles from quaternions for the feet + _, _, feet_yaw = _feet_rpy( + env, + asset_cfg=asset_cfg, + ) + yaw_rel_diff = torch.abs((feet_yaw[:, 1] - feet_yaw[:, 0] + torch.pi) % (2 * torch.pi) - torch.pi) + return yaw_rel_diff + +def reward_feet_yaw_mean( + env: ManagerBasedRLEnv, + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + ) -> torch.Tensor: + """ + Reward minimizing the difference between feet yaw mean and base yaw. + + This function rewards the agent for having the mean yaw angle of all feet + close to the base yaw angle, which encourages better alignment between the feet and the body. + + Returns: + The absolute difference between the mean yaw angle of the feet and the base yaw angle for each environment. + """ + + # Get the entity + entity = env.scene[asset_cfg.name] + + # Calculate yaw angles from quaternions for the feet + _, _, feet_yaw = _feet_rpy( + env, + asset_cfg=asset_cfg, + ) + + _, _, base_yaw = _base_rpy( + env, asset_cfg=asset_cfg, base_index=[0] + ) + mean_yaw = feet_yaw.mean(dim=-1) + torch.pi * (torch.abs(feet_yaw[:, 1] - feet_yaw[:, 0]) > torch.pi) + + yaw_diff = torch.abs((base_yaw - mean_yaw + torch.pi) % (2 * torch.pi) - torch.pi) + + return yaw_diff + + +""" +joint regularization +""" + +def energy(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor: + """ + Reward based on the energy consumption of the robot. + + Returns: + The energy consumption for each environment. + """ + asset: Articulation = env.scene[asset_cfg.name] + reward = torch.norm(torch.abs(asset.data.applied_torque * asset.data.joint_vel), dim=-1) + return reward + +class variable_posture(ManagerTermBase): + """ + Compute L2 reward to regularize robot's whole body posture for each gait. + This reward is modified based on mjlab's gaussian kernel reward. + + The class takes in three different weight dictionaries for standing, walking, and running gaits. + Example configuration: + "weight_standing": { + "joint0": 0.2, + "joint1": 0.1, + ... + }, + "weight_walking": { + "joint0": 0.02, + "joint1": 0.15, + ... + }, + "weight_running": { + "joint0": 0.02, + "joint1": 0.15, + ... + }, + + Returns: + The L2 posture error for each environment. + """ + def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): + super().__init__(cfg, env) + + asset = env.scene[cfg.params["asset_cfg"].name] + self.default_joint_pos = asset.data.default_joint_pos + + _, joint_names = asset.find_joints(cfg.params["asset_cfg"].joint_names) + + _, _, weight_standing = resolve_matching_names_values( + data=cfg.params["weight_standing"], + list_of_strings=joint_names, + ) + self.weight_standing = torch.tensor( + weight_standing, device=env.device, dtype=torch.float32 + ) + + _, _, weight_walking = resolve_matching_names_values( + data=cfg.params["weight_walking"], + list_of_strings=joint_names, + ) + self.weight_walking = torch.tensor(weight_walking, device=env.device, dtype=torch.float32) + + _, _, weight_running = resolve_matching_names_values( + data=cfg.params["weight_running"], + list_of_strings=joint_names, + ) + self.weight_running = torch.tensor(weight_running, device=env.device, dtype=torch.float32) + + + def __call__( + self, + env: ManagerBasedRLEnv, + asset_cfg: SceneEntityCfg, + command_name: str, + weight_standing: dict, + weight_walking: dict, + weight_running: dict, + walking_threshold: float = 0.5, + running_threshold: float = 1.5, + ) -> torch.Tensor: + + asset = env.scene[asset_cfg.name] + command = env.command_manager.get_command(command_name) + + linear_speed = torch.norm(command[:, :2], dim=-1) + angular_speed = torch.abs(command[:, 2]) + total_speed = linear_speed + angular_speed + + standing_mask = (total_speed < walking_threshold).float() + walking_mask = ( + (total_speed >= walking_threshold) & (total_speed < running_threshold) + ).float() + running_mask = (total_speed >= running_threshold).float() + + weight = ( + self.weight_standing * standing_mask.unsqueeze(1) + + self.weight_walking * walking_mask.unsqueeze(1) + + self.weight_running * running_mask.unsqueeze(1) + ) + + current_joint_pos = asset.data.joint_pos[:, asset_cfg.joint_ids] + desired_joint_pos = self.default_joint_pos[:, asset_cfg.joint_ids] + error = torch.abs(current_joint_pos - desired_joint_pos) + return (weight * error).sum(dim=1) + +""" +gait +""" + +def reward_feet_swing( + env: ManagerBasedRLEnv, + swing_period: float, + sensor_cfg: SceneEntityCfg, + cmd_threshold: float = 0.05, + command_name:str = None, + gait_command_name:str = None + ) -> torch.Tensor: + """ + Reward based on the similarity between predifined swing phase and measured contact state of the feet. + Swing period is defined as the portion of the gait cycle where the foot is off the ground. + + Example swing period where locomotion phase is 0 ~ 1 + where SS stands for single support and DS stands for double support: + swing_period=0.2 -> |0.0-0.15 DS| |0.15-0.35 SS| |0.35-0.65 DS| |0.65-0.85 SS| |0.85-1.0 DS| + swing_period=0.3 -> |0.0-0.1 DS| |0.1-0.4 SS| |0.4-0.6 DS| |0.6-0.9 SS| |0.9-1.0 DS| + swing period=0.4 -> |0.0-0.05 DS| |0.05-0.45 SS| |0.45-0.55 DS| |0.55-0.95 SS| |0.95-1.0 DS| + + Returns: + The swing phase reward for each environment. + """ + gait_generator = env.command_manager.get_term(gait_command_name) + freq = 1 / gait_generator.phase_dt + phase = gait_generator.phase + + contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name] + contacts = ( + contact_sensor.data.net_forces_w[:, sensor_cfg.body_ids, :] # type: ignore + .norm(dim=-1) + > 1.0 + ) + + left_swing = (torch.abs(phase - 0.25) < 0.5 * swing_period) & (freq > 1.0e-8) + right_swing = (torch.abs(phase - 0.75) < 0.5 * swing_period) & (freq > 1.0e-8) + reward = (left_swing & ~contacts[:, 0]).float() + (right_swing & ~contacts[:, 1]).float() + + # weight by command magnitude + if command_name is not None: + cmd_norm = torch.norm(env.command_manager.get_command(command_name), dim=1) + reward *= cmd_norm > cmd_threshold + + return reward + +def fly(env: ManagerBasedRLEnv, threshold: float, sensor_cfg: SceneEntityCfg) -> torch.Tensor: + """ + Reward encourages one feet to be in the air at all times. + + Returns: + mask indicating whether at least one foot is in the air for each environment. + """ + contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name] + net_contact_forces = contact_sensor.data.net_forces_w_history + is_contact = torch.max(torch.norm(net_contact_forces[:, :, sensor_cfg.body_ids], dim=-1), dim=1)[0] > threshold + return torch.sum(is_contact, dim=-1) < 0.5 + +""" +contact foot penalties +""" + +def reward_foot_distance( + env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg, ref_dist: float +) -> torch.Tensor: + """ + Calculates the reward based on the distance between the feet. + Penalize feet get close to each other or too far away. + + Returns: + The reward based on the distance between the feet for each environment. + """ + asset: RigidObject = env.scene[asset_cfg.name] + foot_pos = asset.data.body_pos_w[:, asset_cfg.body_ids, :3] + foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1) + + reward = torch.clip(ref_dist - foot_dist, min=0.0, max=0.1) + + return reward + +def feet_force( + env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg, threshold: float = 500, max_reward: float = 400 +) -> torch.Tensor: + """ + Penalize the vertical contact forces on the feet to reduce impact force. + + Returns: + The penalty based on the vertical contact forces on the feet for each environment. + """ + contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name] + reward = contact_sensor.data.net_forces_w[:, sensor_cfg.body_ids, 2].norm(dim=-1) + reward[reward < threshold] = 0 + reward[reward > threshold] -= threshold + reward = reward.clamp(min=0, max=max_reward) + return reward + +""" +swing foot penalties +""" + +def foot_clearance_reward( + env: ManagerBasedRLEnv, + asset_cfg: SceneEntityCfg, + target_height: float, + std: float, + tanh_mult: float, + standing_position_foot_z: float = 0.039, +) -> torch.Tensor: + """ + Reward the swinging feet for clearing a specified height off the ground, weighted by foot velocity. + + target_height is the desired max foot clearance. + standing_position_foot_z is the foot link (ankle roll for G1) height when the robot is standing still on flat ground. + + Returns: + The foot clearance reward for each environment. + """ + asset: RigidObject = env.scene[asset_cfg.name] + foot_z_target_error = torch.square(asset.data.body_pos_w[:, asset_cfg.body_ids, 2] - (target_height + standing_position_foot_z)) + foot_velocity_tanh = torch.tanh(tanh_mult * torch.norm(asset.data.body_lin_vel_w[:, asset_cfg.body_ids, :2], dim=2)) + reward = foot_z_target_error * foot_velocity_tanh + return torch.exp(-torch.sum(reward, dim=1) / std) + +""" +action rate +""" + +def action_rate_l2(env: ManagerBasedRLEnv, joint_idx:list[int]) -> torch.Tensor: + """ + Penalize the rate of change of the actions using L2 squared kernel. + This is different from IsaacLab's built-in action rate penalty and lets you choose specific joints indedices. + + Returns: + The L2 squared action rate penalty for each environment. + """ + return torch.sum( + torch.square(env.action_manager.action[:, joint_idx] - env.action_manager.prev_action[:, joint_idx]), + dim=1) \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/rough_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/rough_env_cfg.py new file mode 100644 index 00000000000..7f5e97ccde0 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1_29dof/rough_env_cfg.py @@ -0,0 +1,81 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils import configclass + +import isaaclab_tasks.manager_based.locomotion.velocity.mdp as mdp +from isaaclab_tasks.manager_based.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg + +from .env_cfg import ( + G1ActionsCfg, + G1ObservationsCfg, + G1RewardsCfg, + G1SceneCfg, + G1TerminationsCfg, + G1CurriculumCfg, + G1EventCfg, + G1CommandsCfg, +) + + +@configclass +class G1RoughEnvCfg(LocomotionVelocityRoughEnvCfg): + rewards: G1RewardsCfg = G1RewardsCfg() + actions: G1ActionsCfg = G1ActionsCfg() + observations: G1ObservationsCfg = G1ObservationsCfg() + scene: G1SceneCfg = G1SceneCfg(num_envs=4096, env_spacing=2.5) + terminations: G1TerminationsCfg = G1TerminationsCfg() + curriculum: G1CurriculumCfg = G1CurriculumCfg() + events: G1EventCfg = G1EventCfg() + commands: G1CommandsCfg = G1CommandsCfg() + + def __post_init__(self): + # post init of parent + super().__post_init__() + + self.sim.physx.gpu_max_rigid_patch_count = 10 * 2**15 + + # Randomization + self.events.reset_base.params = { + "pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)}, + "velocity_range": { + "x": (0.0, 0.0), + "y": (0.0, 0.0), + "z": (0.0, 0.0), + "roll": (0.0, 0.0), + "pitch": (0.0, 0.0), + "yaw": (0.0, 0.0), + }, + } + + +@configclass +class G1RoughEnvCfg_PLAY(G1RoughEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + + # make a smaller scene for play + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + self.episode_length_s = 40.0 + # spawn the robot randomly in the grid (instead of their terrain levels) + self.scene.terrain.max_init_terrain_level = None + # reduce the number of terrains to save memory + if self.scene.terrain.terrain_generator is not None: + self.scene.terrain.terrain_generator.num_rows = 5 + self.scene.terrain.terrain_generator.num_cols = 5 + self.scene.terrain.terrain_generator.curriculum = False + + self.commands.base_velocity.ranges.lin_vel_x = (1.0, 1.0) + self.commands.base_velocity.ranges.lin_vel_y = (0.0, 0.0) + self.commands.base_velocity.ranges.ang_vel_z = (-1.0, 1.0) + self.commands.base_velocity.ranges.heading = (0.0, 0.0) + # disable randomization for play + self.observations.policy.enable_corruption = False + # remove random pushing + self.events.push_robot = None diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/__init__.py index a8a1af6d926..b690cc7536c 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/__init__.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/__init__.py @@ -10,3 +10,5 @@ from .curriculums import * # noqa: F401, F403 from .rewards import * # noqa: F401, F403 from .terminations import * # noqa: F401, F403 +from .commands.velocity_command import * # noqa: F401, F403 +from .commands.phase_command import * # noqa: F401, F403 \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/commands/phase_command.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/commands/phase_command.py new file mode 100644 index 00000000000..adbd61a29ee --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/commands/phase_command.py @@ -0,0 +1,58 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +from dataclasses import MISSING +from typing import TYPE_CHECKING +import torch + +from isaaclab.utils import configclass +from isaaclab.managers import CommandTermCfg, CommandTerm + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedEnv + + +class PhaseCommand(CommandTerm): + """ + A phase command that repeats from [0, 1]. + Locomotion gait period is randomized between a given range called gait_period. + """ + + def __init__(self, cfg: PhaseCommandCfg, env: ManagerBasedEnv): + super().__init__(cfg, env) + self.phase = torch.zeros(self.num_envs, device=self.device) + self.step_dt = env.step_dt + self.phase_dt = torch.empty(self.num_envs, device=self.device).uniform_( + cfg.gait_period[0], + cfg.gait_period[1] + ) + self.phase_increment_per_step = self.step_dt / self.phase_dt + + def _update_command(self) -> None: + self.phase += self.phase_increment_per_step # advance by an amount of a step + self.phase = torch.fmod(self.phase, 1.0) # regulate to [0, 1] + + def _resample_command(self, env_ids: torch.Tensor) -> None: + # resample phase freq and dt + self.phase[env_ids] = 0.0 # reset phase to 0 + self.phase_dt[env_ids] = torch.empty(len(env_ids), device=self.device).uniform_( + self.cfg.gait_period[0], + self.cfg.gait_period[1] + ) + self.phase_increment_per_step[env_ids] = self.step_dt / self.phase_dt[env_ids] + + def _update_metrics(self): + pass # no metrics + + @property + def command(self) -> torch.Tensor: + return self.phase + +@configclass +class PhaseCommandCfg(CommandTermCfg): + class_type: type = PhaseCommand + gait_period: tuple[float, float] = (0.4, 1.0) \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/commands/velocity_command.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/commands/velocity_command.py new file mode 100644 index 00000000000..cbf26c32263 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/commands/velocity_command.py @@ -0,0 +1,149 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +from dataclasses import MISSING +from typing import TYPE_CHECKING +import torch + +import isaaclab.utils.math as math_utils +from isaaclab.markers import VisualizationMarkersCfg, VisualizationMarkers +from isaaclab.markers.config import BLUE_ARROW_X_MARKER_CFG, GREEN_ARROW_X_MARKER_CFG +from isaaclab.envs.mdp import UniformVelocityCommandCfg, UniformVelocityCommand +from isaaclab.utils import configclass + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedEnv + + +class UniformLevelVelocityCommand(UniformVelocityCommand): + """ + This class inherits from `UniformVelocityCommand` to + - apply curriclum to lin/ang velocity sampling range + - provide debug vis for both linear and angular velocity commands + """ + def __init__(self, cfg: UniformVelocityCommandCfg, env: ManagerBasedEnv): + super().__init__(cfg, env) + + def _set_debug_vis_impl(self, debug_vis: bool): + # set visibility of markers + # note: parent only deals with callbacks. not their visibility + if debug_vis: + # create markers if necessary for the first time + if not hasattr(self, "goal_linvel_visualizer"): + # -- goal + self.goal_linvel_visualizer = VisualizationMarkers(self.cfg.goal_linvel_visualizer_cfg) + self.goal_angvel_visualizer = VisualizationMarkers(self.cfg.goal_angvel_visualizer_cfg) + # -- current + self.curr_linvel_visualizer = VisualizationMarkers(self.cfg.current_linvel_visualizer_cfg) + self.curr_angvel_visualizer = VisualizationMarkers(self.cfg.current_angvel_visualizer_cfg) + # set their visibility to true + self.goal_linvel_visualizer.set_visibility(True) + self.goal_angvel_visualizer.set_visibility(True) + self.curr_linvel_visualizer.set_visibility(True) + self.curr_angvel_visualizer.set_visibility(True) + else: + if hasattr(self, "goal_linvel_visualizer"): + self.goal_linvel_visualizer.set_visibility(False) + self.goal_angvel_visualizer.set_visibility(False) + self.curr_linvel_visualizer.set_visibility(False) + self.curr_angvel_visualizer.set_visibility(False) + + def _debug_vis_callback(self, event): + # check if robot is initialized + # note: this is needed in-case the robot is de-initialized. we can't access the data + if not self.robot.is_initialized: + return + # get marker location + # -- base state + base_pos_w = self.robot.data.root_pos_w.clone() + base_pos_w[:, 2] += 0.5 + + # -- resolve linear velocity arrows (in xy plane) + linvel_des_scale, linvel_des_quat = self._resolve_xy_velocity_to_arrow(self.command[:, :2]) + linvel_curr_scale, linvel_curr_quat = self._resolve_xy_velocity_to_arrow(self.robot.data.root_lin_vel_b[:, :2]) + + # -- resolve angular velocity arrows (around z axis) + angvel_des_scale, angvel_des_quat = self._resolve_z_angvel_to_arrow(self.command[:, 2]) + angvel_curr_scale, angvel_curr_quat = self._resolve_z_angvel_to_arrow(self.robot.data.root_ang_vel_b[:, 2]) + + # display markers + self.goal_linvel_visualizer.visualize(base_pos_w, linvel_des_quat, linvel_des_scale) + self.curr_linvel_visualizer.visualize(base_pos_w, linvel_curr_quat, linvel_curr_scale) + + # offset angular velocity arrows slightly higher + angvel_pos_w = base_pos_w.clone() + angvel_pos_w[:, 2] += 0.5 + self.goal_angvel_visualizer.visualize(angvel_pos_w, angvel_des_quat, angvel_des_scale) + self.curr_angvel_visualizer.visualize(angvel_pos_w, angvel_curr_quat, angvel_curr_scale) + + """ + Internal helpers. + """ + + def _resolve_xy_velocity_to_arrow(self, xy_velocity: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + """Converts the XY base velocity command to arrow direction rotation.""" + # obtain default scale of the marker + default_scale = self.goal_linvel_visualizer.cfg.markers["arrow"].scale + # arrow-scale + arrow_scale = torch.tensor(default_scale, device=self.device).repeat(xy_velocity.shape[0], 1) + arrow_scale[:, 0] *= torch.linalg.norm(xy_velocity, dim=1) * 3.0 + # arrow-direction + heading_angle = torch.atan2(xy_velocity[:, 1], xy_velocity[:, 0]) + zeros = torch.zeros_like(heading_angle) + arrow_quat = math_utils.quat_from_euler_xyz(zeros, zeros, heading_angle) + # convert everything back from base to world frame + base_quat_w = self.robot.data.root_quat_w + arrow_quat = math_utils.quat_mul(base_quat_w, arrow_quat) + + return arrow_scale, arrow_quat + + def _resolve_z_angvel_to_arrow(self, z_angvel: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + """Converts the Z angular velocity to arrow pointing in +/- z direction.""" + # obtain default scale of the marker + default_scale = self.goal_angvel_visualizer.cfg.markers["arrow"].scale + # arrow-scale based on angular velocity magnitude + arrow_scale = torch.tensor(default_scale, device=self.device).repeat(z_angvel.shape[0], 1) + arrow_scale[:, 0] *= torch.abs(z_angvel) * 2.0 + # arrow direction: point up (+z) for positive angvel (ccw), down (-z) for negative angvel (cw) + # rotate around y-axis: +90 degrees for +z, -90 degrees for -z + pitch_angle = torch.where(z_angvel < 0, torch.full_like(z_angvel, torch.pi / 2), torch.full_like(z_angvel, -torch.pi / 2)) + zeros = torch.zeros_like(pitch_angle) + arrow_quat = math_utils.quat_from_euler_xyz(zeros, pitch_angle, zeros) + # convert from base to world frame + base_quat_w = self.robot.data.root_quat_w + arrow_quat = math_utils.quat_mul(base_quat_w, arrow_quat) + + return arrow_scale, arrow_quat + + +@configclass +class UniformLevelVelocityCommandCfg(UniformVelocityCommandCfg): + class_type: type = UniformLevelVelocityCommand + + goal_linvel_visualizer_cfg: VisualizationMarkersCfg = GREEN_ARROW_X_MARKER_CFG.replace( + prim_path="/Visuals/Command/velocity_goal" + ) + """The configuration for the goal velocity visualization marker. Defaults to GREEN_ARROW_X_MARKER_CFG.""" + goal_angvel_visualizer_cfg: VisualizationMarkersCfg = GREEN_ARROW_X_MARKER_CFG.replace( + prim_path="/Visuals/Command/angvel_goal" + ) + """The configuration for the goal angvel visualization marker. Defaults to GREEN_ARROW_X_MARKER_CFG.""" + + current_linvel_visualizer_cfg: VisualizationMarkersCfg = BLUE_ARROW_X_MARKER_CFG.replace( + prim_path="/Visuals/Command/velocity_current" + ) + """The configuration for the current velocity visualization marker. Defaults to BLUE_ARROW_X_MARKER_CFG.""" + current_angvel_visualizer_cfg: VisualizationMarkersCfg = BLUE_ARROW_X_MARKER_CFG.replace( + prim_path="/Visuals/Command/angvel_current" + ) + """The configuration for the current velocity visualization marker. Defaults to BLUE_ARROW_X_MARKER_CFG.""" + + # Set the scale of the visualization markers to (0.5, 0.5, 0.5) + goal_linvel_visualizer_cfg.markers["arrow"].scale = (0.5, 0.5, 0.5) + goal_angvel_visualizer_cfg.markers["arrow"].scale = (0.5, 0.5, 0.5) + current_linvel_visualizer_cfg.markers["arrow"].scale = (0.5, 0.5, 0.5) + current_angvel_visualizer_cfg.markers["arrow"].scale = (0.5, 0.5, 0.5) \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/curriculums.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/curriculums.py index 69b7e09b384..0066e4de973 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/curriculums.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/curriculums.py @@ -13,7 +13,7 @@ import torch from collections.abc import Sequence -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, TypedDict from isaaclab.assets import Articulation from isaaclab.managers import SceneEntityCfg @@ -53,3 +53,62 @@ def terrain_levels_vel( terrain.update_env_origins(env_ids, move_up, move_down) # return the mean terrain level return torch.mean(terrain.terrain_levels.float()) + +class VelocityStage(TypedDict): + step: int + lin_vel_x: tuple[float, float] | None + lin_vel_y: tuple[float, float] | None + ang_vel_z: tuple[float, float] | None + +def commands_vel( + env: ManagerBasedRLEnv, + env_ids: torch.Tensor, + command_name: str, + velocity_stages: list[VelocityStage], + ) -> dict[str, torch.Tensor]: + """ + Curriculum that updates the command velocity ranges based on predefined learning iterations. + Example: + "velocity_stages": [ + {"step": 0, "lin_vel_x": (-1.0, 1.0), "ang_vel_z": (-0.5, 0.5)}, + {"step": 5000 * 24, "lin_vel_x": (-1.5, 2.0), "ang_vel_z": (-0.7, 0.7)}, + {"step": 10000 * 24, "lin_vel_x": (-2.0, 3.0)}, + ], + """ + del env_ids # Unused. + command_term = env.command_manager.get_term(command_name) + assert command_term is not None + cfg = command_term.cfg + for stage in velocity_stages: + if env.common_step_counter > stage["step"]: + if "lin_vel_x" in stage and stage["lin_vel_x"] is not None: + cfg.ranges.lin_vel_x = stage["lin_vel_x"] + if "lin_vel_y" in stage and stage["lin_vel_y"] is not None: + cfg.ranges.lin_vel_y = stage["lin_vel_y"] + if "ang_vel_z" in stage and stage["ang_vel_z"] is not None: + cfg.ranges.ang_vel_z = stage["ang_vel_z"] + return { + "lin_vel_x_min": torch.tensor(cfg.ranges.lin_vel_x[0]), + "lin_vel_x_max": torch.tensor(cfg.ranges.lin_vel_x[1]), + "lin_vel_y_min": torch.tensor(cfg.ranges.lin_vel_y[0]), + "lin_vel_y_max": torch.tensor(cfg.ranges.lin_vel_y[1]), + "ang_vel_z_min": torch.tensor(cfg.ranges.ang_vel_z[0]), + "ang_vel_z_max": torch.tensor(cfg.ranges.ang_vel_z[1]), + } + +def modify_reward_std(env: ManagerBasedRLEnv, env_ids: Sequence[int], term_name: str, std: float, num_steps: int): + """Curriculum that modifies a exponential reward std a given number of steps. + + Args: + env: The learning environment. + env_ids: Not used since all environments are affected. + term_name: The name of the reward term. + std: The std of the exponential reward term. + num_steps: The number of steps after which the change should be applied. + """ + if env.common_step_counter > num_steps: + # obtain term settings + term_cfg = env.reward_manager.get_term_cfg(term_name) + # update term settings + term_cfg.params["std"] = std + env.reward_manager.set_term_cfg(term_name, term_cfg) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/rewards.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/rewards.py index 7a1fc12a1db..7a7b6c1f260 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/rewards.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/rewards.py @@ -113,4 +113,4 @@ def stand_still_joint_deviation_l1( """Penalize offsets from the default joint positions when the command is very small.""" command = env.command_manager.get_command(command_name) # Penalize motion when command is nearly zero. - return mdp.joint_deviation_l1(env, asset_cfg) * (torch.norm(command[:, :2], dim=1) < command_threshold) + return mdp.joint_deviation_l1(env, asset_cfg) * (torch.norm(command[:, :2], dim=1) < command_threshold) \ No newline at end of file diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/terminations.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/terminations.py index 833663df163..1a90ffce6a6 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/terminations.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/terminations.py @@ -14,7 +14,7 @@ import torch from typing import TYPE_CHECKING -from isaaclab.assets import RigidObject +from isaaclab.assets import RigidObject, Articulation from isaaclab.managers import SceneEntityCfg if TYPE_CHECKING: @@ -51,3 +51,26 @@ def terrain_out_of_bounds( return torch.logical_or(x_out_of_bounds, y_out_of_bounds) else: raise ValueError("Received unsupported terrain type, must be either 'plane' or 'generator'.") + +def root_height_below_minimum_adaptive( + env: ManagerBasedRLEnv, + minimum_height: float, + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), +) -> torch.Tensor: + """Terminate when the asset's root height is below the minimum height. + + Note: + This is currently only supported for flat terrains, i.e. the minimum height is in the world frame. + """ + # extract the used quantities (to enable type-hinting) + asset: Articulation = env.scene[asset_cfg.name] + + body_ids = asset_cfg.body_ids + if body_ids is None: + body_ids = slice(None) + + min_foot_height = ( + (asset.data.body_pos_w[:, body_ids, 2]).min(dim=1).values + ) + + return asset.data.root_pos_w[:, 2] - min_foot_height < minimum_height \ No newline at end of file