experiment: name: walk_ppo robot: model_name: guguji urdf_path: guguji_ros2_ws/src/guguji_ros2/urdf/guguji.urdf joint_names: - left_hip_pitch_joint - left_knee_pitch_joint - left_ankle_pitch_joint - left_ankle_joint - right_hip_pitch_joint - right_knee_pitch_joint - right_ankle_pitch_joint - right_ankle_joint command_topic_prefix: /guguji/command ros: joint_state_topic: /joint_states tf_topic: /tf clock_topic: /clock world_control_service: /world/default/control sim: world_name: default # 走路训练也建议采用 service_step,便于稳定做 step / reward 对齐。 step_mode: service_step control_dt: 0.05 service_step_iterations: 50 reset_settle_seconds: 1.2 action_publish_delay: 0.01 post_step_wait_seconds: 0.01 # 训练时建议 Gazebo 以 pause 模式启动,再由训练程序按步推进仿真。 launch_hint: ros2 launch guguji_ros2 gazebo.launch.py gui:=false pause:=true task: target_forward_velocity: 0.25 target_base_height: null max_roll_rad: 0.65 max_pitch_rad: 0.65 min_base_height: 0.12 rewards: alive_bonus: 1.0 velocity_tracking_scale: 3.0 velocity_tracking_sigma: 0.25 upright_scale: 1.5 height_scale: 0.8 action_rate_penalty_scale: 0.04 joint_limit_penalty_scale: 0.05 lateral_velocity_penalty_scale: 0.15 fall_penalty: -20.0 training: algorithm: ppo total_timesteps: 500000 max_episode_steps: 500 seed: 42 device: auto learning_rate: 0.0003 n_steps: 1024 batch_size: 256 gamma: 0.99 gae_lambda: 0.95 clip_range: 0.2 ent_coef: 0.0 vf_coef: 0.5 policy_net_arch: [256, 256] checkpoint_freq: 50000 output_root: guguji_rl/outputs evaluation: episodes: 3 deterministic: true