| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192 |
- experiment:
- name: balance_ppo
- robot:
- model_name: guguji
- urdf_path: guguji_ros2_ws/src/guguji_ros2/urdf/guguji.urdf
- joint_names:
- - left_hip_pitch_joint
- - left_knee_pitch_joint
- - left_ankle_pitch_joint
- - left_ankle_joint
- - right_hip_pitch_joint
- - right_knee_pitch_joint
- - right_ankle_pitch_joint
- - right_ankle_joint
- command_topic_prefix: /guguji/command
- # 第一版先围绕一个安全站姿做小范围残差控制,避免随机策略一步把机器人打翻。
- nominal_joint_positions:
- left_hip_pitch_joint: 0.0
- left_knee_pitch_joint: 0.0
- left_ankle_pitch_joint: 0.0
- left_ankle_joint: 0.0
- right_hip_pitch_joint: 0.0
- right_knee_pitch_joint: 0.0
- right_ankle_pitch_joint: 0.0
- right_ankle_joint: 0.0
- action_scale: 0.12
- action_smoothing: 0.85
- ros:
- joint_state_topic: /joint_states
- tf_topic: /tf
- clock_topic: /clock
- world_control_service: /world/default/control
- sim:
- world_name: default
- # 强化学习训练更适合用 service_step,能让每一步更可控、更容易复现。
- step_mode: service_step
- control_dt: 0.05
- # 每次动作只推进较少的物理步,降低初期随机动作的破坏性。
- service_step_iterations: 15
- reset_settle_seconds: 1.0
- # reset 后先保持几步名义站姿,让机器人回到更稳定的初始状态。
- reset_hold_steps: 8
- action_publish_delay: 0.01
- post_step_wait_seconds: 0.01
- # 这里保留 GUI 版启动提示,方便你直观看训练时机器人在做什么。
- launch_hint: ros2 launch guguji_ros2 gazebo.launch.py pause:=true
- task:
- target_forward_velocity: 0.0
- target_base_height: null
- # 第一轮先放宽跌倒判定,避免轻微摆动就立刻结束 episode。
- max_roll_rad: 1.00
- max_pitch_rad: 1.00
- min_base_height: 0.24
- termination_grace_steps: 8
- rewards:
- alive_bonus: 2.0
- velocity_tracking_scale: 1.5
- velocity_tracking_sigma: 0.30
- upright_scale: 2.0
- height_scale: 1.0
- action_rate_penalty_scale: 0.01
- joint_limit_penalty_scale: 0.05
- lateral_velocity_penalty_scale: 0.10
- fall_penalty: -8.0
- training:
- algorithm: ppo
- total_timesteps: 200000
- max_episode_steps: 400
- seed: 42
- device: auto
- learning_rate: 0.0003
- # 单 Gazebo 环境训练很慢,这里先把 rollout 变短,便于更快看到日志反馈。
- n_steps: 128
- batch_size: 64
- gamma: 0.99
- gae_lambda: 0.95
- clip_range: 0.2
- ent_coef: 0.0
- vf_coef: 0.5
- policy_net_arch: [256, 256]
- checkpoint_freq: 20000
- output_root: guguji_rl/outputs
- evaluation:
- episodes: 3
- deterministic: true
|