walk_ppo.yaml 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. experiment:
  2. name: walk_ppo
  3. robot:
  4. model_name: guguji
  5. urdf_path: guguji_ros2_ws/src/guguji_ros2/urdf/guguji.urdf
  6. joint_names:
  7. - left_hip_pitch_joint
  8. - left_knee_pitch_joint
  9. - left_ankle_pitch_joint
  10. - left_ankle_joint
  11. - right_hip_pitch_joint
  12. - right_knee_pitch_joint
  13. - right_ankle_pitch_joint
  14. - right_ankle_joint
  15. command_topic_prefix: /guguji/command
  16. # walking 首版仍然采用“名义站姿 + 残差动作”的方式,
  17. # 这样可以直接沿用 transition 阶段已经学到的稳定姿态能力。
  18. nominal_joint_positions:
  19. left_hip_pitch_joint: 0.0
  20. left_knee_pitch_joint: 0.0
  21. left_ankle_pitch_joint: 0.0
  22. left_ankle_joint: 0.0
  23. right_hip_pitch_joint: 0.0
  24. right_knee_pitch_joint: 0.0
  25. right_ankle_pitch_joint: 0.0
  26. right_ankle_joint: 0.0
  27. # 在 reset 修好后,再把动作空间稍微放大一点,让策略更容易真正迈开步子。
  28. action_scale: 0.26
  29. # 比上一轮略微降低平滑,给左右腿交替摆动更多空间。
  30. action_smoothing: 0.62
  31. ros:
  32. joint_state_topic: /joint_states
  33. tf_topic: /tf
  34. clock_topic: /clock
  35. world_control_service: /world/default/control
  36. sim:
  37. world_name: default
  38. # walking 训练继续使用 service_step,便于一步动作对应一步奖励。
  39. step_mode: service_step
  40. control_dt: 0.05
  41. # 仍然略高于上一版 walking,用于放大迈步动作的实际位移。
  42. service_step_iterations: 22
  43. reset_settle_seconds: 1.0
  44. reset_hold_steps: 6
  45. # reset 时会直接删掉并重生模型,这里就是重生时使用的初始位姿。
  46. spawn_x: 0.0
  47. spawn_y: 0.0
  48. spawn_z: 0.35
  49. spawn_roll: 0.0
  50. spawn_pitch: 0.0
  51. spawn_yaw: 0.0
  52. action_publish_delay: 0.01
  53. post_step_wait_seconds: 0.01
  54. # 如果你想实时看画面,建议像现在这样开着 GUI 并使用 pause:=true。
  55. launch_hint: ros2 launch guguji_ros2 gazebo.launch.py pause:=true
  56. task:
  57. # 再把目标速度往上推一点,进一步压缩“原地小幅挪动”的可行空间。
  58. target_forward_velocity: 0.18
  59. target_base_height: null
  60. # 允许稍大一点机体摆动,为迈步时的动态平衡留空间。
  61. max_roll_rad: 0.90
  62. max_pitch_rad: 0.90
  63. min_base_height: 0.21
  64. termination_grace_steps: 12
  65. rewards:
  66. # 这一版在前进奖励之外,额外加入轻量步态先验:
  67. # 1. hip_alternation 鼓励左右髋关节交替摆动
  68. # 2. knee_flexion 鼓励膝关节适度弯曲,帮助抬腿
  69. alive_bonus: 0.8
  70. velocity_tracking_scale: 3.8
  71. velocity_tracking_sigma: 0.10
  72. forward_progress_scale: 4.2
  73. hip_alternation_scale: 0.8
  74. hip_target_separation: 0.35
  75. hip_antiphase_sigma: 0.18
  76. knee_flexion_scale: 0.6
  77. knee_target: 0.22
  78. knee_flexion_sigma: 0.12
  79. upright_scale: 1.6
  80. height_scale: 0.9
  81. action_rate_penalty_scale: 0.008
  82. joint_limit_penalty_scale: 0.05
  83. lateral_velocity_penalty_scale: 0.10
  84. backward_velocity_penalty_scale: 2.0
  85. stall_penalty_scale: 3.0
  86. stall_velocity_threshold: 0.06
  87. fall_penalty: -15.0
  88. training:
  89. algorithm: ppo
  90. # 继续做 10000 步验证,看这版更激进的前进奖励能否带来明显位移。
  91. total_timesteps: 10000
  92. max_episode_steps: 500
  93. seed: 42
  94. device: auto
  95. # 直接从刚刚这轮修复过 reset 之后的 walking 模型继续训练。
  96. init_model_path: outputs/walk_ppo_20260411_180823/final_model.zip
  97. # 显著降低学习率,并增大 rollout 长度,避免刚才那种更新过猛导致的退化。
  98. learning_rate: 0.00008
  99. n_steps: 256
  100. batch_size: 128
  101. gamma: 0.99
  102. gae_lambda: 0.95
  103. clip_range: 0.15
  104. ent_coef: 0.0
  105. vf_coef: 0.5
  106. policy_net_arch: [256, 256]
  107. checkpoint_freq: 5000
  108. output_root: guguji_rl/outputs
  109. evaluation:
  110. episodes: 3
  111. deterministic: true