walk_ppo.yaml 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. experiment:
  2. name: walk_ppo
  3. robot:
  4. model_name: guguji
  5. urdf_path: guguji_ros2_ws/src/guguji_ros2/urdf/guguji.urdf
  6. joint_names:
  7. - left_hip_pitch_joint
  8. - left_knee_pitch_joint
  9. - left_ankle_pitch_joint
  10. - left_ankle_joint
  11. - right_hip_pitch_joint
  12. - right_knee_pitch_joint
  13. - right_ankle_pitch_joint
  14. - right_ankle_joint
  15. command_topic_prefix: /guguji/command
  16. # walking 首版仍然采用“名义站姿 + 残差动作”的方式,
  17. # 这样可以直接沿用 transition 阶段已经学到的稳定姿态能力。
  18. nominal_joint_positions:
  19. left_hip_pitch_joint: 0.0
  20. left_knee_pitch_joint: 0.0
  21. left_ankle_pitch_joint: 0.0
  22. left_ankle_joint: 0.0
  23. right_hip_pitch_joint: 0.0
  24. right_knee_pitch_joint: 0.0
  25. right_ankle_pitch_joint: 0.0
  26. right_ankle_joint: 0.0
  27. # 这一轮改成“参考步态 + 残差动作”:
  28. # 先让双腿有明确的交替摆动节奏,再由策略学习如何稳住并把步态转成前进。
  29. reference_gait:
  30. enabled: true
  31. period: 0.80
  32. # 让摆动期更短、支撑期更长,更像真正向前走时的步态节奏。
  33. stance_ratio: 0.62
  34. hip_pitch_amplitude: 0.28
  35. knee_pitch_amplitude: 0.36
  36. knee_pitch_bias: 0.08
  37. ankle_pitch_amplitude: 0.18
  38. # 参考步态已经会主动摆腿,所以残差动作幅度可以适当收小一点。
  39. action_scale: 0.18
  40. action_smoothing: 0.72
  41. ros:
  42. joint_state_topic: /joint_states
  43. tf_topic: /tf
  44. clock_topic: /clock
  45. world_control_service: /world/default/control
  46. sim:
  47. world_name: default
  48. # walking 训练继续使用 service_step,便于一步动作对应一步奖励。
  49. step_mode: service_step
  50. control_dt: 0.05
  51. # 仍然略高于上一版 walking,用于放大迈步动作的实际位移。
  52. service_step_iterations: 22
  53. reset_settle_seconds: 1.0
  54. reset_hold_steps: 6
  55. # reset 时会直接删掉并重生模型,这里就是重生时使用的初始位姿。
  56. spawn_x: 0.0
  57. spawn_y: 0.0
  58. spawn_z: 0.35
  59. spawn_roll: 0.0
  60. spawn_pitch: 0.0
  61. spawn_yaw: 0.0
  62. action_publish_delay: 0.01
  63. post_step_wait_seconds: 0.01
  64. # 如果你想实时看画面,建议像现在这样开着 GUI 并使用 pause:=true。
  65. launch_hint: ros2 launch guguji_ros2 gazebo.launch.py pause:=true
  66. task:
  67. # 参考步态接入后,把目标速度继续往上推,逼着策略把摆腿转成前进位移。
  68. target_forward_velocity: 0.22
  69. target_base_height: null
  70. # 允许稍大一点机体摆动,为迈步时的动态平衡留空间。
  71. max_roll_rad: 0.90
  72. max_pitch_rad: 0.90
  73. min_base_height: 0.21
  74. termination_grace_steps: 12
  75. rewards:
  76. # 参考步态已经负责“先迈起来”,这里的奖励更聚焦于“迈起来以后要往前走”。
  77. alive_bonus: 0.6
  78. velocity_tracking_scale: 4.2
  79. velocity_tracking_sigma: 0.09
  80. forward_progress_scale: 5.0
  81. hip_alternation_scale: 0.4
  82. hip_target_separation: 0.35
  83. hip_antiphase_sigma: 0.18
  84. knee_flexion_scale: 0.3
  85. knee_target: 0.22
  86. knee_flexion_sigma: 0.12
  87. upright_scale: 1.6
  88. height_scale: 0.9
  89. action_rate_penalty_scale: 0.006
  90. joint_limit_penalty_scale: 0.05
  91. lateral_velocity_penalty_scale: 0.10
  92. backward_velocity_penalty_scale: 2.5
  93. stall_penalty_scale: 4.0
  94. stall_velocity_threshold: 0.08
  95. fall_penalty: -15.0
  96. training:
  97. algorithm: ppo
  98. # 继续做 10000 步验证,看这版更激进的前进奖励能否带来明显位移。
  99. total_timesteps: 10000
  100. max_episode_steps: 500
  101. seed: 42
  102. device: auto
  103. # 由于参考步态接入后控制结构变化较大,这里仍然沿用最新 walking 模型作为初始化,
  104. # 但把学习率压得更低,让策略以“细修”的方式适应新节奏。
  105. init_model_path: outputs/walk_ppo_20260412_181640/final_model.zip
  106. # 显著降低学习率,并增大 rollout 长度,避免刚才那种更新过猛导致的退化。
  107. learning_rate: 0.00008
  108. n_steps: 256
  109. batch_size: 128
  110. gamma: 0.99
  111. gae_lambda: 0.95
  112. clip_range: 0.15
  113. ent_coef: 0.0
  114. vf_coef: 0.5
  115. policy_net_arch: [256, 256]
  116. checkpoint_freq: 5000
  117. output_root: guguji_rl/outputs
  118. evaluation:
  119. episodes: 3
  120. deterministic: true