walk_ppo.yaml 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. experiment:
  2. name: walk_ppo
  3. robot:
  4. model_name: guguji
  5. urdf_path: guguji_ros2_ws/src/guguji_ros2/urdf/guguji.urdf
  6. joint_names:
  7. - left_hip_pitch_joint
  8. - left_knee_pitch_joint
  9. - left_ankle_pitch_joint
  10. - left_ankle_joint
  11. - right_hip_pitch_joint
  12. - right_knee_pitch_joint
  13. - right_ankle_pitch_joint
  14. - right_ankle_joint
  15. command_topic_prefix: /guguji/command
  16. ros:
  17. joint_state_topic: /joint_states
  18. tf_topic: /tf
  19. clock_topic: /clock
  20. world_control_service: /world/default/control
  21. sim:
  22. world_name: default
  23. # 走路训练也建议采用 service_step,便于稳定做 step / reward 对齐。
  24. step_mode: service_step
  25. control_dt: 0.05
  26. service_step_iterations: 50
  27. reset_settle_seconds: 1.2
  28. action_publish_delay: 0.01
  29. post_step_wait_seconds: 0.01
  30. # 训练时建议 Gazebo 以 pause 模式启动,再由训练程序按步推进仿真。
  31. launch_hint: ros2 launch guguji_ros2 gazebo.launch.py gui:=false pause:=true
  32. task:
  33. target_forward_velocity: 0.25
  34. target_base_height: null
  35. max_roll_rad: 0.65
  36. max_pitch_rad: 0.65
  37. min_base_height: 0.12
  38. rewards:
  39. alive_bonus: 1.0
  40. velocity_tracking_scale: 3.0
  41. velocity_tracking_sigma: 0.25
  42. upright_scale: 1.5
  43. height_scale: 0.8
  44. action_rate_penalty_scale: 0.04
  45. joint_limit_penalty_scale: 0.05
  46. lateral_velocity_penalty_scale: 0.15
  47. fall_penalty: -20.0
  48. training:
  49. algorithm: ppo
  50. total_timesteps: 500000
  51. max_episode_steps: 500
  52. seed: 42
  53. device: auto
  54. learning_rate: 0.0003
  55. n_steps: 1024
  56. batch_size: 256
  57. gamma: 0.99
  58. gae_lambda: 0.95
  59. clip_range: 0.2
  60. ent_coef: 0.0
  61. vf_coef: 0.5
  62. policy_net_arch: [256, 256]
  63. checkpoint_freq: 50000
  64. output_root: guguji_rl/outputs
  65. evaluation:
  66. episodes: 3
  67. deterministic: true