| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127 |
- experiment:
- name: walk_ppo
- robot:
- model_name: guguji
- urdf_path: guguji_ros2_ws/src/guguji_ros2/urdf/guguji.urdf
- joint_names:
- - left_hip_pitch_joint
- - left_knee_pitch_joint
- - left_ankle_pitch_joint
- - left_ankle_joint
- - right_hip_pitch_joint
- - right_knee_pitch_joint
- - right_ankle_pitch_joint
- - right_ankle_joint
- command_topic_prefix: /guguji/command
- # walking 首版仍然采用“名义站姿 + 残差动作”的方式,
- # 这样可以直接沿用 transition 阶段已经学到的稳定姿态能力。
- nominal_joint_positions:
- left_hip_pitch_joint: 0.0
- left_knee_pitch_joint: 0.0
- left_ankle_pitch_joint: 0.0
- left_ankle_joint: 0.0
- right_hip_pitch_joint: 0.0
- right_knee_pitch_joint: 0.0
- right_ankle_pitch_joint: 0.0
- right_ankle_joint: 0.0
- # 这一轮改成“参考步态 + 残差动作”:
- # 先让双腿有明确的交替摆动节奏,再由策略学习如何稳住并把步态转成前进。
- reference_gait:
- enabled: true
- period: 0.80
- # 让摆动期更短、支撑期更长,更像真正向前走时的步态节奏。
- stance_ratio: 0.62
- hip_pitch_amplitude: 0.28
- knee_pitch_amplitude: 0.36
- knee_pitch_bias: 0.08
- ankle_pitch_amplitude: 0.18
- # 参考步态已经会主动摆腿,所以残差动作幅度可以适当收小一点。
- action_scale: 0.18
- action_smoothing: 0.72
- ros:
- joint_state_topic: /joint_states
- tf_topic: /tf
- clock_topic: /clock
- world_control_service: /world/default/control
- sim:
- world_name: default
- # walking 训练继续使用 service_step,便于一步动作对应一步奖励。
- step_mode: service_step
- control_dt: 0.05
- # 仍然略高于上一版 walking,用于放大迈步动作的实际位移。
- service_step_iterations: 22
- reset_settle_seconds: 1.0
- reset_hold_steps: 6
- # reset 时会直接删掉并重生模型,这里就是重生时使用的初始位姿。
- spawn_x: 0.0
- spawn_y: 0.0
- spawn_z: 0.35
- spawn_roll: 0.0
- spawn_pitch: 0.0
- spawn_yaw: 0.0
- action_publish_delay: 0.01
- post_step_wait_seconds: 0.01
- # 如果你想实时看画面,建议像现在这样开着 GUI 并使用 pause:=true。
- launch_hint: ros2 launch guguji_ros2 gazebo.launch.py pause:=true
- task:
- # 参考步态接入后,把目标速度继续往上推,逼着策略把摆腿转成前进位移。
- target_forward_velocity: 0.22
- target_base_height: null
- # 允许稍大一点机体摆动,为迈步时的动态平衡留空间。
- max_roll_rad: 0.90
- max_pitch_rad: 0.90
- min_base_height: 0.21
- termination_grace_steps: 12
- rewards:
- # 参考步态已经负责“先迈起来”,这里的奖励更聚焦于“迈起来以后要往前走”。
- alive_bonus: 0.6
- velocity_tracking_scale: 4.2
- velocity_tracking_sigma: 0.09
- forward_progress_scale: 5.0
- hip_alternation_scale: 0.4
- hip_target_separation: 0.35
- hip_antiphase_sigma: 0.18
- knee_flexion_scale: 0.3
- knee_target: 0.22
- knee_flexion_sigma: 0.12
- upright_scale: 1.6
- height_scale: 0.9
- action_rate_penalty_scale: 0.006
- joint_limit_penalty_scale: 0.05
- lateral_velocity_penalty_scale: 0.10
- backward_velocity_penalty_scale: 2.5
- stall_penalty_scale: 4.0
- stall_velocity_threshold: 0.08
- fall_penalty: -15.0
- training:
- algorithm: ppo
- # 继续做 10000 步验证,看这版更激进的前进奖励能否带来明显位移。
- total_timesteps: 10000
- max_episode_steps: 500
- seed: 42
- device: auto
- # 由于参考步态接入后控制结构变化较大,这里仍然沿用最新 walking 模型作为初始化,
- # 但把学习率压得更低,让策略以“细修”的方式适应新节奏。
- init_model_path: outputs/walk_ppo_20260412_181640/final_model.zip
- # 显著降低学习率,并增大 rollout 长度,避免刚才那种更新过猛导致的退化。
- learning_rate: 0.00008
- n_steps: 256
- batch_size: 128
- gamma: 0.99
- gae_lambda: 0.95
- clip_range: 0.15
- ent_coef: 0.0
- vf_coef: 0.5
- policy_net_arch: [256, 256]
- checkpoint_freq: 5000
- output_root: guguji_rl/outputs
- evaluation:
- episodes: 3
- deterministic: true
|