| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149 |
- experiment:
- name: walk_ppo
- robot:
- model_name: guguji
- urdf_path: guguji_ros2_ws/src/guguji_ros2/urdf/guguji.urdf
- joint_names:
- - left_hip_pitch_joint
- - left_knee_pitch_joint
- - left_ankle_pitch_joint
- - left_ankle_joint
- - right_hip_pitch_joint
- - right_knee_pitch_joint
- - right_ankle_pitch_joint
- - right_ankle_joint
- command_topic_prefix: /guguji/command
- # 这一轮先把名义站姿改成“轻微屈膝 + 踝关节回正”的准备姿态,
- # 这样双腿更容易迈起来,而不是笔直站住后原地小幅摆动。
- nominal_joint_positions:
- left_hip_pitch_joint: 0.04
- left_knee_pitch_joint: 0.18
- left_ankle_pitch_joint: -0.10
- left_ankle_joint: 0.0
- right_hip_pitch_joint: 0.04
- right_knee_pitch_joint: 0.18
- right_ankle_pitch_joint: -0.10
- right_ankle_joint: 0.0
- # 这一轮继续保留“参考步态 + 残差动作”,
- # 但把残差动作再压小一点,让参考步态主导迈腿,PPO 主要负责微调。
- reference_gait:
- enabled: true
- period: 0.72
- # 支撑期稍长,保证落脚稳定;摆动期稍快,用于保持清晰的迈腿节奏。
- stance_ratio: 0.60
- hip_pitch_amplitude: 0.34
- hip_pitch_bias: 0.04
- knee_pitch_amplitude: 0.46
- knee_pitch_bias: 0.12
- swing_knee_scale: 1.10
- ankle_pitch_amplitude: 0.22
- ankle_pitch_bias: -0.05
- push_off_ankle_scale: 0.22
- # 把残差动作进一步收小,这样就算旧策略权重带来偏置,也不容易把机器人直接打翻。
- action_scale: 0.08
- action_smoothing: 0.82
- ros:
- joint_state_topic: /joint_states
- tf_topic: /tf
- clock_topic: /clock
- world_control_service: /world/default/control
- sim:
- world_name: default
- # walking 训练继续使用 service_step,便于一步动作对应一步奖励。
- step_mode: service_step
- control_dt: 0.05
- # 这一版把一步动作在 Gazebo 中的作用时间略微收回一点,优先保稳定。
- service_step_iterations: 22
- reset_settle_seconds: 1.0
- reset_hold_steps: 6
- # reset 时会直接删掉并重生模型,这里就是重生时使用的初始位姿。
- spawn_x: 0.0
- spawn_y: 0.0
- spawn_z: 0.35
- spawn_roll: 0.0
- spawn_pitch: 0.0
- spawn_yaw: 0.0
- action_publish_delay: 0.01
- post_step_wait_seconds: 0.01
- # 如果你想实时看画面,建议像现在这样开着 GUI 并使用 pause:=true。
- launch_hint: ros2 launch guguji_ros2 gazebo.launch.py pause:=true
- task:
- # 这里保留最终阶段的目标速度;真正训练时会由下面的 curriculum_stages
- # 先走 0.18 -> 0.22,再抬到 0.26,避免一开始就把 walking 强度顶太高。
- target_forward_velocity: 0.26
- target_base_height: null
- # 允许稍大一点机体摆动,为迈步时的动态平衡留空间。
- max_roll_rad: 0.90
- max_pitch_rad: 0.90
- min_base_height: 0.21
- termination_grace_steps: 18
- rewards:
- # 奖励函数这轮更偏向“真正前进”,同时保留基础稳定性约束。
- alive_bonus: 0.6
- velocity_tracking_scale: 4.8
- velocity_tracking_sigma: 0.10
- forward_progress_scale: 6.0
- hip_alternation_scale: 0.5
- hip_target_separation: 0.36
- hip_antiphase_sigma: 0.18
- knee_flexion_scale: 0.35
- knee_target: 0.28
- knee_flexion_sigma: 0.15
- upright_scale: 1.6
- height_scale: 0.9
- action_rate_penalty_scale: 0.004
- joint_limit_penalty_scale: 0.05
- lateral_velocity_penalty_scale: 0.08
- backward_velocity_penalty_scale: 2.8
- stall_penalty_scale: 4.6
- stall_velocity_threshold: 0.10
- fall_penalty: -15.0
- training:
- algorithm: ppo
- # 这里的 total_timesteps 是“无课程模式”的兜底值;
- # 只要 curriculum_stages 非空,train.py 就会按阶段自己的 timesteps 顺序训练。
- total_timesteps: 10000
- max_episode_steps: 500
- seed: 42
- device: cuda
- # 从当前最新的 walking 模型继续训,避免把已经学到的基础摆腿能力丢掉。
- init_model_path: outputs/walk_ppo_20260412_183147/final_model.zip
- # 课程学习的这一步不希望探索噪声太大,否则旧策略刚学到的迈腿节奏会被打散。
- initial_log_std: -2.2
- # walking 改成三段式课程:
- # 先让机器人适应 0.18 的低速稳定前进,再逐步爬到 0.22 和 0.26。
- curriculum_stages:
- - name: walk_v018
- target_forward_velocity: 0.18
- total_timesteps: 3000
- initial_log_std: -2.3
- - name: walk_v022
- target_forward_velocity: 0.22
- total_timesteps: 3000
- initial_log_std: -2.25
- - name: walk_v026
- target_forward_velocity: 0.26
- total_timesteps: 4000
- initial_log_std: -2.2
- # 学习率再收一点,避免在新步态逻辑下前几轮就把稳定性学坏。
- learning_rate: 0.00006
- n_steps: 256
- batch_size: 128
- gamma: 0.99
- gae_lambda: 0.95
- clip_range: 0.15
- ent_coef: 0.0
- vf_coef: 0.5
- policy_net_arch: [256, 256]
- checkpoint_freq: 5000
- output_root: guguji_rl/outputs
- evaluation:
- episodes: 3
- deterministic: true
|