forest_navigating_uav/configs/training/sac_gazebo.yaml
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
# Demo-only Gazebo config (rollouts/visualization). Training is not supported in Gazebo.
experiment:
  name: sac_gazebo
  runs_dir: outputs/runs
  seed: 42
  total_timesteps: 100000

env:
  backend: gazebo
  check_env: false
  env_kwargs:
    params:
      dt: 0.1
      lidar_num_beams: 90
      lidar_range_max: 30.0
      lidar_min_valid_range: 0.03
      v_max: 6.0
      wz_max: 2.5
      vz_max: 2.0
      r_safe: 0.24
      episode_seconds: 600.0
      goal_tolerance: 2.0
      world_radius: 20.0
      boundary_margin: 1.5
      drone_radius: 0.18
      collision_threshold: 0.18
      default_z_target: 0.3
      z_error_scale: 5.0
      reward_progress_scale: 5.0
      reward_speed_scale: 0.015
      reward_step_penalty: 0.02
      reward_proximity_scale: 0.45
      reward_shield_penalty: 0.035
      reward_collision_penalty: 28.0
      reward_success_bonus: 22.0
      reward_truncation_penalty: 8.0
      reward_yaw_rate_scale: 0.04
      reward_stall_penalty: 0.05
      reward_accel_clip_penalty: 0.08
      progress_stall_threshold: 0.02
      yaw_penalty_speed_gate: 0.25
      shield_floor_z_min: 0.3
      shield_ceiling_z_max: 0.3
      shield_lookahead_margin: 3.0
      shield_yaw_damping: 0.4
      shield_front_arc_deg: 55.0
      shield_ttc_threshold_sec: 0.75
      tree_radius_mean: 0.14
      tree_radius_std: 0.03
      tree_radius_min: 0.08
      tree_radius_max: 0.30
      worldgen_config_relpath: configs/worldgen/worldgen_run.yaml
      worldgen_seed_offset: 0
      worldgen_resample_every_n_episodes: 1
      worldgen_verbose: false
      odom_topic: /model/uav1/odometry
      scan_topic: /scan
      cmd_vel_topic: /model/uav1/cmd_vel
      spin_timeout_sec: 2.0
      settle_time_sec: 0.05
      attitude_lock_enabled: true
      attitude_lock_kp: 8.0
      attitude_lock_max_rate: 20.0
      use_sim_reset_service: false
      reset_service_name: /reset_simulation
      fixed_goal: [8.0, 0.0, 0.3]
      randomize_goal_on_reset: true
      start_goal_clearance: 0.6
      min_start_goal_distance: 8.0
      spawn_max_attempts: 500

      # dynamics control mode: mirror the fastsim hybrid settings for parity.
      action_mode: hybrid
      accel_v_max: 4.5    # m/s**2
      accel_wz_max: 3.2   # rad/s**2
      accel_vz_max: 1.8   # m/s**2
      decel_v_max: 10.5   # m/s**2
      decel_wz_max: 5.0   # rad/s**2
      decel_vz_max: 3.5   # m/s**2

training:
  n_envs: 1
  checkpoint_freq_step: 10000
  norm:
    enabled: true
    norm_obs: true
    norm_reward: true
    clip_obs: 10.0
    clip_reward: 10.0

logging:
  eval_freq_step: 20000
  n_eval_episodes: 10
  metrics_log_freq_step: 1000

sac:
  policy: MlpPolicy
  learning_rate: 0.0003
  buffer_size: 1000000
  learning_starts: 10000
  batch_size: 256
  tau: 0.005
  gamma: 0.99
  train_freq: 1
  gradient_steps: -1
  ent_coef: auto
  target_update_interval: 1
  verbose: 1
  policy_kwargs:
    net_arch: [256, 256]

save_vecnorm: false