experiment:
name: sac_fastsim_hybrid_stage1
runs_dir: outputs/runs
seed: 42
total_timesteps: 3_000_000
env:
env_kwargs:
params:
dt: 0.1
lidar_num_beams: 90
lidar_range_max: 30.0
v_max: 6.0
wz_max: 2.5
vz_max: 2.0
r_safe: 0.22
episode_seconds: 60.0
goal_tolerance: 0.5
world_radius: 20.0
boundary_margin: 1.5
collision_threshold: 0.18
default_z_target: 2.0
z_error_scale: 5.0
reward_progress_scale: 5.0
reward_speed_scale: 0.02
reward_step_penalty: 0.02
reward_proximity_scale: 0.25
reward_shield_penalty: 0.02
reward_collision_penalty: 15.0
reward_success_bonus: 20.0
reward_truncation_penalty: 8.0
reward_yaw_rate_scale: 0.03
reward_stall_penalty: 0.05
reward_accel_clip_penalty: 0.03
progress_stall_threshold: 0.02
yaw_penalty_speed_gate: 0.25
shield_yaw_damping: 0.35
shield_lookahead_margin: 2.0
shield_floor_z_min: 0.05
worldgen_config_relpath: configs/worldgen/worldgen_run.yaml
worldgen_seed_offset: 0
tree_radius_mean: 0.14
tree_radius_std: 0.03
tree_radius_min: 0.08
tree_radius_max: 0.30
start_goal_clearance: 0.6
min_start_goal_distance: 8.0
spawn_max_attempts: 500
action_mode: hybrid
accel_v_max: 5.0
accel_wz_max: 3.5
accel_vz_max: 2.5
decel_v_max: 10.0
decel_wz_max: 5.0
decel_vz_max: 4.0
training:
n_envs: 16
save_checkpoints: false
checkpoint_freq_step: 100_000
final_snapshot_freq_step: 200_000
save_replay_buffer: false
norm:
enabled: true
norm_obs: true
norm_reward: true
clip_obs: 10.0
clip_reward: 10.0
logging:
eval_freq_step: 50_000
n_eval_episodes: 5
sac:
policy: MlpPolicy
learning_rate: 0.0003
buffer_size: 1_000_000
learning_starts: 10_000
batch_size: 1024
tau: 0.005
gamma: 0.99
train_freq: 4
gradient_steps: 8
ent_coef: auto
target_update_interval: 1
verbose: 1
policy_kwargs:
net_arch: [512, 512]
save_vecnorm: true