1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103 | experiment:
name: sac_gz_aware
runs_dir: outputs/runs
seed: 42
total_timesteps: 10_000_000
env:
env_kwargs:
params:
dt: 0.1
lidar_num_beams: 90
lidar_range_max: 30.0
v_max: 6.0
wz_max: 2.5
vz_max: 2.0
r_safe: 0.24
episode_seconds: 60.0
goal_tolerance: 0.5
world_radius: 20.0
boundary_margin: 1.5
drone_radius: 0.18
collision_threshold: 0.18
default_z_target: 2.0
z_error_scale: 5.0
# clearance semantics:
# - collision happens when min_range < drone_radius
# - soft proximity penalty starts inside r_safe (absolute obstacle distance)
# equivalent soft margin around the UAV is (r_safe - drone_radius)
reward_progress_scale: 5.0
reward_speed_scale: 0.015
reward_step_penalty: 0.02
reward_proximity_scale: 0.45
reward_shield_penalty: 0.035
reward_collision_penalty: 28.0
reward_success_bonus: 22.0
reward_truncation_penalty: 8.0
reward_yaw_rate_scale: 0.04
reward_stall_penalty: 0.05
reward_accel_clip_penalty: 0.08
progress_stall_threshold: 0.02
yaw_penalty_speed_gate: 0.25
shield_yaw_damping: 0.4
shield_lookahead_margin: 3.0
shield_floor_z_min: 0.05
worldgen_config_relpath: configs/worldgen/worldgen_run.yaml
worldgen_seed_offset: 0
worldgen_resample_every_n_episodes: 1 # increase to 2-4 for faster training resets
worldgen_verbose: false # keep false during training to reduce I/O
tree_radius_mean: 0.14
tree_radius_std: 0.03
tree_radius_min: 0.08
tree_radius_max: 0.30
start_goal_clearance: 0.6
min_start_goal_distance: 8.0
spawn_max_attempts: 500
action_mode: hybrid
accel_v_max: 4.5
accel_wz_max: 3.2
accel_vz_max: 1.8
decel_v_max: 10.5
decel_wz_max: 5.0
decel_vz_max: 3.5
training:
n_envs: 16
save_checkpoints: false
checkpoint_freq_step: 100_000
final_snapshot_freq_step: 100_000
save_replay_buffer: false
norm:
enabled: true
norm_obs: true
norm_reward: true
clip_obs: 10.0
clip_reward: 10.0
logging:
eval_freq_step: 50_000
n_eval_episodes: 10
metrics_log_freq_step: 1_000
command_center_episode_window: 200
sac:
policy: MlpPolicy
learning_rate: 0.0002
buffer_size: 1_000_000
learning_starts: 10_000
batch_size: 1024
tau: 0.005
gamma: 0.99
train_freq: 4
gradient_steps: 8
ent_coef: auto
target_update_interval: 1
verbose: 1
policy_kwargs:
net_arch: [512, 512]
save_vecnorm: true
|