1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112 | # Demo-only Gazebo config (rollouts/visualization). Training is not supported in Gazebo.
experiment:
name: sac_gazebo
runs_dir: outputs/runs
seed: 42
total_timesteps: 100000
env:
backend: gazebo
check_env: false
env_kwargs:
params:
dt: 0.1
lidar_num_beams: 90
lidar_range_max: 30.0
lidar_min_valid_range: 0.03
v_max: 6.0
wz_max: 2.5
vz_max: 2.0
r_safe: 0.24
episode_seconds: 600.0
goal_tolerance: 2.0
world_radius: 20.0
boundary_margin: 1.5
drone_radius: 0.18
collision_threshold: 0.18
default_z_target: 0.3
z_error_scale: 5.0
reward_progress_scale: 5.0
reward_speed_scale: 0.015
reward_step_penalty: 0.02
reward_proximity_scale: 0.45
reward_shield_penalty: 0.035
reward_collision_penalty: 28.0
reward_success_bonus: 22.0
reward_truncation_penalty: 8.0
reward_yaw_rate_scale: 0.04
reward_stall_penalty: 0.05
reward_accel_clip_penalty: 0.08
progress_stall_threshold: 0.02
yaw_penalty_speed_gate: 0.25
shield_floor_z_min: 0.3
shield_ceiling_z_max: 0.3
shield_lookahead_margin: 3.0
shield_yaw_damping: 0.4
shield_front_arc_deg: 55.0
shield_ttc_threshold_sec: 0.75
tree_radius_mean: 0.14
tree_radius_std: 0.03
tree_radius_min: 0.08
tree_radius_max: 0.30
worldgen_config_relpath: configs/worldgen/worldgen_run.yaml
worldgen_seed_offset: 0
worldgen_resample_every_n_episodes: 1
worldgen_verbose: false
odom_topic: /model/uav1/odometry
scan_topic: /scan
cmd_vel_topic: /model/uav1/cmd_vel
spin_timeout_sec: 2.0
settle_time_sec: 0.05
attitude_lock_enabled: true
attitude_lock_kp: 8.0
attitude_lock_max_rate: 20.0
use_sim_reset_service: false
reset_service_name: /reset_simulation
fixed_goal: [8.0, 0.0, 0.3]
randomize_goal_on_reset: true
start_goal_clearance: 0.6
min_start_goal_distance: 8.0
spawn_max_attempts: 500
# dynamics control mode: mirror the fastsim hybrid settings for parity.
action_mode: hybrid
accel_v_max: 4.5 # m/s**2
accel_wz_max: 3.2 # rad/s**2
accel_vz_max: 1.8 # m/s**2
decel_v_max: 10.5 # m/s**2
decel_wz_max: 5.0 # rad/s**2
decel_vz_max: 3.5 # m/s**2
training:
n_envs: 1
checkpoint_freq_step: 10000
norm:
enabled: true
norm_obs: true
norm_reward: true
clip_obs: 10.0
clip_reward: 10.0
logging:
eval_freq_step: 20000
n_eval_episodes: 10
metrics_log_freq_step: 1000
sac:
policy: MlpPolicy
learning_rate: 0.0003
buffer_size: 1000000
learning_starts: 10000
batch_size: 256
tau: 0.005
gamma: 0.99
train_freq: 1
gradient_steps: -1
ent_coef: auto
target_update_interval: 1
verbose: 1
policy_kwargs:
net_arch: [256, 256]
save_vecnorm: false
|