benson V5 har cooket

This commit is contained in:
2024-04-20 14:45:47 +02:00
parent 2d017b7a59
commit 65607aebc4
322 changed files with 127875 additions and 143 deletions

View File

@@ -0,0 +1,27 @@
behaviors:
3DBall:
trainer_type: sac
hyperparameters:
learning_rate: 0.0003
learning_rate_schedule: constant
batch_size: 64
buffer_size: 200000
buffer_init_steps: 0
tau: 0.005
steps_per_update: 10.0
save_replay_buffer: false
init_entcoef: 0.5
reward_signal_steps_per_update: 10.0
network_settings:
normalize: true
hidden_units: 64
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 200000
time_horizon: 1000
summary_freq: 12000

View File

@@ -0,0 +1,27 @@
behaviors:
3DBallHard:
trainer_type: sac
hyperparameters:
learning_rate: 0.0003
learning_rate_schedule: constant
batch_size: 256
buffer_size: 500000
buffer_init_steps: 0
tau: 0.005
steps_per_update: 10.0
save_replay_buffer: false
init_entcoef: 1.0
reward_signal_steps_per_update: 10.0
network_settings:
normalize: true
hidden_units: 128
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 500000
time_horizon: 1000
summary_freq: 12000

View File

@@ -0,0 +1,27 @@
behaviors:
Basic:
trainer_type: sac
hyperparameters:
learning_rate: 0.0003
learning_rate_schedule: constant
batch_size: 64
buffer_size: 50000
buffer_init_steps: 0
tau: 0.005
steps_per_update: 10.0
save_replay_buffer: false
init_entcoef: 0.01
reward_signal_steps_per_update: 10.0
network_settings:
normalize: false
hidden_units: 20
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 500000
time_horizon: 10
summary_freq: 2000

View File

@@ -0,0 +1,27 @@
behaviors:
Crawler:
trainer_type: sac
hyperparameters:
learning_rate: 0.0003
learning_rate_schedule: constant
batch_size: 256
buffer_size: 500000
buffer_init_steps: 0
tau: 0.005
steps_per_update: 20.0
save_replay_buffer: false
init_entcoef: 1.0
reward_signal_steps_per_update: 20.0
network_settings:
normalize: true
hidden_units: 512
num_layers: 3
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.995
strength: 1.0
keep_checkpoints: 5
max_steps: 5000000
time_horizon: 1000
summary_freq: 30000

View File

@@ -0,0 +1,28 @@
behaviors:
FoodCollector:
trainer_type: sac
hyperparameters:
learning_rate: 0.0003
learning_rate_schedule: constant
batch_size: 256
buffer_size: 2048
buffer_init_steps: 0
tau: 0.005
steps_per_update: 10.0
save_replay_buffer: false
init_entcoef: 0.05
reward_signal_steps_per_update: 10.0
network_settings:
normalize: false
hidden_units: 256
num_layers: 1
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 2000000
time_horizon: 64
summary_freq: 60000
threaded: false

View File

@@ -0,0 +1,27 @@
behaviors:
GridWorld:
trainer_type: sac
hyperparameters:
learning_rate: 0.0003
learning_rate_schedule: constant
batch_size: 128
buffer_size: 50000
buffer_init_steps: 1000
tau: 0.005
steps_per_update: 10.0
save_replay_buffer: false
init_entcoef: 0.5
reward_signal_steps_per_update: 10.0
network_settings:
normalize: false
hidden_units: 128
num_layers: 1
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.9
strength: 1.0
keep_checkpoints: 5
max_steps: 500000
time_horizon: 5
summary_freq: 20000

View File

@@ -0,0 +1,30 @@
behaviors:
Hallway:
trainer_type: sac
hyperparameters:
learning_rate: 0.0003
learning_rate_schedule: constant
batch_size: 512
buffer_size: 200000
buffer_init_steps: 0
tau: 0.005
steps_per_update: 10.0
save_replay_buffer: false
init_entcoef: 0.1
reward_signal_steps_per_update: 10.0
network_settings:
normalize: false
hidden_units: 128
num_layers: 2
vis_encode_type: simple
memory:
sequence_length: 64
memory_size: 128
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 4000000
time_horizon: 64
summary_freq: 10000

View File

@@ -0,0 +1,27 @@
behaviors:
PushBlock:
trainer_type: sac
hyperparameters:
learning_rate: 0.0003
learning_rate_schedule: constant
batch_size: 128
buffer_size: 50000
buffer_init_steps: 0
tau: 0.005
steps_per_update: 10.0
save_replay_buffer: false
init_entcoef: 0.05
reward_signal_steps_per_update: 10.0
network_settings:
normalize: false
hidden_units: 256
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 2000000
time_horizon: 64
summary_freq: 100000

View File

@@ -0,0 +1,34 @@
behaviors:
Pyramids:
trainer_type: sac
hyperparameters:
learning_rate: 0.0003
learning_rate_schedule: constant
batch_size: 128
buffer_size: 2000000
buffer_init_steps: 1000
tau: 0.01
steps_per_update: 10.0
save_replay_buffer: false
init_entcoef: 0.01
reward_signal_steps_per_update: 10.0
network_settings:
normalize: false
hidden_units: 512
num_layers: 3
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.995
strength: 2.0
gail:
gamma: 0.99
strength: 0.01
learning_rate: 0.0003
use_actions: true
use_vail: false
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
keep_checkpoints: 5
max_steps: 3000000
time_horizon: 128
summary_freq: 30000

View File

@@ -0,0 +1,27 @@
behaviors:
Walker:
trainer_type: sac
hyperparameters:
learning_rate: 0.0003
learning_rate_schedule: constant
batch_size: 1024
buffer_size: 2000000
buffer_init_steps: 0
tau: 0.005
steps_per_update: 30.0
save_replay_buffer: false
init_entcoef: 1.0
reward_signal_steps_per_update: 30.0
network_settings:
normalize: true
hidden_units: 256
num_layers: 3
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.995
strength: 1.0
keep_checkpoints: 5
max_steps: 15000000
time_horizon: 1000
summary_freq: 30000

View File

@@ -0,0 +1,53 @@
behaviors:
BigWallJump:
trainer_type: sac
hyperparameters:
learning_rate: 0.0003
learning_rate_schedule: constant
batch_size: 128
buffer_size: 200000
buffer_init_steps: 0
tau: 0.005
steps_per_update: 20.0
save_replay_buffer: false
init_entcoef: 0.1
reward_signal_steps_per_update: 10.0
network_settings:
normalize: false
hidden_units: 256
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 15000000
time_horizon: 128
summary_freq: 20000
SmallWallJump:
trainer_type: sac
hyperparameters:
learning_rate: 0.0003
learning_rate_schedule: constant
batch_size: 128
buffer_size: 50000
buffer_init_steps: 0
tau: 0.005
steps_per_update: 20.0
save_replay_buffer: false
init_entcoef: 0.1
reward_signal_steps_per_update: 10.0
network_settings:
normalize: false
hidden_units: 256
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 5000000
time_horizon: 128
summary_freq: 20000

View File

@@ -0,0 +1,27 @@
behaviors:
Worm:
trainer_type: sac
hyperparameters:
learning_rate: 0.0003
learning_rate_schedule: constant
batch_size: 256
buffer_size: 500000
buffer_init_steps: 0
tau: 0.005
steps_per_update: 20.0
save_replay_buffer: false
init_entcoef: 1.0
reward_signal_steps_per_update: 20.0
network_settings:
normalize: true
hidden_units: 512
num_layers: 3
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.995
strength: 1.0
keep_checkpoints: 5
max_steps: 5000000
time_horizon: 1000
summary_freq: 30000