Benson V6.1 er trænet færdig

2024-04-20 18:48:43 +02:00
parent d5b24b17ea
commit ae447faf5e
24 changed files with 2294 additions and 5 deletions
--- a/MLAgents/config/BensonV6.1.yaml
+++ b/MLAgents/config/BensonV6.1.yaml
@@ -0,0 +1,65 @@
+behaviors:
+  BensonV6.1:
+    trainer_type: ppo
+
+    hyperparameters:
+      # Hyperparameters common to PPO and SAC
+      batch_size: 50
+      buffer_size: 10240
+      learning_rate: 3.0e-4
+      learning_rate_schedule: linear
+
+      # PPO-specific hyperparameters
+      # Replaces the "PPO-specific hyperparameters" section above
+      beta: 5.0e-2
+      epsilon: 0.2
+      lambd: 0.95
+      num_epoch: 3
+
+    # Configuration of the neural network (common to PPO/SAC)
+    network_settings:
+      vis_encoder_type: simple
+      normalize: false
+      hidden_units: 128
+      num_layers: 2
+
+    # Trainer configurations common to all trainers
+    max_steps: 3.0e6
+    time_horizon: 64
+    summary_freq: 10000
+    keep_checkpoints: 5
+    checkpoint_interval : 500000
+    threaded: false
+    init_path: null
+
+    # # behavior cloning
+    # behavioral_cloning:
+    #   demo_path: 'c:\Users\noahk\Documents\Unity projects\Racesm\Assets\Demonstrations\BensonV3M.demo'
+    #   strength: 0.5
+    #   # steps: 150000
+    #   # batch_size: 512
+    #   # num_epoch: 3
+    #   # samples_per_update: 0
+
+    reward_signals:
+      # environment reward (default)
+      extrinsic:
+        strength: 1.0
+        gamma: 0.99
+
+      # # curiosity module
+      # curiosity:
+      #   strength: 0.02
+      #   gamma: 0.99
+      #   encoding_size: 256
+      #   learning_rate: 3.0e-4
+
+      # # GAIL
+      # gail:
+      #   strength: 0.5
+      #   # gamma: 0.99
+      #   # encoding_size: 128
+      #   demo_path: 'c:\Users\noahk\Documents\Unity projects\Racesm\Assets\Demonstrations\BensonV3M.demo'
+      #   # learning_rate: 3.0e-4
+      #   # use_actions: false
+      #   # use_vail: false