benson V5 har cooket

2024-04-20 14:45:47 +02:00
parent 2d017b7a59
commit 65607aebc4
322 changed files with 127875 additions and 143 deletions
--- a/MLAgents/config/BensonImitationV3.yaml
+++ b/MLAgents/config/BensonImitationV3.yaml
@@ -0,0 +1,65 @@
+behaviors:
+  BensonImitationV3:
+    trainer_type: ppo
+
+    hyperparameters:
+      # Hyperparameters common to PPO and SAC
+      batch_size: 50
+      buffer_size: 3000
+      learning_rate: 3.0e-4
+      learning_rate_schedule: linear
+
+      # PPO-specific hyperparameters
+      # Replaces the "PPO-specific hyperparameters" section above
+      beta: 5.0e-2
+      epsilon: 0.1
+      lambd: 0.95
+      num_epoch: 3
+
+    # Configuration of the neural network (common to PPO/SAC)
+    network_settings:
+      vis_encoder_type: simple
+      normalize: false
+      hidden_units: 128
+      num_layers: 2
+
+    # Trainer configurations common to all trainers
+    max_steps: 1.0e6
+    time_horizon: 64
+    summary_freq: 10000
+    keep_checkpoints: 5
+    checkpoint_interval: 100000
+    threaded: true
+    init_path: null
+
+    # behavior cloning
+    behavioral_cloning:
+      demo_path: 'c:\Users\noahk\Documents\Unity projects\Racesm\Assets\Demonstrations\BensonV3M.demo'
+      strength: 0.5
+      # steps: 150000
+      # batch_size: 512
+      # num_epoch: 3
+      # samples_per_update: 0
+
+    reward_signals:
+      # environment reward (default)
+      extrinsic:
+        strength: 1.0
+        gamma: 0.99
+
+      # curiosity module
+      curiosity:
+        strength: 0.02
+        gamma: 0.99
+        encoding_size: 256
+        learning_rate: 3.0e-4
+
+      # GAIL
+      gail:
+        strength: 0.5
+        # gamma: 0.99
+        # encoding_size: 128
+        demo_path: 'c:\Users\noahk\Documents\Unity projects\Racesm\Assets\Demonstrations\BensonV3M.demo'
+        # learning_rate: 3.0e-4
+        # use_actions: false
+        # use_vail: false