behaviors: MuscleTrack2: trainer_type: ppo hyperparameters: # Hyperparameters common to PPO and SAC batch_size: 50 buffer_size: 10240 learning_rate: 3.0e-4 learning_rate_schedule: linear # PPO-specific hyperparameters # Replaces the "PPO-specific hyperparameters" section above beta: 5.0e-2 epsilon: 0.2 lambd: 0.95 num_epoch: 3 # Configuration of the neural network (common to PPO/SAC) network_settings: vis_encoder_type: simple normalize: false hidden_units: 128 num_layers: 2 # Trainer configurations common to all trainers max_steps: 2.0e7 time_horizon: 64 summary_freq: 10000 keep_checkpoints: 5 checkpoint_interval : 500000 threaded: false init_path: null # # behavior cloning # behavioral_cloning: # demo_path: 'c:\Users\noahk\Documents\Unity projects\Racesm\Assets\Demonstrations\BensonV3M.demo' # strength: 0.5 # # steps: 150000 # # batch_size: 512 # # num_epoch: 3 # # samples_per_update: 0 reward_signals: # environment reward (default) extrinsic: strength: 1.0 gamma: 0.99 # self_play: # window: 3 # play_against_latest_model_ratio: 0.5 # save_steps: 100000 # swap_steps: 200000 # team_change: 1000000 # # curiosity module # curiosity: # strength: 0.02 # gamma: 0.99 # encoding_size: 256 # learning_rate: 3.0e-4 # # GAIL # gail: # strength: 0.5 # # gamma: 0.99 # # encoding_size: 128 # demo_path: 'c:\Users\noahk\Documents\Unity projects\Racesm\Assets\Demonstrations\BensonV3M.demo' # # learning_rate: 3.0e-4 # # use_actions: false # # use_vail: false