self learning test

This commit is contained in:
2024-04-22 20:09:47 +02:00
parent f6993a8a92
commit af1cf75b8d
23 changed files with 162800 additions and 273 deletions

View File

@@ -0,0 +1,72 @@
behaviors:
BensonV7:
trainer_type: ppo
hyperparameters:
# Hyperparameters common to PPO and SAC
batch_size: 50
buffer_size: 10240
learning_rate: 3.0e-4
learning_rate_schedule: linear
# PPO-specific hyperparameters
# Replaces the "PPO-specific hyperparameters" section above
beta: 5.0e-2
epsilon: 0.2
lambd: 0.95
num_epoch: 3
# Configuration of the neural network (common to PPO/SAC)
network_settings:
vis_encoder_type: simple
normalize: false
hidden_units: 128
num_layers: 2
# Trainer configurations common to all trainers
max_steps: 2.0e7
time_horizon: 64
summary_freq: 10000
keep_checkpoints: 5
checkpoint_interval : 500000
threaded: false
init_path: null
# # behavior cloning
# behavioral_cloning:
# demo_path: 'c:\Users\noahk\Documents\Unity projects\Racesm\Assets\Demonstrations\BensonV3M.demo'
# strength: 0.5
# # steps: 150000
# # batch_size: 512
# # num_epoch: 3
# # samples_per_update: 0
reward_signals:
# environment reward (default)
extrinsic:
strength: 1.0
gamma: 0.99
self_play:
window: 10
play_against_latest_model_ratio: 0.5
save_steps: 20000
swap_steps: 10000
team_change: 100000
# # curiosity module
# curiosity:
# strength: 0.02
# gamma: 0.99
# encoding_size: 256
# learning_rate: 3.0e-4
# # GAIL
# gail:
# strength: 0.5
# # gamma: 0.99
# # encoding_size: 128
# demo_path: 'c:\Users\noahk\Documents\Unity projects\Racesm\Assets\Demonstrations\BensonV3M.demo'
# # learning_rate: 3.0e-4
# # use_actions: false
# # use_vail: false

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,82 @@
default_settings: null
behaviors:
BensonV7:
trainer_type: ppo
hyperparameters:
batch_size: 50
buffer_size: 10240
learning_rate: 0.0003
beta: 0.05
epsilon: 0.2
lambd: 0.95
num_epoch: 3
shared_critic: false
learning_rate_schedule: linear
beta_schedule: linear
epsilon_schedule: linear
network_settings:
normalize: false
hidden_units: 128
num_layers: 2
vis_encode_type: simple
memory: null
goal_conditioning_type: hyper
deterministic: false
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
network_settings:
normalize: false
hidden_units: 128
num_layers: 2
vis_encode_type: simple
memory: null
goal_conditioning_type: hyper
deterministic: false
init_path: null
keep_checkpoints: 5
checkpoint_interval: 500000
max_steps: 20000000
time_horizon: 64
summary_freq: 10000
threaded: false
self_play:
save_steps: 20000
team_change: 100000
swap_steps: 10000
window: 10
play_against_latest_model_ratio: 0.5
initial_elo: 1200.0
behavioral_cloning: null
env_settings:
env_path: null
env_args: null
base_port: 5005
num_envs: 1
num_areas: 1
seed: -1
max_lifetime_restarts: 10
restarts_rate_limit_n: 1
restarts_rate_limit_period_s: 60
engine_settings:
width: 84
height: 84
quality_level: 5
time_scale: 20
target_frame_rate: -1
capture_frame_rate: 60
no_graphics: false
environment_parameters: null
checkpoint_settings:
run_id: BensonV7
initialize_from: null
load_model: false
resume: false
force: true
train_model: false
inference: false
results_dir: results
torch_settings:
device: null
debug: false

View File

@@ -0,0 +1,325 @@
{
"name": "root",
"gauges": {
"BensonV7.Policy.Entropy.mean": {
"value": 2.0729053020477295,
"min": 2.0728888511657715,
"max": 2.1842892169952393,
"count": 7
},
"BensonV7.Policy.Entropy.sum": {
"value": 84906.203125,
"min": 80925.578125,
"max": 89468.484375,
"count": 7
},
"BensonV7.Step.mean": {
"value": 69984.0,
"min": 9984.0,
"max": 69984.0,
"count": 7
},
"BensonV7.Step.sum": {
"value": 69984.0,
"min": 9984.0,
"max": 69984.0,
"count": 7
},
"BensonV7.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.10756219178438187,
"min": -1.984985589981079,
"max": -0.10756219178438187,
"count": 7
},
"BensonV7.Policy.ExtrinsicValueEstimate.sum": {
"value": -26.567861557006836,
"min": -488.30645751953125,
"max": -26.567861557006836,
"count": 7
},
"BensonV7.Environment.EpisodeLength.mean": {
"value": 81.16216216216216,
"min": 79.0,
"max": 81.38866396761134,
"count": 7
},
"BensonV7.Environment.EpisodeLength.sum": {
"value": 39039.0,
"min": 37920.0,
"max": 41080.0,
"count": 7
},
"BensonV7.Self-play.ELO.mean": {
"value": 1432.9340303735714,
"min": 1196.0598258059322,
"max": 1432.9340303735714,
"count": 7
},
"BensonV7.Self-play.ELO.sum": {
"value": 171952.08364482858,
"min": 143527.17909671186,
"max": 180385.31347156668,
"count": 7
},
"BensonV7.Environment.CumulativeReward.mean": {
"value": -0.09534425969420324,
"min": -0.11856273706287665,
"max": -0.035531304608593925,
"count": 7
},
"BensonV7.Environment.CumulativeReward.sum": {
"value": -11.536655422998592,
"min": -15.294593081111088,
"max": -4.263756553031271,
"count": 7
},
"BensonV7.Policy.ExtrinsicReward.mean": {
"value": -0.09534425969420324,
"min": -0.11856273706287665,
"max": -0.035531304608593925,
"count": 7
},
"BensonV7.Policy.ExtrinsicReward.sum": {
"value": -11.536655422998592,
"min": -15.294593081111088,
"max": -4.263756553031271,
"count": 7
},
"BensonV7.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 7
},
"BensonV7.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 7
},
"BensonV7.Losses.PolicyLoss.mean": {
"value": 0.11715025729667197,
"min": 0.11001400724555237,
"max": 0.11715025729667197,
"count": 6
},
"BensonV7.Losses.PolicyLoss.sum": {
"value": 0.11715025729667197,
"min": 0.11001400724555237,
"max": 0.11715025729667197,
"count": 6
},
"BensonV7.Losses.ValueLoss.mean": {
"value": 0.0027312553831553743,
"min": 0.0027312553831553743,
"max": 0.34692010765847486,
"count": 6
},
"BensonV7.Losses.ValueLoss.sum": {
"value": 0.0027312553831553743,
"min": 0.0027312553831553743,
"max": 0.34692010765847486,
"count": 6
},
"BensonV7.Policy.LearningRate.mean": {
"value": 0.000299064000312,
"min": 0.000299064000312,
"max": 0.00029984400005200003,
"count": 6
},
"BensonV7.Policy.LearningRate.sum": {
"value": 0.000299064000312,
"min": 0.000299064000312,
"max": 0.00029984400005200003,
"count": 6
},
"BensonV7.Policy.Epsilon.mean": {
"value": 0.19968800000000003,
"min": 0.19968800000000003,
"max": 0.19994800000000001,
"count": 6
},
"BensonV7.Policy.Epsilon.sum": {
"value": 0.19968800000000003,
"min": 0.19968800000000003,
"max": 0.19994800000000001,
"count": 6
},
"BensonV7.Policy.Beta.mean": {
"value": 0.0498440312,
"min": 0.0498440312,
"max": 0.049974005200000005,
"count": 6
},
"BensonV7.Policy.Beta.sum": {
"value": 0.0498440312,
"min": 0.0498440312,
"max": 0.049974005200000005,
"count": 6
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1713809062",
"python_version": "3.9.13 (tags/v3.9.13:6de2ca5, May 17 2022, 16:36:42) [MSC v.1929 64 bit (AMD64)]",
"command_line_arguments": "C:\\Users\\noahk\\Documents\\Unity projects\\Racesm\\.venv\\Scripts\\mlagents-learn config/BensonV7.yaml --run-id=BensonV7 --force",
"mlagents_version": "0.30.0",
"mlagents_envs_version": "0.30.0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.2.2+cu118",
"numpy_version": "1.21.2",
"end_time_seconds": "1713809367"
},
"total": 304.6736262,
"count": 1,
"self": 0.004877599999929316,
"children": {
"run_training.setup": {
"total": 0.0771373999999998,
"count": 1,
"self": 0.0771373999999998
},
"TrainerController.start_learning": {
"total": 304.59161120000005,
"count": 1,
"self": 0.16134309999955576,
"children": {
"TrainerController._reset_env": {
"total": 19.5166619,
"count": 1,
"self": 19.5166619
},
"TrainerController.advance": {
"total": 284.80744110000046,
"count": 8441,
"self": 0.15070639999748892,
"children": {
"env_step": {
"total": 228.853432300001,
"count": 8441,
"self": 133.1350660999974,
"children": {
"SubprocessEnvManager._take_step": {
"total": 95.61978480000201,
"count": 8441,
"self": 1.4240661000022925,
"children": {
"TorchPolicy.evaluate": {
"total": 94.19571869999972,
"count": 31524,
"self": 94.19571869999972
}
}
},
"workers": {
"total": 0.09858140000157434,
"count": 8440,
"self": 0.0,
"children": {
"worker_root": {
"total": 272.33989530000036,
"count": 8440,
"is_parallel": true,
"self": 169.65825009999963,
"children": {
"steps_from_proto": {
"total": 0.0013582999999997014,
"count": 4,
"is_parallel": true,
"self": 0.0005351000000004547,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0008231999999992468,
"count": 24,
"is_parallel": true,
"self": 0.0008231999999992468
}
}
},
"UnityEnvironment.step": {
"total": 102.68028690000074,
"count": 8440,
"is_parallel": true,
"self": 2.971126999999001,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 3.075714900001067,
"count": 8440,
"is_parallel": true,
"self": 3.075714900001067
},
"communicator.exchange": {
"total": 86.89778070000133,
"count": 8440,
"is_parallel": true,
"self": 86.89778070000133
},
"steps_from_proto": {
"total": 9.735664299999346,
"count": 33760,
"is_parallel": true,
"self": 3.697254799999932,
"children": {
"_process_rank_one_or_two_observation": {
"total": 6.038409499999414,
"count": 202560,
"is_parallel": true,
"self": 6.038409499999414
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 55.803302400001996,
"count": 8440,
"self": 1.1739091000001807,
"children": {
"process_trajectory": {
"total": 8.147956200001808,
"count": 8440,
"self": 8.147956200001808
},
"_update_policy": {
"total": 46.48143710000001,
"count": 7,
"self": 9.447134400001843,
"children": {
"TorchPPOOptimizer.update": {
"total": 37.034302699998165,
"count": 4368,
"self": 37.034302699998165
}
}
}
}
}
}
},
"trainer_threads": {
"total": 8.000000093488779e-07,
"count": 1,
"self": 8.000000093488779e-07
},
"TrainerController._save_models": {
"total": 0.10616430000004584,
"count": 1,
"self": 0.007284000000026936,
"children": {
"RLTrainer._checkpoint": {
"total": 0.0988803000000189,
"count": 1,
"self": 0.0988803000000189
}
}
}
}
}
}
}

View File

@@ -0,0 +1,30 @@
{
"BensonV7": {
"elo": 1489.2012227625175,
"checkpoints": [
{
"steps": 78400,
"file_path": "results\\BensonV7\\BensonV7\\BensonV7-78400.onnx",
"reward": -0.11241376396501437,
"creation_time": 1713809367.0670853,
"auxillary_file_paths": [
"results\\BensonV7\\BensonV7\\BensonV7-78400.pt"
]
}
],
"final_checkpoint": {
"steps": 78400,
"file_path": "results\\BensonV7\\BensonV7.onnx",
"reward": -0.11241376396501437,
"creation_time": 1713809367.0670853,
"auxillary_file_paths": [
"results\\BensonV7\\BensonV7\\BensonV7-78400.pt"
]
}
},
"metadata": {
"stats_format_version": "0.3.0",
"mlagents_version": "0.30.0",
"torch_version": "2.2.2+cu118"
}
}

View File

@@ -1,2 +1,2 @@
mlagents-learn config/BensonV6.1.yaml --run-id=BensonV6.1 --force
mlagents-learn config/BensonV7.yaml --run-id=BensonV7 --force
@REM mlagents-learn config/BensonV5.yaml --run-id=BensonV5 --resume --env="C:\Users\noahk\Desktop\BensonV5\Racesm_L.exe" --no-graphics --num-envs=1