Benson V7 be training

This commit is contained in:
2024-04-23 15:47:36 +02:00
parent 114a65b46f
commit 85a909e4fd
34 changed files with 3423 additions and 1331 deletions

View File

@@ -49,10 +49,10 @@ behaviors:
self_play:
window: 10
play_against_latest_model_ratio: 0.5
save_steps: 20000
swap_steps: 10000
team_change: 100000
play_against_latest_model_ratio: 0.666
save_steps: 40000
swap_steps: 20000
team_change: 200000
# # curiosity module
# curiosity:

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -42,11 +42,11 @@ behaviors:
summary_freq: 10000
threaded: false
self_play:
save_steps: 20000
team_change: 100000
swap_steps: 10000
save_steps: 40000
team_change: 200000
swap_steps: 20000
window: 10
play_against_latest_model_ratio: 0.5
play_against_latest_model_ratio: 0.666
initial_elo: 1200.0
behavioral_cloning: null
env_settings:
@@ -72,8 +72,8 @@ checkpoint_settings:
run_id: BensonV7
initialize_from: null
load_model: false
resume: false
force: true
resume: true
force: false
train_model: false
inference: false
results_dir: results

View File

@@ -2,269 +2,269 @@
"name": "root",
"gauges": {
"BensonV7.Policy.Entropy.mean": {
"value": 1.3791288137435913,
"min": 1.3791288137435913,
"max": 2.187758684158325,
"count": 24
"value": 1.724164605140686,
"min": 1.6979351043701172,
"max": 1.9132299423217773,
"count": 114
},
"BensonV7.Policy.Entropy.sum": {
"value": 13763.705078125,
"min": 13763.705078125,
"max": 22402.6484375,
"count": 24
"value": 51466.3125,
"min": 40307.1640625,
"max": 59228.46484375,
"count": 114
},
"BensonV7.Step.mean": {
"value": 239991.0,
"min": 9984.0,
"max": 239991.0,
"count": 24
"value": 1979956.0,
"min": 849958.0,
"max": 1979956.0,
"count": 114
},
"BensonV7.Step.sum": {
"value": 239991.0,
"min": 9984.0,
"max": 239991.0,
"count": 24
"value": 1979956.0,
"min": 849958.0,
"max": 1979956.0,
"count": 114
},
"BensonV7.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.4123384356498718,
"min": -0.17652811110019684,
"max": 0.4123384356498718,
"count": 24
"value": 4.02828311920166,
"min": 2.0664122104644775,
"max": 4.056896209716797,
"count": 114
},
"BensonV7.Policy.ExtrinsicValueEstimate.sum": {
"value": 75.45793151855469,
"min": -43.4259147644043,
"max": 75.45793151855469,
"count": 24
"value": 660.638427734375,
"min": 230.64227294921875,
"max": 681.55859375,
"count": 114
},
"BensonV7.Environment.EpisodeLength.mean": {
"value": 208.6595744680851,
"min": 79.0,
"max": 208.6595744680851,
"count": 24
"value": 568.3518518518518,
"min": 388.85714285714283,
"max": 749.3571428571429,
"count": 114
},
"BensonV7.Environment.EpisodeLength.sum": {
"value": 9807.0,
"min": 8690.0,
"max": 10688.0,
"count": 24
"value": 30691.0,
"min": 2722.0,
"max": 34215.0,
"count": 114
},
"BensonV7.Self-play.ELO.mean": {
"value": 115.6026867741647,
"min": 90.04764891469252,
"max": 1141.217976657289,
"count": 24
"value": 1384.3959480170795,
"min": 1380.287578470765,
"max": 1480.8259227184733,
"count": 114
},
"BensonV7.Self-play.ELO.sum": {
"value": 5433.326278385741,
"min": 5433.326278385741,
"max": 136946.1571988747,
"count": 24
"value": 24919.12706430743,
"min": 4442.47776815542,
"max": 30394.634561574698,
"count": 114
},
"BensonV7.Environment.CumulativeReward.mean": {
"value": 2.9085576262137773,
"min": -0.11774942415853053,
"max": 2.9085576262137773,
"count": 24
"value": 23.51508159438769,
"min": 4.317544090928277,
"max": 26.173201080504803,
"count": 114
},
"BensonV7.Environment.CumulativeReward.sum": {
"value": 133.79365080583375,
"min": -15.189675716450438,
"max": 133.79365080583375,
"count": 24
"value": 423.2714686989784,
"min": 20.118093952536583,
"max": 474.76355612277985,
"count": 114
},
"BensonV7.Policy.ExtrinsicReward.mean": {
"value": 2.9085576262137773,
"min": -0.11774942415853053,
"max": 2.9085576262137773,
"count": 24
"value": 23.51508159438769,
"min": 4.317544090928277,
"max": 26.173201080504803,
"count": 114
},
"BensonV7.Policy.ExtrinsicReward.sum": {
"value": 133.79365080583375,
"min": -15.189675716450438,
"max": 133.79365080583375,
"count": 24
"value": 423.2714686989784,
"min": 20.118093952536583,
"max": 474.76355612277985,
"count": 114
},
"BensonV7.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 24
"count": 114
},
"BensonV7.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 24
"count": 114
},
"BensonV7.Losses.PolicyLoss.mean": {
"value": 0.11305173472296136,
"min": 0.0902305781915467,
"max": 0.1175793781897086,
"count": 23
"value": 0.11129256490654903,
"min": 0.10404405176266453,
"max": 0.12017463006231105,
"count": 110
},
"BensonV7.Losses.PolicyLoss.sum": {
"value": 0.11305173472296136,
"min": 0.0902305781915467,
"max": 0.1175793781897086,
"count": 23
"value": 0.11129256490654903,
"min": 0.10404405176266453,
"max": 0.12017463006231105,
"count": 110
},
"BensonV7.Losses.ValueLoss.mean": {
"value": 0.02714227997847023,
"min": 2.293270521482928e-05,
"max": 0.05294226645212173,
"count": 23
"value": 0.3347890382678044,
"min": 0.10592283378165912,
"max": 0.45261242995417217,
"count": 110
},
"BensonV7.Losses.ValueLoss.sum": {
"value": 0.02714227997847023,
"min": 2.293270521482928e-05,
"max": 0.05294226645212173,
"count": 23
"value": 0.3347890382678044,
"min": 0.10592283378165912,
"max": 0.45261242995417217,
"count": 110
},
"BensonV7.Policy.LearningRate.mean": {
"value": 0.00029641668119443997,
"min": 0.00029641668119443997,
"max": 0.00029984400005200003,
"count": 23
"value": 0.00027037953987349,
"min": 0.00027037953987349,
"max": 0.00028719582426806,
"count": 110
},
"BensonV7.Policy.LearningRate.sum": {
"value": 0.00029641668119443997,
"min": 0.00029641668119443997,
"max": 0.00029984400005200003,
"count": 23
"value": 0.00027037953987349,
"min": 0.00027037953987349,
"max": 0.00028719582426806,
"count": 110
},
"BensonV7.Policy.Epsilon.mean": {
"value": 0.19880556000000005,
"min": 0.19880556000000005,
"max": 0.19994800000000001,
"count": 23
"value": 0.19012651000000003,
"min": 0.19012651000000003,
"max": 0.19573194,
"count": 110
},
"BensonV7.Policy.Epsilon.sum": {
"value": 0.19880556000000005,
"min": 0.19880556000000005,
"max": 0.19994800000000001,
"count": 23
"value": 0.19012651000000003,
"min": 0.19012651000000003,
"max": 0.19573194,
"count": 110
},
"BensonV7.Policy.Beta.mean": {
"value": 0.049402899444000004,
"min": 0.049402899444000004,
"max": 0.049974005200000005,
"count": 23
"value": 0.04506424234900002,
"min": 0.04506424234900002,
"max": 0.047866396806000015,
"count": 110
},
"BensonV7.Policy.Beta.sum": {
"value": 0.049402899444000004,
"min": 0.049402899444000004,
"max": 0.049974005200000005,
"count": 23
"value": 0.04506424234900002,
"min": 0.04506424234900002,
"max": 0.047866396806000015,
"count": 110
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1713854668",
"start_time_seconds": "1713876203",
"python_version": "3.9.13 (tags/v3.9.13:6de2ca5, May 17 2022, 16:36:42) [MSC v.1929 64 bit (AMD64)]",
"command_line_arguments": "C:\\Users\\noahk\\Documents\\Unity projects\\Racesm\\.venv\\Scripts\\mlagents-learn config/BensonV7.yaml --run-id=BensonV7 --force",
"command_line_arguments": "C:\\Users\\noahk\\Documents\\Unity projects\\Racesm\\.venv\\Scripts\\mlagents-learn config/BensonV7.yaml --run-id=BensonV7 --resume",
"mlagents_version": "0.30.0",
"mlagents_envs_version": "0.30.0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.2.2+cu118",
"numpy_version": "1.21.2",
"end_time_seconds": "1713855170"
"end_time_seconds": "1713880030"
},
"total": 502.6965699,
"total": 3827.7623298,
"count": 1,
"self": 0.004939400000012029,
"self": 0.011808999999630032,
"children": {
"run_training.setup": {
"total": 0.07646299999999995,
"total": 0.09212290000000012,
"count": 1,
"self": 0.07646299999999995
"self": 0.09212290000000012
},
"TrainerController.start_learning": {
"total": 502.6151675,
"total": 3827.6583979,
"count": 1,
"self": 0.5009666999978322,
"self": 2.435547799978849,
"children": {
"TrainerController._reset_env": {
"total": 5.93716190000001,
"count": 3,
"self": 5.93716190000001
"total": 19.67768449999975,
"count": 7,
"self": 19.67768449999975
},
"TrainerController.advance": {
"total": 496.02484980000213,
"count": 25454,
"self": 0.4162786999982018,
"total": 3805.4429475000215,
"count": 119182,
"self": 2.0891288001512294,
"children": {
"env_step": {
"total": 306.42174130000774,
"count": 25454,
"self": 212.3873840000059,
"total": 2832.8957301999403,
"count": 119182,
"self": 1718.9134972999607,
"children": {
"SubprocessEnvManager._take_step": {
"total": 93.72146940000326,
"count": 25454,
"self": 1.258011400004591,
"total": 1112.513478499999,
"count": 119183,
"self": 16.29119710006944,
"children": {
"TorchPolicy.evaluate": {
"total": 92.46345799999867,
"count": 24958,
"self": 92.46345799999867
"total": 1096.2222813999297,
"count": 344490,
"self": 1096.2222813999297
}
}
},
"workers": {
"total": 0.3128878999985645,
"count": 25454,
"total": 1.4687543999806074,
"count": 119181,
"self": 0.0,
"children": {
"worker_root": {
"total": 493.2314424000014,
"count": 25454,
"total": 3753.157156000006,
"count": 119181,
"is_parallel": true,
"self": 306.6950511000011,
"self": 2309.568257799966,
"children": {
"steps_from_proto": {
"total": 0.0016319000000137862,
"count": 3,
"total": 0.010590099999753022,
"count": 24,
"is_parallel": true,
"self": 0.0005697000000228769,
"self": 0.0033023999993027076,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0010621999999909093,
"count": 18,
"total": 0.0072877000004503145,
"count": 240,
"is_parallel": true,
"self": 0.0010621999999909093
"self": 0.0072877000004503145
}
}
},
"UnityEnvironment.step": {
"total": 186.53475940000033,
"count": 25454,
"total": 1443.5783081000404,
"count": 119181,
"is_parallel": true,
"self": 3.401311300002959,
"self": 49.66146120013195,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 3.6382145000039445,
"count": 25454,
"total": 36.08897779992863,
"count": 119181,
"is_parallel": true,
"self": 3.6382145000039445
"self": 36.08897779992863
},
"communicator.exchange": {
"total": 169.24416609999997,
"count": 25454,
"total": 1207.7480174999691,
"count": 119181,
"is_parallel": true,
"self": 169.24416609999997
"self": 1207.7480174999691
},
"steps_from_proto": {
"total": 10.25106749999345,
"count": 25454,
"total": 150.0798516000106,
"count": 357543,
"is_parallel": true,
"self": 3.835975299991518,
"self": 46.51214239967524,
"children": {
"_process_rank_one_or_two_observation": {
"total": 6.415092200001932,
"count": 152724,
"total": 103.56770920033537,
"count": 3575430,
"is_parallel": true,
"self": 6.415092200001932
"self": 103.56770920033537
}
}
}
@@ -277,24 +277,31 @@
}
},
"trainer_advance": {
"total": 189.18682979999616,
"count": 25454,
"self": 1.4634836999935033,
"total": 970.45808849993,
"count": 119181,
"self": 17.939639899914255,
"children": {
"process_trajectory": {
"total": 26.51501530000261,
"count": 25454,
"self": 26.51501530000261
"total": 147.86677610001612,
"count": 119181,
"self": 147.7127672000159,
"children": {
"RLTrainer._checkpoint": {
"total": 0.15400890000023537,
"count": 2,
"self": 0.15400890000023537
}
}
},
"_update_policy": {
"total": 161.20833080000006,
"count": 24,
"self": 32.78496999999564,
"total": 804.6516724999997,
"count": 111,
"self": 185.2384082999847,
"children": {
"TorchPPOOptimizer.update": {
"total": 128.42336080000442,
"count": 14628,
"self": 128.42336080000442
"total": 619.413264200015,
"count": 68319,
"self": 619.413264200015
}
}
}
@@ -303,19 +310,19 @@
}
},
"trainer_threads": {
"total": 8.999999749903509e-07,
"total": 9.99999883788405e-07,
"count": 1,
"self": 8.999999749903509e-07
"self": 9.99999883788405e-07
},
"TrainerController._save_models": {
"total": 0.15218820000001188,
"total": 0.10221709999996165,
"count": 1,
"self": 0.007047300000010637,
"self": 0.01906329999974332,
"children": {
"RLTrainer._checkpoint": {
"total": 0.14514090000000124,
"total": 0.08315380000021833,
"count": 1,
"self": 0.14514090000000124
"self": 0.08315380000021833
}
}
}

View File

@@ -1,24 +1,60 @@
{
"BensonV7": {
"elo": 75.87741801617501,
"elo": 1373.1398271068667,
"checkpoints": [
{
"steps": 249156,
"file_path": "results\\BensonV7\\BensonV7\\BensonV7-249156.onnx",
"steps": 777333,
"file_path": "results\\BensonV7\\BensonV7\\BensonV7-777333.onnx",
"reward": null,
"creation_time": 1713855170.9470267,
"creation_time": 1713875715.718203,
"auxillary_file_paths": [
"results\\BensonV7\\BensonV7\\BensonV7-249156.pt"
"results\\BensonV7\\BensonV7\\BensonV7-777333.pt"
]
},
{
"steps": 843357,
"file_path": "results\\BensonV7\\BensonV7\\BensonV7-843357.onnx",
"reward": 19.23734075545023,
"creation_time": 1713876148.3531678,
"auxillary_file_paths": [
"results\\BensonV7\\BensonV7\\BensonV7-843357.pt"
]
},
{
"steps": 999954,
"file_path": "results\\BensonV7\\BensonV7\\BensonV7-999954.onnx",
"reward": 16.805242202430964,
"creation_time": 1713876779.4197166,
"auxillary_file_paths": [
"results\\BensonV7\\BensonV7\\BensonV7-999954.pt"
]
},
{
"steps": 1499941,
"file_path": "results\\BensonV7\\BensonV7\\BensonV7-1499941.onnx",
"reward": 15.373676013729225,
"creation_time": 1713878416.0976129,
"auxillary_file_paths": [
"results\\BensonV7\\BensonV7\\BensonV7-1499941.pt"
]
},
{
"steps": 1989716,
"file_path": "results\\BensonV7\\BensonV7\\BensonV7-1989716.onnx",
"reward": 24.683029329300755,
"creation_time": 1713880030.7978618,
"auxillary_file_paths": [
"results\\BensonV7\\BensonV7\\BensonV7-1989716.pt"
]
}
],
"final_checkpoint": {
"steps": 249156,
"steps": 1989716,
"file_path": "results\\BensonV7\\BensonV7.onnx",
"reward": null,
"creation_time": 1713855170.9470267,
"reward": 24.683029329300755,
"creation_time": 1713880030.7978618,
"auxillary_file_paths": [
"results\\BensonV7\\BensonV7\\BensonV7-249156.pt"
"results\\BensonV7\\BensonV7\\BensonV7-1989716.pt"
]
}
},

View File

@@ -1,2 +1,2 @@
mlagents-learn config/BensonV7.yaml --run-id=BensonV7 --force
mlagents-learn config/BensonV7.yaml --run-id=BensonV7 --resume
@REM mlagents-learn config/BensonV5.yaml --run-id=BensonV5 --resume --env="C:\Users\noahk\Desktop\BensonV5\Racesm_L.exe" --no-graphics --num-envs=1