self learning test

This commit is contained in:
2024-04-22 20:09:47 +02:00
parent f6993a8a92
commit af1cf75b8d
23 changed files with 162800 additions and 273 deletions

View File

@@ -1 +1 @@
{"count":1,"self":809.2977152,"total":4632.3942763,"children":{"InitializeActuators":{"count":10,"self":0.0025077999999999997,"total":0.0025077999999999997,"children":null},"InitializeSensors":{"count":10,"self":0.0025223,"total":0.0025223,"children":null},"AgentSendState":{"count":1255617,"self":31.879206399999998,"total":70.695593899999992,"children":{"CollectObservations":{"count":2511240,"self":11.534297599999999,"total":11.5342975,"children":null},"WriteActionMask":{"count":2511240,"self":1.0299128,"total":1.0299128,"children":null},"RequestDecision":{"count":2511240,"self":4.8350608,"total":26.2521764,"children":{"AgentInfo.ToProto":{"count":2511240,"self":3.9296072,"total":21.417115499999998,"children":{"GenerateSensorData":{"count":2511240,"self":14.390806399999999,"total":17.4875084,"children":{"RayPerceptionSensor.Perceive":{"count":5022480,"self":3.0967018,"total":3.0967018,"children":null}}}}}}}}},"DecideAction":{"count":1255617,"self":3650.8291071999997,"total":3650.8292428,"children":null},"AgentAct":{"count":1255617,"self":101.3596032,"total":101.56567179999999,"children":{"AgentInfo.ToProto":{"count":13246,"self":0.042745899999999996,"total":0.20606549999999998,"children":{"GenerateSensorData":{"count":13246,"self":0.1278361,"total":0.16331959999999998,"children":{"RayPerceptionSensor.Perceive":{"count":26492,"self":0.0354835,"total":0.0354835,"children":null}}}}}}}},"gauges":{"BensonV6.1.CumulativeReward":{"count":13246,"max":26.7048168,"min":-0.06671302,"runningAverage":17.17633,"value":18.1915569,"weightedAverage":18.4008961}},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1713626948","unity_version":"2022.3.11f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2022.3.11f1\\Editor\\Unity.exe -projectpath C:\\Users\\noahk\\Documents\\Unity projects\\Racesm -useHub -hubIPC -cloudEnvironment production -licensingIpc LicenseClient-noahk -hubSessionId e08cc479-296a-466a-a392-68af5330fc09 -accessToken S1StEIprioU2rzDDo5vTCfsRe2zgz-zUchvR2e0iCUQ005f","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.0.1","scene_name":"AI training","end_time_seconds":"1713631580"}}
{"count":1,"self":57.8489728,"total":273.2728647,"children":{"InitializeActuators":{"count":40,"self":0.0015049,"total":0.0015049,"children":null},"InitializeSensors":{"count":40,"self":0.0025047999999999997,"total":0.0025047999999999997,"children":null},"AgentSendState":{"count":39417,"self":3.278764,"total":7.4229638,"children":{"CollectObservations":{"count":315360,"self":1.0711507,"total":1.0711507,"children":null},"WriteActionMask":{"count":315360,"self":0.11854429999999999,"total":0.11854429999999999,"children":null},"RequestDecision":{"count":315360,"self":0.5394548,"total":2.9545049,"children":{"AgentInfo.ToProto":{"count":315360,"self":0.3807658,"total":2.4150500999999998,"children":{"GenerateSensorData":{"count":315360,"self":1.6535278,"total":2.0342843,"children":{"RayPerceptionSensor.Perceive":{"count":630720,"self":0.3807565,"total":0.3807565,"children":null}}}}}}}}},"DecideAction":{"count":39417,"self":201.36573439999998,"total":201.3657307,"children":null},"AgentAct":{"count":39417,"self":6.5899427999999993,"total":6.6301796999999993,"children":{"AgentInfo.ToProto":{"count":3862,"self":0.0095677,"total":0.040236999999999995,"children":{"GenerateSensorData":{"count":3862,"self":0.0241485,"total":0.0306693,"children":{"RayPerceptionSensor.Perceive":{"count":7724,"self":0.0065207999999999993,"total":0.0065207999999999993,"children":null}}}}}}}},"gauges":{"BensonV7.CumulativeReward":{"count":3862,"max":10.1914558,"min":-0.5617464,"runningAverage":-0.0396344438,"value":0.0600896031,"weightedAverage":0.125912115}},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1713809080","unity_version":"2022.3.11f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2022.3.11f1\\Editor\\Unity.exe -projectpath C:\\Users\\noahk\\Documents\\Unity projects\\Racesm -useHub -hubIPC -cloudEnvironment production -licensingIpc LicenseClient-noahk -hubSessionId 837b02b4-3460-4d4b-98bd-f0a6ffa15664 -accessToken DWB9fkNvzhznkjWNmlbZaGbkZ-bcd_svxKhJE2hkqZI005f","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.0.1","scene_name":"AI training","end_time_seconds":"1713809353"}}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,7 @@
fileFormatVersion: 2
guid: 742a3192b43bde644ad6d9a3cc58e51d
PrefabImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,326 @@
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Actuators;
using UnityEngine.UIElements;
using System.Linq;
using Unity.Mathematics;
using Unity.VisualScripting;
using System.Reflection;
using System;
public class AgentControllerV7 : Agent
{
public float motorTorque = 300;
public float brakeTorque = 500;
public float maxSpeed = 400;
public float steeringRange = 9;
public float steeringRangeAtMaxSpeed = 7;
public float autoBrake = 100;
WheelControl[] wheels;
Rigidbody rigidBody;
public List<GameObject> checkpoints;
Vector3 startPosition;
Quaternion startRotation;
int currentStep = 0;
float totalReward = 0;
float totalMentalPain = 0;
int stepsSinceCheckpoint = 0;
int checkpointsReached = 0;
public int maxStepsPerCheckpoint = 300;
public int distanceBetweenCheckpoints = 5;
public bool ignoreMentalPain = true;
// Start is called before the first frame update
void Start()
{
rigidBody = GetComponent<Rigidbody>();
// Find all child GameObjects that have the WheelControl script attached
wheels = GetComponentsInChildren<WheelControl>();
startPosition = transform.localPosition;
startRotation = transform.localRotation;
}
public override void OnEpisodeBegin()
{
stepsSinceCheckpoint = 0;
checkpointsReached = 0;
totalReward = 0;
totalMentalPain = 0;
// reset wheels
foreach (var wheel in wheels)
{
wheel.WheelCollider.brakeTorque = 0;
wheel.WheelCollider.motorTorque = 0;
wheel.WheelCollider.steerAngle = 0;
}
// reset car
transform.localPosition = startPosition;
transform.localRotation = startRotation;
rigidBody.velocity = Vector3.zero;
rigidBody.angularVelocity = Vector3.zero;
// reset checkpoints
foreach (GameObject checkpoint in checkpoints)
{
checkpoint.GetComponent<Checkpoint>().isCollected = false;
}
}
public override void CollectObservations(VectorSensor sensor)
{
Transform currentCheckpoint = checkpoints[0].transform;
foreach (GameObject checkpoint in checkpoints)
{
bool isCollected = checkpoint.GetComponent<Checkpoint>().isCollected;
if (!isCollected)
{
currentCheckpoint = checkpoint.transform;
break;
}
}
// distance to next checkpoint
sensor.AddObservation(distanceToCheckpoint(currentCheckpoint));
// relative angle to checkpoint
sensor.AddObservation(angleToCheckpoint(currentCheckpoint));
// relative vector pointing to checkpoint
Vector3 position = transform.localPosition;
Vector3 checkpointPosition = currentCheckpoint.localPosition;
Vector3 toCheckpoint = new Vector3(
checkpointPosition.x - position.x,
0,
checkpointPosition.z - position.z
);
float carAngle = transform.localEulerAngles.y;
toCheckpoint = Quaternion.Euler(0, -carAngle, 0) * toCheckpoint.normalized;
sensor.AddObservation(toCheckpoint.x);
sensor.AddObservation(toCheckpoint.z);
// relative Velocity
Vector3 velocity = new Vector3(
rigidBody.velocity.x,
0,
rigidBody.velocity.z
);
Vector3 relativeVelocity = Quaternion.Euler(0, -carAngle, 0) * velocity;
sensor.AddObservation(relativeVelocity.x);
sensor.AddObservation(relativeVelocity.z);
}
public override void OnActionReceived(ActionBuffers actions)
{
// Actions size = 2 [vertical speed, horizontal speed] = [-1..1, -1..1] // discrete = [{0, 1, 2}, {0, 1, 2}] = [{-1, 0, 1}...]
float vInput = 0;
float hInput = 0;
if (actions.DiscreteActions[0] == 0)
vInput = -1f;
if (actions.DiscreteActions[0] == 1)
vInput = 1f;
if (actions.DiscreteActions[1] == 0)
hInput = -1f;
if (actions.DiscreteActions[1] == 1)
hInput = 1f;
// give benson mental pain for existing (punishment for maximizing first checkpoint by standing still)
// AddReward(-0.002f);
AddReward(-0.0018f); // less pain because of V4
totalMentalPain -= 0.0018f;
if (ignoreMentalPain)
totalReward -= 0.0018f;
float forwardSpeed = Vector3.Dot(transform.forward, rigidBody.velocity);
// Calculate how close the car is to top speed
// as a number from zero to one
float speedFactor = Mathf.InverseLerp(0, maxSpeed / 4, forwardSpeed);
// Use that to calculate how much torque is available
// (zero torque at top speed)
float currentMotorTorque = Mathf.Lerp(motorTorque, 0, speedFactor);
// …and to calculate how much to steer
// (the car steers more gently at top speed)
float currentSteerRange = Mathf.Lerp(steeringRange, steeringRangeAtMaxSpeed, speedFactor);
// Check whether the user input is in the same direction
// as the car's velocity
bool isAccelerating = Mathf.Sign(vInput) == Mathf.Sign(forwardSpeed);
bool isStopping = vInput == 0; // range
bool isBraking = (vInput < 0 && forwardSpeed > 0) || (vInput > 0 && forwardSpeed < 0);
if (vInput > 0 && forwardSpeed < 0)
{
isAccelerating = false;
}
foreach (var wheel in wheels)
{
// Apply steering to Wheel colliders that have "Steerable" enabled
if (wheel.steerable)
{
wheel.WheelCollider.steerAngle = hInput * currentSteerRange;
}
if (isBraking)
{
wheel.WheelCollider.brakeTorque = Mathf.Abs(vInput) * brakeTorque;
//wheel.WheelCollider.motorTorque = 0;
}
if (isAccelerating)
{
// Apply torque to Wheel colliders that have "Motorized" enabled
if (wheel.motorized)
{
wheel.WheelCollider.motorTorque = vInput * currentMotorTorque;
}
wheel.WheelCollider.brakeTorque = 0;
}
if (isStopping)
{
// If the user is trying to go in the opposite direction
// apply brakes to all wheels
wheel.WheelCollider.brakeTorque = Mathf.Abs(vInput) * brakeTorque + autoBrake;
if (forwardSpeed < 0)
{
wheel.WheelCollider.brakeTorque = (Mathf.Abs(vInput) * brakeTorque + autoBrake) * 5;
}
}
}
// rewards
Transform currentCheckpoint = checkpoints[0].transform;
foreach (GameObject checkpoint in checkpoints)
{
bool isCollected = checkpoint.GetComponent<Checkpoint>().isCollected;
if (!isCollected)
{
currentCheckpoint = checkpoint.transform;
break;
}
}
float checkpintDistance = distanceToCheckpoint(currentCheckpoint);
float reward = (1 - Mathf.InverseLerp(0, distanceBetweenCheckpoints, checkpintDistance)) / 500;
totalReward += reward;
AddReward(reward);
float checkpointAngle = angleToCheckpoint(currentCheckpoint);
if (checkpointAngle > 0)
reward = (1 - Mathf.InverseLerp(0, 60, checkpointAngle)) / 2000;
else
reward = Mathf.InverseLerp(-60, 0, checkpointAngle) / 2000;
AddReward(reward);
if (checkpintDistance < 0.1f)
{
currentCheckpoint.GetComponent<Checkpoint>().isCollected = true;
stepsSinceCheckpoint = 0;
checkpointsReached += 1;
// If last checkpoint
if (currentCheckpoint == checkpoints[checkpoints.Count - 1].transform)
{
AddReward(10f);
EndEpisode();
}
//TODO fix variable names
AddReward(1f);
AddReward(-totalReward);
totalReward = 0;
}
currentStep += 1;
stepsSinceCheckpoint += 1;
if (stepsSinceCheckpoint >= maxStepsPerCheckpoint)
{
stepsSinceCheckpoint = 0;
EndEpisode();
}
// print(GetCumulativeReward());
}
public override void Heuristic(in ActionBuffers actionsOut)
{
var discreteActionsOut = actionsOut.DiscreteActions;
discreteActionsOut[0] = 2;
discreteActionsOut[1] = 2;
if (Input.GetAxis("Vertical") < -0.5)
discreteActionsOut[0] = 0;
if (Input.GetAxis("Vertical") > 0.5)
discreteActionsOut[0] = 1;
if (Input.GetAxis("Horizontal") < -0.5)
discreteActionsOut[1] = 0;
if (Input.GetAxis("Horizontal") > 0.5)
discreteActionsOut[1] = 1;
}
// finds distance from agent to closest point on the checkpoint line
float distanceToCheckpoint(Transform checkpoint)
{
var closestPoint = checkpoint.GetComponent<Collider>().ClosestPointOnBounds(transform.position);
var distanceToCheckpoint = Vector3.Distance(transform.position, closestPoint);
return distanceToCheckpoint;
}
float angleToCheckpoint(Transform checkpoint)
{
Vector3 checkpointDirection = checkpoint.localPosition - transform.localPosition;
float angle = Vector3.SignedAngle(transform.forward, checkpointDirection, Vector3.up);
return angle;
}
private void OnCollisionEnter(Collision other) {
// if (other.gameObject.tag == "NPC")
// {
// AddReward(0.1f);
// }
if (other.gameObject.tag == "Player")
{
AddReward(-0.1f);
}
}
}

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: 2eeab89a9b8a5f04291ce55ceb722904
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,72 @@
behaviors:
BensonV7:
trainer_type: ppo
hyperparameters:
# Hyperparameters common to PPO and SAC
batch_size: 50
buffer_size: 10240
learning_rate: 3.0e-4
learning_rate_schedule: linear
# PPO-specific hyperparameters
# Replaces the "PPO-specific hyperparameters" section above
beta: 5.0e-2
epsilon: 0.2
lambd: 0.95
num_epoch: 3
# Configuration of the neural network (common to PPO/SAC)
network_settings:
vis_encoder_type: simple
normalize: false
hidden_units: 128
num_layers: 2
# Trainer configurations common to all trainers
max_steps: 2.0e7
time_horizon: 64
summary_freq: 10000
keep_checkpoints: 5
checkpoint_interval : 500000
threaded: false
init_path: null
# # behavior cloning
# behavioral_cloning:
# demo_path: 'c:\Users\noahk\Documents\Unity projects\Racesm\Assets\Demonstrations\BensonV3M.demo'
# strength: 0.5
# # steps: 150000
# # batch_size: 512
# # num_epoch: 3
# # samples_per_update: 0
reward_signals:
# environment reward (default)
extrinsic:
strength: 1.0
gamma: 0.99
self_play:
window: 10
play_against_latest_model_ratio: 0.5
save_steps: 20000
swap_steps: 10000
team_change: 100000
# # curiosity module
# curiosity:
# strength: 0.02
# gamma: 0.99
# encoding_size: 256
# learning_rate: 3.0e-4
# # GAIL
# gail:
# strength: 0.5
# # gamma: 0.99
# # encoding_size: 128
# demo_path: 'c:\Users\noahk\Documents\Unity projects\Racesm\Assets\Demonstrations\BensonV3M.demo'
# # learning_rate: 3.0e-4
# # use_actions: false
# # use_vail: false

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,82 @@
default_settings: null
behaviors:
BensonV7:
trainer_type: ppo
hyperparameters:
batch_size: 50
buffer_size: 10240
learning_rate: 0.0003
beta: 0.05
epsilon: 0.2
lambd: 0.95
num_epoch: 3
shared_critic: false
learning_rate_schedule: linear
beta_schedule: linear
epsilon_schedule: linear
network_settings:
normalize: false
hidden_units: 128
num_layers: 2
vis_encode_type: simple
memory: null
goal_conditioning_type: hyper
deterministic: false
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
network_settings:
normalize: false
hidden_units: 128
num_layers: 2
vis_encode_type: simple
memory: null
goal_conditioning_type: hyper
deterministic: false
init_path: null
keep_checkpoints: 5
checkpoint_interval: 500000
max_steps: 20000000
time_horizon: 64
summary_freq: 10000
threaded: false
self_play:
save_steps: 20000
team_change: 100000
swap_steps: 10000
window: 10
play_against_latest_model_ratio: 0.5
initial_elo: 1200.0
behavioral_cloning: null
env_settings:
env_path: null
env_args: null
base_port: 5005
num_envs: 1
num_areas: 1
seed: -1
max_lifetime_restarts: 10
restarts_rate_limit_n: 1
restarts_rate_limit_period_s: 60
engine_settings:
width: 84
height: 84
quality_level: 5
time_scale: 20
target_frame_rate: -1
capture_frame_rate: 60
no_graphics: false
environment_parameters: null
checkpoint_settings:
run_id: BensonV7
initialize_from: null
load_model: false
resume: false
force: true
train_model: false
inference: false
results_dir: results
torch_settings:
device: null
debug: false

View File

@@ -0,0 +1,325 @@
{
"name": "root",
"gauges": {
"BensonV7.Policy.Entropy.mean": {
"value": 2.0729053020477295,
"min": 2.0728888511657715,
"max": 2.1842892169952393,
"count": 7
},
"BensonV7.Policy.Entropy.sum": {
"value": 84906.203125,
"min": 80925.578125,
"max": 89468.484375,
"count": 7
},
"BensonV7.Step.mean": {
"value": 69984.0,
"min": 9984.0,
"max": 69984.0,
"count": 7
},
"BensonV7.Step.sum": {
"value": 69984.0,
"min": 9984.0,
"max": 69984.0,
"count": 7
},
"BensonV7.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.10756219178438187,
"min": -1.984985589981079,
"max": -0.10756219178438187,
"count": 7
},
"BensonV7.Policy.ExtrinsicValueEstimate.sum": {
"value": -26.567861557006836,
"min": -488.30645751953125,
"max": -26.567861557006836,
"count": 7
},
"BensonV7.Environment.EpisodeLength.mean": {
"value": 81.16216216216216,
"min": 79.0,
"max": 81.38866396761134,
"count": 7
},
"BensonV7.Environment.EpisodeLength.sum": {
"value": 39039.0,
"min": 37920.0,
"max": 41080.0,
"count": 7
},
"BensonV7.Self-play.ELO.mean": {
"value": 1432.9340303735714,
"min": 1196.0598258059322,
"max": 1432.9340303735714,
"count": 7
},
"BensonV7.Self-play.ELO.sum": {
"value": 171952.08364482858,
"min": 143527.17909671186,
"max": 180385.31347156668,
"count": 7
},
"BensonV7.Environment.CumulativeReward.mean": {
"value": -0.09534425969420324,
"min": -0.11856273706287665,
"max": -0.035531304608593925,
"count": 7
},
"BensonV7.Environment.CumulativeReward.sum": {
"value": -11.536655422998592,
"min": -15.294593081111088,
"max": -4.263756553031271,
"count": 7
},
"BensonV7.Policy.ExtrinsicReward.mean": {
"value": -0.09534425969420324,
"min": -0.11856273706287665,
"max": -0.035531304608593925,
"count": 7
},
"BensonV7.Policy.ExtrinsicReward.sum": {
"value": -11.536655422998592,
"min": -15.294593081111088,
"max": -4.263756553031271,
"count": 7
},
"BensonV7.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 7
},
"BensonV7.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 7
},
"BensonV7.Losses.PolicyLoss.mean": {
"value": 0.11715025729667197,
"min": 0.11001400724555237,
"max": 0.11715025729667197,
"count": 6
},
"BensonV7.Losses.PolicyLoss.sum": {
"value": 0.11715025729667197,
"min": 0.11001400724555237,
"max": 0.11715025729667197,
"count": 6
},
"BensonV7.Losses.ValueLoss.mean": {
"value": 0.0027312553831553743,
"min": 0.0027312553831553743,
"max": 0.34692010765847486,
"count": 6
},
"BensonV7.Losses.ValueLoss.sum": {
"value": 0.0027312553831553743,
"min": 0.0027312553831553743,
"max": 0.34692010765847486,
"count": 6
},
"BensonV7.Policy.LearningRate.mean": {
"value": 0.000299064000312,
"min": 0.000299064000312,
"max": 0.00029984400005200003,
"count": 6
},
"BensonV7.Policy.LearningRate.sum": {
"value": 0.000299064000312,
"min": 0.000299064000312,
"max": 0.00029984400005200003,
"count": 6
},
"BensonV7.Policy.Epsilon.mean": {
"value": 0.19968800000000003,
"min": 0.19968800000000003,
"max": 0.19994800000000001,
"count": 6
},
"BensonV7.Policy.Epsilon.sum": {
"value": 0.19968800000000003,
"min": 0.19968800000000003,
"max": 0.19994800000000001,
"count": 6
},
"BensonV7.Policy.Beta.mean": {
"value": 0.0498440312,
"min": 0.0498440312,
"max": 0.049974005200000005,
"count": 6
},
"BensonV7.Policy.Beta.sum": {
"value": 0.0498440312,
"min": 0.0498440312,
"max": 0.049974005200000005,
"count": 6
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1713809062",
"python_version": "3.9.13 (tags/v3.9.13:6de2ca5, May 17 2022, 16:36:42) [MSC v.1929 64 bit (AMD64)]",
"command_line_arguments": "C:\\Users\\noahk\\Documents\\Unity projects\\Racesm\\.venv\\Scripts\\mlagents-learn config/BensonV7.yaml --run-id=BensonV7 --force",
"mlagents_version": "0.30.0",
"mlagents_envs_version": "0.30.0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.2.2+cu118",
"numpy_version": "1.21.2",
"end_time_seconds": "1713809367"
},
"total": 304.6736262,
"count": 1,
"self": 0.004877599999929316,
"children": {
"run_training.setup": {
"total": 0.0771373999999998,
"count": 1,
"self": 0.0771373999999998
},
"TrainerController.start_learning": {
"total": 304.59161120000005,
"count": 1,
"self": 0.16134309999955576,
"children": {
"TrainerController._reset_env": {
"total": 19.5166619,
"count": 1,
"self": 19.5166619
},
"TrainerController.advance": {
"total": 284.80744110000046,
"count": 8441,
"self": 0.15070639999748892,
"children": {
"env_step": {
"total": 228.853432300001,
"count": 8441,
"self": 133.1350660999974,
"children": {
"SubprocessEnvManager._take_step": {
"total": 95.61978480000201,
"count": 8441,
"self": 1.4240661000022925,
"children": {
"TorchPolicy.evaluate": {
"total": 94.19571869999972,
"count": 31524,
"self": 94.19571869999972
}
}
},
"workers": {
"total": 0.09858140000157434,
"count": 8440,
"self": 0.0,
"children": {
"worker_root": {
"total": 272.33989530000036,
"count": 8440,
"is_parallel": true,
"self": 169.65825009999963,
"children": {
"steps_from_proto": {
"total": 0.0013582999999997014,
"count": 4,
"is_parallel": true,
"self": 0.0005351000000004547,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0008231999999992468,
"count": 24,
"is_parallel": true,
"self": 0.0008231999999992468
}
}
},
"UnityEnvironment.step": {
"total": 102.68028690000074,
"count": 8440,
"is_parallel": true,
"self": 2.971126999999001,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 3.075714900001067,
"count": 8440,
"is_parallel": true,
"self": 3.075714900001067
},
"communicator.exchange": {
"total": 86.89778070000133,
"count": 8440,
"is_parallel": true,
"self": 86.89778070000133
},
"steps_from_proto": {
"total": 9.735664299999346,
"count": 33760,
"is_parallel": true,
"self": 3.697254799999932,
"children": {
"_process_rank_one_or_two_observation": {
"total": 6.038409499999414,
"count": 202560,
"is_parallel": true,
"self": 6.038409499999414
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 55.803302400001996,
"count": 8440,
"self": 1.1739091000001807,
"children": {
"process_trajectory": {
"total": 8.147956200001808,
"count": 8440,
"self": 8.147956200001808
},
"_update_policy": {
"total": 46.48143710000001,
"count": 7,
"self": 9.447134400001843,
"children": {
"TorchPPOOptimizer.update": {
"total": 37.034302699998165,
"count": 4368,
"self": 37.034302699998165
}
}
}
}
}
}
},
"trainer_threads": {
"total": 8.000000093488779e-07,
"count": 1,
"self": 8.000000093488779e-07
},
"TrainerController._save_models": {
"total": 0.10616430000004584,
"count": 1,
"self": 0.007284000000026936,
"children": {
"RLTrainer._checkpoint": {
"total": 0.0988803000000189,
"count": 1,
"self": 0.0988803000000189
}
}
}
}
}
}
}

View File

@@ -0,0 +1,30 @@
{
"BensonV7": {
"elo": 1489.2012227625175,
"checkpoints": [
{
"steps": 78400,
"file_path": "results\\BensonV7\\BensonV7\\BensonV7-78400.onnx",
"reward": -0.11241376396501437,
"creation_time": 1713809367.0670853,
"auxillary_file_paths": [
"results\\BensonV7\\BensonV7\\BensonV7-78400.pt"
]
}
],
"final_checkpoint": {
"steps": 78400,
"file_path": "results\\BensonV7\\BensonV7.onnx",
"reward": -0.11241376396501437,
"creation_time": 1713809367.0670853,
"auxillary_file_paths": [
"results\\BensonV7\\BensonV7\\BensonV7-78400.pt"
]
}
},
"metadata": {
"stats_format_version": "0.3.0",
"mlagents_version": "0.30.0",
"torch_version": "2.2.2+cu118"
}
}

View File

@@ -1,2 +1,2 @@
mlagents-learn config/BensonV6.1.yaml --run-id=BensonV6.1 --force
mlagents-learn config/BensonV7.yaml --run-id=BensonV7 --force
@REM mlagents-learn config/BensonV5.yaml --run-id=BensonV5 --resume --env="C:\Users\noahk\Desktop\BensonV5\Racesm_L.exe" --no-graphics --num-envs=1

View File

@@ -5,6 +5,7 @@ TagManager:
serializedVersion: 2
tags:
- Wall
- NPC
layers:
- Default
- TransparentFX