NeurIPS 2022: CityLearn Challenge
Going below 1.0 score with stablebaseline3
Applying PPO to citylearn
So this notebook will get you started using stablebaseline3 (and PPO) to get a (almost) good score on citylearn env.
To summarize, the idea of the notebook is to use the PPO implementation of stablebaseline3 to create a optimize policy.
So thing to note :
1. We modify the stablebaseline3 official repository to make it compatible with gym 0.24 (version used by citylearn). We will get it from my github repo.
2. The gym observation are coming from the building observation and the actions are the 5 actions (electrical storage action) of the 5 buildings.
Importing citylearn lib and (modify) stablebaseline lib¶
!pip install git+https://github.com/Forbu/CityLearn-1.3.5.git
!pip install git+https://github.com/Forbu/stable-baselines3.git
!git clone http://gitlab.aicrowd.com/adrien_forbu/citylearn-2022-starter-kit.git
import os
path = "/content/neurips-2022-citylearn-challenge"
os.chdir(path)
import session¶
# import couple of libs some will be useful
import gym
import numpy as np
from collections import deque
import random
import re
import os
import sys
import time
import json
import itertools
# import stable_baselines3
from stable_baselines3 import PPO, A2C, DDPG, TD3
from stable_baselines3.common.utils import set_random_seed
from citylearn.citylearn import CityLearnEnv
import functools
Warning: Gym version v0.24.0 has a number of critical issues with `gym.make` such that the `reset` and `step` functions are called before returning the environment. It is recommend to downgrading to v0.23.1 or upgrading to v0.25.1
Main tools¶
Here we define the gym environment in a way that stable baseline 3 lib will be able to understand.
class Constants:
episodes = 3
schema_path = '/content/neurips-2022-citylearn-challenge/data/citylearn_challenge_2022_phase_1/schema.json'
def action_space_to_dict(aspace):
""" Only for box space """
return { "high": aspace.high,
"low": aspace.low,
"shape": aspace.shape,
"dtype": str(aspace.dtype)
}
def env_reset(env):
observations = env.reset()
action_space = env.action_space
observation_space = env.observation_space
building_info = env.get_building_information()
building_info = list(building_info.values())
action_space_dicts = [action_space_to_dict(asp) for asp in action_space]
observation_space_dicts = [action_space_to_dict(osp) for osp in observation_space]
obs_dict = {"action_space": action_space_dicts,
"observation_space": observation_space_dicts,
"building_info": building_info,
"observation": observations }
return obs_dict
import gym
# here we init the citylearn env
env = CityLearnEnv(schema=Constants.schema_path)
#### IMPORTANT
# here we choose the observation we want to take from the building env
# we divide observation that are specific to buildings (index_particular)
# and observation that are the same for all the buildings (index_commun)
index_commun = [0, 2, 19, 4, 8, 24]
index_particular = [20, 21, 22, 23]
normalization_value_commun = [12, 24, 2, 100, 100, 1]
normalization_value_particular = [5, 5, 5, 5]
len_tot_index = len(index_commun) + len(index_particular) * 5
## env wrapper for stable baselines
class EnvCityGym(gym.Env):
"""
Env wrapper coming from the gym library.
"""
def __init__(self, env):
self.env = env
# get the number of buildings
self.num_buildings = len(env.action_space)
# define action and observation space
self.action_space = gym.spaces.Box(low=np.array([-1] * self.num_buildings), high=np.array([1] * self.num_buildings), dtype=np.float32)
# define the observation space
self.observation_space = gym.spaces.Box(low=np.array([0] * len_tot_index), high=np.array([1] * len_tot_index), dtype=np.float32)
# TO THINK : normalize the observation space
def reset(self):
obs_dict = env_reset(self.env)
obs = self.env.reset()
observation = self.get_observation(obs)
return observation
def get_observation(self, obs):
"""
We retrieve new observation from the building observation to get a proper array of observation
Basicly the observation array will be something like obs[0][index_commun] + obs[i][index_particular] for i in range(5)
The first element of the new observation will be "commun observation" among all building like month / hour / carbon intensity / outdoor_dry_bulb_temperature_predicted_6h ...
The next element of the new observation will be the concatenation of certain observation specific to buildings non_shiftable_load / solar_generation / ...
"""
# we get the observation commun for each building (index_commun)
observation_commun = [obs[0][i]/n for i, n in zip(index_commun, normalization_value_commun)]
observation_particular = [[o[i]/n for i, n in zip(index_particular, normalization_value_particular)] for o in obs]
observation_particular = list(itertools.chain(*observation_particular))
# we concatenate the observation
observation = observation_commun + observation_particular
return observation
def step(self, action):
"""
we apply the same action for all the buildings
"""
# reprocessing action
action = [[act] for act in action]
# we do a step in the environment
obs, reward, done, info = self.env.step(action)
observation = self.get_observation(obs)
return observation, sum(reward), done, info
def render(self, mode='human'):
return self.env.render(mode)
Train and test function¶
The function to train and test the sb3 PPO algorithm
# function to train the policy with PPO algorithm
def test_ppo():
# Modify the petting zoo environment to make a custom observation space (return an array of value for each agent)
# first we initialize the environment (petting zoo)
env = CityLearnEnv(schema=Constants.schema_path)
env = EnvCityGym(env)
# we load the model
model = PPO.load("ppo_citylearn")
# we reset the environment
obs = env.reset()
nb_iter = 8000
# loop on the number of iteration
for i in range(nb_iter):
# we get the action for each agent
actions = []
for agent in env.possible_agents:
action, _states = model.predict(obs[agent], deterministic=True)
actions.append(action)
actions = {agent: action for agent, action in zip(env.possible_agents, actions)}
# we do a step in the environment
obs, rewards, dones, info = env.step(actions)
# sometimes check the actions and rewards
if i % 100 == 0:
print("actions : ", actions)
print("rewards : ", rewards)
final_result = sum(env.citylearnenv.evaluate())/2
print("final result : ", final_result)
# launch as main
return final_result
# function to train the policy with PPO algorithm
def train_ppo():
# first we initialize the environment (petting zoo)
env = CityLearnEnv(schema=Constants.schema_path)
env = EnvCityGym(env)
env.reset()
# Configure the algorithm
# load model if exist
try:
model = PPO.load("ppo_citylearn")
except:
model = PPO('MlpPolicy', env, verbose=2, gamma=0.99)
# Train the agent
model.learn(total_timesteps=10000000)
model.save("ppo_citylearn")
return model
model = train_ppo()
/usr/local/lib/python3.7/dist-packages/gym/spaces/box.py:112: UserWarning: WARN: Box bound precision lowered by casting to float32
logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
Le flux de sortie a été tronqué et ne contient que les 5000 dernières lignes.
| loss | 21.7 |
| n_updates | 46550 |
| policy_gradient_loss | -0.0056 |
| std | 0.0858 |
| value_loss | 42.2 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4657 |
| time_elapsed | 27781 |
| total_timesteps | 9537536 |
| train/ | |
| approx_kl | 0.046927072 |
| clip_fraction | 0.421 |
| clip_range | 0.2 |
| entropy_loss | 5.25 |
| explained_variance | 0.959 |
| learning_rate | 0.0003 |
| loss | 28.9 |
| n_updates | 46560 |
| policy_gradient_loss | -0.0115 |
| std | 0.0848 |
| value_loss | 56.1 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4658 |
| time_elapsed | 27787 |
| total_timesteps | 9539584 |
| train/ | |
| approx_kl | 0.06135473 |
| clip_fraction | 0.456 |
| clip_range | 0.2 |
| entropy_loss | 5.25 |
| explained_variance | 0.953 |
| learning_rate | 0.0003 |
| loss | 15.4 |
| n_updates | 46570 |
| policy_gradient_loss | -0.0101 |
| std | 0.0853 |
| value_loss | 30.4 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4659 |
| time_elapsed | 27793 |
| total_timesteps | 9541632 |
| train/ | |
| approx_kl | 0.04368954 |
| clip_fraction | 0.338 |
| clip_range | 0.2 |
| entropy_loss | 5.25 |
| explained_variance | 0.84 |
| learning_rate | 0.0003 |
| loss | 38.7 |
| n_updates | 46580 |
| policy_gradient_loss | -0.0117 |
| std | 0.0853 |
| value_loss | 129 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4660 |
| time_elapsed | 27799 |
| total_timesteps | 9543680 |
| train/ | |
| approx_kl | 0.03953073 |
| clip_fraction | 0.37 |
| clip_range | 0.2 |
| entropy_loss | 5.24 |
| explained_variance | 0.923 |
| learning_rate | 0.0003 |
| loss | 20.9 |
| n_updates | 46590 |
| policy_gradient_loss | -0.0102 |
| std | 0.0854 |
| value_loss | 47.6 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4661 |
| time_elapsed | 27805 |
| total_timesteps | 9545728 |
| train/ | |
| approx_kl | 0.04136477 |
| clip_fraction | 0.373 |
| clip_range | 0.2 |
| entropy_loss | 5.24 |
| explained_variance | 0.933 |
| learning_rate | 0.0003 |
| loss | 35.2 |
| n_updates | 46600 |
| policy_gradient_loss | -0.0106 |
| std | 0.0853 |
| value_loss | 59.8 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4662 |
| time_elapsed | 27811 |
| total_timesteps | 9547776 |
| train/ | |
| approx_kl | 0.06604685 |
| clip_fraction | 0.5 |
| clip_range | 0.2 |
| entropy_loss | 5.2 |
| explained_variance | 0.84 |
| learning_rate | 0.0003 |
| loss | 10.3 |
| n_updates | 46610 |
| policy_gradient_loss | 0.00379 |
| std | 0.0864 |
| value_loss | 26.5 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4663 |
| time_elapsed | 27816 |
| total_timesteps | 9549824 |
| train/ | |
| approx_kl | 0.030814908 |
| clip_fraction | 0.322 |
| clip_range | 0.2 |
| entropy_loss | 5.19 |
| explained_variance | 0.907 |
| learning_rate | 0.0003 |
| loss | 29.5 |
| n_updates | 46620 |
| policy_gradient_loss | -0.0183 |
| std | 0.0863 |
| value_loss | 138 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4664 |
| time_elapsed | 27822 |
| total_timesteps | 9551872 |
| train/ | |
| approx_kl | 0.04921186 |
| clip_fraction | 0.397 |
| clip_range | 0.2 |
| entropy_loss | 5.19 |
| explained_variance | 0.949 |
| learning_rate | 0.0003 |
| loss | 18.8 |
| n_updates | 46630 |
| policy_gradient_loss | -0.00683 |
| std | 0.0863 |
| value_loss | 46 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4665 |
| time_elapsed | 27828 |
| total_timesteps | 9553920 |
| train/ | |
| approx_kl | 0.041459356 |
| clip_fraction | 0.348 |
| clip_range | 0.2 |
| entropy_loss | 5.19 |
| explained_variance | 0.938 |
| learning_rate | 0.0003 |
| loss | 38.3 |
| n_updates | 46640 |
| policy_gradient_loss | -0.0183 |
| std | 0.0862 |
| value_loss | 62.8 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4666 |
| time_elapsed | 27834 |
| total_timesteps | 9555968 |
| train/ | |
| approx_kl | 0.064091995 |
| clip_fraction | 0.456 |
| clip_range | 0.2 |
| entropy_loss | 5.21 |
| explained_variance | 0.959 |
| learning_rate | 0.0003 |
| loss | 16.5 |
| n_updates | 46650 |
| policy_gradient_loss | -0.0061 |
| std | 0.0859 |
| value_loss | 31.2 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4667 |
| time_elapsed | 27840 |
| total_timesteps | 9558016 |
| train/ | |
| approx_kl | 0.03993137 |
| clip_fraction | 0.361 |
| clip_range | 0.2 |
| entropy_loss | 5.21 |
| explained_variance | 0.964 |
| learning_rate | 0.0003 |
| loss | 26.2 |
| n_updates | 46660 |
| policy_gradient_loss | -0.00923 |
| std | 0.0861 |
| value_loss | 55.5 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4668 |
| time_elapsed | 27846 |
| total_timesteps | 9560064 |
| train/ | |
| approx_kl | 0.038641527 |
| clip_fraction | 0.38 |
| clip_range | 0.2 |
| entropy_loss | 5.2 |
| explained_variance | 0.904 |
| learning_rate | 0.0003 |
| loss | 58.8 |
| n_updates | 46670 |
| policy_gradient_loss | -0.00955 |
| std | 0.0859 |
| value_loss | 101 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4669 |
| time_elapsed | 27851 |
| total_timesteps | 9562112 |
| train/ | |
| approx_kl | 0.04263217 |
| clip_fraction | 0.374 |
| clip_range | 0.2 |
| entropy_loss | 5.21 |
| explained_variance | 0.97 |
| learning_rate | 0.0003 |
| loss | 23 |
| n_updates | 46680 |
| policy_gradient_loss | -0.0053 |
| std | 0.0857 |
| value_loss | 43.5 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4670 |
| time_elapsed | 27857 |
| total_timesteps | 9564160 |
| train/ | |
| approx_kl | 0.049593113 |
| clip_fraction | 0.394 |
| clip_range | 0.2 |
| entropy_loss | 5.21 |
| explained_variance | 0.964 |
| learning_rate | 0.0003 |
| loss | 23 |
| n_updates | 46690 |
| policy_gradient_loss | -0.0128 |
| std | 0.086 |
| value_loss | 50.4 |
-----------------------------------------
---------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4671 |
| time_elapsed | 27863 |
| total_timesteps | 9566208 |
| train/ | |
| approx_kl | 0.0519082 |
| clip_fraction | 0.384 |
| clip_range | 0.2 |
| entropy_loss | 5.2 |
| explained_variance | 0.967 |
| learning_rate | 0.0003 |
| loss | 23.7 |
| n_updates | 46700 |
| policy_gradient_loss | -0.00782 |
| std | 0.0861 |
| value_loss | 39 |
---------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4672 |
| time_elapsed | 27869 |
| total_timesteps | 9568256 |
| train/ | |
| approx_kl | 0.036675796 |
| clip_fraction | 0.332 |
| clip_range | 0.2 |
| entropy_loss | 5.19 |
| explained_variance | 0.849 |
| learning_rate | 0.0003 |
| loss | 38.8 |
| n_updates | 46710 |
| policy_gradient_loss | -0.0146 |
| std | 0.0863 |
| value_loss | 130 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4673 |
| time_elapsed | 27875 |
| total_timesteps | 9570304 |
| train/ | |
| approx_kl | 0.048215315 |
| clip_fraction | 0.404 |
| clip_range | 0.2 |
| entropy_loss | 5.17 |
| explained_variance | 0.961 |
| learning_rate | 0.0003 |
| loss | 17.3 |
| n_updates | 46720 |
| policy_gradient_loss | -0.0113 |
| std | 0.0867 |
| value_loss | 41.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4674 |
| time_elapsed | 27880 |
| total_timesteps | 9572352 |
| train/ | |
| approx_kl | 0.048168875 |
| clip_fraction | 0.424 |
| clip_range | 0.2 |
| entropy_loss | 5.17 |
| explained_variance | 0.957 |
| learning_rate | 0.0003 |
| loss | 21.7 |
| n_updates | 46730 |
| policy_gradient_loss | -0.0141 |
| std | 0.0864 |
| value_loss | 61.3 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4675 |
| time_elapsed | 27886 |
| total_timesteps | 9574400 |
| train/ | |
| approx_kl | 0.073954344 |
| clip_fraction | 0.519 |
| clip_range | 0.2 |
| entropy_loss | 5.17 |
| explained_variance | 0.871 |
| learning_rate | 0.0003 |
| loss | 12.3 |
| n_updates | 46740 |
| policy_gradient_loss | 0.000644 |
| std | 0.0866 |
| value_loss | 25.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4676 |
| time_elapsed | 27892 |
| total_timesteps | 9576448 |
| train/ | |
| approx_kl | 0.037302386 |
| clip_fraction | 0.313 |
| clip_range | 0.2 |
| entropy_loss | 5.16 |
| explained_variance | 0.804 |
| learning_rate | 0.0003 |
| loss | 179 |
| n_updates | 46750 |
| policy_gradient_loss | -0.0123 |
| std | 0.0868 |
| value_loss | 137 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4677 |
| time_elapsed | 27898 |
| total_timesteps | 9578496 |
| train/ | |
| approx_kl | 0.036337383 |
| clip_fraction | 0.374 |
| clip_range | 0.2 |
| entropy_loss | 5.16 |
| explained_variance | 0.902 |
| learning_rate | 0.0003 |
| loss | 35.3 |
| n_updates | 46760 |
| policy_gradient_loss | -0.00997 |
| std | 0.0865 |
| value_loss | 44.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4678 |
| time_elapsed | 27904 |
| total_timesteps | 9580544 |
| train/ | |
| approx_kl | 0.039574925 |
| clip_fraction | 0.371 |
| clip_range | 0.2 |
| entropy_loss | 5.17 |
| explained_variance | 0.914 |
| learning_rate | 0.0003 |
| loss | 22.7 |
| n_updates | 46770 |
| policy_gradient_loss | -0.0168 |
| std | 0.0864 |
| value_loss | 60.7 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4679 |
| time_elapsed | 27910 |
| total_timesteps | 9582592 |
| train/ | |
| approx_kl | 0.07369236 |
| clip_fraction | 0.489 |
| clip_range | 0.2 |
| entropy_loss | 5.17 |
| explained_variance | 0.93 |
| learning_rate | 0.0003 |
| loss | 13.3 |
| n_updates | 46780 |
| policy_gradient_loss | -0.00955 |
| std | 0.0865 |
| value_loss | 30.3 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4680 |
| time_elapsed | 27916 |
| total_timesteps | 9584640 |
| train/ | |
| approx_kl | 0.036709297 |
| clip_fraction | 0.311 |
| clip_range | 0.2 |
| entropy_loss | 5.18 |
| explained_variance | 0.927 |
| learning_rate | 0.0003 |
| loss | 47.1 |
| n_updates | 46790 |
| policy_gradient_loss | -0.0147 |
| std | 0.086 |
| value_loss | 128 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4681 |
| time_elapsed | 27921 |
| total_timesteps | 9586688 |
| train/ | |
| approx_kl | 0.06075281 |
| clip_fraction | 0.428 |
| clip_range | 0.2 |
| entropy_loss | 5.21 |
| explained_variance | 0.957 |
| learning_rate | 0.0003 |
| loss | 20.1 |
| n_updates | 46800 |
| policy_gradient_loss | -0.0129 |
| std | 0.0856 |
| value_loss | 48.1 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4682 |
| time_elapsed | 27927 |
| total_timesteps | 9588736 |
| train/ | |
| approx_kl | 0.046840575 |
| clip_fraction | 0.367 |
| clip_range | 0.2 |
| entropy_loss | 5.25 |
| explained_variance | 0.958 |
| learning_rate | 0.0003 |
| loss | 24 |
| n_updates | 46810 |
| policy_gradient_loss | -0.0124 |
| std | 0.0848 |
| value_loss | 53.3 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4683 |
| time_elapsed | 27933 |
| total_timesteps | 9590784 |
| train/ | |
| approx_kl | 0.063902296 |
| clip_fraction | 0.453 |
| clip_range | 0.2 |
| entropy_loss | 5.25 |
| explained_variance | 0.956 |
| learning_rate | 0.0003 |
| loss | 14.4 |
| n_updates | 46820 |
| policy_gradient_loss | -0.00771 |
| std | 0.0853 |
| value_loss | 39.8 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4684 |
| time_elapsed | 27939 |
| total_timesteps | 9592832 |
| train/ | |
| approx_kl | 0.051683538 |
| clip_fraction | 0.405 |
| clip_range | 0.2 |
| entropy_loss | 5.24 |
| explained_variance | 0.972 |
| learning_rate | 0.0003 |
| loss | 20.4 |
| n_updates | 46830 |
| policy_gradient_loss | -0.0117 |
| std | 0.0853 |
| value_loss | 47.2 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4685 |
| time_elapsed | 27945 |
| total_timesteps | 9594880 |
| train/ | |
| approx_kl | 0.045429714 |
| clip_fraction | 0.364 |
| clip_range | 0.2 |
| entropy_loss | 5.24 |
| explained_variance | 0.881 |
| learning_rate | 0.0003 |
| loss | 123 |
| n_updates | 46840 |
| policy_gradient_loss | -0.016 |
| std | 0.0854 |
| value_loss | 120 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4686 |
| time_elapsed | 27951 |
| total_timesteps | 9596928 |
| train/ | |
| approx_kl | 0.04849296 |
| clip_fraction | 0.403 |
| clip_range | 0.2 |
| entropy_loss | 5.22 |
| explained_variance | 0.972 |
| learning_rate | 0.0003 |
| loss | 20.1 |
| n_updates | 46850 |
| policy_gradient_loss | -0.00828 |
| std | 0.0859 |
| value_loss | 40.3 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4687 |
| time_elapsed | 27957 |
| total_timesteps | 9598976 |
| train/ | |
| approx_kl | 0.04957109 |
| clip_fraction | 0.392 |
| clip_range | 0.2 |
| entropy_loss | 5.2 |
| explained_variance | 0.961 |
| learning_rate | 0.0003 |
| loss | 21.8 |
| n_updates | 46860 |
| policy_gradient_loss | -0.0109 |
| std | 0.0861 |
| value_loss | 58.2 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4688 |
| time_elapsed | 27963 |
| total_timesteps | 9601024 |
| train/ | |
| approx_kl | 0.059594743 |
| clip_fraction | 0.468 |
| clip_range | 0.2 |
| entropy_loss | 5.17 |
| explained_variance | 0.965 |
| learning_rate | 0.0003 |
| loss | 14.8 |
| n_updates | 46870 |
| policy_gradient_loss | -0.0123 |
| std | 0.0867 |
| value_loss | 33.9 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4689 |
| time_elapsed | 27969 |
| total_timesteps | 9603072 |
| train/ | |
| approx_kl | 0.03500364 |
| clip_fraction | 0.346 |
| clip_range | 0.2 |
| entropy_loss | 5.17 |
| explained_variance | 0.84 |
| learning_rate | 0.0003 |
| loss | 34.6 |
| n_updates | 46880 |
| policy_gradient_loss | -0.00729 |
| std | 0.0864 |
| value_loss | 133 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4690 |
| time_elapsed | 27975 |
| total_timesteps | 9605120 |
| train/ | |
| approx_kl | 0.057583317 |
| clip_fraction | 0.412 |
| clip_range | 0.2 |
| entropy_loss | 5.18 |
| explained_variance | 0.94 |
| learning_rate | 0.0003 |
| loss | 18.9 |
| n_updates | 46890 |
| policy_gradient_loss | -0.0115 |
| std | 0.0861 |
| value_loss | 42.6 |
-----------------------------------------
---------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4691 |
| time_elapsed | 27981 |
| total_timesteps | 9607168 |
| train/ | |
| approx_kl | 0.0450269 |
| clip_fraction | 0.39 |
| clip_range | 0.2 |
| entropy_loss | 5.19 |
| explained_variance | 0.943 |
| learning_rate | 0.0003 |
| loss | 57.2 |
| n_updates | 46900 |
| policy_gradient_loss | -0.0156 |
| std | 0.0862 |
| value_loss | 61 |
---------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4692 |
| time_elapsed | 27987 |
| total_timesteps | 9609216 |
| train/ | |
| approx_kl | 0.06969172 |
| clip_fraction | 0.5 |
| clip_range | 0.2 |
| entropy_loss | 5.21 |
| explained_variance | 0.838 |
| learning_rate | 0.0003 |
| loss | 12.9 |
| n_updates | 46910 |
| policy_gradient_loss | -0.00709 |
| std | 0.0854 |
| value_loss | 24.7 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4693 |
| time_elapsed | 27993 |
| total_timesteps | 9611264 |
| train/ | |
| approx_kl | 0.029024754 |
| clip_fraction | 0.318 |
| clip_range | 0.2 |
| entropy_loss | 5.22 |
| explained_variance | 0.864 |
| learning_rate | 0.0003 |
| loss | 177 |
| n_updates | 46920 |
| policy_gradient_loss | -0.0146 |
| std | 0.0857 |
| value_loss | 139 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4694 |
| time_elapsed | 27998 |
| total_timesteps | 9613312 |
| train/ | |
| approx_kl | 0.049878303 |
| clip_fraction | 0.398 |
| clip_range | 0.2 |
| entropy_loss | 5.21 |
| explained_variance | 0.926 |
| learning_rate | 0.0003 |
| loss | 15.4 |
| n_updates | 46930 |
| policy_gradient_loss | -0.00886 |
| std | 0.0856 |
| value_loss | 44.9 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4695 |
| time_elapsed | 28004 |
| total_timesteps | 9615360 |
| train/ | |
| approx_kl | 0.042785686 |
| clip_fraction | 0.348 |
| clip_range | 0.2 |
| entropy_loss | 5.22 |
| explained_variance | 0.927 |
| learning_rate | 0.0003 |
| loss | 17.6 |
| n_updates | 46940 |
| policy_gradient_loss | -0.0131 |
| std | 0.0855 |
| value_loss | 62.2 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4696 |
| time_elapsed | 28010 |
| total_timesteps | 9617408 |
| train/ | |
| approx_kl | 0.054771632 |
| clip_fraction | 0.438 |
| clip_range | 0.2 |
| entropy_loss | 5.22 |
| explained_variance | 0.956 |
| learning_rate | 0.0003 |
| loss | 21.6 |
| n_updates | 46950 |
| policy_gradient_loss | -0.0111 |
| std | 0.0858 |
| value_loss | 31.8 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4697 |
| time_elapsed | 28016 |
| total_timesteps | 9619456 |
| train/ | |
| approx_kl | 0.03219974 |
| clip_fraction | 0.333 |
| clip_range | 0.2 |
| entropy_loss | 5.21 |
| explained_variance | 0.935 |
| learning_rate | 0.0003 |
| loss | 40.2 |
| n_updates | 46960 |
| policy_gradient_loss | -0.0134 |
| std | 0.0857 |
| value_loss | 128 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4698 |
| time_elapsed | 28022 |
| total_timesteps | 9621504 |
| train/ | |
| approx_kl | 0.05829243 |
| clip_fraction | 0.423 |
| clip_range | 0.2 |
| entropy_loss | 5.22 |
| explained_variance | 0.957 |
| learning_rate | 0.0003 |
| loss | 25.8 |
| n_updates | 46970 |
| policy_gradient_loss | -0.00988 |
| std | 0.0856 |
| value_loss | 51.3 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4699 |
| time_elapsed | 28028 |
| total_timesteps | 9623552 |
| train/ | |
| approx_kl | 0.03800214 |
| clip_fraction | 0.392 |
| clip_range | 0.2 |
| entropy_loss | 5.21 |
| explained_variance | 0.966 |
| learning_rate | 0.0003 |
| loss | 23.5 |
| n_updates | 46980 |
| policy_gradient_loss | -0.0101 |
| std | 0.086 |
| value_loss | 45.5 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4700 |
| time_elapsed | 28034 |
| total_timesteps | 9625600 |
| train/ | |
| approx_kl | 0.05904118 |
| clip_fraction | 0.414 |
| clip_range | 0.2 |
| entropy_loss | 5.18 |
| explained_variance | 0.967 |
| learning_rate | 0.0003 |
| loss | 32.8 |
| n_updates | 46990 |
| policy_gradient_loss | -0.0137 |
| std | 0.0864 |
| value_loss | 49.2 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4701 |
| time_elapsed | 28039 |
| total_timesteps | 9627648 |
| train/ | |
| approx_kl | 0.06618355 |
| clip_fraction | 0.42 |
| clip_range | 0.2 |
| entropy_loss | 5.16 |
| explained_variance | 0.972 |
| learning_rate | 0.0003 |
| loss | 16.1 |
| n_updates | 47000 |
| policy_gradient_loss | -0.0106 |
| std | 0.0868 |
| value_loss | 42.7 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4702 |
| time_elapsed | 28045 |
| total_timesteps | 9629696 |
| train/ | |
| approx_kl | 0.039013103 |
| clip_fraction | 0.368 |
| clip_range | 0.2 |
| entropy_loss | 5.16 |
| explained_variance | 0.851 |
| learning_rate | 0.0003 |
| loss | 66.9 |
| n_updates | 47010 |
| policy_gradient_loss | -0.013 |
| std | 0.0866 |
| value_loss | 127 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4703 |
| time_elapsed | 28051 |
| total_timesteps | 9631744 |
| train/ | |
| approx_kl | 0.04956356 |
| clip_fraction | 0.438 |
| clip_range | 0.2 |
| entropy_loss | 5.14 |
| explained_variance | 0.968 |
| learning_rate | 0.0003 |
| loss | 15.8 |
| n_updates | 47020 |
| policy_gradient_loss | -0.00707 |
| std | 0.0873 |
| value_loss | 41.4 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4704 |
| time_elapsed | 28057 |
| total_timesteps | 9633792 |
| train/ | |
| approx_kl | 0.045165204 |
| clip_fraction | 0.409 |
| clip_range | 0.2 |
| entropy_loss | 5.14 |
| explained_variance | 0.96 |
| learning_rate | 0.0003 |
| loss | 25.3 |
| n_updates | 47030 |
| policy_gradient_loss | -0.0124 |
| std | 0.087 |
| value_loss | 59.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4705 |
| time_elapsed | 28063 |
| total_timesteps | 9635840 |
| train/ | |
| approx_kl | 0.051127344 |
| clip_fraction | 0.453 |
| clip_range | 0.2 |
| entropy_loss | 5.13 |
| explained_variance | 0.934 |
| learning_rate | 0.0003 |
| loss | 12.5 |
| n_updates | 47040 |
| policy_gradient_loss | 0.00134 |
| std | 0.0876 |
| value_loss | 27.1 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4706 |
| time_elapsed | 28069 |
| total_timesteps | 9637888 |
| train/ | |
| approx_kl | 0.04012461 |
| clip_fraction | 0.353 |
| clip_range | 0.2 |
| entropy_loss | 5.12 |
| explained_variance | 0.826 |
| learning_rate | 0.0003 |
| loss | 62.2 |
| n_updates | 47050 |
| policy_gradient_loss | -0.0184 |
| std | 0.0873 |
| value_loss | 134 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4707 |
| time_elapsed | 28075 |
| total_timesteps | 9639936 |
| train/ | |
| approx_kl | 0.036436632 |
| clip_fraction | 0.38 |
| clip_range | 0.2 |
| entropy_loss | 5.14 |
| explained_variance | 0.91 |
| learning_rate | 0.0003 |
| loss | 17.8 |
| n_updates | 47060 |
| policy_gradient_loss | -0.00555 |
| std | 0.0871 |
| value_loss | 46.8 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4708 |
| time_elapsed | 28081 |
| total_timesteps | 9641984 |
| train/ | |
| approx_kl | 0.04498616 |
| clip_fraction | 0.386 |
| clip_range | 0.2 |
| entropy_loss | 5.15 |
| explained_variance | 0.918 |
| learning_rate | 0.0003 |
| loss | 22.4 |
| n_updates | 47070 |
| policy_gradient_loss | -0.0159 |
| std | 0.0868 |
| value_loss | 61.3 |
----------------------------------------
---------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4709 |
| time_elapsed | 28087 |
| total_timesteps | 9644032 |
| train/ | |
| approx_kl | 0.0658004 |
| clip_fraction | 0.48 |
| clip_range | 0.2 |
| entropy_loss | 5.15 |
| explained_variance | 0.866 |
| learning_rate | 0.0003 |
| loss | 14.7 |
| n_updates | 47080 |
| policy_gradient_loss | 0.0025 |
| std | 0.0871 |
| value_loss | 28.6 |
---------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4710 |
| time_elapsed | 28093 |
| total_timesteps | 9646080 |
| train/ | |
| approx_kl | 0.02917299 |
| clip_fraction | 0.292 |
| clip_range | 0.2 |
| entropy_loss | 5.15 |
| explained_variance | 0.922 |
| learning_rate | 0.0003 |
| loss | 32 |
| n_updates | 47090 |
| policy_gradient_loss | -0.016 |
| std | 0.0868 |
| value_loss | 131 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4711 |
| time_elapsed | 28099 |
| total_timesteps | 9648128 |
| train/ | |
| approx_kl | 0.044124447 |
| clip_fraction | 0.393 |
| clip_range | 0.2 |
| entropy_loss | 5.16 |
| explained_variance | 0.954 |
| learning_rate | 0.0003 |
| loss | 34.7 |
| n_updates | 47100 |
| policy_gradient_loss | -0.00864 |
| std | 0.0871 |
| value_loss | 46.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4712 |
| time_elapsed | 28105 |
| total_timesteps | 9650176 |
| train/ | |
| approx_kl | 0.037089244 |
| clip_fraction | 0.361 |
| clip_range | 0.2 |
| entropy_loss | 5.17 |
| explained_variance | 0.942 |
| learning_rate | 0.0003 |
| loss | 21.2 |
| n_updates | 47110 |
| policy_gradient_loss | -0.0187 |
| std | 0.0862 |
| value_loss | 59.5 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4713 |
| time_elapsed | 28110 |
| total_timesteps | 9652224 |
| train/ | |
| approx_kl | 0.06069903 |
| clip_fraction | 0.463 |
| clip_range | 0.2 |
| entropy_loss | 5.17 |
| explained_variance | 0.962 |
| learning_rate | 0.0003 |
| loss | 10.5 |
| n_updates | 47120 |
| policy_gradient_loss | -0.00641 |
| std | 0.0868 |
| value_loss | 32.6 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4714 |
| time_elapsed | 28116 |
| total_timesteps | 9654272 |
| train/ | |
| approx_kl | 0.05018707 |
| clip_fraction | 0.382 |
| clip_range | 0.2 |
| entropy_loss | 5.17 |
| explained_variance | 0.969 |
| learning_rate | 0.0003 |
| loss | 22.5 |
| n_updates | 47130 |
| policy_gradient_loss | -0.0169 |
| std | 0.0865 |
| value_loss | 51.6 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4715 |
| time_elapsed | 28122 |
| total_timesteps | 9656320 |
| train/ | |
| approx_kl | 0.040243655 |
| clip_fraction | 0.346 |
| clip_range | 0.2 |
| entropy_loss | 5.18 |
| explained_variance | 0.894 |
| learning_rate | 0.0003 |
| loss | 21.5 |
| n_updates | 47140 |
| policy_gradient_loss | -0.0144 |
| std | 0.0862 |
| value_loss | 112 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4716 |
| time_elapsed | 28129 |
| total_timesteps | 9658368 |
| train/ | |
| approx_kl | 0.051857587 |
| clip_fraction | 0.422 |
| clip_range | 0.2 |
| entropy_loss | 5.22 |
| explained_variance | 0.969 |
| learning_rate | 0.0003 |
| loss | 14.2 |
| n_updates | 47150 |
| policy_gradient_loss | -0.0124 |
| std | 0.0855 |
| value_loss | 42.6 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4717 |
| time_elapsed | 28135 |
| total_timesteps | 9660416 |
| train/ | |
| approx_kl | 0.06274843 |
| clip_fraction | 0.409 |
| clip_range | 0.2 |
| entropy_loss | 5.22 |
| explained_variance | 0.965 |
| learning_rate | 0.0003 |
| loss | 19.1 |
| n_updates | 47160 |
| policy_gradient_loss | -0.013 |
| std | 0.086 |
| value_loss | 52.7 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4718 |
| time_elapsed | 28141 |
| total_timesteps | 9662464 |
| train/ | |
| approx_kl | 0.051213443 |
| clip_fraction | 0.433 |
| clip_range | 0.2 |
| entropy_loss | 5.2 |
| explained_variance | 0.965 |
| learning_rate | 0.0003 |
| loss | 18.5 |
| n_updates | 47170 |
| policy_gradient_loss | -0.0138 |
| std | 0.0858 |
| value_loss | 36 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4719 |
| time_elapsed | 28146 |
| total_timesteps | 9664512 |
| train/ | |
| approx_kl | 0.044063643 |
| clip_fraction | 0.333 |
| clip_range | 0.2 |
| entropy_loss | 5.2 |
| explained_variance | 0.841 |
| learning_rate | 0.0003 |
| loss | 149 |
| n_updates | 47180 |
| policy_gradient_loss | -0.0158 |
| std | 0.0862 |
| value_loss | 131 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4720 |
| time_elapsed | 28152 |
| total_timesteps | 9666560 |
| train/ | |
| approx_kl | 0.06509362 |
| clip_fraction | 0.412 |
| clip_range | 0.2 |
| entropy_loss | 5.18 |
| explained_variance | 0.952 |
| learning_rate | 0.0003 |
| loss | 20 |
| n_updates | 47190 |
| policy_gradient_loss | -0.00729 |
| std | 0.0866 |
| value_loss | 41.1 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4721 |
| time_elapsed | 28158 |
| total_timesteps | 9668608 |
| train/ | |
| approx_kl | 0.04778389 |
| clip_fraction | 0.381 |
| clip_range | 0.2 |
| entropy_loss | 5.18 |
| explained_variance | 0.951 |
| learning_rate | 0.0003 |
| loss | 29.8 |
| n_updates | 47200 |
| policy_gradient_loss | -0.0114 |
| std | 0.0864 |
| value_loss | 64.6 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4722 |
| time_elapsed | 28164 |
| total_timesteps | 9670656 |
| train/ | |
| approx_kl | 0.070981964 |
| clip_fraction | 0.492 |
| clip_range | 0.2 |
| entropy_loss | 5.17 |
| explained_variance | 0.861 |
| learning_rate | 0.0003 |
| loss | 8.84 |
| n_updates | 47210 |
| policy_gradient_loss | 0.00134 |
| std | 0.0867 |
| value_loss | 24.7 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4723 |
| time_elapsed | 28170 |
| total_timesteps | 9672704 |
| train/ | |
| approx_kl | 0.03296309 |
| clip_fraction | 0.335 |
| clip_range | 0.2 |
| entropy_loss | 5.16 |
| explained_variance | 0.826 |
| learning_rate | 0.0003 |
| loss | 31.8 |
| n_updates | 47220 |
| policy_gradient_loss | -0.0186 |
| std | 0.0869 |
| value_loss | 133 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4724 |
| time_elapsed | 28176 |
| total_timesteps | 9674752 |
| train/ | |
| approx_kl | 0.05054172 |
| clip_fraction | 0.403 |
| clip_range | 0.2 |
| entropy_loss | 5.15 |
| explained_variance | 0.903 |
| learning_rate | 0.0003 |
| loss | 25.5 |
| n_updates | 47230 |
| policy_gradient_loss | -0.0112 |
| std | 0.0869 |
| value_loss | 44.1 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4725 |
| time_elapsed | 28182 |
| total_timesteps | 9676800 |
| train/ | |
| approx_kl | 0.038069587 |
| clip_fraction | 0.35 |
| clip_range | 0.2 |
| entropy_loss | 5.15 |
| explained_variance | 0.913 |
| learning_rate | 0.0003 |
| loss | 45.1 |
| n_updates | 47240 |
| policy_gradient_loss | -0.0165 |
| std | 0.0873 |
| value_loss | 65.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4726 |
| time_elapsed | 28188 |
| total_timesteps | 9678848 |
| train/ | |
| approx_kl | 0.056725744 |
| clip_fraction | 0.47 |
| clip_range | 0.2 |
| entropy_loss | 5.16 |
| explained_variance | 0.951 |
| learning_rate | 0.0003 |
| loss | 10.5 |
| n_updates | 47250 |
| policy_gradient_loss | -0.00918 |
| std | 0.0867 |
| value_loss | 30.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4727 |
| time_elapsed | 28193 |
| total_timesteps | 9680896 |
| train/ | |
| approx_kl | 0.036995433 |
| clip_fraction | 0.35 |
| clip_range | 0.2 |
| entropy_loss | 5.17 |
| explained_variance | 0.931 |
| learning_rate | 0.0003 |
| loss | 66 |
| n_updates | 47260 |
| policy_gradient_loss | -0.0127 |
| std | 0.0867 |
| value_loss | 128 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4728 |
| time_elapsed | 28199 |
| total_timesteps | 9682944 |
| train/ | |
| approx_kl | 0.054921806 |
| clip_fraction | 0.428 |
| clip_range | 0.2 |
| entropy_loss | 5.17 |
| explained_variance | 0.959 |
| learning_rate | 0.0003 |
| loss | 20.7 |
| n_updates | 47270 |
| policy_gradient_loss | -0.00868 |
| std | 0.0868 |
| value_loss | 51.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4729 |
| time_elapsed | 28205 |
| total_timesteps | 9684992 |
| train/ | |
| approx_kl | 0.048723914 |
| clip_fraction | 0.378 |
| clip_range | 0.2 |
| entropy_loss | 5.15 |
| explained_variance | 0.963 |
| learning_rate | 0.0003 |
| loss | 22.8 |
| n_updates | 47280 |
| policy_gradient_loss | -0.0088 |
| std | 0.0874 |
| value_loss | 48.7 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4730 |
| time_elapsed | 28211 |
| total_timesteps | 9687040 |
| train/ | |
| approx_kl | 0.05287794 |
| clip_fraction | 0.442 |
| clip_range | 0.2 |
| entropy_loss | 5.11 |
| explained_variance | 0.955 |
| learning_rate | 0.0003 |
| loss | 27.1 |
| n_updates | 47290 |
| policy_gradient_loss | -0.0134 |
| std | 0.0881 |
| value_loss | 42.7 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4731 |
| time_elapsed | 28217 |
| total_timesteps | 9689088 |
| train/ | |
| approx_kl | 0.04707245 |
| clip_fraction | 0.422 |
| clip_range | 0.2 |
| entropy_loss | 5.1 |
| explained_variance | 0.973 |
| learning_rate | 0.0003 |
| loss | 21.7 |
| n_updates | 47300 |
| policy_gradient_loss | -0.0122 |
| std | 0.088 |
| value_loss | 44.1 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4732 |
| time_elapsed | 28223 |
| total_timesteps | 9691136 |
| train/ | |
| approx_kl | 0.04915804 |
| clip_fraction | 0.367 |
| clip_range | 0.2 |
| entropy_loss | 5.11 |
| explained_variance | 0.873 |
| learning_rate | 0.0003 |
| loss | 53 |
| n_updates | 47310 |
| policy_gradient_loss | -0.0161 |
| std | 0.0876 |
| value_loss | 124 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4733 |
| time_elapsed | 28228 |
| total_timesteps | 9693184 |
| train/ | |
| approx_kl | 0.06437225 |
| clip_fraction | 0.442 |
| clip_range | 0.2 |
| entropy_loss | 5.12 |
| explained_variance | 0.97 |
| learning_rate | 0.0003 |
| loss | 26.7 |
| n_updates | 47320 |
| policy_gradient_loss | -0.0062 |
| std | 0.0877 |
| value_loss | 41.6 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4734 |
| time_elapsed | 28234 |
| total_timesteps | 9695232 |
| train/ | |
| approx_kl | 0.04900548 |
| clip_fraction | 0.383 |
| clip_range | 0.2 |
| entropy_loss | 5.12 |
| explained_variance | 0.959 |
| learning_rate | 0.0003 |
| loss | 33.4 |
| n_updates | 47330 |
| policy_gradient_loss | -0.00834 |
| std | 0.0877 |
| value_loss | 59.6 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4735 |
| time_elapsed | 28240 |
| total_timesteps | 9697280 |
| train/ | |
| approx_kl | 0.07465423 |
| clip_fraction | 0.489 |
| clip_range | 0.2 |
| entropy_loss | 5.14 |
| explained_variance | 0.959 |
| learning_rate | 0.0003 |
| loss | 16.6 |
| n_updates | 47340 |
| policy_gradient_loss | -0.0015 |
| std | 0.0867 |
| value_loss | 31.5 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4736 |
| time_elapsed | 28246 |
| total_timesteps | 9699328 |
| train/ | |
| approx_kl | 0.039271288 |
| clip_fraction | 0.339 |
| clip_range | 0.2 |
| entropy_loss | 5.18 |
| explained_variance | 0.844 |
| learning_rate | 0.0003 |
| loss | 32.6 |
| n_updates | 47350 |
| policy_gradient_loss | -0.0201 |
| std | 0.0866 |
| value_loss | 130 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4737 |
| time_elapsed | 28252 |
| total_timesteps | 9701376 |
| train/ | |
| approx_kl | 0.049667418 |
| clip_fraction | 0.401 |
| clip_range | 0.2 |
| entropy_loss | 5.18 |
| explained_variance | 0.926 |
| learning_rate | 0.0003 |
| loss | 15.8 |
| n_updates | 47360 |
| policy_gradient_loss | -0.012 |
| std | 0.0865 |
| value_loss | 48.8 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4738 |
| time_elapsed | 28258 |
| total_timesteps | 9703424 |
| train/ | |
| approx_kl | 0.049516972 |
| clip_fraction | 0.378 |
| clip_range | 0.2 |
| entropy_loss | 5.19 |
| explained_variance | 0.931 |
| learning_rate | 0.0003 |
| loss | 25.1 |
| n_updates | 47370 |
| policy_gradient_loss | -0.0121 |
| std | 0.0863 |
| value_loss | 63.5 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4739 |
| time_elapsed | 28264 |
| total_timesteps | 9705472 |
| train/ | |
| approx_kl | 0.09660293 |
| clip_fraction | 0.51 |
| clip_range | 0.2 |
| entropy_loss | 5.2 |
| explained_variance | 0.838 |
| learning_rate | 0.0003 |
| loss | 10.3 |
| n_updates | 47380 |
| policy_gradient_loss | 0.00744 |
| std | 0.0865 |
| value_loss | 25.9 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4740 |
| time_elapsed | 28270 |
| total_timesteps | 9707520 |
| train/ | |
| approx_kl | 0.029136397 |
| clip_fraction | 0.332 |
| clip_range | 0.2 |
| entropy_loss | 5.2 |
| explained_variance | 0.9 |
| learning_rate | 0.0003 |
| loss | 151 |
| n_updates | 47390 |
| policy_gradient_loss | -0.015 |
| std | 0.0863 |
| value_loss | 137 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4741 |
| time_elapsed | 28275 |
| total_timesteps | 9709568 |
| train/ | |
| approx_kl | 0.053510267 |
| clip_fraction | 0.385 |
| clip_range | 0.2 |
| entropy_loss | 5.23 |
| explained_variance | 0.942 |
| learning_rate | 0.0003 |
| loss | 19.2 |
| n_updates | 47400 |
| policy_gradient_loss | -0.00922 |
| std | 0.0855 |
| value_loss | 45.9 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4742 |
| time_elapsed | 28281 |
| total_timesteps | 9711616 |
| train/ | |
| approx_kl | 0.039325647 |
| clip_fraction | 0.378 |
| clip_range | 0.2 |
| entropy_loss | 5.24 |
| explained_variance | 0.932 |
| learning_rate | 0.0003 |
| loss | 26.7 |
| n_updates | 47410 |
| policy_gradient_loss | -0.00841 |
| std | 0.0857 |
| value_loss | 60.7 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4743 |
| time_elapsed | 28287 |
| total_timesteps | 9713664 |
| train/ | |
| approx_kl | 0.067215316 |
| clip_fraction | 0.467 |
| clip_range | 0.2 |
| entropy_loss | 5.22 |
| explained_variance | 0.957 |
| learning_rate | 0.0003 |
| loss | 13 |
| n_updates | 47420 |
| policy_gradient_loss | -0.00442 |
| std | 0.0861 |
| value_loss | 30.9 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4744 |
| time_elapsed | 28294 |
| total_timesteps | 9715712 |
| train/ | |
| approx_kl | 0.038473867 |
| clip_fraction | 0.368 |
| clip_range | 0.2 |
| entropy_loss | 5.21 |
| explained_variance | 0.966 |
| learning_rate | 0.0003 |
| loss | 37.3 |
| n_updates | 47430 |
| policy_gradient_loss | -0.0126 |
| std | 0.0861 |
| value_loss | 57.6 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4745 |
| time_elapsed | 28300 |
| total_timesteps | 9717760 |
| train/ | |
| approx_kl | 0.04886844 |
| clip_fraction | 0.369 |
| clip_range | 0.2 |
| entropy_loss | 5.2 |
| explained_variance | 0.904 |
| learning_rate | 0.0003 |
| loss | 34 |
| n_updates | 47440 |
| policy_gradient_loss | -0.012 |
| std | 0.0863 |
| value_loss | 103 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4746 |
| time_elapsed | 28305 |
| total_timesteps | 9719808 |
| train/ | |
| approx_kl | 0.04298178 |
| clip_fraction | 0.383 |
| clip_range | 0.2 |
| entropy_loss | 5.2 |
| explained_variance | 0.969 |
| learning_rate | 0.0003 |
| loss | 13.6 |
| n_updates | 47450 |
| policy_gradient_loss | -0.012 |
| std | 0.0862 |
| value_loss | 42.8 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4747 |
| time_elapsed | 28311 |
| total_timesteps | 9721856 |
| train/ | |
| approx_kl | 0.051239405 |
| clip_fraction | 0.416 |
| clip_range | 0.2 |
| entropy_loss | 5.2 |
| explained_variance | 0.966 |
| learning_rate | 0.0003 |
| loss | 32.2 |
| n_updates | 47460 |
| policy_gradient_loss | -0.00592 |
| std | 0.0862 |
| value_loss | 48.8 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4748 |
| time_elapsed | 28317 |
| total_timesteps | 9723904 |
| train/ | |
| approx_kl | 0.06327915 |
| clip_fraction | 0.426 |
| clip_range | 0.2 |
| entropy_loss | 5.19 |
| explained_variance | 0.972 |
| learning_rate | 0.0003 |
| loss | 11.9 |
| n_updates | 47470 |
| policy_gradient_loss | -0.0113 |
| std | 0.0864 |
| value_loss | 36.8 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4749 |
| time_elapsed | 28323 |
| total_timesteps | 9725952 |
| train/ | |
| approx_kl | 0.038197704 |
| clip_fraction | 0.365 |
| clip_range | 0.2 |
| entropy_loss | 5.2 |
| explained_variance | 0.834 |
| learning_rate | 0.0003 |
| loss | 46.1 |
| n_updates | 47480 |
| policy_gradient_loss | -0.0146 |
| std | 0.0863 |
| value_loss | 131 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4750 |
| time_elapsed | 28329 |
| total_timesteps | 9728000 |
| train/ | |
| approx_kl | 0.059342604 |
| clip_fraction | 0.434 |
| clip_range | 0.2 |
| entropy_loss | 5.18 |
| explained_variance | 0.964 |
| learning_rate | 0.0003 |
| loss | 19.4 |
| n_updates | 47490 |
| policy_gradient_loss | -0.00753 |
| std | 0.0867 |
| value_loss | 41.1 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4751 |
| time_elapsed | 28335 |
| total_timesteps | 9730048 |
| train/ | |
| approx_kl | 0.04412159 |
| clip_fraction | 0.395 |
| clip_range | 0.2 |
| entropy_loss | 5.21 |
| explained_variance | 0.956 |
| learning_rate | 0.0003 |
| loss | 27.2 |
| n_updates | 47500 |
| policy_gradient_loss | -0.00679 |
| std | 0.0858 |
| value_loss | 63.8 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4752 |
| time_elapsed | 28341 |
| total_timesteps | 9732096 |
| train/ | |
| approx_kl | 0.053957723 |
| clip_fraction | 0.508 |
| clip_range | 0.2 |
| entropy_loss | 5.21 |
| explained_variance | 0.896 |
| learning_rate | 0.0003 |
| loss | 13.2 |
| n_updates | 47510 |
| policy_gradient_loss | 0.00442 |
| std | 0.0864 |
| value_loss | 25.8 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4753 |
| time_elapsed | 28346 |
| total_timesteps | 9734144 |
| train/ | |
| approx_kl | 0.039711993 |
| clip_fraction | 0.315 |
| clip_range | 0.2 |
| entropy_loss | 5.18 |
| explained_variance | 0.808 |
| learning_rate | 0.0003 |
| loss | 27 |
| n_updates | 47520 |
| policy_gradient_loss | -0.0129 |
| std | 0.0868 |
| value_loss | 138 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4754 |
| time_elapsed | 28352 |
| total_timesteps | 9736192 |
| train/ | |
| approx_kl | 0.040188488 |
| clip_fraction | 0.378 |
| clip_range | 0.2 |
| entropy_loss | 5.16 |
| explained_variance | 0.9 |
| learning_rate | 0.0003 |
| loss | 24.6 |
| n_updates | 47530 |
| policy_gradient_loss | -0.0139 |
| std | 0.087 |
| value_loss | 47 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4755 |
| time_elapsed | 28358 |
| total_timesteps | 9738240 |
| train/ | |
| approx_kl | 0.04637432 |
| clip_fraction | 0.389 |
| clip_range | 0.2 |
| entropy_loss | 5.17 |
| explained_variance | 0.922 |
| learning_rate | 0.0003 |
| loss | 30.4 |
| n_updates | 47540 |
| policy_gradient_loss | -0.0152 |
| std | 0.0866 |
| value_loss | 60.9 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4756 |
| time_elapsed | 28364 |
| total_timesteps | 9740288 |
| train/ | |
| approx_kl | 0.05895415 |
| clip_fraction | 0.475 |
| clip_range | 0.2 |
| entropy_loss | 5.16 |
| explained_variance | 0.924 |
| learning_rate | 0.0003 |
| loss | 15.1 |
| n_updates | 47550 |
| policy_gradient_loss | -0.00312 |
| std | 0.087 |
| value_loss | 29.8 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4757 |
| time_elapsed | 28370 |
| total_timesteps | 9742336 |
| train/ | |
| approx_kl | 0.034737825 |
| clip_fraction | 0.324 |
| clip_range | 0.2 |
| entropy_loss | 5.15 |
| explained_variance | 0.927 |
| learning_rate | 0.0003 |
| loss | 82.6 |
| n_updates | 47560 |
| policy_gradient_loss | -0.013 |
| std | 0.0868 |
| value_loss | 128 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4758 |
| time_elapsed | 28376 |
| total_timesteps | 9744384 |
| train/ | |
| approx_kl | 0.049225267 |
| clip_fraction | 0.407 |
| clip_range | 0.2 |
| entropy_loss | 5.17 |
| explained_variance | 0.959 |
| learning_rate | 0.0003 |
| loss | 19.4 |
| n_updates | 47570 |
| policy_gradient_loss | -0.00981 |
| std | 0.0863 |
| value_loss | 47.5 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4759 |
| time_elapsed | 28381 |
| total_timesteps | 9746432 |
| train/ | |
| approx_kl | 0.039632488 |
| clip_fraction | 0.376 |
| clip_range | 0.2 |
| entropy_loss | 5.17 |
| explained_variance | 0.951 |
| learning_rate | 0.0003 |
| loss | 44.2 |
| n_updates | 47580 |
| policy_gradient_loss | -0.018 |
| std | 0.0867 |
| value_loss | 58.9 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4760 |
| time_elapsed | 28387 |
| total_timesteps | 9748480 |
| train/ | |
| approx_kl | 0.06635163 |
| clip_fraction | 0.458 |
| clip_range | 0.2 |
| entropy_loss | 5.14 |
| explained_variance | 0.963 |
| learning_rate | 0.0003 |
| loss | 20.7 |
| n_updates | 47590 |
| policy_gradient_loss | -0.00557 |
| std | 0.0872 |
| value_loss | 36 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4761 |
| time_elapsed | 28393 |
| total_timesteps | 9750528 |
| train/ | |
| approx_kl | 0.03987309 |
| clip_fraction | 0.371 |
| clip_range | 0.2 |
| entropy_loss | 5.12 |
| explained_variance | 0.974 |
| learning_rate | 0.0003 |
| loss | 36.1 |
| n_updates | 47600 |
| policy_gradient_loss | -0.0108 |
| std | 0.0874 |
| value_loss | 47.9 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4762 |
| time_elapsed | 28399 |
| total_timesteps | 9752576 |
| train/ | |
| approx_kl | 0.03786237 |
| clip_fraction | 0.338 |
| clip_range | 0.2 |
| entropy_loss | 5.11 |
| explained_variance | 0.884 |
| learning_rate | 0.0003 |
| loss | 40.1 |
| n_updates | 47610 |
| policy_gradient_loss | -0.0131 |
| std | 0.0876 |
| value_loss | 123 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4763 |
| time_elapsed | 28405 |
| total_timesteps | 9754624 |
| train/ | |
| approx_kl | 0.061969656 |
| clip_fraction | 0.433 |
| clip_range | 0.2 |
| entropy_loss | 5.09 |
| explained_variance | 0.971 |
| learning_rate | 0.0003 |
| loss | 21.5 |
| n_updates | 47620 |
| policy_gradient_loss | -0.0116 |
| std | 0.0879 |
| value_loss | 39.4 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4764 |
| time_elapsed | 28411 |
| total_timesteps | 9756672 |
| train/ | |
| approx_kl | 0.04299046 |
| clip_fraction | 0.367 |
| clip_range | 0.2 |
| entropy_loss | 5.09 |
| explained_variance | 0.965 |
| learning_rate | 0.0003 |
| loss | 30.4 |
| n_updates | 47630 |
| policy_gradient_loss | -0.00824 |
| std | 0.088 |
| value_loss | 53.1 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4765 |
| time_elapsed | 28417 |
| total_timesteps | 9758720 |
| train/ | |
| approx_kl | 0.050033122 |
| clip_fraction | 0.424 |
| clip_range | 0.2 |
| entropy_loss | 5.07 |
| explained_variance | 0.967 |
| learning_rate | 0.0003 |
| loss | 14.5 |
| n_updates | 47640 |
| policy_gradient_loss | -0.00968 |
| std | 0.0883 |
| value_loss | 33.9 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4766 |
| time_elapsed | 28423 |
| total_timesteps | 9760768 |
| train/ | |
| approx_kl | 0.038848694 |
| clip_fraction | 0.347 |
| clip_range | 0.2 |
| entropy_loss | 5.06 |
| explained_variance | 0.847 |
| learning_rate | 0.0003 |
| loss | 29.9 |
| n_updates | 47650 |
| policy_gradient_loss | -0.0141 |
| std | 0.0885 |
| value_loss | 129 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4767 |
| time_elapsed | 28428 |
| total_timesteps | 9762816 |
| train/ | |
| approx_kl | 0.06270054 |
| clip_fraction | 0.427 |
| clip_range | 0.2 |
| entropy_loss | 5.05 |
| explained_variance | 0.942 |
| learning_rate | 0.0003 |
| loss | 15.8 |
| n_updates | 47660 |
| policy_gradient_loss | -0.011 |
| std | 0.0886 |
| value_loss | 40.4 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4768 |
| time_elapsed | 28434 |
| total_timesteps | 9764864 |
| train/ | |
| approx_kl | 0.042152107 |
| clip_fraction | 0.379 |
| clip_range | 0.2 |
| entropy_loss | 5.04 |
| explained_variance | 0.949 |
| learning_rate | 0.0003 |
| loss | 29 |
| n_updates | 47670 |
| policy_gradient_loss | -0.0122 |
| std | 0.0886 |
| value_loss | 63 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4769 |
| time_elapsed | 28440 |
| total_timesteps | 9766912 |
| train/ | |
| approx_kl | 0.07643299 |
| clip_fraction | 0.505 |
| clip_range | 0.2 |
| entropy_loss | 5.02 |
| explained_variance | 0.842 |
| learning_rate | 0.0003 |
| loss | 15 |
| n_updates | 47680 |
| policy_gradient_loss | 0.000725 |
| std | 0.0892 |
| value_loss | 24.1 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4770 |
| time_elapsed | 28446 |
| total_timesteps | 9768960 |
| train/ | |
| approx_kl | 0.029563729 |
| clip_fraction | 0.315 |
| clip_range | 0.2 |
| entropy_loss | 5.02 |
| explained_variance | 0.864 |
| learning_rate | 0.0003 |
| loss | 106 |
| n_updates | 47690 |
| policy_gradient_loss | -0.0162 |
| std | 0.0888 |
| value_loss | 135 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4771 |
| time_elapsed | 28452 |
| total_timesteps | 9771008 |
| train/ | |
| approx_kl | 0.063232556 |
| clip_fraction | 0.402 |
| clip_range | 0.2 |
| entropy_loss | 5.02 |
| explained_variance | 0.93 |
| learning_rate | 0.0003 |
| loss | 21.5 |
| n_updates | 47700 |
| policy_gradient_loss | -0.00948 |
| std | 0.0893 |
| value_loss | 44.8 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4772 |
| time_elapsed | 28458 |
| total_timesteps | 9773056 |
| train/ | |
| approx_kl | 0.04384408 |
| clip_fraction | 0.35 |
| clip_range | 0.2 |
| entropy_loss | 5 |
| explained_variance | 0.925 |
| learning_rate | 0.0003 |
| loss | 22.5 |
| n_updates | 47710 |
| policy_gradient_loss | -0.0191 |
| std | 0.0893 |
| value_loss | 62.9 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4773 |
| time_elapsed | 28464 |
| total_timesteps | 9775104 |
| train/ | |
| approx_kl | 0.05652573 |
| clip_fraction | 0.46 |
| clip_range | 0.2 |
| entropy_loss | 5.01 |
| explained_variance | 0.955 |
| learning_rate | 0.0003 |
| loss | 14.1 |
| n_updates | 47720 |
| policy_gradient_loss | -0.00117 |
| std | 0.0891 |
| value_loss | 32.3 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4774 |
| time_elapsed | 28470 |
| total_timesteps | 9777152 |
| train/ | |
| approx_kl | 0.035616662 |
| clip_fraction | 0.339 |
| clip_range | 0.2 |
| entropy_loss | 5.01 |
| explained_variance | 0.937 |
| learning_rate | 0.0003 |
| loss | 38.2 |
| n_updates | 47730 |
| policy_gradient_loss | -0.0153 |
| std | 0.0892 |
| value_loss | 126 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4775 |
| time_elapsed | 28476 |
| total_timesteps | 9779200 |
| train/ | |
| approx_kl | 0.04479259 |
| clip_fraction | 0.4 |
| clip_range | 0.2 |
| entropy_loss | 5.02 |
| explained_variance | 0.958 |
| learning_rate | 0.0003 |
| loss | 26.3 |
| n_updates | 47740 |
| policy_gradient_loss | -0.00944 |
| std | 0.0889 |
| value_loss | 51.6 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4776 |
| time_elapsed | 28481 |
| total_timesteps | 9781248 |
| train/ | |
| approx_kl | 0.034188494 |
| clip_fraction | 0.376 |
| clip_range | 0.2 |
| entropy_loss | 5.01 |
| explained_variance | 0.967 |
| learning_rate | 0.0003 |
| loss | 14.9 |
| n_updates | 47750 |
| policy_gradient_loss | -0.0139 |
| std | 0.0893 |
| value_loss | 45.2 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4777 |
| time_elapsed | 28487 |
| total_timesteps | 9783296 |
| train/ | |
| approx_kl | 0.053666636 |
| clip_fraction | 0.415 |
| clip_range | 0.2 |
| entropy_loss | 4.99 |
| explained_variance | 0.967 |
| learning_rate | 0.0003 |
| loss | 26.1 |
| n_updates | 47760 |
| policy_gradient_loss | -0.00595 |
| std | 0.0894 |
| value_loss | 44.5 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4778 |
| time_elapsed | 28493 |
| total_timesteps | 9785344 |
| train/ | |
| approx_kl | 0.04730598 |
| clip_fraction | 0.397 |
| clip_range | 0.2 |
| entropy_loss | 4.99 |
| explained_variance | 0.975 |
| learning_rate | 0.0003 |
| loss | 28.1 |
| n_updates | 47770 |
| policy_gradient_loss | -0.0151 |
| std | 0.0896 |
| value_loss | 42 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4779 |
| time_elapsed | 28499 |
| total_timesteps | 9787392 |
| train/ | |
| approx_kl | 0.03912072 |
| clip_fraction | 0.343 |
| clip_range | 0.2 |
| entropy_loss | 4.99 |
| explained_variance | 0.868 |
| learning_rate | 0.0003 |
| loss | 93.6 |
| n_updates | 47780 |
| policy_gradient_loss | -0.0175 |
| std | 0.0894 |
| value_loss | 124 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4780 |
| time_elapsed | 28505 |
| total_timesteps | 9789440 |
| train/ | |
| approx_kl | 0.044075094 |
| clip_fraction | 0.401 |
| clip_range | 0.2 |
| entropy_loss | 5 |
| explained_variance | 0.967 |
| learning_rate | 0.0003 |
| loss | 22 |
| n_updates | 47790 |
| policy_gradient_loss | -0.00331 |
| std | 0.0893 |
| value_loss | 40.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4781 |
| time_elapsed | 28511 |
| total_timesteps | 9791488 |
| train/ | |
| approx_kl | 0.048613723 |
| clip_fraction | 0.395 |
| clip_range | 0.2 |
| entropy_loss | 4.99 |
| explained_variance | 0.962 |
| learning_rate | 0.0003 |
| loss | 20.6 |
| n_updates | 47800 |
| policy_gradient_loss | -0.0155 |
| std | 0.0895 |
| value_loss | 57.7 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4782 |
| time_elapsed | 28517 |
| total_timesteps | 9793536 |
| train/ | |
| approx_kl | 0.07278054 |
| clip_fraction | 0.487 |
| clip_range | 0.2 |
| entropy_loss | 4.98 |
| explained_variance | 0.94 |
| learning_rate | 0.0003 |
| loss | 9.92 |
| n_updates | 47810 |
| policy_gradient_loss | -0.00783 |
| std | 0.0896 |
| value_loss | 25.9 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4783 |
| time_elapsed | 28523 |
| total_timesteps | 9795584 |
| train/ | |
| approx_kl | 0.036002394 |
| clip_fraction | 0.347 |
| clip_range | 0.2 |
| entropy_loss | 4.97 |
| explained_variance | 0.839 |
| learning_rate | 0.0003 |
| loss | 87.4 |
| n_updates | 47820 |
| policy_gradient_loss | -0.0146 |
| std | 0.09 |
| value_loss | 133 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4784 |
| time_elapsed | 28528 |
| total_timesteps | 9797632 |
| train/ | |
| approx_kl | 0.051947672 |
| clip_fraction | 0.381 |
| clip_range | 0.2 |
| entropy_loss | 4.95 |
| explained_variance | 0.915 |
| learning_rate | 0.0003 |
| loss | 24.4 |
| n_updates | 47830 |
| policy_gradient_loss | -0.0109 |
| std | 0.0904 |
| value_loss | 46.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4785 |
| time_elapsed | 28534 |
| total_timesteps | 9799680 |
| train/ | |
| approx_kl | 0.042364106 |
| clip_fraction | 0.367 |
| clip_range | 0.2 |
| entropy_loss | 4.94 |
| explained_variance | 0.915 |
| learning_rate | 0.0003 |
| loss | 37.3 |
| n_updates | 47840 |
| policy_gradient_loss | -0.0144 |
| std | 0.0905 |
| value_loss | 63.4 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4786 |
| time_elapsed | 28540 |
| total_timesteps | 9801728 |
| train/ | |
| approx_kl | 0.07736538 |
| clip_fraction | 0.496 |
| clip_range | 0.2 |
| entropy_loss | 4.96 |
| explained_variance | 0.842 |
| learning_rate | 0.0003 |
| loss | 14.1 |
| n_updates | 47850 |
| policy_gradient_loss | 0.00331 |
| std | 0.0897 |
| value_loss | 27.2 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4787 |
| time_elapsed | 28546 |
| total_timesteps | 9803776 |
| train/ | |
| approx_kl | 0.04268372 |
| clip_fraction | 0.345 |
| clip_range | 0.2 |
| entropy_loss | 4.97 |
| explained_variance | 0.917 |
| learning_rate | 0.0003 |
| loss | 186 |
| n_updates | 47860 |
| policy_gradient_loss | -0.0145 |
| std | 0.0903 |
| value_loss | 137 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4788 |
| time_elapsed | 28552 |
| total_timesteps | 9805824 |
| train/ | |
| approx_kl | 0.048731558 |
| clip_fraction | 0.392 |
| clip_range | 0.2 |
| entropy_loss | 4.96 |
| explained_variance | 0.952 |
| learning_rate | 0.0003 |
| loss | 11.1 |
| n_updates | 47870 |
| policy_gradient_loss | -0.0136 |
| std | 0.09 |
| value_loss | 45.4 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4789 |
| time_elapsed | 28557 |
| total_timesteps | 9807872 |
| train/ | |
| approx_kl | 0.03275656 |
| clip_fraction | 0.333 |
| clip_range | 0.2 |
| entropy_loss | 4.97 |
| explained_variance | 0.938 |
| learning_rate | 0.0003 |
| loss | 27.6 |
| n_updates | 47880 |
| policy_gradient_loss | -0.0167 |
| std | 0.0902 |
| value_loss | 62.5 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4790 |
| time_elapsed | 28563 |
| total_timesteps | 9809920 |
| train/ | |
| approx_kl | 0.06802517 |
| clip_fraction | 0.457 |
| clip_range | 0.2 |
| entropy_loss | 4.97 |
| explained_variance | 0.96 |
| learning_rate | 0.0003 |
| loss | 13.7 |
| n_updates | 47890 |
| policy_gradient_loss | -0.0103 |
| std | 0.0897 |
| value_loss | 33.3 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4791 |
| time_elapsed | 28569 |
| total_timesteps | 9811968 |
| train/ | |
| approx_kl | 0.042068996 |
| clip_fraction | 0.36 |
| clip_range | 0.2 |
| entropy_loss | 4.99 |
| explained_variance | 0.969 |
| learning_rate | 0.0003 |
| loss | 18.9 |
| n_updates | 47900 |
| policy_gradient_loss | -0.0138 |
| std | 0.0896 |
| value_loss | 52.3 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4792 |
| time_elapsed | 28575 |
| total_timesteps | 9814016 |
| train/ | |
| approx_kl | 0.038424455 |
| clip_fraction | 0.355 |
| clip_range | 0.2 |
| entropy_loss | 5 |
| explained_variance | 0.894 |
| learning_rate | 0.0003 |
| loss | 22.3 |
| n_updates | 47910 |
| policy_gradient_loss | -0.0145 |
| std | 0.0891 |
| value_loss | 111 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4793 |
| time_elapsed | 28581 |
| total_timesteps | 9816064 |
| train/ | |
| approx_kl | 0.05419812 |
| clip_fraction | 0.43 |
| clip_range | 0.2 |
| entropy_loss | 5.02 |
| explained_variance | 0.971 |
| learning_rate | 0.0003 |
| loss | 16 |
| n_updates | 47920 |
| policy_gradient_loss | -0.0151 |
| std | 0.0892 |
| value_loss | 41.2 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4794 |
| time_elapsed | 28587 |
| total_timesteps | 9818112 |
| train/ | |
| approx_kl | 0.052352786 |
| clip_fraction | 0.402 |
| clip_range | 0.2 |
| entropy_loss | 5 |
| explained_variance | 0.965 |
| learning_rate | 0.0003 |
| loss | 26.3 |
| n_updates | 47930 |
| policy_gradient_loss | -0.00796 |
| std | 0.0896 |
| value_loss | 52.4 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4795 |
| time_elapsed | 28593 |
| total_timesteps | 9820160 |
| train/ | |
| approx_kl | 0.06317793 |
| clip_fraction | 0.428 |
| clip_range | 0.2 |
| entropy_loss | 4.97 |
| explained_variance | 0.966 |
| learning_rate | 0.0003 |
| loss | 20 |
| n_updates | 47940 |
| policy_gradient_loss | -0.00528 |
| std | 0.0902 |
| value_loss | 36.8 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4796 |
| time_elapsed | 28599 |
| total_timesteps | 9822208 |
| train/ | |
| approx_kl | 0.03735993 |
| clip_fraction | 0.352 |
| clip_range | 0.2 |
| entropy_loss | 4.95 |
| explained_variance | 0.837 |
| learning_rate | 0.0003 |
| loss | 38.3 |
| n_updates | 47950 |
| policy_gradient_loss | -0.0144 |
| std | 0.0902 |
| value_loss | 139 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4797 |
| time_elapsed | 28605 |
| total_timesteps | 9824256 |
| train/ | |
| approx_kl | 0.056248877 |
| clip_fraction | 0.415 |
| clip_range | 0.2 |
| entropy_loss | 4.94 |
| explained_variance | 0.952 |
| learning_rate | 0.0003 |
| loss | 21.2 |
| n_updates | 47960 |
| policy_gradient_loss | -0.00503 |
| std | 0.0905 |
| value_loss | 40.7 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4798 |
| time_elapsed | 28610 |
| total_timesteps | 9826304 |
| train/ | |
| approx_kl | 0.035433408 |
| clip_fraction | 0.372 |
| clip_range | 0.2 |
| entropy_loss | 4.94 |
| explained_variance | 0.954 |
| learning_rate | 0.0003 |
| loss | 17.4 |
| n_updates | 47970 |
| policy_gradient_loss | -0.0149 |
| std | 0.0902 |
| value_loss | 61.4 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4799 |
| time_elapsed | 28616 |
| total_timesteps | 9828352 |
| train/ | |
| approx_kl | 0.06489922 |
| clip_fraction | 0.478 |
| clip_range | 0.2 |
| entropy_loss | 4.94 |
| explained_variance | 0.864 |
| learning_rate | 0.0003 |
| loss | 16.1 |
| n_updates | 47980 |
| policy_gradient_loss | -0.00447 |
| std | 0.0905 |
| value_loss | 24.4 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4800 |
| time_elapsed | 28623 |
| total_timesteps | 9830400 |
| train/ | |
| approx_kl | 0.03431963 |
| clip_fraction | 0.316 |
| clip_range | 0.2 |
| entropy_loss | 4.94 |
| explained_variance | 0.811 |
| learning_rate | 0.0003 |
| loss | 43.1 |
| n_updates | 47990 |
| policy_gradient_loss | -0.021 |
| std | 0.0904 |
| value_loss | 139 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4801 |
| time_elapsed | 28629 |
| total_timesteps | 9832448 |
| train/ | |
| approx_kl | 0.053379193 |
| clip_fraction | 0.414 |
| clip_range | 0.2 |
| entropy_loss | 4.93 |
| explained_variance | 0.899 |
| learning_rate | 0.0003 |
| loss | 21.5 |
| n_updates | 48000 |
| policy_gradient_loss | -0.0151 |
| std | 0.0906 |
| value_loss | 41.5 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4802 |
| time_elapsed | 28634 |
| total_timesteps | 9834496 |
| train/ | |
| approx_kl | 0.04090803 |
| clip_fraction | 0.357 |
| clip_range | 0.2 |
| entropy_loss | 4.92 |
| explained_variance | 0.909 |
| learning_rate | 0.0003 |
| loss | 29.1 |
| n_updates | 48010 |
| policy_gradient_loss | -0.0204 |
| std | 0.0907 |
| value_loss | 64 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4803 |
| time_elapsed | 28640 |
| total_timesteps | 9836544 |
| train/ | |
| approx_kl | 0.06847512 |
| clip_fraction | 0.465 |
| clip_range | 0.2 |
| entropy_loss | 4.91 |
| explained_variance | 0.946 |
| learning_rate | 0.0003 |
| loss | 11.4 |
| n_updates | 48020 |
| policy_gradient_loss | -0.0025 |
| std | 0.0909 |
| value_loss | 29.7 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4804 |
| time_elapsed | 28646 |
| total_timesteps | 9838592 |
| train/ | |
| approx_kl | 0.030914476 |
| clip_fraction | 0.3 |
| clip_range | 0.2 |
| entropy_loss | 4.91 |
| explained_variance | 0.929 |
| learning_rate | 0.0003 |
| loss | 27.7 |
| n_updates | 48030 |
| policy_gradient_loss | -0.0151 |
| std | 0.0909 |
| value_loss | 131 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4805 |
| time_elapsed | 28652 |
| total_timesteps | 9840640 |
| train/ | |
| approx_kl | 0.052734185 |
| clip_fraction | 0.413 |
| clip_range | 0.2 |
| entropy_loss | 4.91 |
| explained_variance | 0.954 |
| learning_rate | 0.0003 |
| loss | 33.1 |
| n_updates | 48040 |
| policy_gradient_loss | -0.0161 |
| std | 0.091 |
| value_loss | 52.3 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4806 |
| time_elapsed | 28658 |
| total_timesteps | 9842688 |
| train/ | |
| approx_kl | 0.04086415 |
| clip_fraction | 0.391 |
| clip_range | 0.2 |
| entropy_loss | 4.93 |
| explained_variance | 0.962 |
| learning_rate | 0.0003 |
| loss | 25.2 |
| n_updates | 48050 |
| policy_gradient_loss | -0.0164 |
| std | 0.0903 |
| value_loss | 49.1 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4807 |
| time_elapsed | 28664 |
| total_timesteps | 9844736 |
| train/ | |
| approx_kl | 0.055822946 |
| clip_fraction | 0.424 |
| clip_range | 0.2 |
| entropy_loss | 4.94 |
| explained_variance | 0.95 |
| learning_rate | 0.0003 |
| loss | 18.3 |
| n_updates | 48060 |
| policy_gradient_loss | -0.00671 |
| std | 0.0904 |
| value_loss | 43.3 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4808 |
| time_elapsed | 28670 |
| total_timesteps | 9846784 |
| train/ | |
| approx_kl | 0.056520157 |
| clip_fraction | 0.386 |
| clip_range | 0.2 |
| entropy_loss | 4.94 |
| explained_variance | 0.974 |
| learning_rate | 0.0003 |
| loss | 16.8 |
| n_updates | 48070 |
| policy_gradient_loss | -0.0143 |
| std | 0.0905 |
| value_loss | 46.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4809 |
| time_elapsed | 28675 |
| total_timesteps | 9848832 |
| train/ | |
| approx_kl | 0.032730833 |
| clip_fraction | 0.347 |
| clip_range | 0.2 |
| entropy_loss | 4.93 |
| explained_variance | 0.869 |
| learning_rate | 0.0003 |
| loss | 24.3 |
| n_updates | 48080 |
| policy_gradient_loss | -0.0164 |
| std | 0.0907 |
| value_loss | 128 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4810 |
| time_elapsed | 28681 |
| total_timesteps | 9850880 |
| train/ | |
| approx_kl | 0.068026505 |
| clip_fraction | 0.443 |
| clip_range | 0.2 |
| entropy_loss | 4.94 |
| explained_variance | 0.966 |
| learning_rate | 0.0003 |
| loss | 14.7 |
| n_updates | 48090 |
| policy_gradient_loss | -0.0125 |
| std | 0.0904 |
| value_loss | 40.9 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4811 |
| time_elapsed | 28687 |
| total_timesteps | 9852928 |
| train/ | |
| approx_kl | 0.036956333 |
| clip_fraction | 0.372 |
| clip_range | 0.2 |
| entropy_loss | 4.97 |
| explained_variance | 0.962 |
| learning_rate | 0.0003 |
| loss | 25.2 |
| n_updates | 48100 |
| policy_gradient_loss | -0.0078 |
| std | 0.0898 |
| value_loss | 54.7 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4812 |
| time_elapsed | 28693 |
| total_timesteps | 9854976 |
| train/ | |
| approx_kl | 0.056863323 |
| clip_fraction | 0.46 |
| clip_range | 0.2 |
| entropy_loss | 4.97 |
| explained_variance | 0.965 |
| learning_rate | 0.0003 |
| loss | 9.54 |
| n_updates | 48110 |
| policy_gradient_loss | -0.00938 |
| std | 0.0902 |
| value_loss | 32.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4813 |
| time_elapsed | 28699 |
| total_timesteps | 9857024 |
| train/ | |
| approx_kl | 0.035922214 |
| clip_fraction | 0.341 |
| clip_range | 0.2 |
| entropy_loss | 4.96 |
| explained_variance | 0.846 |
| learning_rate | 0.0003 |
| loss | 41.9 |
| n_updates | 48120 |
| policy_gradient_loss | -0.0143 |
| std | 0.0901 |
| value_loss | 135 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4814 |
| time_elapsed | 28705 |
| total_timesteps | 9859072 |
| train/ | |
| approx_kl | 0.041102387 |
| clip_fraction | 0.374 |
| clip_range | 0.2 |
| entropy_loss | 4.96 |
| explained_variance | 0.929 |
| learning_rate | 0.0003 |
| loss | 22.6 |
| n_updates | 48130 |
| policy_gradient_loss | -0.0107 |
| std | 0.0903 |
| value_loss | 46.4 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4815 |
| time_elapsed | 28711 |
| total_timesteps | 9861120 |
| train/ | |
| approx_kl | 0.046587057 |
| clip_fraction | 0.381 |
| clip_range | 0.2 |
| entropy_loss | 4.97 |
| explained_variance | 0.938 |
| learning_rate | 0.0003 |
| loss | 31.8 |
| n_updates | 48140 |
| policy_gradient_loss | -0.00889 |
| std | 0.0899 |
| value_loss | 64.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4816 |
| time_elapsed | 28717 |
| total_timesteps | 9863168 |
| train/ | |
| approx_kl | 0.083585136 |
| clip_fraction | 0.51 |
| clip_range | 0.2 |
| entropy_loss | 4.99 |
| explained_variance | 0.843 |
| learning_rate | 0.0003 |
| loss | 11.8 |
| n_updates | 48150 |
| policy_gradient_loss | 0.0061 |
| std | 0.0896 |
| value_loss | 24.4 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4817 |
| time_elapsed | 28723 |
| total_timesteps | 9865216 |
| train/ | |
| approx_kl | 0.035579707 |
| clip_fraction | 0.344 |
| clip_range | 0.2 |
| entropy_loss | 4.99 |
| explained_variance | 0.888 |
| learning_rate | 0.0003 |
| loss | 119 |
| n_updates | 48160 |
| policy_gradient_loss | -0.0146 |
| std | 0.0896 |
| value_loss | 135 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4818 |
| time_elapsed | 28728 |
| total_timesteps | 9867264 |
| train/ | |
| approx_kl | 0.048704617 |
| clip_fraction | 0.369 |
| clip_range | 0.2 |
| entropy_loss | 5.01 |
| explained_variance | 0.935 |
| learning_rate | 0.0003 |
| loss | 24.6 |
| n_updates | 48170 |
| policy_gradient_loss | -0.012 |
| std | 0.0889 |
| value_loss | 46.7 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4819 |
| time_elapsed | 28734 |
| total_timesteps | 9869312 |
| train/ | |
| approx_kl | 0.04302324 |
| clip_fraction | 0.344 |
| clip_range | 0.2 |
| entropy_loss | 5.02 |
| explained_variance | 0.93 |
| learning_rate | 0.0003 |
| loss | 32.7 |
| n_updates | 48180 |
| policy_gradient_loss | -0.0168 |
| std | 0.0893 |
| value_loss | 61.2 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4820 |
| time_elapsed | 28740 |
| total_timesteps | 9871360 |
| train/ | |
| approx_kl | 0.056765098 |
| clip_fraction | 0.461 |
| clip_range | 0.2 |
| entropy_loss | 5 |
| explained_variance | 0.956 |
| learning_rate | 0.0003 |
| loss | 9.16 |
| n_updates | 48190 |
| policy_gradient_loss | -0.00533 |
| std | 0.0896 |
| value_loss | 31.2 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4821 |
| time_elapsed | 28746 |
| total_timesteps | 9873408 |
| train/ | |
| approx_kl | 0.040459245 |
| clip_fraction | 0.344 |
| clip_range | 0.2 |
| entropy_loss | 4.99 |
| explained_variance | 0.966 |
| learning_rate | 0.0003 |
| loss | 30.5 |
| n_updates | 48200 |
| policy_gradient_loss | -0.0172 |
| std | 0.0897 |
| value_loss | 58.3 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4822 |
| time_elapsed | 28752 |
| total_timesteps | 9875456 |
| train/ | |
| approx_kl | 0.07570796 |
| clip_fraction | 0.361 |
| clip_range | 0.2 |
| entropy_loss | 4.97 |
| explained_variance | 0.904 |
| learning_rate | 0.0003 |
| loss | 26.4 |
| n_updates | 48210 |
| policy_gradient_loss | -0.0137 |
| std | 0.0903 |
| value_loss | 101 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4823 |
| time_elapsed | 28758 |
| total_timesteps | 9877504 |
| train/ | |
| approx_kl | 0.042558752 |
| clip_fraction | 0.391 |
| clip_range | 0.2 |
| entropy_loss | 4.95 |
| explained_variance | 0.964 |
| learning_rate | 0.0003 |
| loss | 15.9 |
| n_updates | 48220 |
| policy_gradient_loss | -0.0102 |
| std | 0.0901 |
| value_loss | 43.5 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4824 |
| time_elapsed | 28764 |
| total_timesteps | 9879552 |
| train/ | |
| approx_kl | 0.053665735 |
| clip_fraction | 0.415 |
| clip_range | 0.2 |
| entropy_loss | 4.95 |
| explained_variance | 0.964 |
| learning_rate | 0.0003 |
| loss | 21.4 |
| n_updates | 48230 |
| policy_gradient_loss | -0.00836 |
| std | 0.0903 |
| value_loss | 50.7 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4825 |
| time_elapsed | 28770 |
| total_timesteps | 9881600 |
| train/ | |
| approx_kl | 0.073934086 |
| clip_fraction | 0.44 |
| clip_range | 0.2 |
| entropy_loss | 4.94 |
| explained_variance | 0.971 |
| learning_rate | 0.0003 |
| loss | 22 |
| n_updates | 48240 |
| policy_gradient_loss | -0.00707 |
| std | 0.0904 |
| value_loss | 38.1 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4826 |
| time_elapsed | 28775 |
| total_timesteps | 9883648 |
| train/ | |
| approx_kl | 0.03861378 |
| clip_fraction | 0.345 |
| clip_range | 0.2 |
| entropy_loss | 4.95 |
| explained_variance | 0.839 |
| learning_rate | 0.0003 |
| loss | 35.2 |
| n_updates | 48250 |
| policy_gradient_loss | -0.0192 |
| std | 0.0901 |
| value_loss | 126 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4827 |
| time_elapsed | 28781 |
| total_timesteps | 9885696 |
| train/ | |
| approx_kl | 0.04737499 |
| clip_fraction | 0.402 |
| clip_range | 0.2 |
| entropy_loss | 4.96 |
| explained_variance | 0.966 |
| learning_rate | 0.0003 |
| loss | 14.1 |
| n_updates | 48260 |
| policy_gradient_loss | -0.00714 |
| std | 0.0901 |
| value_loss | 40 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4828 |
| time_elapsed | 28787 |
| total_timesteps | 9887744 |
| train/ | |
| approx_kl | 0.040164713 |
| clip_fraction | 0.372 |
| clip_range | 0.2 |
| entropy_loss | 4.96 |
| explained_variance | 0.96 |
| learning_rate | 0.0003 |
| loss | 32.2 |
| n_updates | 48270 |
| policy_gradient_loss | -0.0151 |
| std | 0.0901 |
| value_loss | 59.3 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4829 |
| time_elapsed | 28793 |
| total_timesteps | 9889792 |
| train/ | |
| approx_kl | 0.06335577 |
| clip_fraction | 0.49 |
| clip_range | 0.2 |
| entropy_loss | 4.95 |
| explained_variance | 0.908 |
| learning_rate | 0.0003 |
| loss | 12.3 |
| n_updates | 48280 |
| policy_gradient_loss | -0.00162 |
| std | 0.0904 |
| value_loss | 25.5 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4830 |
| time_elapsed | 28799 |
| total_timesteps | 9891840 |
| train/ | |
| approx_kl | 0.03352327 |
| clip_fraction | 0.326 |
| clip_range | 0.2 |
| entropy_loss | 4.94 |
| explained_variance | 0.813 |
| learning_rate | 0.0003 |
| loss | 31 |
| n_updates | 48290 |
| policy_gradient_loss | -0.0144 |
| std | 0.0906 |
| value_loss | 136 |
----------------------------------------
---------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4831 |
| time_elapsed | 28805 |
| total_timesteps | 9893888 |
| train/ | |
| approx_kl | 0.0537534 |
| clip_fraction | 0.387 |
| clip_range | 0.2 |
| entropy_loss | 4.92 |
| explained_variance | 0.904 |
| learning_rate | 0.0003 |
| loss | 21.7 |
| n_updates | 48300 |
| policy_gradient_loss | -0.0174 |
| std | 0.0911 |
| value_loss | 47.4 |
---------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4832 |
| time_elapsed | 28811 |
| total_timesteps | 9895936 |
| train/ | |
| approx_kl | 0.035882916 |
| clip_fraction | 0.358 |
| clip_range | 0.2 |
| entropy_loss | 4.91 |
| explained_variance | 0.922 |
| learning_rate | 0.0003 |
| loss | 47.1 |
| n_updates | 48310 |
| policy_gradient_loss | -0.016 |
| std | 0.091 |
| value_loss | 61.6 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4833 |
| time_elapsed | 28817 |
| total_timesteps | 9897984 |
| train/ | |
| approx_kl | 0.07257991 |
| clip_fraction | 0.477 |
| clip_range | 0.2 |
| entropy_loss | 4.91 |
| explained_variance | 0.92 |
| learning_rate | 0.0003 |
| loss | 21.6 |
| n_updates | 48320 |
| policy_gradient_loss | 0.00051 |
| std | 0.0911 |
| value_loss | 27.9 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4834 |
| time_elapsed | 28823 |
| total_timesteps | 9900032 |
| train/ | |
| approx_kl | 0.036860988 |
| clip_fraction | 0.306 |
| clip_range | 0.2 |
| entropy_loss | 4.92 |
| explained_variance | 0.926 |
| learning_rate | 0.0003 |
| loss | 100 |
| n_updates | 48330 |
| policy_gradient_loss | -0.0141 |
| std | 0.0908 |
| value_loss | 129 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4835 |
| time_elapsed | 28829 |
| total_timesteps | 9902080 |
| train/ | |
| approx_kl | 0.051048182 |
| clip_fraction | 0.42 |
| clip_range | 0.2 |
| entropy_loss | 4.92 |
| explained_variance | 0.957 |
| learning_rate | 0.0003 |
| loss | 27.2 |
| n_updates | 48340 |
| policy_gradient_loss | -0.0109 |
| std | 0.0908 |
| value_loss | 45.8 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4836 |
| time_elapsed | 28834 |
| total_timesteps | 9904128 |
| train/ | |
| approx_kl | 0.04033347 |
| clip_fraction | 0.361 |
| clip_range | 0.2 |
| entropy_loss | 4.93 |
| explained_variance | 0.948 |
| learning_rate | 0.0003 |
| loss | 24.8 |
| n_updates | 48350 |
| policy_gradient_loss | -0.0149 |
| std | 0.0905 |
| value_loss | 60.5 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4837 |
| time_elapsed | 28840 |
| total_timesteps | 9906176 |
| train/ | |
| approx_kl | 0.060554408 |
| clip_fraction | 0.459 |
| clip_range | 0.2 |
| entropy_loss | 4.94 |
| explained_variance | 0.965 |
| learning_rate | 0.0003 |
| loss | 21.2 |
| n_updates | 48360 |
| policy_gradient_loss | -0.00906 |
| std | 0.0906 |
| value_loss | 35.8 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4838 |
| time_elapsed | 28846 |
| total_timesteps | 9908224 |
| train/ | |
| approx_kl | 0.04581771 |
| clip_fraction | 0.376 |
| clip_range | 0.2 |
| entropy_loss | 4.95 |
| explained_variance | 0.972 |
| learning_rate | 0.0003 |
| loss | 19.7 |
| n_updates | 48370 |
| policy_gradient_loss | -0.0137 |
| std | 0.0902 |
| value_loss | 47.9 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4839 |
| time_elapsed | 28852 |
| total_timesteps | 9910272 |
| train/ | |
| approx_kl | 0.04122579 |
| clip_fraction | 0.347 |
| clip_range | 0.2 |
| entropy_loss | 4.97 |
| explained_variance | 0.886 |
| learning_rate | 0.0003 |
| loss | 138 |
| n_updates | 48380 |
| policy_gradient_loss | -0.0108 |
| std | 0.0902 |
| value_loss | 116 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4840 |
| time_elapsed | 28858 |
| total_timesteps | 9912320 |
| train/ | |
| approx_kl | 0.056925587 |
| clip_fraction | 0.415 |
| clip_range | 0.2 |
| entropy_loss | 4.97 |
| explained_variance | 0.968 |
| learning_rate | 0.0003 |
| loss | 17.2 |
| n_updates | 48390 |
| policy_gradient_loss | -0.00925 |
| std | 0.0899 |
| value_loss | 40.9 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4841 |
| time_elapsed | 28864 |
| total_timesteps | 9914368 |
| train/ | |
| approx_kl | 0.050084546 |
| clip_fraction | 0.405 |
| clip_range | 0.2 |
| entropy_loss | 4.98 |
| explained_variance | 0.964 |
| learning_rate | 0.0003 |
| loss | 29.5 |
| n_updates | 48400 |
| policy_gradient_loss | -0.0145 |
| std | 0.0896 |
| value_loss | 55.4 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4842 |
| time_elapsed | 28870 |
| total_timesteps | 9916416 |
| train/ | |
| approx_kl | 0.050317235 |
| clip_fraction | 0.426 |
| clip_range | 0.2 |
| entropy_loss | 5 |
| explained_variance | 0.967 |
| learning_rate | 0.0003 |
| loss | 22.2 |
| n_updates | 48410 |
| policy_gradient_loss | -0.0132 |
| std | 0.0895 |
| value_loss | 34.4 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4843 |
| time_elapsed | 28875 |
| total_timesteps | 9918464 |
| train/ | |
| approx_kl | 0.033886503 |
| clip_fraction | 0.349 |
| clip_range | 0.2 |
| entropy_loss | 5 |
| explained_variance | 0.832 |
| learning_rate | 0.0003 |
| loss | 94.6 |
| n_updates | 48420 |
| policy_gradient_loss | -0.0129 |
| std | 0.0895 |
| value_loss | 133 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4844 |
| time_elapsed | 28881 |
| total_timesteps | 9920512 |
| train/ | |
| approx_kl | 0.053775787 |
| clip_fraction | 0.467 |
| clip_range | 0.2 |
| entropy_loss | 5 |
| explained_variance | 0.938 |
| learning_rate | 0.0003 |
| loss | 21.2 |
| n_updates | 48430 |
| policy_gradient_loss | -0.00838 |
| std | 0.0894 |
| value_loss | 40.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4845 |
| time_elapsed | 28887 |
| total_timesteps | 9922560 |
| train/ | |
| approx_kl | 0.039454255 |
| clip_fraction | 0.392 |
| clip_range | 0.2 |
| entropy_loss | 5 |
| explained_variance | 0.951 |
| learning_rate | 0.0003 |
| loss | 45.6 |
| n_updates | 48440 |
| policy_gradient_loss | -0.0145 |
| std | 0.0895 |
| value_loss | 62.9 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4846 |
| time_elapsed | 28893 |
| total_timesteps | 9924608 |
| train/ | |
| approx_kl | 0.07386839 |
| clip_fraction | 0.513 |
| clip_range | 0.2 |
| entropy_loss | 4.98 |
| explained_variance | 0.829 |
| learning_rate | 0.0003 |
| loss | 12.2 |
| n_updates | 48450 |
| policy_gradient_loss | -0.00621 |
| std | 0.09 |
| value_loss | 26.6 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4847 |
| time_elapsed | 28899 |
| total_timesteps | 9926656 |
| train/ | |
| approx_kl | 0.030346688 |
| clip_fraction | 0.318 |
| clip_range | 0.2 |
| entropy_loss | 4.98 |
| explained_variance | 0.849 |
| learning_rate | 0.0003 |
| loss | 116 |
| n_updates | 48460 |
| policy_gradient_loss | -0.0161 |
| std | 0.0899 |
| value_loss | 135 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4848 |
| time_elapsed | 28905 |
| total_timesteps | 9928704 |
| train/ | |
| approx_kl | 0.044165038 |
| clip_fraction | 0.394 |
| clip_range | 0.2 |
| entropy_loss | 4.99 |
| explained_variance | 0.92 |
| learning_rate | 0.0003 |
| loss | 29.3 |
| n_updates | 48470 |
| policy_gradient_loss | -0.0155 |
| std | 0.0896 |
| value_loss | 44.5 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4849 |
| time_elapsed | 28910 |
| total_timesteps | 9930752 |
| train/ | |
| approx_kl | 0.036444224 |
| clip_fraction | 0.337 |
| clip_range | 0.2 |
| entropy_loss | 4.99 |
| explained_variance | 0.92 |
| learning_rate | 0.0003 |
| loss | 45.2 |
| n_updates | 48480 |
| policy_gradient_loss | -0.0194 |
| std | 0.0897 |
| value_loss | 63.8 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4850 |
| time_elapsed | 28916 |
| total_timesteps | 9932800 |
| train/ | |
| approx_kl | 0.05902419 |
| clip_fraction | 0.46 |
| clip_range | 0.2 |
| entropy_loss | 5 |
| explained_variance | 0.953 |
| learning_rate | 0.0003 |
| loss | 16.5 |
| n_updates | 48490 |
| policy_gradient_loss | -0.00782 |
| std | 0.0894 |
| value_loss | 31.8 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4851 |
| time_elapsed | 28922 |
| total_timesteps | 9934848 |
| train/ | |
| approx_kl | 0.039963588 |
| clip_fraction | 0.343 |
| clip_range | 0.2 |
| entropy_loss | 5.01 |
| explained_variance | 0.934 |
| learning_rate | 0.0003 |
| loss | 31.4 |
| n_updates | 48500 |
| policy_gradient_loss | -0.0193 |
| std | 0.0895 |
| value_loss | 131 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4852 |
| time_elapsed | 28928 |
| total_timesteps | 9936896 |
| train/ | |
| approx_kl | 0.06086333 |
| clip_fraction | 0.43 |
| clip_range | 0.2 |
| entropy_loss | 5 |
| explained_variance | 0.955 |
| learning_rate | 0.0003 |
| loss | 42.1 |
| n_updates | 48510 |
| policy_gradient_loss | -0.0135 |
| std | 0.0896 |
| value_loss | 51.4 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4853 |
| time_elapsed | 28934 |
| total_timesteps | 9938944 |
| train/ | |
| approx_kl | 0.054594032 |
| clip_fraction | 0.408 |
| clip_range | 0.2 |
| entropy_loss | 4.99 |
| explained_variance | 0.963 |
| learning_rate | 0.0003 |
| loss | 27.6 |
| n_updates | 48520 |
| policy_gradient_loss | -0.0149 |
| std | 0.0897 |
| value_loss | 46.3 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4854 |
| time_elapsed | 28940 |
| total_timesteps | 9940992 |
| train/ | |
| approx_kl | 0.055270806 |
| clip_fraction | 0.415 |
| clip_range | 0.2 |
| entropy_loss | 5 |
| explained_variance | 0.96 |
| learning_rate | 0.0003 |
| loss | 20.8 |
| n_updates | 48530 |
| policy_gradient_loss | -0.0112 |
| std | 0.0896 |
| value_loss | 45.2 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4855 |
| time_elapsed | 28946 |
| total_timesteps | 9943040 |
| train/ | |
| approx_kl | 0.050826233 |
| clip_fraction | 0.396 |
| clip_range | 0.2 |
| entropy_loss | 4.99 |
| explained_variance | 0.974 |
| learning_rate | 0.0003 |
| loss | 25.1 |
| n_updates | 48540 |
| policy_gradient_loss | -0.0199 |
| std | 0.0897 |
| value_loss | 43 |
-----------------------------------------
---------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4856 |
| time_elapsed | 28952 |
| total_timesteps | 9945088 |
| train/ | |
| approx_kl | 0.0394902 |
| clip_fraction | 0.364 |
| clip_range | 0.2 |
| entropy_loss | 5.02 |
| explained_variance | 0.861 |
| learning_rate | 0.0003 |
| loss | 85.5 |
| n_updates | 48550 |
| policy_gradient_loss | -0.0174 |
| std | 0.089 |
| value_loss | 125 |
---------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4857 |
| time_elapsed | 28958 |
| total_timesteps | 9947136 |
| train/ | |
| approx_kl | 0.053031266 |
| clip_fraction | 0.414 |
| clip_range | 0.2 |
| entropy_loss | 5.04 |
| explained_variance | 0.967 |
| learning_rate | 0.0003 |
| loss | 19.1 |
| n_updates | 48560 |
| policy_gradient_loss | -0.00698 |
| std | 0.0887 |
| value_loss | 41.4 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4858 |
| time_elapsed | 28963 |
| total_timesteps | 9949184 |
| train/ | |
| approx_kl | 0.044173434 |
| clip_fraction | 0.384 |
| clip_range | 0.2 |
| entropy_loss | 5.04 |
| explained_variance | 0.96 |
| learning_rate | 0.0003 |
| loss | 30.7 |
| n_updates | 48570 |
| policy_gradient_loss | -0.0178 |
| std | 0.0887 |
| value_loss | 58.1 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4859 |
| time_elapsed | 28970 |
| total_timesteps | 9951232 |
| train/ | |
| approx_kl | 0.06983917 |
| clip_fraction | 0.482 |
| clip_range | 0.2 |
| entropy_loss | 5.04 |
| explained_variance | 0.947 |
| learning_rate | 0.0003 |
| loss | 15.3 |
| n_updates | 48580 |
| policy_gradient_loss | -0.00757 |
| std | 0.0889 |
| value_loss | 27.6 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4860 |
| time_elapsed | 28975 |
| total_timesteps | 9953280 |
| train/ | |
| approx_kl | 0.041938394 |
| clip_fraction | 0.341 |
| clip_range | 0.2 |
| entropy_loss | 5.04 |
| explained_variance | 0.832 |
| learning_rate | 0.0003 |
| loss | 33.3 |
| n_updates | 48590 |
| policy_gradient_loss | -0.0135 |
| std | 0.0886 |
| value_loss | 135 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4861 |
| time_elapsed | 28981 |
| total_timesteps | 9955328 |
| train/ | |
| approx_kl | 0.04981736 |
| clip_fraction | 0.393 |
| clip_range | 0.2 |
| entropy_loss | 5.04 |
| explained_variance | 0.918 |
| learning_rate | 0.0003 |
| loss | 24.6 |
| n_updates | 48600 |
| policy_gradient_loss | -0.0151 |
| std | 0.0889 |
| value_loss | 44.8 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4862 |
| time_elapsed | 28987 |
| total_timesteps | 9957376 |
| train/ | |
| approx_kl | 0.046022236 |
| clip_fraction | 0.376 |
| clip_range | 0.2 |
| entropy_loss | 5.03 |
| explained_variance | 0.926 |
| learning_rate | 0.0003 |
| loss | 26 |
| n_updates | 48610 |
| policy_gradient_loss | -0.0162 |
| std | 0.0889 |
| value_loss | 63.4 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4863 |
| time_elapsed | 28993 |
| total_timesteps | 9959424 |
| train/ | |
| approx_kl | 0.080863416 |
| clip_fraction | 0.491 |
| clip_range | 0.2 |
| entropy_loss | 5.02 |
| explained_variance | 0.845 |
| learning_rate | 0.0003 |
| loss | 14.8 |
| n_updates | 48620 |
| policy_gradient_loss | -0.00317 |
| std | 0.0891 |
| value_loss | 25.7 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4864 |
| time_elapsed | 28999 |
| total_timesteps | 9961472 |
| train/ | |
| approx_kl | 0.02899794 |
| clip_fraction | 0.319 |
| clip_range | 0.2 |
| entropy_loss | 5.03 |
| explained_variance | 0.909 |
| learning_rate | 0.0003 |
| loss | 94.2 |
| n_updates | 48630 |
| policy_gradient_loss | -0.0176 |
| std | 0.0887 |
| value_loss | 137 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4865 |
| time_elapsed | 29005 |
| total_timesteps | 9963520 |
| train/ | |
| approx_kl | 0.048905633 |
| clip_fraction | 0.392 |
| clip_range | 0.2 |
| entropy_loss | 5.03 |
| explained_variance | 0.948 |
| learning_rate | 0.0003 |
| loss | 26 |
| n_updates | 48640 |
| policy_gradient_loss | -0.0155 |
| std | 0.0889 |
| value_loss | 44.4 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4866 |
| time_elapsed | 29011 |
| total_timesteps | 9965568 |
| train/ | |
| approx_kl | 0.040824823 |
| clip_fraction | 0.354 |
| clip_range | 0.2 |
| entropy_loss | 5.02 |
| explained_variance | 0.938 |
| learning_rate | 0.0003 |
| loss | 29.7 |
| n_updates | 48650 |
| policy_gradient_loss | -0.0168 |
| std | 0.089 |
| value_loss | 61 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4867 |
| time_elapsed | 29017 |
| total_timesteps | 9967616 |
| train/ | |
| approx_kl | 0.058495294 |
| clip_fraction | 0.448 |
| clip_range | 0.2 |
| entropy_loss | 5.01 |
| explained_variance | 0.959 |
| learning_rate | 0.0003 |
| loss | 14.9 |
| n_updates | 48660 |
| policy_gradient_loss | -0.00898 |
| std | 0.0892 |
| value_loss | 32 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4868 |
| time_elapsed | 29023 |
| total_timesteps | 9969664 |
| train/ | |
| approx_kl | 0.04482922 |
| clip_fraction | 0.384 |
| clip_range | 0.2 |
| entropy_loss | 5.03 |
| explained_variance | 0.967 |
| learning_rate | 0.0003 |
| loss | 31.6 |
| n_updates | 48670 |
| policy_gradient_loss | -0.0165 |
| std | 0.0886 |
| value_loss | 52.2 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4869 |
| time_elapsed | 29029 |
| total_timesteps | 9971712 |
| train/ | |
| approx_kl | 0.040869277 |
| clip_fraction | 0.371 |
| clip_range | 0.2 |
| entropy_loss | 5.06 |
| explained_variance | 0.892 |
| learning_rate | 0.0003 |
| loss | 96.3 |
| n_updates | 48680 |
| policy_gradient_loss | -0.0186 |
| std | 0.0882 |
| value_loss | 107 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4870 |
| time_elapsed | 29035 |
| total_timesteps | 9973760 |
| train/ | |
| approx_kl | 0.049336053 |
| clip_fraction | 0.426 |
| clip_range | 0.2 |
| entropy_loss | 5.07 |
| explained_variance | 0.968 |
| learning_rate | 0.0003 |
| loss | 22.5 |
| n_updates | 48690 |
| policy_gradient_loss | -0.0103 |
| std | 0.0881 |
| value_loss | 40.1 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4871 |
| time_elapsed | 29040 |
| total_timesteps | 9975808 |
| train/ | |
| approx_kl | 0.05824302 |
| clip_fraction | 0.439 |
| clip_range | 0.2 |
| entropy_loss | 5.09 |
| explained_variance | 0.965 |
| learning_rate | 0.0003 |
| loss | 19 |
| n_updates | 48700 |
| policy_gradient_loss | -0.0055 |
| std | 0.0877 |
| value_loss | 49.8 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4872 |
| time_elapsed | 29046 |
| total_timesteps | 9977856 |
| train/ | |
| approx_kl | 0.059091553 |
| clip_fraction | 0.408 |
| clip_range | 0.2 |
| entropy_loss | 5.11 |
| explained_variance | 0.969 |
| learning_rate | 0.0003 |
| loss | 16.7 |
| n_updates | 48710 |
| policy_gradient_loss | -0.012 |
| std | 0.0874 |
| value_loss | 37.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4873 |
| time_elapsed | 29052 |
| total_timesteps | 9979904 |
| train/ | |
| approx_kl | 0.037843898 |
| clip_fraction | 0.359 |
| clip_range | 0.2 |
| entropy_loss | 5.1 |
| explained_variance | 0.837 |
| learning_rate | 0.0003 |
| loss | 47.1 |
| n_updates | 48720 |
| policy_gradient_loss | -0.0134 |
| std | 0.0876 |
| value_loss | 131 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4874 |
| time_elapsed | 29058 |
| total_timesteps | 9981952 |
| train/ | |
| approx_kl | 0.055119976 |
| clip_fraction | 0.419 |
| clip_range | 0.2 |
| entropy_loss | 5.11 |
| explained_variance | 0.957 |
| learning_rate | 0.0003 |
| loss | 16.8 |
| n_updates | 48730 |
| policy_gradient_loss | -0.00726 |
| std | 0.0875 |
| value_loss | 38.2 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4875 |
| time_elapsed | 29064 |
| total_timesteps | 9984000 |
| train/ | |
| approx_kl | 0.04577005 |
| clip_fraction | 0.377 |
| clip_range | 0.2 |
| entropy_loss | 5.09 |
| explained_variance | 0.958 |
| learning_rate | 0.0003 |
| loss | 28.9 |
| n_updates | 48740 |
| policy_gradient_loss | -0.0153 |
| std | 0.0878 |
| value_loss | 59.7 |
----------------------------------------
---------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4876 |
| time_elapsed | 29070 |
| total_timesteps | 9986048 |
| train/ | |
| approx_kl | 0.0850863 |
| clip_fraction | 0.533 |
| clip_range | 0.2 |
| entropy_loss | 5.07 |
| explained_variance | 0.87 |
| learning_rate | 0.0003 |
| loss | 9.65 |
| n_updates | 48750 |
| policy_gradient_loss | -0.00416 |
| std | 0.0883 |
| value_loss | 25 |
---------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4877 |
| time_elapsed | 29076 |
| total_timesteps | 9988096 |
| train/ | |
| approx_kl | 0.029078625 |
| clip_fraction | 0.283 |
| clip_range | 0.2 |
| entropy_loss | 5.05 |
| explained_variance | 0.8 |
| learning_rate | 0.0003 |
| loss | 45.5 |
| n_updates | 48760 |
| policy_gradient_loss | -0.0124 |
| std | 0.0885 |
| value_loss | 140 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4878 |
| time_elapsed | 29082 |
| total_timesteps | 9990144 |
| train/ | |
| approx_kl | 0.055476185 |
| clip_fraction | 0.395 |
| clip_range | 0.2 |
| entropy_loss | 5.04 |
| explained_variance | 0.894 |
| learning_rate | 0.0003 |
| loss | 20.7 |
| n_updates | 48770 |
| policy_gradient_loss | -0.0137 |
| std | 0.0887 |
| value_loss | 41.7 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4879 |
| time_elapsed | 29088 |
| total_timesteps | 9992192 |
| train/ | |
| approx_kl | 0.048617296 |
| clip_fraction | 0.345 |
| clip_range | 0.2 |
| entropy_loss | 5.05 |
| explained_variance | 0.919 |
| learning_rate | 0.0003 |
| loss | 28.2 |
| n_updates | 48780 |
| policy_gradient_loss | -0.0173 |
| std | 0.0883 |
| value_loss | 61.9 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4880 |
| time_elapsed | 29094 |
| total_timesteps | 9994240 |
| train/ | |
| approx_kl | 0.07143147 |
| clip_fraction | 0.473 |
| clip_range | 0.2 |
| entropy_loss | 5.05 |
| explained_variance | 0.935 |
| learning_rate | 0.0003 |
| loss | 19.1 |
| n_updates | 48790 |
| policy_gradient_loss | -0.000467 |
| std | 0.0886 |
| value_loss | 29.9 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4881 |
| time_elapsed | 29100 |
| total_timesteps | 9996288 |
| train/ | |
| approx_kl | 0.03532288 |
| clip_fraction | 0.318 |
| clip_range | 0.2 |
| entropy_loss | 5.03 |
| explained_variance | 0.929 |
| learning_rate | 0.0003 |
| loss | 26.7 |
| n_updates | 48800 |
| policy_gradient_loss | -0.0167 |
| std | 0.0888 |
| value_loss | 128 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4882 |
| time_elapsed | 29106 |
| total_timesteps | 9998336 |
| train/ | |
| approx_kl | 0.05176502 |
| clip_fraction | 0.381 |
| clip_range | 0.2 |
| entropy_loss | 5.02 |
| explained_variance | 0.955 |
| learning_rate | 0.0003 |
| loss | 22.7 |
| n_updates | 48810 |
| policy_gradient_loss | -0.0159 |
| std | 0.0888 |
| value_loss | 49.1 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 8.76e+03 |
| ep_rew_mean | -1.34e+04 |
| time/ | |
| fps | 343 |
| iterations | 4883 |
| time_elapsed | 29112 |
| total_timesteps | 10000384 |
| train/ | |
| approx_kl | 0.043819282 |
| clip_fraction | 0.379 |
| clip_range | 0.2 |
| entropy_loss | 5.05 |
| explained_variance | 0.959 |
| learning_rate | 0.0003 |
| loss | 20 |
| n_updates | 48820 |
| policy_gradient_loss | -0.0115 |
| std | 0.0881 |
| value_loss | 46.3 |
-----------------------------------------
<stable_baselines3.ppo.ppo.PPO at 0x7f49d0ebb690>
# simple run though the env with our PPO policy and we sometimes print our actions / reward to get a sense of what we are doing
env = CityLearnEnv(schema=Constants.schema_path)
env = EnvCityGym(env)
obs = env.reset()
model = PPO.load("ppo_citylearn")
nb_iter = 8750
reward_tot = 0
for i in range(nb_iter):
action = model.predict(obs)[0]
obs, rewards, dones, info = env.step(action)
reward_tot += rewards
if i % 1000 == 0:
print("actions : ", action)
print("rewards : ", rewards)
print(sum(env.env.evaluate())/2)
print(reward_tot)
actions : [-1. -0.51221657 -1. 0.3934157 -0.29064393] rewards : -2.7105634623043136 actions : [-1. 0.08792342 -0.7883315 0.09763932 -0.71059644] rewards : -2.359457404634485 actions : [-1. -1. -0.5391931 0.89732116 -0.04991096] rewards : 0.0 actions : [-1. -0.33264172 -1. 0.2705636 0.0255101 ] rewards : -0.8749080808257886 actions : [-0.18878677 -0.04086002 0.13534778 0.3798426 -0.13745424] rewards : -1.475681695967634 actions : [-0.21046181 -1. 0.23394877 0.57906485 -0.1837316 ] rewards : -1.271070223988344 actions : [-0.898123 -0.5645208 -1. 0.56289023 -0.34396997] rewards : -0.9884980741265077 actions : [ 0.64262384 0.42495456 1. 0.70089674 -0.4528601 ] rewards : -1.230325556764777 actions : [-0.1211988 0.19727333 1. 0.4166654 -0.13164607] rewards : -2.258931668645342 0.9541584900404141 -13526.836035385733
GUYS WE ARE AT 0.95 ! BELOW 1.0 !¶
Content
Comments
You must login before you can post a comment.
len_tot_index = len(index_commun) + len(index_particular) * 5
Doesn’t this make the number of buildings hard coded. Will the trained model be able to tackle different number of buildings.
totally !
I can not find git clone http://gitlab.aicrowd.com/adrien_forbu/citylearn-2022-starter-kit.git, where can I find it
I now tried git clone http://gitlab.aicrowd.com/adrien_forbu/neurips-2022-citylearn-challenge.git