Here we demonstrate hypothetical usage of the interaction loop.

env = gym.make("CartPole-v1") # make the environment
env = env_wrappers.ToTorchWrapper(env) # wrap it for conversion to/from torch.Tensors
agent = neuralnets.ActorCritic( # make the actor-critic agent
    env.observation_space.shape[0],
    env.action_space,
)
buf = buffers.PGBuffer(env.observation_space.shape, env.action_space.shape, 4000) # create empty buffer
full_buf, infos, env_infos = polgrad_interaction_loop(env, agent, buf) # run loop, fills buffer
for k, v in infos.items(): # print loop stats
    print(f"{k}: {v}")

MeanEpReturn: 25.477707006369428
StdEpReturn: 14.071059873100223
MaxEpReturn: 100.0
MinEpReturn: 9.0
MeanEpLength: 25.477707006369428
StdEpLength: 14.071059873100223

loops

`polgrad_interaction_loop`[source]

loops

polgrad_interaction_loop[source]

`polgrad_interaction_loop`[source]