There is no standard as far as I know. What I usualy do is this :
( this is from https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm )
class Normalizer():
def __init__(self, num_inputs):
self.n = torch.zeros(num_inputs)
self.mean = torch.zeros(num_inputs)
self.mean_diff = torch.zeros(num_inputs)
self.var = torch.zeros(num_inputs)
def observe(self, x):
self.n += 1.
last_mean = self.mean.clone()
self.mean += (x-self.mean)/self.n
self.mean_diff += (x-last_mean)*(x-self.mean)
self.var = torch.clamp(self.mean_diff/self.n, min=1e-2)
def normalize(self, inputs):
obs_std = torch.sqrt(self.var)
return (inputs - self.mean)/obs_std
Then each time I get a new state, I just do:
normalizer.observe(new_state)
new_state = normalizer.normalize(new_state)
'''
new_state must be a simple tensor,
if it's a variable, use new_state.data
'''