Source code for track.utils.stat

import math
import json

from multiprocessing.sharedctypes import Value
from ctypes import Structure, c_double, c_int


[docs]class StatStreamStruct(Structure): _fields_ = [ ('sum', c_double), ('sum_sqr', c_double), ('first_obs', c_double), ('min', c_double), ('max', c_double), ('current_count', c_int), ('current_obs', c_double), ('drop_obs', c_int) ]
[docs]class StatStream(object): """ Sharable object Store the sum of the observations amd the the sum of the observations squared The first few observations are discarded (usually slower than the rest) The average and the standard deviation is computed at the user's request In order to make the computation stable we store the first observation and subtract it to every other observations. The idea is if x ~ N(mu, sigma) x - x0 and the sum of x - x0 should be close(r) to 0 allowing for greater precision; without that trick `var` was getting negative on some iteration. """ def __init__(self, drop_first_obs=10): self.struct = Value( StatStreamStruct, 0, # sum 0, # sum_sqr 0, # first_obs float('+inf'), # min float('-inf'), # max 0, # current_count 0, # current_obs drop_first_obs) # drop_obs
[docs] @staticmethod def from_dict(data): cls = StatStream(0) cls.struct.sum = data['sum'] cls.struct.sum_sqr = data['sum_sqr'] cls.struct.first_obs = data['first_obs'] cls.struct.min = data['min'] cls.struct.max = data['max'] cls.struct.current_count = data['current_count'] cls.struct.current_obs = data['current_obs'] cls.struct.drop_obs = data['drop_obs'] return cls
[docs] def state_dict(self): data = dict() data['sum'] = self.struct.sum data['sum_sqr'] = self.struct.sum_sqr data['first_obs'] = self.struct.first_obs data['min'] = self.struct.min data['max'] = self.struct.max data['current_count'] = self.struct.current_count data['current_obs'] = self.struct.current_obs data['drop_obs'] = self.struct.drop_obs return data
@property def sum(self): return self.struct.sum @property def sum_sqr(self): return self.struct.sum_sqr @property def current_count(self): return self.struct.current_count @property def current_obs(self): return self.struct.current_obs @property def max(self): return self.struct.max @property def min(self): return self.struct.min @property def drop_obs(self): return self.struct.drop_obs @property def first_obs(self): return self.struct.first_obs @property def total(self): return self.struct.sum + self.first_obs * self.count def __iadd__(self, other): self.update(other, 1) return self
[docs] def update(self, val, weight=1): self.struct.current_count += weight if self.current_count < self.drop_obs: self.struct.current_obs = val return if self.count == 1: self.struct.first_obs = val self.struct.current_obs = val - self.first_obs self.struct.sum += float(self.current_obs) * float(weight) self.struct.sum_sqr += float(self.current_obs * self.current_obs) * float(weight) self.struct.min = min(self.min, val) self.struct.max = max(self.max, val)
@property def val(self) -> float: return self.current_obs + self.first_obs @property def count(self) -> int: # is count is 0 then self.sum is 0 so everything should workout return max(self.current_count - self.drop_obs, 1) @property def avg(self) -> float: return self.sum / float(self.count) + self.first_obs @property def var(self) -> float: avg = self.sum / float(self.count) return self.sum_sqr / float(self.count) - avg * avg @property def sd(self) -> float: return math.sqrt(self.var)
[docs] def to_array(self, transform=None): if transform is not None: return [transform(self.avg), 'NA', transform(self.min), transform(self.max), self.count] return [self.avg, self.sd, self.min, self.max, self.count]
[docs] def to_dict(self): data = { 'avg': self.avg, 'min': self.min, 'max': self.max, 'sd': self.sd, 'count': self.count, 'unit': 's' } return data
[docs] def to_json(self): return json.dumps(self.to_dict())