summaryrefslogtreecommitdiff
path: root/regression.py
blob: b6ac7dba0d4e39d3237f151030daab56f63df7ee (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import numpy as np

# data = [(0,0), (1,1), (2, 4), (3, 9), (4, 16) ] ; t = 3
def velvar(data, t):
    n = len(data) # number of entries
    X = np.array([(datum[0]**2, datum[0], 1) for datum in data])
    Xt = np.transpose(X) # X^T
    Y = [datum[1] for datum in data]
    M = np.matmul( np.linalg.inv(np.matmul(Xt, X)), Xt ) # solution = My
    est = np.matmul(M, Y);
    P = np.matmul(X, M) # \hat y = Py
    R = np.subtract(np.identity(n), P) # residual = Ry
    bias = sum([sum([entry**2 for entry in row]) for row in R])
    Ry = np.matmul(R, Y);
    S2 = np.dot(Ry, Ry); # S^2 estimator
    variance = S2/bias
    v = 2*M[0]*t + M[1] # V = v\cdot y
    velocity = 2*est[0]*t+est[1]
    variancevel = np.dot(v,v)*variance
    return (np.abs(velocity), variancevel)

def combine_estimates(velvars):
    avg = np.average(list(a[0] for a in velvars))
    var = (np.sum(a[1] + a[0]**2 for a in velvars) - avg**2)/len(velvars)
    return((avg, var))

if __name__ == '__main__':
    tot = (velvar([(0,0), (1,1), (2, 4), (3, 9), (4, 17)], 3),
           velvar([(0, 0), (1,2), (2, 8), (3, 18), (4, 32)], 3))
    n = 5
    k = 2
    print(sum([datum[1]**2+n*datum[0]**2 for datum in tot])
        - n*k*sum([datum[0]/k for datum in tot])**2)