Commit 469572fa authored by Fredrik Bagge Carlson's avatar Fredrik Bagge Carlson
Browse files

added support for using stichastih gradient descent, does not work well yet....

added support for using stichastih gradient descent, does not work well yet. Should implement storage of linear parameters to reduce comp. cost
parent cbb84401
......@@ -113,7 +113,7 @@ Base.show(fit::FitStatistics) = println("Fit RMS:$(fit.RMS), FIT:$(fit.FIT), AIC
include("armax.jl")
include("kalman.jl")
include("PCA.jl")
include("kernelPCA.jl")
# include("kernelPCA.jl")
include("toeplitz.jl")
end
......@@ -6,7 +6,7 @@ function pf_bootstrap!(xp,w, y, N, g_density, f)
xp[:,1] = 2*σw*randn(N)
wT = slice(w,:,1)
xT = slice(xp,:,1)
fill!(wT,log(1/N))
fill!(wT,-log(N))
g(y[1],xT, wT)
wT -= logsumexp(wT)
j = Array(Int64,N)
......@@ -20,7 +20,7 @@ function pf_bootstrap!(xp,w, y, N, g_density, f)
if t%2 == 0
resample_systematic!(j,wT1,N)
f(xT,xT1[j],t-1)
fill!(wT,log(1/N))
fill!(wT,-log(N))
else # Resample not needed
f(xT,xT1,t-1)
copy!(wT,wT1)
......@@ -44,7 +44,7 @@ function pf_bootstrap(y, N, g_density, f)
xp[:,1] = 2*σw*randn(N)
wT = slice(w,:,1)
xT = slice(xp,:,1)
fill!(wT,log(1/N))
fill!(wT,-log(N))
g(y[1],xT, wT)
wT -= logsumexp(wT)
j = Array(Int64,N)
......@@ -58,7 +58,7 @@ function pf_bootstrap(y, N, g_density, f)
if t%2 == 0
resample_systematic!(j,wT1,N)
f(xT,xT1[j],t-1)
fill!(wT,log(1/N))
fill!(wT,-log(N))
else # Resample not needed
f(xT,xT1,t-1)
copy!(wT,wT1)
......@@ -83,7 +83,7 @@ function pf_bootstrap_nn(xp, w, wnn, y, N, g_density, f)
xp[:,1] = 2*σw*randn(N)
wT = slice(w,:,1)
xT = slice(xp,:,1)
fill!(wT,log(1/N))
fill!(wT,-log(N))
g(y[1],xT, wT)
wnn[:,t] = wT
wT -= logsumexp(wT)
......@@ -98,7 +98,7 @@ function pf_bootstrap_nn(xp, w, wnn, y, N, g_density, f)
if t%2 == 0
resample_systematic!(j,wT1,N)
f(xT,xT1[j],t-1)
fill!(wT,log(1/N))
fill!(wT,-log(N))
else # Resample not needed
f(xT,xT1,t-1)
copy!(wT,wT1)
......@@ -177,7 +177,7 @@ function pf_CSMC!(xp,w, y, N, g_density, f, xc )
T = length(y)
wT = slice(w,:,1)
xT = slice(xp,:,1)
fill!(wT,log(1/N))
fill!(wT,-log(N))
g(y[1],xT, wT)
wT -= logsumexp(wT)
j = Array(Int64,N)
......@@ -191,7 +191,7 @@ function pf_CSMC!(xp,w, y, N, g_density, f, xc )
if t%2 == 0
resample_systematic!(j,wT1,N)
f(xT,xT1[j],t-1)
fill!(wT,log(1/N))
fill!(wT,-log(N))
else # Resample not needed
f(xT,xT1,t-1)
copy!(wT,wT1)
......
function stochastic_gradient_descent(f, g, x, N, n_state; lambda = 0.1, maxIter = 10, tolG = 1e-7, tolX = 1e-10, show_trace=true, timeout = 1000, batch_size=10)
converged = false
x_converged = false
g_converged = false
need_jacobian = true
iterCt = 0
f_calls = 0
g_calls = 0
fcur = f(x)
f_calls += 1
residual = rms(fcur)
lambdai = lambda
t0 = time()
# Maintain a trace of the system.
tr = Optim.OptimizationTrace()
if show_trace
d = Dict("lambda" => lambda)
os = Optim.OptimizationState(1, residual, NaN)
push!(tr, os)
println("Iter:0, f:$(round(os.value,5)), ||g||:$(0))")
end
for t = 1:maxIter
iterCt = t
for i = 1:batch_size:N
grad = g(x,i:min(i+batch_size,N))
g_calls += 1
x -= lambdai*grad
x = saturatePrecision(x,n_state)
end
grad = g(x,1:N)
lambdai = lambda / (1+t)
fcur = f(x)
f_calls += 1
residual = rms(fcur)
if show_trace && (t < 5 || t % 5 == 0)
gradnorm = norm(grad)
os = Optim.OptimizationState(t, residual, gradnorm)
push!(tr, os)
println("Iter:$t, f:$(round(os.value,5)), ||g||:$(round(gradnorm,5))")
end
if time()-t0 > timeout
display("stochastic_gradient_descent: timeout $(timeout)s reached ($(time()-t0)s)")
break
end
end
Optim.MultivariateOptimizationResults("stochastic_gradient_descent", x0, x, residual, t, !converged, x_converged, 0.0, false, 0.0, g_converged, tolG, tr, f_calls, g_calls)
end
......@@ -60,8 +60,6 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
else
Znl = getcentersKmeans(state, nc, predictionerror, n_state); debug("gotcentersKmeans")
end
@show Znl
@show size(state)
getΨ(Ψ, Znl, state, n_points, n_state, normalized); debug("Got Ψ")
@ddshow sum(!isfinite(Ψ))
getLinearRegressor(V,Ψ,A,state,outputnet,na,n_state,n_centers,n_points); debug("Got linear regressor V")
......@@ -73,7 +71,6 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
error = y - prediction
errors = zeros(liniters+1)
Lb,Ub = getbounds(Znl, state, n_state, n_centers)
# 5hz går på 5:09min, 10hz på 7:10min och 40hz 16
# ============= Main loop ================================================
debug("Calculating initial error")
......@@ -84,6 +81,11 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
w = fitlinear(V,y)
return outputnet ? jacobian_outputnet(znl,Ψ, w, V, state) : jacobian_no_outputnet(znl,Ψ, w, V, state)
end
function grad(z,range)
znl = RbfNonlinearParameters(z,n_state,n_centers)
w = fitlinear(V,y)
return outputnet ? gradient_outputnet(znl,Ψ, V, state,A,y,w, range) : gradient_no_outputnet(znl,Ψ, V, state,A,y,w, range)
end
f(z) = predictionerror(z)
X0 = deepcopy(Znl.x)
for i = 1:liniters
......@@ -95,6 +97,14 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
show_trace=true,
timeout = timeout,
n_state = n_state)
# @time res = stochastic_gradient_descent(f, grad, X0, n_points, n_state,
# lambda = 0.001,
# maxIter = 5,
# tolG = 1e-7,
# tolX = 1e-10,
# show_trace=true,
# timeout = 1000,
# batch_size=50)
X0 = deepcopy(res.minimum)
DEBUG && assert(X0 == res.minimum)
DEBUG && @show ff1 = rms(f(X0))
......@@ -189,17 +199,17 @@ function trainRBF(y, state, nc; liniters=3,nonliniters=50, normalized=false, ini
# Znl.x += 0.1*randn(size(Znl.x))
@ddshow Znl
getΨ(Ψ, Znl, state, n_points, n_state, normalized); debug("Got Ψ")
# matshow(deepcopy(Ψ));axis("tight"); colorbar()
# plotparams(Znl)
# matshow(deepcopy(Ψ));axis("tight"); colorbar()
# plotparams(Znl)
@ddshow sum(!isfinite(Ψ))
w = fitlinear(Ψ,y); debug("fitlinear")
# newplot(w,"o"); title("Linear parameters")
# newplot(w,"o"); title("Linear parameters")
@ddshow sum(!isfinite(Zl))
prediction = Ψ*w
error = y - prediction
# newplot([y prediction error]);title("inbetween")
# newplot([y prediction error]);title("inbetween")
errors = zeros(liniters+1)
Lb,Ub = getbounds(Znl, state, n_state, n_centers)
......@@ -375,8 +385,8 @@ end
function getΨ(Ψ, Znl, state, n_points, n_state, normalized::Bool)
Ψ = normalized ? getΨnormalized(Ψ, Znl, state, n_points, n_state) : getΨnonnormalized(Ψ, Znl, state, n_points, n_state)
function getΨ(Ψ, Znl, state, n_points, n_state, normalized::Bool, range = 1:1)
Ψ = normalized ? getΨnormalized(Ψ, Znl, state, n_points, n_state) : getΨnonnormalized(Ψ, Znl, state, n_points, n_state, range)
if DEBUG && sum(!isfinite(Ψ)) > 0
@show sum(!isfinite(Ψ))
end
......@@ -407,11 +417,11 @@ function getΨnormalized(Ψ, Znl, state, n_points, n_state)
return Ψ
end
function getΨnonnormalized(Ψ, Znl, state, n_points, n_state)
# RBF(x, Z,n_state) = exp(-(x-Z[1:n_state]).^2)[1]
function getΨnonnormalized(Ψ, Znl, state, n_points, n_state, range = 1:1)
range = (range == 1:1) ? (1:size(state,1)) : range
RBF(x, Z,n_state) = exp(-(sum(((x-Z[1:n_state]).^2).*Z[n_state+1:end])))
for (j,Zi) in enumerate(Znl)
for i = 1:n_points
for i = range
statei = state[i,:]'
# statei = slice(state,i,:)
Ψ[i,j] = RBF(statei, Zi, n_state)
......@@ -427,15 +437,16 @@ function getΨnonnormalized(Ψ, Znl, state, n_points, n_state)
return Ψ
end
function getLinearRegressor(V,Ψ,A,state,outputnet,na,n_state,n_centers,n_points)
outputnet ? getLinearRegressor_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points) : getLinearRegressor_no_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points)
function getLinearRegressor(V,Ψ,A,state,outputnet,na,n_state,n_centers,n_points, range = 1:1)
outputnet ? getLinearRegressor_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points, range) : getLinearRegressor_no_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points, range)
end
function getLinearRegressor_no_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points)
function getLinearRegressor_no_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points, range = 1:1)
range = range == 1:1 ? 1:size(state,1) : range
ii = 1
for i = 1:na
for k = 1:n_centers+1
for l = 1:n_points
for l = range
V[l,ii] = Ψ[l,k]*A[l,i]
end
ii = ii+1
......@@ -447,12 +458,13 @@ function getLinearRegressor_no_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_poi
return V
end
function getLinearRegressor_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points)
function getLinearRegressor_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points, range = 1:1)
range = (range == 1:1) ? (1:size(state,1)) : range
ii = 1
for i = 1:na
for k = 1:n_centers+1
for j = 1:n_state
for l = 1:n_points
for l = range
V[l,ii] = Ψ[l,k]*A[l,i]*state[l,j]
end
ii = ii+1
......@@ -490,13 +502,18 @@ function jacobian_outputnet(Znl, Ψ, w, V, state)
n_points = size(Ψ,1)
n_state = Znl.n_state
n_centers = Znl.n_centers
J = Array(Float64,(n_points,length(Znl.x)))
n_batch = 100
points = randperm(n_points)[1:n_batch]
J = Array(Float64,(n_batch,length(Znl.x)))
ii = 1
for (k,Zi) in enumerate(Znl)
μ = Zi[1:n_state] # slice?
γ = Zi[n_state+1:end]
i1 = ii-1
for l = 1:n_points
for (l,li) = enumerate(points)
Ψw = 1.0
for i = 1:na
for j = 1:n_state
......@@ -506,8 +523,8 @@ function jacobian_outputnet(Znl, Ψ, w, V, state)
end
for p = 1:n_state
x_μ = state[l,p]-μ[p]
J[l,i1+p] = 2*Ψw*x_μ*γ[p]
J[l,i1+n_state+p] = (-Ψw)*x_μ^2
J[li,i1+p] = 2*Ψw*x_μ*γ[p]
J[li,i1+n_state+p] = (-Ψw)*x_μ^2
end
end
ii += 2n_state
......@@ -515,7 +532,7 @@ function jacobian_outputnet(Znl, Ψ, w, V, state)
return J
end
function jacobian_no_outputnet(Znl, Ψ, w,v, state)
function jacobian_no_outputnet(Znl, Ψ, w,V, state)
n_points = size(Ψ,1)
n_state = Znl.n_state
n_centers = Znl.n_centers
......@@ -542,6 +559,76 @@ function jacobian_no_outputnet(Znl, Ψ, w,v, state)
return J
end
function gradient_outputnet(Znl, Ψ, V, state, A,y,w, range)
n_points = size(Ψ,1)
n_state = Znl.n_state
n_centers = Znl.n_centers
na = size(A,2)
# znl = RbfNonlinearParameters(z,n_state,n_centers)
# psi = getΨ(Ψ, Znl, state, n_points, n_state, false, range)
# getLinearRegressor(V,psi,A,state,true,na,n_state,n_centers,n_points, range)
# w = fitlinear(V,y)
J = zeros(length(Znl.x))
ii = 1
for (k,Zi) in enumerate(Znl)
μ = Zi[1:n_state] # slice?
γ = Zi[n_state+1:end]
i1 = ii-1
for l = range
Ψw = 1.0
for i = 1:na
for j = 1:n_state
ind = j + n_state*(k-1) + n_state*(n_centers+1)*(i-1)
Ψw += V[l,ind]*w[ind]
end
end
for p = 1:n_state
x_μ = state[l,p]-μ[p]
J[i1+p] += 2*Ψw*x_μ*γ[p]
J[i1+n_state+p] += (-Ψw)*x_μ^2
end
end
ii += 2n_state
end
J /= length(range)
return J
end
function gradient_no_outputnet(Znl, Ψ, V, state, A,y,w, range)
n_points = size(Ψ,1)
n_state = Znl.n_state
n_centers = Znl.n_centers
na = size(A,2)
# psi = getΨ(Ψ, Znl, state, n_points, n_state, false, range)
# getLinearRegressor(V,psi,A,state,true,na,n_state,n_centers,n_points, range)
# w = fitlinear(V,y)
J = zeros(length(Znl.x))
ii = 1
for (k,Zi) in enumerate(Znl)
μ = Zi[1:n_state] # slice?
γ = Zi[n_state+1:end]
i1 = ii-1
for l = range
Ψw = 1.0
for i = 1:na
ind = k + (n_centers+1)*(i-1)
Ψw += V[l,ind]*w[ind]
end
for p = 1:n_state
x_μ = state[l,p]-μ[p]
J[i1+p] += 2*Ψw*x_μ*γ[p]
J[i1+n_state+p] += (-Ψw)*x_μ^2
end
end
ii += 2n_state
end
J /= length(range)
return J
end
function jacobian(Znl, Ψ, w, state)
n_points = size(Ψ,1)
n_state = Znl.n_state
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment