Commit 469572fa authored by Fredrik Bagge Carlson's avatar Fredrik Bagge Carlson
Browse files

added support for using stichastih gradient descent, does not work well yet....

added support for using stichastih gradient descent, does not work well yet. Should implement storage of linear parameters to reduce comp. cost
parent cbb84401
...@@ -113,7 +113,7 @@ Base.show(fit::FitStatistics) = println("Fit RMS:$(fit.RMS), FIT:$(fit.FIT), AIC ...@@ -113,7 +113,7 @@ Base.show(fit::FitStatistics) = println("Fit RMS:$(fit.RMS), FIT:$(fit.FIT), AIC
include("armax.jl") include("armax.jl")
include("kalman.jl") include("kalman.jl")
include("PCA.jl") include("PCA.jl")
include("kernelPCA.jl") # include("kernelPCA.jl")
include("toeplitz.jl") include("toeplitz.jl")
end end
...@@ -6,7 +6,7 @@ function pf_bootstrap!(xp,w, y, N, g_density, f) ...@@ -6,7 +6,7 @@ function pf_bootstrap!(xp,w, y, N, g_density, f)
xp[:,1] = 2*σw*randn(N) xp[:,1] = 2*σw*randn(N)
wT = slice(w,:,1) wT = slice(w,:,1)
xT = slice(xp,:,1) xT = slice(xp,:,1)
fill!(wT,log(1/N)) fill!(wT,-log(N))
g(y[1],xT, wT) g(y[1],xT, wT)
wT -= logsumexp(wT) wT -= logsumexp(wT)
j = Array(Int64,N) j = Array(Int64,N)
...@@ -20,7 +20,7 @@ function pf_bootstrap!(xp,w, y, N, g_density, f) ...@@ -20,7 +20,7 @@ function pf_bootstrap!(xp,w, y, N, g_density, f)
if t%2 == 0 if t%2 == 0
resample_systematic!(j,wT1,N) resample_systematic!(j,wT1,N)
f(xT,xT1[j],t-1) f(xT,xT1[j],t-1)
fill!(wT,log(1/N)) fill!(wT,-log(N))
else # Resample not needed else # Resample not needed
f(xT,xT1,t-1) f(xT,xT1,t-1)
copy!(wT,wT1) copy!(wT,wT1)
...@@ -44,7 +44,7 @@ function pf_bootstrap(y, N, g_density, f) ...@@ -44,7 +44,7 @@ function pf_bootstrap(y, N, g_density, f)
xp[:,1] = 2*σw*randn(N) xp[:,1] = 2*σw*randn(N)
wT = slice(w,:,1) wT = slice(w,:,1)
xT = slice(xp,:,1) xT = slice(xp,:,1)
fill!(wT,log(1/N)) fill!(wT,-log(N))
g(y[1],xT, wT) g(y[1],xT, wT)
wT -= logsumexp(wT) wT -= logsumexp(wT)
j = Array(Int64,N) j = Array(Int64,N)
...@@ -58,7 +58,7 @@ function pf_bootstrap(y, N, g_density, f) ...@@ -58,7 +58,7 @@ function pf_bootstrap(y, N, g_density, f)
if t%2 == 0 if t%2 == 0
resample_systematic!(j,wT1,N) resample_systematic!(j,wT1,N)
f(xT,xT1[j],t-1) f(xT,xT1[j],t-1)
fill!(wT,log(1/N)) fill!(wT,-log(N))
else # Resample not needed else # Resample not needed
f(xT,xT1,t-1) f(xT,xT1,t-1)
copy!(wT,wT1) copy!(wT,wT1)
...@@ -83,7 +83,7 @@ function pf_bootstrap_nn(xp, w, wnn, y, N, g_density, f) ...@@ -83,7 +83,7 @@ function pf_bootstrap_nn(xp, w, wnn, y, N, g_density, f)
xp[:,1] = 2*σw*randn(N) xp[:,1] = 2*σw*randn(N)
wT = slice(w,:,1) wT = slice(w,:,1)
xT = slice(xp,:,1) xT = slice(xp,:,1)
fill!(wT,log(1/N)) fill!(wT,-log(N))
g(y[1],xT, wT) g(y[1],xT, wT)
wnn[:,t] = wT wnn[:,t] = wT
wT -= logsumexp(wT) wT -= logsumexp(wT)
...@@ -98,7 +98,7 @@ function pf_bootstrap_nn(xp, w, wnn, y, N, g_density, f) ...@@ -98,7 +98,7 @@ function pf_bootstrap_nn(xp, w, wnn, y, N, g_density, f)
if t%2 == 0 if t%2 == 0
resample_systematic!(j,wT1,N) resample_systematic!(j,wT1,N)
f(xT,xT1[j],t-1) f(xT,xT1[j],t-1)
fill!(wT,log(1/N)) fill!(wT,-log(N))
else # Resample not needed else # Resample not needed
f(xT,xT1,t-1) f(xT,xT1,t-1)
copy!(wT,wT1) copy!(wT,wT1)
...@@ -177,7 +177,7 @@ function pf_CSMC!(xp,w, y, N, g_density, f, xc ) ...@@ -177,7 +177,7 @@ function pf_CSMC!(xp,w, y, N, g_density, f, xc )
T = length(y) T = length(y)
wT = slice(w,:,1) wT = slice(w,:,1)
xT = slice(xp,:,1) xT = slice(xp,:,1)
fill!(wT,log(1/N)) fill!(wT,-log(N))
g(y[1],xT, wT) g(y[1],xT, wT)
wT -= logsumexp(wT) wT -= logsumexp(wT)
j = Array(Int64,N) j = Array(Int64,N)
...@@ -191,7 +191,7 @@ function pf_CSMC!(xp,w, y, N, g_density, f, xc ) ...@@ -191,7 +191,7 @@ function pf_CSMC!(xp,w, y, N, g_density, f, xc )
if t%2 == 0 if t%2 == 0
resample_systematic!(j,wT1,N) resample_systematic!(j,wT1,N)
f(xT,xT1[j],t-1) f(xT,xT1[j],t-1)
fill!(wT,log(1/N)) fill!(wT,-log(N))
else # Resample not needed else # Resample not needed
f(xT,xT1,t-1) f(xT,xT1,t-1)
copy!(wT,wT1) copy!(wT,wT1)
......
function stochastic_gradient_descent(f, g, x, N, n_state; lambda = 0.1, maxIter = 10, tolG = 1e-7, tolX = 1e-10, show_trace=true, timeout = 1000, batch_size=10)
converged = false
x_converged = false
g_converged = false
need_jacobian = true
iterCt = 0
f_calls = 0
g_calls = 0
fcur = f(x)
f_calls += 1
residual = rms(fcur)
lambdai = lambda
t0 = time()
# Maintain a trace of the system.
tr = Optim.OptimizationTrace()
if show_trace
d = Dict("lambda" => lambda)
os = Optim.OptimizationState(1, residual, NaN)
push!(tr, os)
println("Iter:0, f:$(round(os.value,5)), ||g||:$(0))")
end
for t = 1:maxIter
iterCt = t
for i = 1:batch_size:N
grad = g(x,i:min(i+batch_size,N))
g_calls += 1
x -= lambdai*grad
x = saturatePrecision(x,n_state)
end
grad = g(x,1:N)
lambdai = lambda / (1+t)
fcur = f(x)
f_calls += 1
residual = rms(fcur)
if show_trace && (t < 5 || t % 5 == 0)
gradnorm = norm(grad)
os = Optim.OptimizationState(t, residual, gradnorm)
push!(tr, os)
println("Iter:$t, f:$(round(os.value,5)), ||g||:$(round(gradnorm,5))")
end
if time()-t0 > timeout
display("stochastic_gradient_descent: timeout $(timeout)s reached ($(time()-t0)s)")
break
end
end
Optim.MultivariateOptimizationResults("stochastic_gradient_descent", x0, x, residual, t, !converged, x_converged, 0.0, false, 0.0, g_converged, tolG, tr, f_calls, g_calls)
end
...@@ -60,8 +60,6 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal ...@@ -60,8 +60,6 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
else else
Znl = getcentersKmeans(state, nc, predictionerror, n_state); debug("gotcentersKmeans") Znl = getcentersKmeans(state, nc, predictionerror, n_state); debug("gotcentersKmeans")
end end
@show Znl
@show size(state)
getΨ(Ψ, Znl, state, n_points, n_state, normalized); debug("Got Ψ") getΨ(Ψ, Znl, state, n_points, n_state, normalized); debug("Got Ψ")
@ddshow sum(!isfinite(Ψ)) @ddshow sum(!isfinite(Ψ))
getLinearRegressor(V,Ψ,A,state,outputnet,na,n_state,n_centers,n_points); debug("Got linear regressor V") getLinearRegressor(V,Ψ,A,state,outputnet,na,n_state,n_centers,n_points); debug("Got linear regressor V")
...@@ -73,7 +71,6 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal ...@@ -73,7 +71,6 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
error = y - prediction error = y - prediction
errors = zeros(liniters+1) errors = zeros(liniters+1)
Lb,Ub = getbounds(Znl, state, n_state, n_centers) Lb,Ub = getbounds(Znl, state, n_state, n_centers)
# 5hz går på 5:09min, 10hz på 7:10min och 40hz 16
# ============= Main loop ================================================ # ============= Main loop ================================================
debug("Calculating initial error") debug("Calculating initial error")
...@@ -84,6 +81,11 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal ...@@ -84,6 +81,11 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
w = fitlinear(V,y) w = fitlinear(V,y)
return outputnet ? jacobian_outputnet(znl,Ψ, w, V, state) : jacobian_no_outputnet(znl,Ψ, w, V, state) return outputnet ? jacobian_outputnet(znl,Ψ, w, V, state) : jacobian_no_outputnet(znl,Ψ, w, V, state)
end end
function grad(z,range)
znl = RbfNonlinearParameters(z,n_state,n_centers)
w = fitlinear(V,y)
return outputnet ? gradient_outputnet(znl,Ψ, V, state,A,y,w, range) : gradient_no_outputnet(znl,Ψ, V, state,A,y,w, range)
end
f(z) = predictionerror(z) f(z) = predictionerror(z)
X0 = deepcopy(Znl.x) X0 = deepcopy(Znl.x)
for i = 1:liniters for i = 1:liniters
...@@ -95,6 +97,14 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal ...@@ -95,6 +97,14 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
show_trace=true, show_trace=true,
timeout = timeout, timeout = timeout,
n_state = n_state) n_state = n_state)
# @time res = stochastic_gradient_descent(f, grad, X0, n_points, n_state,
# lambda = 0.001,
# maxIter = 5,
# tolG = 1e-7,
# tolX = 1e-10,
# show_trace=true,
# timeout = 1000,
# batch_size=50)
X0 = deepcopy(res.minimum) X0 = deepcopy(res.minimum)
DEBUG && assert(X0 == res.minimum) DEBUG && assert(X0 == res.minimum)
DEBUG && @show ff1 = rms(f(X0)) DEBUG && @show ff1 = rms(f(X0))
...@@ -189,17 +199,17 @@ function trainRBF(y, state, nc; liniters=3,nonliniters=50, normalized=false, ini ...@@ -189,17 +199,17 @@ function trainRBF(y, state, nc; liniters=3,nonliniters=50, normalized=false, ini
# Znl.x += 0.1*randn(size(Znl.x)) # Znl.x += 0.1*randn(size(Znl.x))
@ddshow Znl @ddshow Znl
getΨ(Ψ, Znl, state, n_points, n_state, normalized); debug("Got Ψ") getΨ(Ψ, Znl, state, n_points, n_state, normalized); debug("Got Ψ")
# matshow(deepcopy(Ψ));axis("tight"); colorbar() # matshow(deepcopy(Ψ));axis("tight"); colorbar()
# plotparams(Znl) # plotparams(Znl)
@ddshow sum(!isfinite(Ψ)) @ddshow sum(!isfinite(Ψ))
w = fitlinear(Ψ,y); debug("fitlinear") w = fitlinear(Ψ,y); debug("fitlinear")
# newplot(w,"o"); title("Linear parameters") # newplot(w,"o"); title("Linear parameters")
@ddshow sum(!isfinite(Zl)) @ddshow sum(!isfinite(Zl))
prediction = Ψ*w prediction = Ψ*w
error = y - prediction error = y - prediction
# newplot([y prediction error]);title("inbetween") # newplot([y prediction error]);title("inbetween")
errors = zeros(liniters+1) errors = zeros(liniters+1)
Lb,Ub = getbounds(Znl, state, n_state, n_centers) Lb,Ub = getbounds(Znl, state, n_state, n_centers)
...@@ -375,8 +385,8 @@ end ...@@ -375,8 +385,8 @@ end
function getΨ(Ψ, Znl, state, n_points, n_state, normalized::Bool) function getΨ(Ψ, Znl, state, n_points, n_state, normalized::Bool, range = 1:1)
Ψ = normalized ? getΨnormalized(Ψ, Znl, state, n_points, n_state) : getΨnonnormalized(Ψ, Znl, state, n_points, n_state) Ψ = normalized ? getΨnormalized(Ψ, Znl, state, n_points, n_state) : getΨnonnormalized(Ψ, Znl, state, n_points, n_state, range)
if DEBUG && sum(!isfinite(Ψ)) > 0 if DEBUG && sum(!isfinite(Ψ)) > 0
@show sum(!isfinite(Ψ)) @show sum(!isfinite(Ψ))
end end
...@@ -407,11 +417,11 @@ function getΨnormalized(Ψ, Znl, state, n_points, n_state) ...@@ -407,11 +417,11 @@ function getΨnormalized(Ψ, Znl, state, n_points, n_state)
return Ψ return Ψ
end end
function getΨnonnormalized(Ψ, Znl, state, n_points, n_state) function getΨnonnormalized(Ψ, Znl, state, n_points, n_state, range = 1:1)
# RBF(x, Z,n_state) = exp(-(x-Z[1:n_state]).^2)[1] range = (range == 1:1) ? (1:size(state,1)) : range
RBF(x, Z,n_state) = exp(-(sum(((x-Z[1:n_state]).^2).*Z[n_state+1:end]))) RBF(x, Z,n_state) = exp(-(sum(((x-Z[1:n_state]).^2).*Z[n_state+1:end])))
for (j,Zi) in enumerate(Znl) for (j,Zi) in enumerate(Znl)
for i = 1:n_points for i = range
statei = state[i,:]' statei = state[i,:]'
# statei = slice(state,i,:) # statei = slice(state,i,:)
Ψ[i,j] = RBF(statei, Zi, n_state) Ψ[i,j] = RBF(statei, Zi, n_state)
...@@ -427,15 +437,16 @@ function getΨnonnormalized(Ψ, Znl, state, n_points, n_state) ...@@ -427,15 +437,16 @@ function getΨnonnormalized(Ψ, Znl, state, n_points, n_state)
return Ψ return Ψ
end end
function getLinearRegressor(V,Ψ,A,state,outputnet,na,n_state,n_centers,n_points) function getLinearRegressor(V,Ψ,A,state,outputnet,na,n_state,n_centers,n_points, range = 1:1)
outputnet ? getLinearRegressor_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points) : getLinearRegressor_no_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points) outputnet ? getLinearRegressor_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points, range) : getLinearRegressor_no_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points, range)
end end
function getLinearRegressor_no_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points) function getLinearRegressor_no_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points, range = 1:1)
range = range == 1:1 ? 1:size(state,1) : range
ii = 1 ii = 1
for i = 1:na for i = 1:na
for k = 1:n_centers+1 for k = 1:n_centers+1
for l = 1:n_points for l = range
V[l,ii] = Ψ[l,k]*A[l,i] V[l,ii] = Ψ[l,k]*A[l,i]
end end
ii = ii+1 ii = ii+1
...@@ -447,12 +458,13 @@ function getLinearRegressor_no_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_poi ...@@ -447,12 +458,13 @@ function getLinearRegressor_no_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_poi
return V return V
end end
function getLinearRegressor_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points) function getLinearRegressor_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points, range = 1:1)
range = (range == 1:1) ? (1:size(state,1)) : range
ii = 1 ii = 1
for i = 1:na for i = 1:na
for k = 1:n_centers+1 for k = 1:n_centers+1
for j = 1:n_state for j = 1:n_state
for l = 1:n_points for l = range
V[l,ii] = Ψ[l,k]*A[l,i]*state[l,j] V[l,ii] = Ψ[l,k]*A[l,i]*state[l,j]
end end
ii = ii+1 ii = ii+1
...@@ -490,13 +502,18 @@ function jacobian_outputnet(Znl, Ψ, w, V, state) ...@@ -490,13 +502,18 @@ function jacobian_outputnet(Znl, Ψ, w, V, state)
n_points = size(Ψ,1) n_points = size(Ψ,1)
n_state = Znl.n_state n_state = Znl.n_state
n_centers = Znl.n_centers n_centers = Znl.n_centers
J = Array(Float64,(n_points,length(Znl.x)))
n_batch = 100
points = randperm(n_points)[1:n_batch]
J = Array(Float64,(n_batch,length(Znl.x)))
ii = 1 ii = 1
for (k,Zi) in enumerate(Znl) for (k,Zi) in enumerate(Znl)
μ = Zi[1:n_state] # slice? μ = Zi[1:n_state] # slice?
γ = Zi[n_state+1:end] γ = Zi[n_state+1:end]
i1 = ii-1 i1 = ii-1
for l = 1:n_points for (l,li) = enumerate(points)
Ψw = 1.0 Ψw = 1.0
for i = 1:na for i = 1:na
for j = 1:n_state for j = 1:n_state
...@@ -506,8 +523,8 @@ function jacobian_outputnet(Znl, Ψ, w, V, state) ...@@ -506,8 +523,8 @@ function jacobian_outputnet(Znl, Ψ, w, V, state)
end end
for p = 1:n_state for p = 1:n_state
x_μ = state[l,p]-μ[p] x_μ = state[l,p]-μ[p]
J[l,i1+p] = 2*Ψw*x_μ*γ[p] J[li,i1+p] = 2*Ψw*x_μ*γ[p]
J[l,i1+n_state+p] = (-Ψw)*x_μ^2 J[li,i1+n_state+p] = (-Ψw)*x_μ^2
end end
end end
ii += 2n_state ii += 2n_state
...@@ -515,7 +532,7 @@ function jacobian_outputnet(Znl, Ψ, w, V, state) ...@@ -515,7 +532,7 @@ function jacobian_outputnet(Znl, Ψ, w, V, state)
return J return J
end end
function jacobian_no_outputnet(Znl, Ψ, w,v, state) function jacobian_no_outputnet(Znl, Ψ, w,V, state)
n_points = size(Ψ,1) n_points = size(Ψ,1)
n_state = Znl.n_state n_state = Znl.n_state
n_centers = Znl.n_centers n_centers = Znl.n_centers
...@@ -542,6 +559,76 @@ function jacobian_no_outputnet(Znl, Ψ, w,v, state) ...@@ -542,6 +559,76 @@ function jacobian_no_outputnet(Znl, Ψ, w,v, state)
return J return J
end end
function gradient_outputnet(Znl, Ψ, V, state, A,y,w, range)
n_points = size(Ψ,1)
n_state = Znl.n_state
n_centers = Znl.n_centers
na = size(A,2)
# znl = RbfNonlinearParameters(z,n_state,n_centers)
# psi = getΨ(Ψ, Znl, state, n_points, n_state, false, range)
# getLinearRegressor(V,psi,A,state,true,na,n_state,n_centers,n_points, range)
# w = fitlinear(V,y)
J = zeros(length(Znl.x))
ii = 1
for (k,Zi) in enumerate(Znl)
μ = Zi[1:n_state] # slice?
γ = Zi[n_state+1:end]
i1 = ii-1
for l = range
Ψw = 1.0
for i = 1:na
for j = 1:n_state
ind = j + n_state*(k-1) + n_state*(n_centers+1)*(i-1)
Ψw += V[l,ind]*w[ind]
end
end
for p = 1:n_state
x_μ = state[l,p]-μ[p]
J[i1+p] += 2*Ψw*x_μ*γ[p]
J[i1+n_state+p] += (-Ψw)*x_μ^2
end
end
ii += 2n_state
end
J /= length(range)
return J
end
function gradient_no_outputnet(Znl, Ψ, V, state, A,y,w, range)
n_points = size(Ψ,1)
n_state = Znl.n_state
n_centers = Znl.n_centers
na = size(A,2)
# psi = getΨ(Ψ, Znl, state, n_points, n_state, false, range)
# getLinearRegressor(V,psi,A,state,true,na,n_state,n_centers,n_points, range)
# w = fitlinear(V,y)
J = zeros(length(Znl.x))
ii = 1
for (k,Zi) in enumerate(Znl)
μ = Zi[1:n_state] # slice?
γ = Zi[n_state+1:end]
i1 = ii-1
for l = range
Ψw = 1.0
for i = 1:na
ind = k + (n_centers+1)*(i-1)
Ψw += V[l,ind]*w[ind]
end
for p = 1:n_state
x_μ = state[l,p]-μ[p]
J[i1+p] += 2*Ψw*x_μ*γ[p]
J[i1+n_state+p] += (-Ψw)*x_μ^2
end
end
ii += 2n_state
end
J /= length(range)
return J
end
function jacobian(Znl, Ψ, w, state) function jacobian(Znl, Ψ, w, state)
n_points = size(Ψ,1) n_points = size(Ψ,1)
n_state = Znl.n_state n_state = Znl.n_state
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment