diff --git a/src/SystemIdentification.jl b/src/SystemIdentification.jl index 444d1ec8e831481a61408354ffbd7abfb17eedc4..c74e36845f446876f970e3eecb1521df221cb440 100644 --- a/src/SystemIdentification.jl +++ b/src/SystemIdentification.jl @@ -113,7 +113,7 @@ Base.show(fit::FitStatistics) = println("Fit RMS:$(fit.RMS), FIT:$(fit.FIT), AIC include("armax.jl") include("kalman.jl") include("PCA.jl") -include("kernelPCA.jl") +# include("kernelPCA.jl") include("toeplitz.jl") end diff --git a/src/particle_filters/pf_bootstrap.jl b/src/particle_filters/pf_bootstrap.jl index 5cdb02c7ba10e807127ac851417c6963723c81c1..0cb4a6126ee2fff7f3ae82a67e2da5d9fe70b128 100644 --- a/src/particle_filters/pf_bootstrap.jl +++ b/src/particle_filters/pf_bootstrap.jl @@ -6,7 +6,7 @@ function pf_bootstrap!(xp,w, y, N, g_density, f) xp[:,1] = 2*σw*randn(N) wT = slice(w,:,1) xT = slice(xp,:,1) - fill!(wT,log(1/N)) + fill!(wT,-log(N)) g(y[1],xT, wT) wT -= logsumexp(wT) j = Array(Int64,N) @@ -20,7 +20,7 @@ function pf_bootstrap!(xp,w, y, N, g_density, f) if t%2 == 0 resample_systematic!(j,wT1,N) f(xT,xT1[j],t-1) - fill!(wT,log(1/N)) + fill!(wT,-log(N)) else # Resample not needed f(xT,xT1,t-1) copy!(wT,wT1) @@ -44,7 +44,7 @@ function pf_bootstrap(y, N, g_density, f) xp[:,1] = 2*σw*randn(N) wT = slice(w,:,1) xT = slice(xp,:,1) - fill!(wT,log(1/N)) + fill!(wT,-log(N)) g(y[1],xT, wT) wT -= logsumexp(wT) j = Array(Int64,N) @@ -58,7 +58,7 @@ function pf_bootstrap(y, N, g_density, f) if t%2 == 0 resample_systematic!(j,wT1,N) f(xT,xT1[j],t-1) - fill!(wT,log(1/N)) + fill!(wT,-log(N)) else # Resample not needed f(xT,xT1,t-1) copy!(wT,wT1) @@ -83,7 +83,7 @@ function pf_bootstrap_nn(xp, w, wnn, y, N, g_density, f) xp[:,1] = 2*σw*randn(N) wT = slice(w,:,1) xT = slice(xp,:,1) - fill!(wT,log(1/N)) + fill!(wT,-log(N)) g(y[1],xT, wT) wnn[:,t] = wT wT -= logsumexp(wT) @@ -98,7 +98,7 @@ function pf_bootstrap_nn(xp, w, wnn, y, N, g_density, f) if t%2 == 0 resample_systematic!(j,wT1,N) f(xT,xT1[j],t-1) - fill!(wT,log(1/N)) + fill!(wT,-log(N)) else # Resample not needed f(xT,xT1,t-1) copy!(wT,wT1) @@ -177,7 +177,7 @@ function pf_CSMC!(xp,w, y, N, g_density, f, xc ) T = length(y) wT = slice(w,:,1) xT = slice(xp,:,1) - fill!(wT,log(1/N)) + fill!(wT,-log(N)) g(y[1],xT, wT) wT -= logsumexp(wT) j = Array(Int64,N) @@ -191,7 +191,7 @@ function pf_CSMC!(xp,w, y, N, g_density, f, xc ) if t%2 == 0 resample_systematic!(j,wT1,N) f(xT,xT1[j],t-1) - fill!(wT,log(1/N)) + fill!(wT,-log(N)) else # Resample not needed f(xT,xT1,t-1) copy!(wT,wT1) diff --git a/src/rbfmodels/stochastic_gradient_descent.jl b/src/rbfmodels/stochastic_gradient_descent.jl new file mode 100644 index 0000000000000000000000000000000000000000..deff37f8ada2aefb0e06e5d1927189138caef17e --- /dev/null +++ b/src/rbfmodels/stochastic_gradient_descent.jl @@ -0,0 +1,70 @@ +function stochastic_gradient_descent(f, g, x, N, n_state; lambda = 0.1, maxIter = 10, tolG = 1e-7, tolX = 1e-10, show_trace=true, timeout = 1000, batch_size=10) + + converged = false + x_converged = false + g_converged = false + need_jacobian = true + iterCt = 0 + + f_calls = 0 + g_calls = 0 + + fcur = f(x) + f_calls += 1 + residual = rms(fcur) + lambdai = lambda + t0 = time() + + # Maintain a trace of the system. + tr = Optim.OptimizationTrace() + if show_trace + d = Dict("lambda" => lambda) + os = Optim.OptimizationState(1, residual, NaN) + + push!(tr, os) + println("Iter:0, f:$(round(os.value,5)), ||g||:$(0))") + end + + + + for t = 1:maxIter + iterCt = t + for i = 1:batch_size:N + grad = g(x,i:min(i+batch_size,N)) + g_calls += 1 + x -= lambdai*grad + x = saturatePrecision(x,n_state) + + + end + grad = g(x,1:N) + lambdai = lambda / (1+t) + fcur = f(x) + f_calls += 1 + residual = rms(fcur) + + if show_trace && (t < 5 || t % 5 == 0) + gradnorm = norm(grad) + os = Optim.OptimizationState(t, residual, gradnorm) + push!(tr, os) + println("Iter:$t, f:$(round(os.value,5)), ||g||:$(round(gradnorm,5))") + end + + + if time()-t0 > timeout + display("stochastic_gradient_descent: timeout $(timeout)s reached ($(time()-t0)s)") + break + end + + end + Optim.MultivariateOptimizationResults("stochastic_gradient_descent", x0, x, residual, t, !converged, x_converged, 0.0, false, 0.0, g_converged, tolG, tr, f_calls, g_calls) + + + + + + + + + +end diff --git a/src/rbfmodels/trainRBF_ARX.jl b/src/rbfmodels/trainRBF_ARX.jl index 9021c1d3be3c3454f8bee359337bc98621b34d56..02f7a193e29e3bf9eaa44d5b2428cf86c2804ea7 100644 --- a/src/rbfmodels/trainRBF_ARX.jl +++ b/src/rbfmodels/trainRBF_ARX.jl @@ -60,8 +60,6 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal else Znl = getcentersKmeans(state, nc, predictionerror, n_state); debug("gotcentersKmeans") end - @show Znl - @show size(state) getΨ(Ψ, Znl, state, n_points, n_state, normalized); debug("Got Ψ") @ddshow sum(!isfinite(Ψ)) getLinearRegressor(V,Ψ,A,state,outputnet,na,n_state,n_centers,n_points); debug("Got linear regressor V") @@ -73,7 +71,6 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal error = y - prediction errors = zeros(liniters+1) Lb,Ub = getbounds(Znl, state, n_state, n_centers) -# 5hz går på 5:09min, 10hz på 7:10min och 40hz 16 # ============= Main loop ================================================ debug("Calculating initial error") @@ -84,6 +81,11 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal w = fitlinear(V,y) return outputnet ? jacobian_outputnet(znl,Ψ, w, V, state) : jacobian_no_outputnet(znl,Ψ, w, V, state) end + function grad(z,range) + znl = RbfNonlinearParameters(z,n_state,n_centers) + w = fitlinear(V,y) + return outputnet ? gradient_outputnet(znl,Ψ, V, state,A,y,w, range) : gradient_no_outputnet(znl,Ψ, V, state,A,y,w, range) + end f(z) = predictionerror(z) X0 = deepcopy(Znl.x) for i = 1:liniters @@ -95,6 +97,14 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal show_trace=true, timeout = timeout, n_state = n_state) + # @time res = stochastic_gradient_descent(f, grad, X0, n_points, n_state, + # lambda = 0.001, + # maxIter = 5, + # tolG = 1e-7, + # tolX = 1e-10, + # show_trace=true, + # timeout = 1000, + # batch_size=50) X0 = deepcopy(res.minimum) DEBUG && assert(X0 == res.minimum) DEBUG && @show ff1 = rms(f(X0)) @@ -189,17 +199,17 @@ function trainRBF(y, state, nc; liniters=3,nonliniters=50, normalized=false, ini # Znl.x += 0.1*randn(size(Znl.x)) @ddshow Znl getΨ(Ψ, Znl, state, n_points, n_state, normalized); debug("Got Ψ") -# matshow(deepcopy(Ψ));axis("tight"); colorbar() -# plotparams(Znl) + # matshow(deepcopy(Ψ));axis("tight"); colorbar() + # plotparams(Znl) @ddshow sum(!isfinite(Ψ)) w = fitlinear(Ψ,y); debug("fitlinear") -# newplot(w,"o"); title("Linear parameters") + # newplot(w,"o"); title("Linear parameters") @ddshow sum(!isfinite(Zl)) prediction = Ψ*w error = y - prediction -# newplot([y prediction error]);title("inbetween") + # newplot([y prediction error]);title("inbetween") errors = zeros(liniters+1) Lb,Ub = getbounds(Znl, state, n_state, n_centers) @@ -375,8 +385,8 @@ end -function getΨ(Ψ, Znl, state, n_points, n_state, normalized::Bool) - Ψ = normalized ? getΨnormalized(Ψ, Znl, state, n_points, n_state) : getΨnonnormalized(Ψ, Znl, state, n_points, n_state) +function getΨ(Ψ, Znl, state, n_points, n_state, normalized::Bool, range = 1:1) + Ψ = normalized ? getΨnormalized(Ψ, Znl, state, n_points, n_state) : getΨnonnormalized(Ψ, Znl, state, n_points, n_state, range) if DEBUG && sum(!isfinite(Ψ)) > 0 @show sum(!isfinite(Ψ)) end @@ -407,11 +417,11 @@ function getΨnormalized(Ψ, Znl, state, n_points, n_state) return Ψ end -function getΨnonnormalized(Ψ, Znl, state, n_points, n_state) - # RBF(x, Z,n_state) = exp(-(x-Z[1:n_state]).^2)[1] +function getΨnonnormalized(Ψ, Znl, state, n_points, n_state, range = 1:1) + range = (range == 1:1) ? (1:size(state,1)) : range RBF(x, Z,n_state) = exp(-(sum(((x-Z[1:n_state]).^2).*Z[n_state+1:end]))) for (j,Zi) in enumerate(Znl) - for i = 1:n_points + for i = range statei = state[i,:]' # statei = slice(state,i,:) Ψ[i,j] = RBF(statei, Zi, n_state) @@ -427,15 +437,16 @@ function getΨnonnormalized(Ψ, Znl, state, n_points, n_state) return Ψ end -function getLinearRegressor(V,Ψ,A,state,outputnet,na,n_state,n_centers,n_points) - outputnet ? getLinearRegressor_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points) : getLinearRegressor_no_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points) +function getLinearRegressor(V,Ψ,A,state,outputnet,na,n_state,n_centers,n_points, range = 1:1) + outputnet ? getLinearRegressor_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points, range) : getLinearRegressor_no_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points, range) end -function getLinearRegressor_no_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points) +function getLinearRegressor_no_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points, range = 1:1) + range = range == 1:1 ? 1:size(state,1) : range ii = 1 for i = 1:na for k = 1:n_centers+1 - for l = 1:n_points + for l = range V[l,ii] = Ψ[l,k]*A[l,i] end ii = ii+1 @@ -447,12 +458,13 @@ function getLinearRegressor_no_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_poi return V end -function getLinearRegressor_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points) +function getLinearRegressor_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points, range = 1:1) + range = (range == 1:1) ? (1:size(state,1)) : range ii = 1 for i = 1:na for k = 1:n_centers+1 for j = 1:n_state - for l = 1:n_points + for l = range V[l,ii] = Ψ[l,k]*A[l,i]*state[l,j] end ii = ii+1 @@ -490,13 +502,18 @@ function jacobian_outputnet(Znl, Ψ, w, V, state) n_points = size(Ψ,1) n_state = Znl.n_state n_centers = Znl.n_centers - J = Array(Float64,(n_points,length(Znl.x))) + + n_batch = 100 + points = randperm(n_points)[1:n_batch] + + + J = Array(Float64,(n_batch,length(Znl.x))) ii = 1 for (k,Zi) in enumerate(Znl) μ = Zi[1:n_state] # slice? γ = Zi[n_state+1:end] i1 = ii-1 - for l = 1:n_points + for (l,li) = enumerate(points) Ψw = 1.0 for i = 1:na for j = 1:n_state @@ -506,8 +523,8 @@ function jacobian_outputnet(Znl, Ψ, w, V, state) end for p = 1:n_state x_μ = state[l,p]-μ[p] - J[l,i1+p] = 2*Ψw*x_μ*γ[p] - J[l,i1+n_state+p] = (-Ψw)*x_μ^2 + J[li,i1+p] = 2*Ψw*x_μ*γ[p] + J[li,i1+n_state+p] = (-Ψw)*x_μ^2 end end ii += 2n_state @@ -515,7 +532,7 @@ function jacobian_outputnet(Znl, Ψ, w, V, state) return J end -function jacobian_no_outputnet(Znl, Ψ, w,v, state) +function jacobian_no_outputnet(Znl, Ψ, w,V, state) n_points = size(Ψ,1) n_state = Znl.n_state n_centers = Znl.n_centers @@ -542,6 +559,76 @@ function jacobian_no_outputnet(Znl, Ψ, w,v, state) return J end + +function gradient_outputnet(Znl, Ψ, V, state, A,y,w, range) + n_points = size(Ψ,1) + n_state = Znl.n_state + n_centers = Znl.n_centers + na = size(A,2) + + # znl = RbfNonlinearParameters(z,n_state,n_centers) + # psi = getΨ(Ψ, Znl, state, n_points, n_state, false, range) + # getLinearRegressor(V,psi,A,state,true,na,n_state,n_centers,n_points, range) + # w = fitlinear(V,y) + + J = zeros(length(Znl.x)) + ii = 1 + for (k,Zi) in enumerate(Znl) + μ = Zi[1:n_state] # slice? + γ = Zi[n_state+1:end] + i1 = ii-1 + for l = range + Ψw = 1.0 + for i = 1:na + for j = 1:n_state + ind = j + n_state*(k-1) + n_state*(n_centers+1)*(i-1) + Ψw += V[l,ind]*w[ind] + end + end + for p = 1:n_state + x_μ = state[l,p]-μ[p] + J[i1+p] += 2*Ψw*x_μ*γ[p] + J[i1+n_state+p] += (-Ψw)*x_μ^2 + end + end + ii += 2n_state + end + J /= length(range) + return J +end + +function gradient_no_outputnet(Znl, Ψ, V, state, A,y,w, range) + n_points = size(Ψ,1) + n_state = Znl.n_state + n_centers = Znl.n_centers + na = size(A,2) + # psi = getΨ(Ψ, Znl, state, n_points, n_state, false, range) + # getLinearRegressor(V,psi,A,state,true,na,n_state,n_centers,n_points, range) + # w = fitlinear(V,y) + J = zeros(length(Znl.x)) + ii = 1 + for (k,Zi) in enumerate(Znl) + μ = Zi[1:n_state] # slice? + γ = Zi[n_state+1:end] + i1 = ii-1 + for l = range + Ψw = 1.0 + for i = 1:na + ind = k + (n_centers+1)*(i-1) + Ψw += V[l,ind]*w[ind] + end + for p = 1:n_state + x_μ = state[l,p]-μ[p] + J[i1+p] += 2*Ψw*x_μ*γ[p] + J[i1+n_state+p] += (-Ψw)*x_μ^2 + end + end + ii += 2n_state + end + J /= length(range) + return J +end + function jacobian(Znl, Ψ, w, state) n_points = size(Ψ,1) n_state = Znl.n_state