diff --git a/src/SystemIdentification.jl b/src/SystemIdentification.jl index bb109dc2a3e1668cf96bebb8deca18202ad29e2c..444d1ec8e831481a61408354ffbd7abfb17eedc4 100644 --- a/src/SystemIdentification.jl +++ b/src/SystemIdentification.jl @@ -13,7 +13,7 @@ FitResult, IdData, # Functions ar,arx,getARregressor,getARXregressor,find_na, -toeplitz, kalman, kalman_smoother +toeplitz, kalman, kalman_smoother, forward_kalman ## Fit Methods ================= :LS diff --git a/src/armax.jl b/src/armax.jl index 8ce9e24e156550e615dbc37ad823acca67a3d065..255572a693da881339170a3f5e26a9e84ab7eccd 100644 --- a/src/armax.jl +++ b/src/armax.jl @@ -105,3 +105,15 @@ function find_na(y::Vector{Float64},n::Int) plotsub(error,"-o") show() end + +import PyPlot +function PyPlot.plot(y,m::AR) + na = length(m.a) + y,A = getARregressor(y,na) + yh = A*m.a + error = y-yh + newplot(y,"k") + plot(yh,"b") + plot(error,"r") + title("Fitresult, AR, n_a: $na, RMSE = $(rms(error)) Fit = $(fit(y,yh))") +end diff --git a/src/cuckooSearch.jl b/src/cuckooSearch.jl index 23751b5adc08a385f9ac62a9e33a3838e8c841eb..858249d2cab7ce4796e24b9367fac41c5bc23d54 100644 --- a/src/cuckooSearch.jl +++ b/src/cuckooSearch.jl @@ -15,7 +15,7 @@ Based on implementation by } http://www.mathworks.com/matlabcentral/fileexchange/29809-cuckoo-search--cs--algorithm """ -function cuckoo_search(f,X0, Lb,Ub;n=25,pa=0.25, Tol=1.0e-5, max_iter = 1e5, timeout = Inf) +function cuckoo_search(f,X0, Lb,Ub;n=25,pa=0.25, Tol=1.0e-5, max_iter = 1e3, timeout = Inf) X00 = deepcopy(X0) nd=size(X0,1); X0t = X0' diff --git a/src/kalman.jl b/src/kalman.jl index e2bca2f1126f3e07544dc4e290028362da1b712a..b78b88b395605d98ba3fb1e2a5b36088c98917fc 100644 --- a/src/kalman.jl +++ b/src/kalman.jl @@ -24,35 +24,37 @@ function forward_kalman(y,A,R1,R2, P0) xk = xkk Pk = Pkk i = 1 + Hk = A[i,:] e = y[i]-Hk*xk[:,i] S = Hk*Pk[:,:,i]*Hk' + R2 - K = (Pk[:,:,i]*H')/S + K = (Pk[:,:,i]*Hk')/S xkk[:,i] = xk[:,i] + K*e Pkk[:,:,i] = (I - K*Hk)*Pk[:,:,i] for i = 2:N Fk = 1 - Hk = A[i,:]' + Hk = A[i,:] xk[:,i] = Fk*xkk[:,i-1] Pk[:,:,i] = Fk*Pkk[:,:,i-1]*Fk' + R1 e = y[i]-Hk*xk[:,i] S = Hk*Pk[:,:,i]*Hk' + R2 - K = (Pk[:,:,i]*H')/S + K = (Pk[:,:,i]*Hk')/S xkk[:,i] = xk[:,i] + K*e Pkk[:,:,i] = (I - K*Hk)*Pk[:,:,i] end return xkk,xk,Pkk,Pk end -"""A kalman parameter smoother""" +using Debug + +# """A kalman parameter smoother""" function kalman_smoother(y, A, R1, R2, P0) na = size(R1,1) N = length(y) xkk,xk,Pkk,Pk = forward_kalman(y,A,R1,R2, P0) xkn = zeros(xkk) - Pkn = zeros(P) + Pkn = zeros(Pkk) for i = N-1:-1:1 - Ai = A[i,:] Fk = 1 Hk = A[i,:]' C = Pkk[:,:,i]/Pk[:,:,i+1] @@ -60,6 +62,5 @@ function kalman_smoother(y, A, R1, R2, P0) Pkn[:,:,i] = Pkk[:,:,i] + C*(Pkn[:,:,i+1] - Pk[:,:,i+1])*C' end - return xkn, Pkn - + return xkn end diff --git a/src/rbfmodels/levenberg_marquardt.jl b/src/rbfmodels/levenberg_marquardt.jl new file mode 100644 index 0000000000000000000000000000000000000000..030d5cbe3415d11b6d9768ad4a6685f613d86ff9 --- /dev/null +++ b/src/rbfmodels/levenberg_marquardt.jl @@ -0,0 +1,133 @@ +sse(x) = (x'x)[1]# gives the L2 norm of x +using Optim +""" +`levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12, maxIter=100, lambda=100.0, show_trace=false, timeout=Inf)`\n + # finds argmin sum(f(x).^2) using the Levenberg-Marquardt algorithm + # x + # The function f should take an input vector of length n and return an output vector of length m + # The function g is the Jacobian of f, and should be an m x n matrix + # x0 is an initial guess for the solution + # fargs is a tuple of additional arguments to pass to f + # available options: + # tolX - search tolerance in x + # tolG - search tolerance in gradient + # maxIter - maximum number of iterations + # lambda - (inverse of) initial trust region radius + # show_trace - print a status summary on each iteration if true + # returns: x, J + # x - least squares solution for x + # J - estimate of the Jacobian of f at x +""" +function levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12, maxIter=100, lambda=100.0, show_trace=false, timeout=Inf, n_state = 1) + + + # other constants + const MAX_LAMBDA = 1e16 # minimum trust region radius + const MIN_LAMBDA = 1e-16 # maximum trust region radius + const MIN_STEP_QUALITY = 1e-4 + const MIN_DIAGONAL = 1e-6 # lower bound on values of diagonal matrix used to regularize the trust region step + + converged = false + x_converged = false + g_converged = false + need_jacobian = true + iterCt = 0 + x = x0 + delta_x = deepcopy(x0) + f_calls = 0 + g_calls = 0 + t0 = time() + + fcur = f(x) + f_calls += 1 + residual = sse(fcur) + + # Maintain a trace of the system. + tr = Optim.OptimizationTrace() + if show_trace + d = {"lambda" => lambda} + os = Optim.OptimizationState(iterCt, rms(fcur), NaN, d) + + push!(tr, os) + println("Iter:0, f:$(round(os.value,5)), ||g||:$(0)), λ:$(round(lambda,5))") + end + + while ( ~converged && iterCt < maxIter ) + if need_jacobian + J = g(x) + g_calls += 1 + need_jacobian = false + end + # we want to solve: + # argmin 0.5*||J(x)*delta_x + f(x)||^2 + lambda*||diagm(J'*J)*delta_x||^2 + # Solving for the minimum gives: + # (J'*J + lambda*DtD) * delta_x == -J^T * f(x), where DtD = diagm(sum(J.^2,1)) + # Where we have used the equivalence: diagm(J'*J) = diagm(sum(J.^2, 1)) + # It is additionally useful to bound the elements of DtD below to help + # prevent "parameter evaporation". + DtD = diagm(Float64[max(x, MIN_DIAGONAL) for x in sum(J.^2,1)]) + delta_x = ( J'*J + sqrt(lambda)*DtD ) \ ( -J'*fcur ) + # if the linear assumption is valid, our new residual should be: + predicted_residual = sse(J*delta_x + fcur) + # check for numerical problems in solving for delta_x by ensuring that the predicted residual is smaller + # than the current residual + if predicted_residual > residual + 2max(eps(predicted_residual),eps(residual)) + warn("""Problem solving for delta_x: predicted residual increase. + $predicted_residual (predicted_residual) > + $residual (residual) + $(eps(predicted_residual)) (eps)""") + end + # try the step and compute its quality + trail_x = saturatePrecision(x + delta_x,n_state) + trial_f = f(trail_x) + f_calls += 1 + trial_residual = sse(trial_f) + # step quality = residual change / predicted residual change + rho = (trial_residual - residual) / (predicted_residual - residual) + + if rho > MIN_STEP_QUALITY + x = trail_x + fcur = trial_f + residual = trial_residual + # increase trust region radius + lambda = max(0.1*lambda, MIN_LAMBDA) + need_jacobian = true + else + # decrease trust region radius + lambda = min(10*lambda, MAX_LAMBDA) + end + iterCt += 1 + + # show state + if show_trace && (iterCt < 20 || iterCt % 10 == 0) + gradnorm = norm(J'*fcur, Inf) + d = {"lambda" => lambda} + os = Optim.OptimizationState(iterCt, rms(fcur), gradnorm, d) + push!(tr, os) + println("Iter:$iterCt, f:$(round(os.value,5)), ||g||:$(round(gradnorm,5)), λ:$(round(lambda,5))") + end + + # check convergence criteria: + # 1. Small gradient: norm(J^T * fcur, Inf) < tolG + # 2. Small step size: norm(delta_x) < tolX + if norm(J' * fcur, Inf) < tolG + @show g_converged = true + elseif norm(delta_x) < tolX*(tolX + norm(x)) + @show x_converged = true + end + converged = g_converged | x_converged + if time()-t0 > timeout + display("levenberg_marquardt: timeout $(timeout)s reached ($(time()-t0)s)") + break + end + end + Optim.MultivariateOptimizationResults("Levenberg-Marquardt", x0, x, rms(fcur), iterCt, !converged, x_converged, 0.0, false, 0.0, g_converged, tolG, tr, f_calls, g_calls) +end + + +function saturatePrecision(x,n_state) + for i = 1:2n_state:length(x) + range = i+n_state:i+2n_state-1 + x[range] = abs(x[range]) + end + return x +end diff --git a/src/rbfmodels/trainRBF_ARX.jl b/src/rbfmodels/trainRBF_ARX.jl index 429c2e800ec021dece8b4ddb22ac36c67ea0164f..b2db6efa7c0e40cb7c1ebef4678e8a4c409acb03 100644 --- a/src/rbfmodels/trainRBF_ARX.jl +++ b/src/rbfmodels/trainRBF_ARX.jl @@ -28,31 +28,10 @@ The number of centers is equal to `nc` if Kmeans is used to get initial centers, `n_state` is equal to the state dimension, possibly reduced to `inputpca` if so desired.\n The number of nonlinear parameters is `n_centers × n_state`\n The number of linear parameters is `outputnet ? n_state × (n_centers+1) × (na)+1) : (na)×(n_centers+1)+1)`""" -function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=false, initialcenters="equidistant", inputpca=false, outputnet = true, cuckoosearch = false) +function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=false, initialcenters="equidistant", inputpca=false, outputnet = true, cuckoosearch = false, cuckooiter=100) n_points = length(y) na = isa(A,Matrix) ? size(A,2) : 1 - function getcentersKmeans(state, nc) - iters = 21 - errorvec = zeros(iters) - params = Array(Float64,(nc*2*n_state,iters)) - methods = [:rand;:kmpp] - for iter = 1:iters - clusterresult = kmeans(state', nc; maxiter=200, display=:none, init=iter<iters ? methods[iter%2+1] : :kmcen) - for i = 1:nc - si = 1+(i-1)n_state*2 - params[si:si+n_state-1,iter] = clusterresult.centers[:,i] - C = cov(state[clusterresult.assignments .== i,:]) - params[si+n_state:si+2n_state-1,iter] = diag(inv(C)) - @assert !any(diag(inv(C)) .< 0) - end - errorvec[iter] = rms(predictionerror(params[:,iter])) - end - println("Std in errors among initial centers: ", round(std(errorvec),3)) - ind = indmin(errorvec) - return RbfNonlinearParameters(params[:,ind],n_state, nc) - end - function saturatePrecision(x,n_state) for i = 1:2n_state:length(x) @@ -62,16 +41,6 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal return x end - function plotcenters(Z) - X = zeros(Z.n_centers,2) - for (i,Zi) in enumerate(Z) - X[i,:] = Zi[1:2]' - end - newplot(X[:,1],X[:,2],"o"); title("Centers") - end - - - function fitlinear(V) try DEBUG && assert(isa(V,Matrix)) @@ -119,14 +88,12 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal return J end - - function predictionerror(z) znl = RbfNonlinearParameters(z,n_state,n_centers) - psi = getΨ(deepcopy(Ψ), znl, state, n_points, n_state, normalized) - v = getLinearRegressor(deepcopy(V),psi,A,state,outputnet,na,n_state,n_centers,n_points) - zl = fitlinear(v); - prediction = v*zl + psi = getΨ(Ψ, znl, state, n_points, n_state, normalized) + getLinearRegressor(V,psi,A,state,outputnet,na,n_state,n_centers,n_points) + zl = fitlinear(V); + prediction = V*zl error = prediction-y return error end @@ -153,7 +120,6 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal n_centers = nc end - Ψ = Array(Float64,(n_points,n_centers+1)) Ψ[:,end] = 1.0 V = outputnet ? V = Array(Float64,(n_points, n_state* (n_centers+1)* (na)+1)) : V = Array(Float64,(n_points, (na)*(n_centers+1)+1)) @@ -161,7 +127,7 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal if initialcenters == :equidistant Znl = getcentersEq(state,nc); debug("gotcentersEq") else - Znl = getcentersKmeans(state,nc); debug("gotcentersKmeans") + Znl = getcentersKmeans(state, nc, predictionerror, n_state); debug("gotcentersKmeans") end @ddshow Znl getΨ(Ψ, Znl, state, n_points, n_state, normalized); debug("Got Ψ") @@ -174,19 +140,7 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal prediction = V*Zl error = y - prediction errors = zeros(liniters+1) - - Lb = zeros(Znl.x) - Ub = zeros(Znl.x) - mas = maximum(state,1)' - mis = minimum(state,1)' - for i = 1:2n_state:n_centers*2n_state - Lb[i:i+n_state-1] = mis - Ub[i:i+n_state-1] = mas - Lb[i+n_state:i+2n_state-1] = 0.000001 - Ub[i+n_state:i+2n_state-1] = 10*Znl.x[n_state+1:2n_state] - end - @show Lb - @show Ub + Lb,Ub = getbounds(Znl, state, n_state, n_centers) # ============= Main loop ================================================ debug("Calculating initial error") @@ -209,14 +163,16 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal timeout = 60, n_state = n_state) X0 = deepcopy(res.minimum) - assert(X0 == res.minimum) - @show rms(f(X0)) + DEBUG && assert(X0 == res.minimum) + DEBUG && @show ff1 = rms(f(X0)) if DEBUG _V = deepcopy(V) _Ψ = deepcopy(Ψ) end - @show rms(f(res.minimum)) + DEBUG && @show ff2 = rms(f(res.minimum)) + if DEBUG + @assert ff1 == ff2 @show res.minimum == X0 @show _V == V @show _Ψ == Ψ @@ -232,7 +188,7 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal n=50, pa=0.25, Tol=1.0e-5, - max_iter = i < liniters-1 ? 80 : 200, + max_iter = i < liniters-1 ? cuckooiter : 2cuckooiter, timeout = 120) debug("cuckoo_search done") X0 = deepcopy(bestnest) @@ -389,3 +345,48 @@ function getLinearRegressor(V,Ψ,A,state,outputnet,na,n_state,n_centers,n_points end return V end + + +function plotcenters(Z) + X = zeros(Z.n_centers,2) + for (i,Zi) in enumerate(Z) + X[i,:] = Zi[1:2]' + end + newplot(X[:,1],X[:,2],"o"); title("Centers") +end + +function getbounds(Znl, state, n_state, n_centers) + Lb = zeros(Znl.x) + Ub = zeros(Znl.x) + mas = maximum(state,1)' + mis = minimum(state,1)' + for i = 1:2n_state:n_centers*2n_state + Lb[i:i+n_state-1] = mis + Ub[i:i+n_state-1] = mas + Lb[i+n_state:i+2n_state-1] = 0.000001 + Ub[i+n_state:i+2n_state-1] = 10*Znl.x[n_state+1:2n_state] + end + return Lb,Ub +end + + +function getcentersKmeans(state, nc::Int, f::Function, n_state::Int) + iters = 21 + errorvec = zeros(iters) + params = Array(Float64,(nc*2*n_state,iters)) + methods = [:rand;:kmpp] + for iter = 1:iters + clusterresult = kmeans(state', nc; maxiter=200, display=:none, init=iter<iters ? methods[iter%2+1] : :kmcen) + for i = 1:nc + si = 1+(i-1)n_state*2 + params[si:si+n_state-1,iter] = clusterresult.centers[:,i] + C = cov(state[clusterresult.assignments .== i,:]) + params[si+n_state:si+2n_state-1,iter] = diag(inv(C)) + @assert !any(diag(inv(C)) .< 0) + end + errorvec[iter] = rms(f(params[:,iter])) + end + println("Std in errors among initial centers: ", round(std(errorvec),3)) + ind = indmin(errorvec) + return RbfNonlinearParameters(params[:,ind],n_state, nc) +end