Commit b0c08636 authored by Fredrik Bagge Carlson's avatar Fredrik Bagge Carlson
Browse files

Extracted many methods from train_RBF.. Moving towrads several different...

Extracted many methods from train_RBF.. Moving towrads several different RBF-network implementations
parent 55740f37
......@@ -13,7 +13,7 @@ FitResult,
IdData,
# Functions
ar,arx,getARregressor,getARXregressor,find_na,
toeplitz, kalman, kalman_smoother
toeplitz, kalman, kalman_smoother, forward_kalman
## Fit Methods =================
:LS
......
......@@ -105,3 +105,15 @@ function find_na(y::Vector{Float64},n::Int)
plotsub(error,"-o")
show()
end
import PyPlot
function PyPlot.plot(y,m::AR)
na = length(m.a)
y,A = getARregressor(y,na)
yh = A*m.a
error = y-yh
newplot(y,"k")
plot(yh,"b")
plot(error,"r")
title("Fitresult, AR, n_a: $na, RMSE = $(rms(error)) Fit = $(fit(y,yh))")
end
......@@ -15,7 +15,7 @@ Based on implementation by
}
http://www.mathworks.com/matlabcentral/fileexchange/29809-cuckoo-search--cs--algorithm
"""
function cuckoo_search(f,X0, Lb,Ub;n=25,pa=0.25, Tol=1.0e-5, max_iter = 1e5, timeout = Inf)
function cuckoo_search(f,X0, Lb,Ub;n=25,pa=0.25, Tol=1.0e-5, max_iter = 1e3, timeout = Inf)
X00 = deepcopy(X0)
nd=size(X0,1);
X0t = X0'
......
......@@ -24,35 +24,37 @@ function forward_kalman(y,A,R1,R2, P0)
xk = xkk
Pk = Pkk
i = 1
Hk = A[i,:]
e = y[i]-Hk*xk[:,i]
S = Hk*Pk[:,:,i]*Hk' + R2
K = (Pk[:,:,i]*H')/S
K = (Pk[:,:,i]*Hk')/S
xkk[:,i] = xk[:,i] + K*e
Pkk[:,:,i] = (I - K*Hk)*Pk[:,:,i]
for i = 2:N
Fk = 1
Hk = A[i,:]'
Hk = A[i,:]
xk[:,i] = Fk*xkk[:,i-1]
Pk[:,:,i] = Fk*Pkk[:,:,i-1]*Fk' + R1
e = y[i]-Hk*xk[:,i]
S = Hk*Pk[:,:,i]*Hk' + R2
K = (Pk[:,:,i]*H')/S
K = (Pk[:,:,i]*Hk')/S
xkk[:,i] = xk[:,i] + K*e
Pkk[:,:,i] = (I - K*Hk)*Pk[:,:,i]
end
return xkk,xk,Pkk,Pk
end
"""A kalman parameter smoother"""
using Debug
# """A kalman parameter smoother"""
function kalman_smoother(y, A, R1, R2, P0)
na = size(R1,1)
N = length(y)
xkk,xk,Pkk,Pk = forward_kalman(y,A,R1,R2, P0)
xkn = zeros(xkk)
Pkn = zeros(P)
Pkn = zeros(Pkk)
for i = N-1:-1:1
Ai = A[i,:]
Fk = 1
Hk = A[i,:]'
C = Pkk[:,:,i]/Pk[:,:,i+1]
......@@ -60,6 +62,5 @@ function kalman_smoother(y, A, R1, R2, P0)
Pkn[:,:,i] = Pkk[:,:,i] + C*(Pkn[:,:,i+1] - Pk[:,:,i+1])*C'
end
return xkn, Pkn
return xkn
end
sse(x) = (x'x)[1]# gives the L2 norm of x
using Optim
"""
`levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12, maxIter=100, lambda=100.0, show_trace=false, timeout=Inf)`\n
# finds argmin sum(f(x).^2) using the Levenberg-Marquardt algorithm
# x
# The function f should take an input vector of length n and return an output vector of length m
# The function g is the Jacobian of f, and should be an m x n matrix
# x0 is an initial guess for the solution
# fargs is a tuple of additional arguments to pass to f
# available options:
# tolX - search tolerance in x
# tolG - search tolerance in gradient
# maxIter - maximum number of iterations
# lambda - (inverse of) initial trust region radius
# show_trace - print a status summary on each iteration if true
# returns: x, J
# x - least squares solution for x
# J - estimate of the Jacobian of f at x
"""
function levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12, maxIter=100, lambda=100.0, show_trace=false, timeout=Inf, n_state = 1)
# other constants
const MAX_LAMBDA = 1e16 # minimum trust region radius
const MIN_LAMBDA = 1e-16 # maximum trust region radius
const MIN_STEP_QUALITY = 1e-4
const MIN_DIAGONAL = 1e-6 # lower bound on values of diagonal matrix used to regularize the trust region step
converged = false
x_converged = false
g_converged = false
need_jacobian = true
iterCt = 0
x = x0
delta_x = deepcopy(x0)
f_calls = 0
g_calls = 0
t0 = time()
fcur = f(x)
f_calls += 1
residual = sse(fcur)
# Maintain a trace of the system.
tr = Optim.OptimizationTrace()
if show_trace
d = {"lambda" => lambda}
os = Optim.OptimizationState(iterCt, rms(fcur), NaN, d)
push!(tr, os)
println("Iter:0, f:$(round(os.value,5)), ||g||:$(0)), λ:$(round(lambda,5))")
end
while ( ~converged && iterCt < maxIter )
if need_jacobian
J = g(x)
g_calls += 1
need_jacobian = false
end
# we want to solve:
# argmin 0.5*||J(x)*delta_x + f(x)||^2 + lambda*||diagm(J'*J)*delta_x||^2
# Solving for the minimum gives:
# (J'*J + lambda*DtD) * delta_x == -J^T * f(x), where DtD = diagm(sum(J.^2,1))
# Where we have used the equivalence: diagm(J'*J) = diagm(sum(J.^2, 1))
# It is additionally useful to bound the elements of DtD below to help
# prevent "parameter evaporation".
DtD = diagm(Float64[max(x, MIN_DIAGONAL) for x in sum(J.^2,1)])
delta_x = ( J'*J + sqrt(lambda)*DtD ) \ ( -J'*fcur )
# if the linear assumption is valid, our new residual should be:
predicted_residual = sse(J*delta_x + fcur)
# check for numerical problems in solving for delta_x by ensuring that the predicted residual is smaller
# than the current residual
if predicted_residual > residual + 2max(eps(predicted_residual),eps(residual))
warn("""Problem solving for delta_x: predicted residual increase.
$predicted_residual (predicted_residual) >
$residual (residual) + $(eps(predicted_residual)) (eps)""")
end
# try the step and compute its quality
trail_x = saturatePrecision(x + delta_x,n_state)
trial_f = f(trail_x)
f_calls += 1
trial_residual = sse(trial_f)
# step quality = residual change / predicted residual change
rho = (trial_residual - residual) / (predicted_residual - residual)
if rho > MIN_STEP_QUALITY
x = trail_x
fcur = trial_f
residual = trial_residual
# increase trust region radius
lambda = max(0.1*lambda, MIN_LAMBDA)
need_jacobian = true
else
# decrease trust region radius
lambda = min(10*lambda, MAX_LAMBDA)
end
iterCt += 1
# show state
if show_trace && (iterCt < 20 || iterCt % 10 == 0)
gradnorm = norm(J'*fcur, Inf)
d = {"lambda" => lambda}
os = Optim.OptimizationState(iterCt, rms(fcur), gradnorm, d)
push!(tr, os)
println("Iter:$iterCt, f:$(round(os.value,5)), ||g||:$(round(gradnorm,5)), λ:$(round(lambda,5))")
end
# check convergence criteria:
# 1. Small gradient: norm(J^T * fcur, Inf) < tolG
# 2. Small step size: norm(delta_x) < tolX
if norm(J' * fcur, Inf) < tolG
@show g_converged = true
elseif norm(delta_x) < tolX*(tolX + norm(x))
@show x_converged = true
end
converged = g_converged | x_converged
if time()-t0 > timeout
display("levenberg_marquardt: timeout $(timeout)s reached ($(time()-t0)s)")
break
end
end
Optim.MultivariateOptimizationResults("Levenberg-Marquardt", x0, x, rms(fcur), iterCt, !converged, x_converged, 0.0, false, 0.0, g_converged, tolG, tr, f_calls, g_calls)
end
function saturatePrecision(x,n_state)
for i = 1:2n_state:length(x)
range = i+n_state:i+2n_state-1
x[range] = abs(x[range])
end
return x
end
......@@ -28,31 +28,10 @@ The number of centers is equal to `nc` if Kmeans is used to get initial centers,
`n_state` is equal to the state dimension, possibly reduced to `inputpca` if so desired.\n
The number of nonlinear parameters is `n_centers × n_state`\n
The number of linear parameters is `outputnet ? n_state × (n_centers+1) × (na)+1) : (na)×(n_centers+1)+1)`"""
function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=false, initialcenters="equidistant", inputpca=false, outputnet = true, cuckoosearch = false)
function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=false, initialcenters="equidistant", inputpca=false, outputnet = true, cuckoosearch = false, cuckooiter=100)
n_points = length(y)
na = isa(A,Matrix) ? size(A,2) : 1
function getcentersKmeans(state, nc)
iters = 21
errorvec = zeros(iters)
params = Array(Float64,(nc*2*n_state,iters))
methods = [:rand;:kmpp]
for iter = 1:iters
clusterresult = kmeans(state', nc; maxiter=200, display=:none, init=iter<iters ? methods[iter%2+1] : :kmcen)
for i = 1:nc
si = 1+(i-1)n_state*2
params[si:si+n_state-1,iter] = clusterresult.centers[:,i]
C = cov(state[clusterresult.assignments .== i,:])
params[si+n_state:si+2n_state-1,iter] = diag(inv(C))
@assert !any(diag(inv(C)) .< 0)
end
errorvec[iter] = rms(predictionerror(params[:,iter]))
end
println("Std in errors among initial centers: ", round(std(errorvec),3))
ind = indmin(errorvec)
return RbfNonlinearParameters(params[:,ind],n_state, nc)
end
function saturatePrecision(x,n_state)
for i = 1:2n_state:length(x)
......@@ -62,16 +41,6 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
return x
end
function plotcenters(Z)
X = zeros(Z.n_centers,2)
for (i,Zi) in enumerate(Z)
X[i,:] = Zi[1:2]'
end
newplot(X[:,1],X[:,2],"o"); title("Centers")
end
function fitlinear(V)
try
DEBUG && assert(isa(V,Matrix))
......@@ -119,14 +88,12 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
return J
end
function predictionerror(z)
znl = RbfNonlinearParameters(z,n_state,n_centers)
psi = getΨ(deepcopy(Ψ), znl, state, n_points, n_state, normalized)
v = getLinearRegressor(deepcopy(V),psi,A,state,outputnet,na,n_state,n_centers,n_points)
zl = fitlinear(v);
prediction = v*zl
psi = getΨ(Ψ, znl, state, n_points, n_state, normalized)
getLinearRegressor(V,psi,A,state,outputnet,na,n_state,n_centers,n_points)
zl = fitlinear(V);
prediction = V*zl
error = prediction-y
return error
end
......@@ -153,7 +120,6 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
n_centers = nc
end
Ψ = Array(Float64,(n_points,n_centers+1))
Ψ[:,end] = 1.0
V = outputnet ? V = Array(Float64,(n_points, n_state* (n_centers+1)* (na)+1)) : V = Array(Float64,(n_points, (na)*(n_centers+1)+1))
......@@ -161,7 +127,7 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
if initialcenters == :equidistant
Znl = getcentersEq(state,nc); debug("gotcentersEq")
else
Znl = getcentersKmeans(state,nc); debug("gotcentersKmeans")
Znl = getcentersKmeans(state, nc, predictionerror, n_state); debug("gotcentersKmeans")
end
@ddshow Znl
getΨ(Ψ, Znl, state, n_points, n_state, normalized); debug("Got Ψ")
......@@ -174,19 +140,7 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
prediction = V*Zl
error = y - prediction
errors = zeros(liniters+1)
Lb = zeros(Znl.x)
Ub = zeros(Znl.x)
mas = maximum(state,1)'
mis = minimum(state,1)'
for i = 1:2n_state:n_centers*2n_state
Lb[i:i+n_state-1] = mis
Ub[i:i+n_state-1] = mas
Lb[i+n_state:i+2n_state-1] = 0.000001
Ub[i+n_state:i+2n_state-1] = 10*Znl.x[n_state+1:2n_state]
end
@show Lb
@show Ub
Lb,Ub = getbounds(Znl, state, n_state, n_centers)
# ============= Main loop ================================================
debug("Calculating initial error")
......@@ -209,14 +163,16 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
timeout = 60,
n_state = n_state)
X0 = deepcopy(res.minimum)
assert(X0 == res.minimum)
@show rms(f(X0))
DEBUG && assert(X0 == res.minimum)
DEBUG && @show ff1 = rms(f(X0))
if DEBUG
_V = deepcopy(V)
= deepcopy(Ψ)
end
@show rms(f(res.minimum))
DEBUG && @show ff2 = rms(f(res.minimum))
if DEBUG
@assert ff1 == ff2
@show res.minimum == X0
@show _V == V
@show == Ψ
......@@ -232,7 +188,7 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
n=50,
pa=0.25,
Tol=1.0e-5,
max_iter = i < liniters-1 ? 80 : 200,
max_iter = i < liniters-1 ? cuckooiter : 2cuckooiter,
timeout = 120)
debug("cuckoo_search done")
X0 = deepcopy(bestnest)
......@@ -389,3 +345,48 @@ function getLinearRegressor(V,Ψ,A,state,outputnet,na,n_state,n_centers,n_points
end
return V
end
function plotcenters(Z)
X = zeros(Z.n_centers,2)
for (i,Zi) in enumerate(Z)
X[i,:] = Zi[1:2]'
end
newplot(X[:,1],X[:,2],"o"); title("Centers")
end
function getbounds(Znl, state, n_state, n_centers)
Lb = zeros(Znl.x)
Ub = zeros(Znl.x)
mas = maximum(state,1)'
mis = minimum(state,1)'
for i = 1:2n_state:n_centers*2n_state
Lb[i:i+n_state-1] = mis
Ub[i:i+n_state-1] = mas
Lb[i+n_state:i+2n_state-1] = 0.000001
Ub[i+n_state:i+2n_state-1] = 10*Znl.x[n_state+1:2n_state]
end
return Lb,Ub
end
function getcentersKmeans(state, nc::Int, f::Function, n_state::Int)
iters = 21
errorvec = zeros(iters)
params = Array(Float64,(nc*2*n_state,iters))
methods = [:rand;:kmpp]
for iter = 1:iters
clusterresult = kmeans(state', nc; maxiter=200, display=:none, init=iter<iters ? methods[iter%2+1] : :kmcen)
for i = 1:nc
si = 1+(i-1)n_state*2
params[si:si+n_state-1,iter] = clusterresult.centers[:,i]
C = cov(state[clusterresult.assignments .== i,:])
params[si+n_state:si+2n_state-1,iter] = diag(inv(C))
@assert !any(diag(inv(C)) .< 0)
end
errorvec[iter] = rms(f(params[:,iter]))
end
println("Std in errors among initial centers: ", round(std(errorvec),3))
ind = indmin(errorvec)
return RbfNonlinearParameters(params[:,ind],n_state, nc)
end
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment