Commit fddafd73 authored by Fredrik Bagge Carlson's avatar Fredrik Bagge Carlson
Browse files

fixed huge issue with the RBF function which didnt perform the sum in the...

fixed huge issue with the RBF function which didnt perform the sum in the quadratic form but instead only picked the first quadratic term
parent 970d0e81
...@@ -46,7 +46,7 @@ function levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12 ...@@ -46,7 +46,7 @@ function levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12
# Maintain a trace of the system. # Maintain a trace of the system.
tr = Optim.OptimizationTrace() tr = Optim.OptimizationTrace()
if show_trace if show_trace
d = {"lambda" => lambda} d = Dict("lambda" => lambda)
os = Optim.OptimizationState(iterCt, rms(fcur), NaN, d) os = Optim.OptimizationState(iterCt, rms(fcur), NaN, d)
push!(tr, os) push!(tr, os)
...@@ -101,7 +101,7 @@ function levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12 ...@@ -101,7 +101,7 @@ function levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12
# show state # show state
if show_trace && (iterCt < 20 || iterCt % 10 == 0) if show_trace && (iterCt < 20 || iterCt % 10 == 0)
gradnorm = norm(J'*fcur, Inf) gradnorm = norm(J'*fcur, Inf)
d = {"lambda" => lambda} d = Dict("lambda" => lambda)
os = Optim.OptimizationState(iterCt, rms(fcur), gradnorm, d) os = Optim.OptimizationState(iterCt, rms(fcur), gradnorm, d)
push!(tr, os) push!(tr, os)
println("Iter:$iterCt, f:$(round(os.value,5)), ||g||:$(round(gradnorm,5)), λ:$(round(lambda,5))") println("Iter:$iterCt, f:$(round(os.value,5)), ||g||:$(round(gradnorm,5)), λ:$(round(lambda,5))")
......
...@@ -11,9 +11,6 @@ type RbfNonlinearParameters ...@@ -11,9 +11,6 @@ type RbfNonlinearParameters
end end
# for op = (:+, :*, :\, :/) # for op = (:+, :*, :\, :/)
# @eval ($op)(a::RbfNonlinearParameters,b) = ($op)(a.x,b) # @eval ($op)(a::RbfNonlinearParameters,b) = ($op)(a.x,b)
# @eval ($op)(b,a::RbfNonlinearParameters) = ($op)(b,a.x) # @eval ($op)(b,a::RbfNonlinearParameters) = ($op)(b,a.x)
...@@ -44,17 +41,7 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal ...@@ -44,17 +41,7 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
# Get initial centers ================================ # Get initial centers ================================
Znl::RbfNonlinearParameters Znl::RbfNonlinearParameters
if isa(inputpca, Int) state,n_state, TT = inputtransform(state, inputpca)
if inputpca > size(state,2)
warn("inputpca must be <= n_state")
inputpca = size(state,2)
end
state-= repmat(mean(state,1),n_points,1)
state./= repmat(var(state,1),n_points,1)
C,score,latent,W0 = PCA(state,true)
state = score[:,1:inputpca]
end
n_state = size(state,2)
if lowercase(initialcenters) == "equidistant" if lowercase(initialcenters) == "equidistant"
initialcenters = :equidistant initialcenters = :equidistant
...@@ -73,7 +60,8 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal ...@@ -73,7 +60,8 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
else else
Znl = getcentersKmeans(state, nc, predictionerror, n_state); debug("gotcentersKmeans") Znl = getcentersKmeans(state, nc, predictionerror, n_state); debug("gotcentersKmeans")
end end
@ddshow Znl @show Znl
@show size(state)
getΨ(Ψ, Znl, state, n_points, n_state, normalized); debug("Got Ψ") getΨ(Ψ, Znl, state, n_points, n_state, normalized); debug("Got Ψ")
@ddshow sum(!isfinite(Ψ)) @ddshow sum(!isfinite(Ψ))
getLinearRegressor(V,Ψ,A,state,outputnet,na,n_state,n_centers,n_points); debug("Got linear regressor V") getLinearRegressor(V,Ψ,A,state,outputnet,na,n_state,n_centers,n_points); debug("Got linear regressor V")
...@@ -85,6 +73,7 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal ...@@ -85,6 +73,7 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
error = y - prediction error = y - prediction
errors = zeros(liniters+1) errors = zeros(liniters+1)
Lb,Ub = getbounds(Znl, state, n_state, n_centers) Lb,Ub = getbounds(Znl, state, n_state, n_centers)
# 5hz går på 5:09min, 10hz på 7:10min och 40hz 16
# ============= Main loop ================================================ # ============= Main loop ================================================
debug("Calculating initial error") debug("Calculating initial error")
...@@ -93,7 +82,7 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal ...@@ -93,7 +82,7 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
function g(z) function g(z)
znl = RbfNonlinearParameters(z,n_state,n_centers) znl = RbfNonlinearParameters(z,n_state,n_centers)
w = fitlinear(V,y) w = fitlinear(V,y)
return outputnet ? jacobian_outputnet(znl,Ψ, w, V) : jacobian_no_outputnet(znl,Ψ, w, V) return outputnet ? jacobian_outputnet(znl,Ψ, w, V, state) : jacobian_no_outputnet(znl,Ψ, w, V, state)
end end
f(z) = predictionerror(z) f(z) = predictionerror(z)
X0 = deepcopy(Znl.x) X0 = deepcopy(Znl.x)
...@@ -162,12 +151,161 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal ...@@ -162,12 +151,161 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
# Exit =============================================== # Exit ===============================================
println("tainRBF_ARX done. Centers: $(Znl.n_centers), Nonlinear parameters: $(length(Znl.x)), Linear parameters: $(length(Zl)), RMSE: $(rms(error))") println("tainRBF_ARX done. Centers: $(Znl.n_centers), Nonlinear parameters: $(length(Znl.x)), Linear parameters: $(length(Zl)), RMSE: $(rms(error))")
end
function trainRBF(y, state, nc; liniters=3,nonliniters=50, normalized=false, initialcenters="equidistant", inputpca=false, cuckoosearch = false, cuckooiter=100)
n_points = length(y)
function predictionerror(z)
znl = RbfNonlinearParameters(z,n_state,n_centers)
psi = getΨ(Ψ, znl, state, n_points, n_state, normalized)
zl = fitlinear(Ψ,y);
prediction = Ψ*zl
error = prediction-y
return error
end
# Get initial centers ================================
Znl::RbfNonlinearParameters
state, n_state, TT = inputtransform(state, inputpca)
# n_state = size(state,2)
if lowercase(initialcenters) == "equidistant"
initialcenters = :equidistant
n_centers = nc^n_state
else
initialcenters = :kmeans
n_centers = nc
end
Ψ = Array(Float64,(n_points,n_centers+1))
Ψ[:,end] = 1.0
if initialcenters == :equidistant
Znl = getcentersEq(state,nc); debug("gotcentersEq")
else
Znl = getcentersKmeans(state, nc, predictionerror, n_state); debug("gotcentersKmeans")
end
# Znl.x += 0.1*randn(size(Znl.x))
@ddshow Znl
getΨ(Ψ, Znl, state, n_points, n_state, normalized); debug("Got Ψ")
# matshow(deepcopy(Ψ));axis("tight"); colorbar()
# plotparams(Znl)
@ddshow sum(!isfinite(Ψ))
w = fitlinear(Ψ,y); debug("fitlinear")
newplot(w,"o"); title("Linear parameters")
@ddshow sum(!isfinite(Zl))
prediction = Ψ*w
error = y - prediction
# newplot([y prediction error]);title("inbetween")
errors = zeros(liniters+1)
Lb,Ub = getbounds(Znl, state, n_state, n_centers)
# ============= Main loop ================================================
debug("Calculating initial error")
errors[1] = rms(predictionerror(Znl.x))
println("Training RBF_ARX Centers: $(Znl.n_centers), Nonlinear parameters: $(length(Znl.x)), Linear parameters: $(length(w))")
function g(z)
znl = RbfNonlinearParameters(z,n_state,n_centers)
w = fitlinear(Ψ,y)
return jacobian(znl,Ψ, w, state)
end
f(z) = predictionerror(z)
X0 = deepcopy(Znl.x)
for i = 1:liniters
if i%2 == 1 || !cuckoosearch
@time res = levenberg_marquardt(f, g, X0,
maxIter = nonliniters,
tolG = 1e-7,
tolX = 1e-10,
show_trace=true,
timeout = 60,
n_state = n_state)
X0 = deepcopy(res.minimum)
DEBUG && assert(X0 == res.minimum)
DEBUG && @show ff1 = rms(f(X0))
if DEBUG
= deepcopy(Ψ)
end
DEBUG && @show ff2 = rms(f(res.minimum))
if DEBUG
@assert ff1 == ff2
@show res.minimum == X0
@show == Ψ
end
assert(X0 == res.minimum)
# Znl = RbfNonlinearParameters(saturatePrecision(copy(res.minimum),n_state),n_state,n_centers)
Znl = RbfNonlinearParameters(deepcopy(res.minimum),n_state,n_centers)
errors[i+1] = res.f_minimum
# show(res.trace)
else
display("Using cuckoo search to escape local minimum")
@time (bestnest,fmin) = cuckoo_search(x -> rms(f(x)),X0, Lb, Ub;
n=50,
pa=0.25,
Tol=1.0e-5,
max_iter = i < liniters-1 ? cuckooiter : 2cuckooiter,
timeout = 120)
debug("cuckoo_search done")
X0 = deepcopy(bestnest)
@ddshow rms(f(X0))
Znl = RbfNonlinearParameters(deepcopy(bestnest),n_state,n_centers)
errors[i+1] = fmin
end
if abs(errors[i+1]-errors[i]) < 1e-10
display("No significant change in function value")
break
end
end
# Test ===============================================
getΨ(Ψ, Znl, state, n_points, n_state, normalized)
w = fitlinear(Ψ,y); debug("fitlinear")
prediction = Ψ*w
error = y - prediction
if PYPLOT || WINSTON
newplot(y,"k");
plot(prediction,"b");
plot(error,"r");title("Fitresult, RBF, n_a: $na, n_c: $(Znl.n_centers), Nonlin params: $(length(Znl.x)), Lin params: $(length(w)) RMSE = $(rms(error)) Fit = $(fit(y,prediction))")
plotcenters(Znl)
newplot(errors,"o"); title("Errors");
end
# Exit ===============================================
println("tainRBF done. Centers: $(Znl.n_centers), Nonlinear parameters: $(length(Znl.x)), Linear parameters: $(length(w)), RMSE: $(rms(error))")
return prediction
end
type InputTransform
mean
variance
C
end end
function inputtransform(state, inputpca)
n_points = size(state,1)
m = mean(state,1)
v = std(state,1)
state-= repmat(m,n_points,1)
state./= repmat(v,n_points,1)
if isa(inputpca, Int)
if inputpca > size(state,2)
warn("inputpca must be <= n_state")
inputpca = size(state,2)
end
C,score,latent,W0 = PCA(state,true)
state = score[:,1:inputpca]
else
C = 1
end
n_state = size(state,2)
# newplot(state); title("State after transformation")
return state, n_state, InputTransform(m,v,C)
end
function getcentersEq(state::VecOrMat, nc::Integer) function getcentersEq(state::VecOrMat, nc::Integer)
n_points = size(state,1) n_points = size(state,1)
...@@ -182,7 +320,7 @@ function getcentersEq(state::VecOrMat, nc::Integer) ...@@ -182,7 +320,7 @@ function getcentersEq(state::VecOrMat, nc::Integer)
end end
# add bandwidth parameters γ, give all centers the same bandwidth with Δc as a (hopefully) good initial guess # add bandwidth parameters γ, give all centers the same bandwidth with Δc as a (hopefully) good initial guess
# display(Z) # display(Z)
Z[:,n_state+1:end] = 1*repmat(4./(Δc.^2)',nc,1) # Spread the initial guess to all centers Z[:,n_state+1:end] = ones(1*repmat(4./(Δc.^2)',nc,1)) # Spread the initial guess to all centers
assert(all(Z[:,n_state+1:end].> 0)) assert(all(Z[:,n_state+1:end].> 0))
debug("Z done") debug("Z done")
n_centers::Int64 = nc^n_state # new number of centers wich considers gridding of 1D centers n_centers::Int64 = nc^n_state # new number of centers wich considers gridding of 1D centers
...@@ -204,6 +342,36 @@ function getcentersEq(state::VecOrMat, nc::Integer) ...@@ -204,6 +342,36 @@ function getcentersEq(state::VecOrMat, nc::Integer)
#error("Bias parameter!") #error("Bias parameter!")
end end
function getcentersKmeans(state, nc::Int, f::Function, n_state::Int)
iters = 21
errorvec = zeros(iters)
params = Array(Float64,(nc*2*n_state,iters))
methods = [:rand;:kmpp]
for iter = 1:iters
clusterresult = kmeans(state', nc; maxiter=200, display=:none, init=iter<iters ? methods[iter%2+1] : :kmcen)
for i = 1:nc
si = 1+(i-1)n_state*2
params[si:si+n_state-1,iter] = clusterresult.centers[:,i]
C = cov(state[clusterresult.assignments .== i,:])
C = cond(C) > 1e4 ? eye(C) : C
params[si+n_state:si+2n_state-1,iter] = diag(inv(C))
any(diag(inv(C)) .< 0) && show(C)
end
errorvec[iter] = rms(f(params[:,iter]))
end
println("Std in errors among initial centers: ", round(std(errorvec),6))
ind = indmin(errorvec)
return RbfNonlinearParameters(params[:,ind],n_state, nc)
end
function plotcenters(Z)
X = zeros(Z.n_centers,2)
for (i,Zi) in enumerate(Z)
X[i,:] = Zi[1:2]'
end
newplot(X[:,1],X[:,2],"o"); title("Centers")
end
...@@ -216,7 +384,7 @@ function getΨ(Ψ, Znl, state, n_points, n_state, normalized::Bool) ...@@ -216,7 +384,7 @@ function getΨ(Ψ, Znl, state, n_points, n_state, normalized::Bool)
end end
function getΨnormalized(Ψ, Znl, state, n_points, n_state) function getΨnormalized(Ψ, Znl, state, n_points, n_state)
RBF(x, Znl::VecOrMat,n_state::Integer) = exp(-(((x-Znl[1:n_state]).^2.*Znl[n_state+1:end])[1])) RBF(x, Znl::VecOrMat,n_state::Integer) = exp(-(sum((x-Znl[1:n_state]).^2.*Znl[n_state+1:end])))
rowsum = ones(n_points) rowsum = ones(n_points)
for (j,Zin) in enumerate(Znl) for (j,Zin) in enumerate(Znl)
Zi = deepcopy(Zin) Zi = deepcopy(Zin)
...@@ -240,45 +408,52 @@ function getΨnormalized(Ψ, Znl, state, n_points, n_state) ...@@ -240,45 +408,52 @@ function getΨnormalized(Ψ, Znl, state, n_points, n_state)
end end
function getΨnonnormalized(Ψ, Znl, state, n_points, n_state) function getΨnonnormalized(Ψ, Znl, state, n_points, n_state)
RBF(x, Znl::VecOrMat,n_state::Integer) = exp(-(((x-Znl[1:n_state]).^2.*Znl[n_state+1:end])[1])) # RBF(x, Z,n_state) = exp(-(x-Z[1:n_state]).^2)[1]
for (j,Zin) in enumerate(Znl) RBF(x, Z,n_state) = exp(-(sum(((x-Z[1:n_state]).^2).*Z[n_state+1:end])))
Zi = deepcopy(Zin) for (j,Zi) in enumerate(Znl)
for i = 1:n_points for i = 1:n_points
statei = squeeze(state[i,:]',2) statei = state[i,:]'
# statei = slice(state,i,:) # statei = slice(state,i,:)
Ψ[i,j] = RBF(statei, Zi, n_state) Ψ[i,j] = RBF(statei, Zi, n_state)
if DEBUG && !isfinite(Ψ[i,j]) if DEBUG && !isfinite(Ψ[i,j])
@show i,j,statei, Zi, n_state, Ψ[i,j] @show i,j,statei, Zi, n_state, Ψ[i,j]
@show (statei-Zi[1:n_state]).^2 @show (statei-Zi[1:n_state]).^2
@show Zi[n_state+1:end] @show Zi[n_state+1:end]
# @show exp(-(((statei-Zi[1:n_state]).^2.*Zi[n_state+1:end])[1]))
error("Stopping") error("Stopping")
end end
end end
end end
return Ψ return Ψ
end end
function getLinearRegressor(V,Ψ,A,state,outputnet,na,n_state,n_centers,n_points) function getLinearRegressor(V,Ψ,A,state,outputnet,na,n_state,n_centers,n_points)
if outputnet outputnet ? getLinearRegressor_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points) : getLinearRegressor_no_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points)
ii = 1 end
for i = 1:na
for k = 1:n_centers+1 function getLinearRegressor_no_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points)
for j = 1:n_state ii = 1
for l = 1:n_points for i = 1:na
V[l,ii] = Ψ[l,k]*A[l,i]*state[l,j] for k = 1:n_centers+1
end for l = 1:n_points
ii = ii+1 V[l,ii] = Ψ[l,k]*A[l,i]
end
end end
ii = ii+1
end end
else end
ii = 1 if DEBUG && sum(!isfinite(V)) > 0
for i = 1:na @show sum(!isfinite(V))
for k = 1:n_centers+1 end
return V
end
function getLinearRegressor_outputnet(V,Ψ,A,state,na,n_state,n_centers,n_points)
ii = 1
for i = 1:na
for k = 1:n_centers+1
for j = 1:n_state
for l = 1:n_points for l = 1:n_points
V[l,ii] = Ψ[l,k]*A[l,i] V[l,ii] = Ψ[l,k]*A[l,i]*state[l,j]
end end
ii = ii+1 ii = ii+1
end end
...@@ -291,13 +466,7 @@ function getLinearRegressor(V,Ψ,A,state,outputnet,na,n_state,n_centers,n_points ...@@ -291,13 +466,7 @@ function getLinearRegressor(V,Ψ,A,state,outputnet,na,n_state,n_centers,n_points
end end
function plotcenters(Z)
X = zeros(Z.n_centers,2)
for (i,Zi) in enumerate(Z)
X[i,:] = Zi[1:2]'
end
newplot(X[:,1],X[:,2],"o"); title("Centers")
end
function getbounds(Znl, state, n_state, n_centers) function getbounds(Znl, state, n_state, n_centers)
Lb = zeros(Znl.x) Lb = zeros(Znl.x)
...@@ -314,29 +483,10 @@ function getbounds(Znl, state, n_state, n_centers) ...@@ -314,29 +483,10 @@ function getbounds(Znl, state, n_state, n_centers)
end end
function getcentersKmeans(state, nc::Int, f::Function, n_state::Int)
iters = 21
errorvec = zeros(iters)
params = Array(Float64,(nc*2*n_state,iters))
methods = [:rand;:kmpp]
for iter = 1:iters
clusterresult = kmeans(state', nc; maxiter=200, display=:none, init=iter<iters ? methods[iter%2+1] : :kmcen)
for i = 1:nc
si = 1+(i-1)n_state*2
params[si:si+n_state-1,iter] = clusterresult.centers[:,i]
C = cov(state[clusterresult.assignments .== i,:])
params[si+n_state:si+2n_state-1,iter] = diag(inv(C))
@assert !any(diag(inv(C)) .< 0)
end
errorvec[iter] = rms(f(params[:,iter]))
end
println("Std in errors among initial centers: ", round(std(errorvec),3))
ind = indmin(errorvec)
return RbfNonlinearParameters(params[:,ind],n_state, nc)
end
function jacobian_outputnet(Znl, Ψ, w, V)
function jacobian_outputnet(Znl, Ψ, w, V, state)
n_points = size(Ψ,1) n_points = size(Ψ,1)
n_state = Znl.n_state n_state = Znl.n_state
n_centers = Znl.n_centers n_centers = Znl.n_centers
...@@ -365,7 +515,7 @@ function jacobian_outputnet(Znl, Ψ, w, V) ...@@ -365,7 +515,7 @@ function jacobian_outputnet(Znl, Ψ, w, V)
return J return J
end end
function jacobian_no_outputnet(Znl, Ψ, w,v) function jacobian_no_outputnet(Znl, Ψ, w,v, state)
n_points = size(Ψ,1) n_points = size(Ψ,1)
n_state = Znl.n_state n_state = Znl.n_state
n_centers = Znl.n_centers n_centers = Znl.n_centers
...@@ -392,6 +542,29 @@ function jacobian_no_outputnet(Znl, Ψ, w,v) ...@@ -392,6 +542,29 @@ function jacobian_no_outputnet(Znl, Ψ, w,v)
return J return J
end end
function jacobian(Znl, Ψ, w, state)
n_points = size(Ψ,1)
n_state = Znl.n_state
n_centers = Znl.n_centers
J = Array(Float64,(n_points,length(Znl.x)))
ii = 1
for (k,Zi) in enumerate(Znl)
μ = Zi[1:n_state] # slice?
γ = Zi[n_state+1:end]
i1 = ii-1
for l = 1:n_points
Ψw = Ψ[l,k]*w[k]
for p = 1:n_state
x_μ = state[l,p]-μ[p]
J[l,i1+p] = 2*Ψw*x_μ*γ[p]
J[l,i1+n_state+p] = (-Ψw)*x_μ^2
end
end
ii += 2n_state
end
return J
end
function saturatePrecision(x,n_state) function saturatePrecision(x,n_state)
for i = 1:2n_state:length(x) for i = 1:2n_state:length(x)
range = i+n_state:i+2n_state-1 range = i+n_state:i+2n_state-1
...@@ -412,3 +585,12 @@ function fitlinear(V,y) ...@@ -412,3 +585,12 @@ function fitlinear(V,y)
error("Linear fitting failed") error("Linear fitting failed")
end end
end end
function Base.show(p::RbfNonlinearParameters)
println(round(reshape(p.x,2p.n_state,p.n_centers),4))
end
function plotparams(p::RbfNonlinearParameters)
newplot(reshape(p.x,2p.n_state,p.n_centers)',"o")
title("Nonlinear parameters")
end
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment