diff --git a/src/SystemIdentification.jl b/src/SystemIdentification.jl
index bb109dc2a3e1668cf96bebb8deca18202ad29e2c..444d1ec8e831481a61408354ffbd7abfb17eedc4 100644
--- a/src/SystemIdentification.jl
+++ b/src/SystemIdentification.jl
@@ -13,7 +13,7 @@ FitResult,
 IdData,
 # Functions
 ar,arx,getARregressor,getARXregressor,find_na,
-toeplitz, kalman, kalman_smoother
+toeplitz, kalman, kalman_smoother, forward_kalman
 
 ## Fit Methods =================
 :LS
diff --git a/src/armax.jl b/src/armax.jl
index 8ce9e24e156550e615dbc37ad823acca67a3d065..255572a693da881339170a3f5e26a9e84ab7eccd 100644
--- a/src/armax.jl
+++ b/src/armax.jl
@@ -105,3 +105,15 @@ function find_na(y::Vector{Float64},n::Int)
     plotsub(error,"-o")
     show()
 end
+
+import PyPlot
+function PyPlot.plot(y,m::AR)
+    na = length(m.a)
+    y,A = getARregressor(y,na)
+    yh = A*m.a
+    error = y-yh
+    newplot(y,"k")
+    plot(yh,"b")
+    plot(error,"r")
+    title("Fitresult, AR, n_a: $na, RMSE = $(rms(error)) Fit = $(fit(y,yh))")
+end
diff --git a/src/cuckooSearch.jl b/src/cuckooSearch.jl
index 23751b5adc08a385f9ac62a9e33a3838e8c841eb..858249d2cab7ce4796e24b9367fac41c5bc23d54 100644
--- a/src/cuckooSearch.jl
+++ b/src/cuckooSearch.jl
@@ -15,7 +15,7 @@ Based on implementation by
 }
 http://www.mathworks.com/matlabcentral/fileexchange/29809-cuckoo-search--cs--algorithm
 """
-function cuckoo_search(f,X0, Lb,Ub;n=25,pa=0.25, Tol=1.0e-5, max_iter = 1e5, timeout = Inf)
+function cuckoo_search(f,X0, Lb,Ub;n=25,pa=0.25, Tol=1.0e-5, max_iter = 1e3, timeout = Inf)
     X00 = deepcopy(X0)
     nd=size(X0,1);
     X0t = X0'
diff --git a/src/kalman.jl b/src/kalman.jl
index e2bca2f1126f3e07544dc4e290028362da1b712a..b78b88b395605d98ba3fb1e2a5b36088c98917fc 100644
--- a/src/kalman.jl
+++ b/src/kalman.jl
@@ -24,35 +24,37 @@ function forward_kalman(y,A,R1,R2, P0)
     xk = xkk
     Pk = Pkk
     i = 1
+    Hk = A[i,:]
     e = y[i]-Hk*xk[:,i]
     S = Hk*Pk[:,:,i]*Hk' + R2
-    K = (Pk[:,:,i]*H')/S
+    K = (Pk[:,:,i]*Hk')/S
     xkk[:,i] = xk[:,i] + K*e
     Pkk[:,:,i] = (I - K*Hk)*Pk[:,:,i]
     for i = 2:N
         Fk = 1
-        Hk = A[i,:]'
+        Hk = A[i,:]
         xk[:,i] = Fk*xkk[:,i-1]
         Pk[:,:,i] = Fk*Pkk[:,:,i-1]*Fk' + R1
         e = y[i]-Hk*xk[:,i]
         S = Hk*Pk[:,:,i]*Hk' + R2
-        K = (Pk[:,:,i]*H')/S
+        K = (Pk[:,:,i]*Hk')/S
         xkk[:,i] = xk[:,i] + K*e
         Pkk[:,:,i] = (I - K*Hk)*Pk[:,:,i]
     end
     return xkk,xk,Pkk,Pk
 end
 
-"""A kalman parameter smoother"""
+using Debug
+
+# """A kalman parameter smoother"""
 function kalman_smoother(y, A, R1, R2, P0)
     na = size(R1,1)
     N = length(y)
 
     xkk,xk,Pkk,Pk = forward_kalman(y,A,R1,R2, P0)
     xkn = zeros(xkk)
-    Pkn = zeros(P)
+    Pkn = zeros(Pkk)
     for i = N-1:-1:1
-        Ai = A[i,:]
         Fk = 1
         Hk = A[i,:]'
         C = Pkk[:,:,i]/Pk[:,:,i+1]
@@ -60,6 +62,5 @@ function kalman_smoother(y, A, R1, R2, P0)
         Pkn[:,:,i] = Pkk[:,:,i] + C*(Pkn[:,:,i+1] - Pk[:,:,i+1])*C'
     end
 
-    return xkn, Pkn
-
+    return xkn
 end
diff --git a/src/rbfmodels/levenberg_marquardt.jl b/src/rbfmodels/levenberg_marquardt.jl
new file mode 100644
index 0000000000000000000000000000000000000000..030d5cbe3415d11b6d9768ad4a6685f613d86ff9
--- /dev/null
+++ b/src/rbfmodels/levenberg_marquardt.jl
@@ -0,0 +1,133 @@
+sse(x) = (x'x)[1]# gives the L2 norm of x
+using Optim
+"""
+`levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12, maxIter=100, lambda=100.0, show_trace=false, timeout=Inf)`\n
+    # finds argmin sum(f(x).^2) using the Levenberg-Marquardt algorithm
+    #          x
+    # The function f should take an input vector of length n and return an output vector of length m
+    # The function g is the Jacobian of f, and should be an m x n matrix
+    # x0 is an initial guess for the solution
+    # fargs is a tuple of additional arguments to pass to f
+    # available options:
+    #   tolX - search tolerance in x
+    #   tolG - search tolerance in gradient
+    #   maxIter - maximum number of iterations
+    #   lambda - (inverse of) initial trust region radius
+    #   show_trace - print a status summary on each iteration if true
+    # returns: x, J
+    #   x - least squares solution for x
+    #   J - estimate of the Jacobian of f at x
+"""
+function levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12, maxIter=100, lambda=100.0, show_trace=false, timeout=Inf, n_state = 1)
+
+
+    # other constants
+    const MAX_LAMBDA = 1e16 # minimum trust region radius
+    const MIN_LAMBDA = 1e-16 # maximum trust region radius
+    const MIN_STEP_QUALITY = 1e-4
+    const MIN_DIAGONAL = 1e-6 # lower bound on values of diagonal matrix used to regularize the trust region step
+
+    converged = false
+    x_converged = false
+    g_converged = false
+    need_jacobian = true
+    iterCt = 0
+    x = x0
+    delta_x = deepcopy(x0)
+    f_calls = 0
+    g_calls = 0
+    t0 = time()
+
+    fcur = f(x)
+    f_calls += 1
+    residual = sse(fcur)
+
+    # Maintain a trace of the system.
+    tr = Optim.OptimizationTrace()
+    if show_trace
+        d = {"lambda" => lambda}
+        os = Optim.OptimizationState(iterCt, rms(fcur), NaN, d)
+
+        push!(tr, os)
+        println("Iter:0, f:$(round(os.value,5)), ||g||:$(0)), λ:$(round(lambda,5))")
+    end
+
+    while ( ~converged && iterCt < maxIter )
+        if need_jacobian
+            J = g(x)
+            g_calls += 1
+            need_jacobian = false
+        end
+        # we want to solve:
+        #    argmin 0.5*||J(x)*delta_x + f(x)||^2 + lambda*||diagm(J'*J)*delta_x||^2
+        # Solving for the minimum gives:
+        #    (J'*J + lambda*DtD) * delta_x == -J^T * f(x), where DtD = diagm(sum(J.^2,1))
+        # Where we have used the equivalence: diagm(J'*J) = diagm(sum(J.^2, 1))
+        # It is additionally useful to bound the elements of DtD below to help
+        # prevent "parameter evaporation".
+        DtD = diagm(Float64[max(x, MIN_DIAGONAL) for x in sum(J.^2,1)])
+        delta_x = ( J'*J + sqrt(lambda)*DtD ) \ ( -J'*fcur )
+        # if the linear assumption is valid, our new residual should be:
+        predicted_residual = sse(J*delta_x + fcur)
+        # check for numerical problems in solving for delta_x by ensuring that the predicted residual is smaller
+        # than the current residual
+        if predicted_residual > residual + 2max(eps(predicted_residual),eps(residual))
+            warn("""Problem solving for delta_x: predicted residual increase.
+                             $predicted_residual (predicted_residual) >
+                             $residual (residual) + $(eps(predicted_residual)) (eps)""")
+        end
+        # try the step and compute its quality
+        trail_x = saturatePrecision(x + delta_x,n_state)
+        trial_f = f(trail_x)
+        f_calls += 1
+        trial_residual = sse(trial_f)
+        # step quality = residual change / predicted residual change
+        rho = (trial_residual - residual) / (predicted_residual - residual)
+
+        if rho > MIN_STEP_QUALITY
+            x = trail_x
+            fcur = trial_f
+            residual = trial_residual
+            # increase trust region radius
+            lambda = max(0.1*lambda, MIN_LAMBDA)
+            need_jacobian = true
+        else
+            # decrease trust region radius
+            lambda = min(10*lambda, MAX_LAMBDA)
+        end
+        iterCt += 1
+
+        # show state
+        if show_trace && (iterCt < 20 || iterCt % 10 == 0)
+            gradnorm = norm(J'*fcur, Inf)
+            d = {"lambda" => lambda}
+            os = Optim.OptimizationState(iterCt, rms(fcur), gradnorm, d)
+            push!(tr, os)
+            println("Iter:$iterCt, f:$(round(os.value,5)), ||g||:$(round(gradnorm,5)), λ:$(round(lambda,5))")
+        end
+
+        # check convergence criteria:
+        # 1. Small gradient: norm(J^T * fcur, Inf) < tolG
+        # 2. Small step size: norm(delta_x) < tolX
+        if norm(J' * fcur, Inf) < tolG
+            @show g_converged = true
+        elseif norm(delta_x) < tolX*(tolX + norm(x))
+            @show x_converged = true
+        end
+        converged = g_converged | x_converged
+        if time()-t0 > timeout
+            display("levenberg_marquardt: timeout $(timeout)s reached ($(time()-t0)s)")
+            break
+        end
+    end
+    Optim.MultivariateOptimizationResults("Levenberg-Marquardt", x0, x, rms(fcur), iterCt, !converged, x_converged, 0.0, false, 0.0, g_converged, tolG, tr, f_calls, g_calls)
+end
+
+
+function saturatePrecision(x,n_state)
+    for i = 1:2n_state:length(x)
+        range = i+n_state:i+2n_state-1
+        x[range] = abs(x[range])
+    end
+    return x
+end
diff --git a/src/rbfmodels/trainRBF_ARX.jl b/src/rbfmodels/trainRBF_ARX.jl
index 429c2e800ec021dece8b4ddb22ac36c67ea0164f..b2db6efa7c0e40cb7c1ebef4678e8a4c409acb03 100644
--- a/src/rbfmodels/trainRBF_ARX.jl
+++ b/src/rbfmodels/trainRBF_ARX.jl
@@ -28,31 +28,10 @@ The number of centers is equal to `nc` if Kmeans is used to get initial centers,
 `n_state` is equal to the state dimension, possibly reduced to `inputpca` if so desired.\n
 The number of nonlinear parameters is `n_centers × n_state`\n
 The number of linear parameters is `outputnet ? n_state × (n_centers+1) × (na)+1) : (na)×(n_centers+1)+1)`"""
-function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=false, initialcenters="equidistant", inputpca=false, outputnet = true, cuckoosearch = false)
+function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=false, initialcenters="equidistant", inputpca=false, outputnet = true, cuckoosearch = false, cuckooiter=100)
     n_points = length(y)
     na = isa(A,Matrix) ? size(A,2) : 1
 
-    function getcentersKmeans(state, nc)
-        iters = 21
-        errorvec = zeros(iters)
-        params = Array(Float64,(nc*2*n_state,iters))
-        methods = [:rand;:kmpp]
-        for iter = 1:iters
-            clusterresult = kmeans(state', nc; maxiter=200, display=:none, init=iter<iters ? methods[iter%2+1] : :kmcen)
-            for i = 1:nc
-                si = 1+(i-1)n_state*2
-                params[si:si+n_state-1,iter] = clusterresult.centers[:,i]
-                C = cov(state[clusterresult.assignments .== i,:])
-                params[si+n_state:si+2n_state-1,iter] = diag(inv(C))
-                @assert !any(diag(inv(C)) .< 0)
-            end
-            errorvec[iter] = rms(predictionerror(params[:,iter]))
-        end
-        println("Std in errors among initial centers: ", round(std(errorvec),3))
-        ind = indmin(errorvec)
-        return RbfNonlinearParameters(params[:,ind],n_state, nc)
-    end
-
 
     function saturatePrecision(x,n_state)
         for i = 1:2n_state:length(x)
@@ -62,16 +41,6 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
         return x
     end
 
-    function plotcenters(Z)
-        X = zeros(Z.n_centers,2)
-        for (i,Zi) in enumerate(Z)
-            X[i,:] = Zi[1:2]'
-        end
-        newplot(X[:,1],X[:,2],"o"); title("Centers")
-    end
-
-
-
     function fitlinear(V)
         try
             DEBUG && assert(isa(V,Matrix))
@@ -119,14 +88,12 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
         return J
     end
 
-
-
     function predictionerror(z)
         znl = RbfNonlinearParameters(z,n_state,n_centers)
-        psi = getΨ(deepcopy(Ψ), znl, state, n_points, n_state, normalized)
-        v = getLinearRegressor(deepcopy(V),psi,A,state,outputnet,na,n_state,n_centers,n_points)
-        zl = fitlinear(v);
-        prediction = v*zl
+        psi = getΨ(Ψ, znl, state, n_points, n_state, normalized)
+        getLinearRegressor(V,psi,A,state,outputnet,na,n_state,n_centers,n_points)
+        zl = fitlinear(V);
+        prediction = V*zl
         error = prediction-y
         return error
     end
@@ -153,7 +120,6 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
         n_centers = nc
     end
 
-
     Ψ = Array(Float64,(n_points,n_centers+1))
     Ψ[:,end] = 1.0
     V = outputnet ? V = Array(Float64,(n_points, n_state* (n_centers+1)* (na)+1)) : V = Array(Float64,(n_points, (na)*(n_centers+1)+1))
@@ -161,7 +127,7 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
     if initialcenters == :equidistant
         Znl = getcentersEq(state,nc); debug("gotcentersEq")
     else
-        Znl = getcentersKmeans(state,nc); debug("gotcentersKmeans")
+        Znl = getcentersKmeans(state, nc, predictionerror, n_state); debug("gotcentersKmeans")
     end
     @ddshow Znl
     getΨ(Ψ, Znl, state, n_points, n_state, normalized); debug("Got Ψ")
@@ -174,19 +140,7 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
     prediction = V*Zl
     error = y - prediction
     errors = zeros(liniters+1)
-
-    Lb = zeros(Znl.x)
-    Ub = zeros(Znl.x)
-    mas = maximum(state,1)'
-    mis = minimum(state,1)'
-    for i = 1:2n_state:n_centers*2n_state
-        Lb[i:i+n_state-1] = mis
-        Ub[i:i+n_state-1] = mas
-        Lb[i+n_state:i+2n_state-1] = 0.000001
-        Ub[i+n_state:i+2n_state-1] = 10*Znl.x[n_state+1:2n_state]
-    end
-    @show Lb
-    @show Ub
+    Lb,Ub = getbounds(Znl, state, n_state, n_centers)
 
     # ============= Main loop  ================================================
     debug("Calculating initial error")
@@ -209,14 +163,16 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
                                             timeout = 60,
                                             n_state = n_state)
             X0 = deepcopy(res.minimum)
-            assert(X0 == res.minimum)
-            @show rms(f(X0))
+            DEBUG && assert(X0 == res.minimum)
+            DEBUG && @show ff1 = rms(f(X0))
             if DEBUG
                 _V = deepcopy(V)
                 _Ψ = deepcopy(Ψ)
             end
-            @show rms(f(res.minimum))
+            DEBUG && @show ff2 = rms(f(res.minimum))
+
             if DEBUG
+                @assert ff1 == ff2
                 @show res.minimum == X0
                 @show _V == V
                 @show _Ψ == Ψ
@@ -232,7 +188,7 @@ function trainRBF_ARX(y, A, state, nc; liniters=3,nonliniters=50, normalized=fal
                                                   n=50,
                                                   pa=0.25,
                                                   Tol=1.0e-5,
-                                                  max_iter = i < liniters-1 ? 80 : 200,
+                                                  max_iter = i < liniters-1 ? cuckooiter : 2cuckooiter,
                                                   timeout = 120)
             debug("cuckoo_search done")
             X0 = deepcopy(bestnest)
@@ -389,3 +345,48 @@ function getLinearRegressor(V,Ψ,A,state,outputnet,na,n_state,n_centers,n_points
     end
     return V
 end
+
+
+function plotcenters(Z)
+    X = zeros(Z.n_centers,2)
+    for (i,Zi) in enumerate(Z)
+        X[i,:] = Zi[1:2]'
+    end
+    newplot(X[:,1],X[:,2],"o"); title("Centers")
+end
+
+function getbounds(Znl, state, n_state, n_centers)
+    Lb = zeros(Znl.x)
+    Ub = zeros(Znl.x)
+    mas = maximum(state,1)'
+    mis = minimum(state,1)'
+    for i = 1:2n_state:n_centers*2n_state
+        Lb[i:i+n_state-1] = mis
+        Ub[i:i+n_state-1] = mas
+        Lb[i+n_state:i+2n_state-1] = 0.000001
+        Ub[i+n_state:i+2n_state-1] = 10*Znl.x[n_state+1:2n_state]
+    end
+    return Lb,Ub
+end
+
+
+function getcentersKmeans(state, nc::Int, f::Function, n_state::Int)
+    iters = 21
+    errorvec = zeros(iters)
+    params = Array(Float64,(nc*2*n_state,iters))
+    methods = [:rand;:kmpp]
+    for iter = 1:iters
+        clusterresult = kmeans(state', nc; maxiter=200, display=:none, init=iter<iters ? methods[iter%2+1] : :kmcen)
+        for i = 1:nc
+            si = 1+(i-1)n_state*2
+            params[si:si+n_state-1,iter] = clusterresult.centers[:,i]
+            C = cov(state[clusterresult.assignments .== i,:])
+            params[si+n_state:si+2n_state-1,iter] = diag(inv(C))
+            @assert !any(diag(inv(C)) .< 0)
+        end
+        errorvec[iter] = rms(f(params[:,iter]))
+    end
+    println("Std in errors among initial centers: ", round(std(errorvec),3))
+    ind = indmin(errorvec)
+    return RbfNonlinearParameters(params[:,ind],n_state, nc)
+end