levenberg_marquardt.jl 5.17 KB
 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 ``````sse(x) = (x'x)[1]# gives the L2 norm of x using Optim """ `levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12, maxIter=100, lambda=100.0, show_trace=false, timeout=Inf)`\n # finds argmin sum(f(x).^2) using the Levenberg-Marquardt algorithm # x # The function f should take an input vector of length n and return an output vector of length m # The function g is the Jacobian of f, and should be an m x n matrix # x0 is an initial guess for the solution # fargs is a tuple of additional arguments to pass to f # available options: # tolX - search tolerance in x # tolG - search tolerance in gradient # maxIter - maximum number of iterations # lambda - (inverse of) initial trust region radius # show_trace - print a status summary on each iteration if true # returns: x, J # x - least squares solution for x # J - estimate of the Jacobian of f at x """ function levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12, maxIter=100, lambda=100.0, show_trace=false, timeout=Inf, n_state = 1) # other constants const MAX_LAMBDA = 1e16 # minimum trust region radius const MIN_LAMBDA = 1e-16 # maximum trust region radius const MIN_STEP_QUALITY = 1e-4 const MIN_DIAGONAL = 1e-6 # lower bound on values of diagonal matrix used to regularize the trust region step converged = false x_converged = false g_converged = false need_jacobian = true iterCt = 0 x = x0 delta_x = deepcopy(x0) f_calls = 0 g_calls = 0 t0 = time() fcur = f(x) f_calls += 1 residual = sse(fcur) # Maintain a trace of the system. tr = Optim.OptimizationTrace() if show_trace d = {"lambda" => lambda} os = Optim.OptimizationState(iterCt, rms(fcur), NaN, d) push!(tr, os) println("Iter:0, f:\$(round(os.value,5)), ||g||:\$(0)), λ:\$(round(lambda,5))") end while ( ~converged && iterCt < maxIter ) if need_jacobian J = g(x) g_calls += 1 need_jacobian = false end # we want to solve: # argmin 0.5*||J(x)*delta_x + f(x)||^2 + lambda*||diagm(J'*J)*delta_x||^2 # Solving for the minimum gives: # (J'*J + lambda*DtD) * delta_x == -J^T * f(x), where DtD = diagm(sum(J.^2,1)) # Where we have used the equivalence: diagm(J'*J) = diagm(sum(J.^2, 1)) # It is additionally useful to bound the elements of DtD below to help # prevent "parameter evaporation". DtD = diagm(Float64[max(x, MIN_DIAGONAL) for x in sum(J.^2,1)]) delta_x = ( J'*J + sqrt(lambda)*DtD ) \ ( -J'*fcur ) # if the linear assumption is valid, our new residual should be: predicted_residual = sse(J*delta_x + fcur) # check for numerical problems in solving for delta_x by ensuring that the predicted residual is smaller # than the current residual if predicted_residual > residual + 2max(eps(predicted_residual),eps(residual)) warn("""Problem solving for delta_x: predicted residual increase. \$predicted_residual (predicted_residual) > \$residual (residual) + \$(eps(predicted_residual)) (eps)""") end # try the step and compute its quality trail_x = saturatePrecision(x + delta_x,n_state) trial_f = f(trail_x) f_calls += 1 trial_residual = sse(trial_f) # step quality = residual change / predicted residual change rho = (trial_residual - residual) / (predicted_residual - residual) if rho > MIN_STEP_QUALITY x = trail_x fcur = trial_f residual = trial_residual # increase trust region radius lambda = max(0.1*lambda, MIN_LAMBDA) need_jacobian = true else # decrease trust region radius lambda = min(10*lambda, MAX_LAMBDA) end iterCt += 1 # show state if show_trace && (iterCt < 20 || iterCt % 10 == 0) gradnorm = norm(J'*fcur, Inf) d = {"lambda" => lambda} os = Optim.OptimizationState(iterCt, rms(fcur), gradnorm, d) push!(tr, os) println("Iter:\$iterCt, f:\$(round(os.value,5)), ||g||:\$(round(gradnorm,5)), λ:\$(round(lambda,5))") end # check convergence criteria: # 1. Small gradient: norm(J^T * fcur, Inf) < tolG # 2. Small step size: norm(delta_x) < tolX if norm(J' * fcur, Inf) < tolG @show g_converged = true elseif norm(delta_x) < tolX*(tolX + norm(x)) @show x_converged = true end converged = g_converged | x_converged if time()-t0 > timeout display("levenberg_marquardt: timeout \$(timeout)s reached (\$(time()-t0)s)") break end end Optim.MultivariateOptimizationResults("Levenberg-Marquardt", x0, x, rms(fcur), iterCt, !converged, x_converged, 0.0, false, 0.0, g_converged, tolG, tr, f_calls, g_calls) end function saturatePrecision(x,n_state) for i = 1:2n_state:length(x) range = i+n_state:i+2n_state-1 x[range] = abs(x[range]) end return x end``````