levenberg_marquardt.jl 5.17 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
sse(x) = (x'x)[1]# gives the L2 norm of x
using Optim
"""
`levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12, maxIter=100, lambda=100.0, show_trace=false, timeout=Inf)`\n
    # finds argmin sum(f(x).^2) using the Levenberg-Marquardt algorithm
    #          x
    # The function f should take an input vector of length n and return an output vector of length m
    # The function g is the Jacobian of f, and should be an m x n matrix
    # x0 is an initial guess for the solution
    # fargs is a tuple of additional arguments to pass to f
    # available options:
    #   tolX - search tolerance in x
    #   tolG - search tolerance in gradient
    #   maxIter - maximum number of iterations
    #   lambda - (inverse of) initial trust region radius
    #   show_trace - print a status summary on each iteration if true
    # returns: x, J
    #   x - least squares solution for x
    #   J - estimate of the Jacobian of f at x
"""
function levenberg_marquardt(f::Function, g::Function, x0; tolX=1e-8, tolG=1e-12, maxIter=100, lambda=100.0, show_trace=false, timeout=Inf, n_state = 1)


    # other constants
    const MAX_LAMBDA = 1e16 # minimum trust region radius
    const MIN_LAMBDA = 1e-16 # maximum trust region radius
    const MIN_STEP_QUALITY = 1e-4
    const MIN_DIAGONAL = 1e-6 # lower bound on values of diagonal matrix used to regularize the trust region step

    converged = false
    x_converged = false
    g_converged = false
    need_jacobian = true
    iterCt = 0
    x = x0
    delta_x = deepcopy(x0)
    f_calls = 0
    g_calls = 0
    t0 = time()

    fcur = f(x)
    f_calls += 1
    residual = sse(fcur)

    # Maintain a trace of the system.
    tr = Optim.OptimizationTrace()
    if show_trace
        d = {"lambda" => lambda}
        os = Optim.OptimizationState(iterCt, rms(fcur), NaN, d)

        push!(tr, os)
        println("Iter:0, f:$(round(os.value,5)), ||g||:$(0)), λ:$(round(lambda,5))")
    end

    while ( ~converged && iterCt < maxIter )
        if need_jacobian
            J = g(x)
            g_calls += 1
            need_jacobian = false
        end
        # we want to solve:
        #    argmin 0.5*||J(x)*delta_x + f(x)||^2 + lambda*||diagm(J'*J)*delta_x||^2
        # Solving for the minimum gives:
        #    (J'*J + lambda*DtD) * delta_x == -J^T * f(x), where DtD = diagm(sum(J.^2,1))
        # Where we have used the equivalence: diagm(J'*J) = diagm(sum(J.^2, 1))
        # It is additionally useful to bound the elements of DtD below to help
        # prevent "parameter evaporation".
        DtD = diagm(Float64[max(x, MIN_DIAGONAL) for x in sum(J.^2,1)])
        delta_x = ( J'*J + sqrt(lambda)*DtD ) \ ( -J'*fcur )
        # if the linear assumption is valid, our new residual should be:
        predicted_residual = sse(J*delta_x + fcur)
        # check for numerical problems in solving for delta_x by ensuring that the predicted residual is smaller
        # than the current residual
        if predicted_residual > residual + 2max(eps(predicted_residual),eps(residual))
            warn("""Problem solving for delta_x: predicted residual increase.
                             $predicted_residual (predicted_residual) >
                             $residual (residual) + $(eps(predicted_residual)) (eps)""")
        end
        # try the step and compute its quality
        trail_x = saturatePrecision(x + delta_x,n_state)
        trial_f = f(trail_x)
        f_calls += 1
        trial_residual = sse(trial_f)
        # step quality = residual change / predicted residual change
        rho = (trial_residual - residual) / (predicted_residual - residual)

        if rho > MIN_STEP_QUALITY
            x = trail_x
            fcur = trial_f
            residual = trial_residual
            # increase trust region radius
            lambda = max(0.1*lambda, MIN_LAMBDA)
            need_jacobian = true
        else
            # decrease trust region radius
            lambda = min(10*lambda, MAX_LAMBDA)
        end
        iterCt += 1

        # show state
        if show_trace && (iterCt < 20 || iterCt % 10 == 0)
            gradnorm = norm(J'*fcur, Inf)
            d = {"lambda" => lambda}
            os = Optim.OptimizationState(iterCt, rms(fcur), gradnorm, d)
            push!(tr, os)
            println("Iter:$iterCt, f:$(round(os.value,5)), ||g||:$(round(gradnorm,5)), λ:$(round(lambda,5))")
        end

        # check convergence criteria:
        # 1. Small gradient: norm(J^T * fcur, Inf) < tolG
        # 2. Small step size: norm(delta_x) < tolX
        if norm(J' * fcur, Inf) < tolG
            @show g_converged = true
        elseif norm(delta_x) < tolX*(tolX + norm(x))
            @show x_converged = true
        end
        converged = g_converged | x_converged
        if time()-t0 > timeout
            display("levenberg_marquardt: timeout $(timeout)s reached ($(time()-t0)s)")
            break
        end
    end
    Optim.MultivariateOptimizationResults("Levenberg-Marquardt", x0, x, rms(fcur), iterCt, !converged, x_converged, 0.0, false, 0.0, g_converged, tolG, tr, f_calls, g_calls)
end


function saturatePrecision(x,n_state)
    for i = 1:2n_state:length(x)
        range = i+n_state:i+2n_state-1
        x[range] = abs(x[range])
    end
    return x
end