From 06300846a7781192dc208cf162bf2bb9c28d8e35 Mon Sep 17 00:00:00 2001 From: Johan Gronqvist <johan.gronqvist@control.lth.se> Date: Wed, 20 Nov 2024 17:14:03 +0100 Subject: [PATCH] Work on ToolboxExamples. Bias fix, more training, new examples --- examples/Manifest.toml | 6 +- examples/Project.toml | 1 + examples/ToolboxExamples.jl | 362 ++++++++++++++++++++++++++++++++++-- src/NeuralNetworks.jl | 30 ++- 4 files changed, 381 insertions(+), 18 deletions(-) diff --git a/examples/Manifest.toml b/examples/Manifest.toml index 80529a6..e270839 100644 --- a/examples/Manifest.toml +++ b/examples/Manifest.toml @@ -2,7 +2,7 @@ julia_version = "1.10.5" manifest_format = "2.0" -project_hash = "61120cda448e8e4205b4a56b75fb97e2678121f2" +project_hash = "46766fae1a150e893edd42270999623a41bcac33" [[deps.ADTypes]] git-tree-sha1 = "99a6f5d0ce1c7c6afdb759daa30226f71c54f6b0" @@ -1236,9 +1236,9 @@ uuid = "81d17ec3-03a1-5e46-b53e-bddc35a13473" version = "3.0.1+0" [[deps.LaTeXStrings]] -git-tree-sha1 = "50901ebc375ed41dbf8058da26f9de442febbbec" +git-tree-sha1 = "dda21b8cbd6a6c40d9d02a73230f9d70fed6918c" uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" -version = "1.3.1" +version = "1.4.0" [[deps.LambertW]] git-tree-sha1 = "c5ffc834de5d61d00d2b0e18c96267cffc21f648" diff --git a/examples/Project.toml b/examples/Project.toml index caf42bd..cf817bf 100644 --- a/examples/Project.toml +++ b/examples/Project.toml @@ -7,6 +7,7 @@ Hypatia = "b99e6be6-89ff-11e8-14f8-45c827f4f8f2" IQC = "03cba634-6a1b-4966-83c4-4a7cd72c5d48" JLD = "4138dd39-2aa7-5051-a626-17a0bb65d9c8" JuMP = "4076af6c-e467-56ae-b986-b466b2749572" +LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" MLStyle = "d8e11817-5142-5d16-987a-aa16d5891078" MosekTools = "1ec41992-ff65-5c91-ac43-2df89e9693a4" Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" diff --git a/examples/ToolboxExamples.jl b/examples/ToolboxExamples.jl index 28472f1..6743834 100644 --- a/examples/ToolboxExamples.jl +++ b/examples/ToolboxExamples.jl @@ -29,6 +29,10 @@ using LinearAlgebra using Plots using ProgressMeter import Random +using LaTeXStrings + +# %% +ProgressMeter.ijulia_behavior(:clear) # %% import MosekTools @@ -1130,6 +1134,9 @@ cost = γ²_x + 100*γ²_1 ## value(cost) = 52.757927422444304 +# %% +[ layer.bias for layer in nn1.layers ] + # %% vcat(vec.(value.(IQC.NeuralNetworks.params(nn2_IQC)))...) @@ -1143,15 +1150,19 @@ vcat(vec.(value.(IQC.NeuralNetworks.params(nn2_IQC)))...) pgfplotsx() # %% +plts_a = [] + for scale in [ 1, .1, .01 ] xs = -scale:scale/100:scale Δ = sqrt.(value(γ²_x) .* xs .^ 2 .+ value(γ²_1)) - plot(xs, [ [x] |> nn1 |> nn2 |> only for x in xs ], c = :red, label = "NN") - plot!(xs, xs, ribbon = Δ, c = :green, fillalpha = .2, label = "Bound") + plot(xs, [ [x] |> nn1 |> nn2 |> only for x in xs ], c = :red, label = L"$\mathrm{NN}(f(x))$") + plot!(xs, xs, c = :green, label = L"$x$") + plot!(xs, xs, ribbon = Δ, c = :green, fillalpha = .2, line = nothing, label = "Bound") plot!(legend = :topleft) savefig("NN_bound_$(scale).tikz") savefig("NN_bound_$(scale).tex") savefig("NN_bound_$(scale).pdf") + push!(plts_a, plot!()) plot!() |> display end @@ -1177,19 +1188,27 @@ end ## value(cost) = 2.3892430883958777 # %% -for scale in [ 1, .1, .01 ] +plts_b = [] +for scale in [ 1, .1, .01 ] xs = -scale:scale/100:scale Δ = sqrt.(value(γ²_x) .* xs .^ 2 .+ value(γ²_1)) - plot(xs, [ [x] |> nn1 |> nn2 |> only for x in xs ], c = :red, label = "NN") - plot!(xs, xs, ribbon = Δ, c = :green, fillalpha = .2, label = "Bound") + plot(xs, [ [x] |> nn1 |> nn2 |> only for x in xs ], c = :red, label = L"$\mathrm{NN}(f(x))$") + plot!(xs, xs, c = :green, label = L"$x$") + plot!(xs, xs, ribbon = Δ, c = :green, fillalpha = .2, line = nothing, label = "Bound") plot!(legend = :topleft) savefig("trained_NN_bound_$(scale).tikz") savefig("trained_NN_bound_$(scale).tex") savefig("trained_NN_bound_$(scale).pdf") + push!(plts_b, plot!()) plot!() |> display end # %% +plot(plts_a[1], plts_a[2], plts_b[1], plts_b[2], layout = (2,2)) +savefig("merged_NN_bund.tikz") +savefig("merged_NN_bund.tex") +savefig("merged_NN_bund.pdf") +plot!() # %% ps = IQC.NeuralNetworks.params(nn2_IQC) @@ -1248,6 +1267,9 @@ value.(ps) # %% [markdown] # **Plan:** Use an NN controller. Train to stability without bias terms, then train on gain bound with constant term. +# %% +z = ControlSystems.tf("z", Δt) + # %% [markdown] # ## Affine @@ -1341,6 +1363,9 @@ value(k), value(b) # %% [markdown] # ## NN +# %% +pgfplotsx() + # %% # A difficult seed, uesd stepsize 2e-4 Random.seed!(17) @@ -1359,11 +1384,19 @@ end # %% xs = range(-100, 100, length=100) -plot(xs, [ only(nn_flux([x])) for x in xs ]) +plt_a = plot(xs, [ only(nn_flux([x])) for x in xs ], label = L"$\mathrm{NN}(x)$ (initial)") +savefig("initial_nn_controller_100.tikz") +savefig("initial_nn_controller_100.tex") +savefig("initial_nn_controller_100.pdf") +plot!() # %% xs = range(-1, 1, length=100) -plot(xs, [ only(nn_flux([x])) for x in xs ]) +plt_b = plot(xs, [ only(nn_flux([x])) for x in xs ], label = L"$\mathrm{NN}(x)$ (initial)") +savefig("initial_nn_controller_1.tikz") +savefig("initial_nn_controller_1.tex") +savefig("initial_nn_controller_1.pdf") +plot!() # %% sys_cr = DiscreteTimeSystem(opt = MosekTools.Optimizer, sampling_time = 1/30) @@ -1414,9 +1447,7 @@ cost = γ²_w + 100*γ²_1 convergence_rate(sys_cr, x_cr) # %% -@assert solve!(sys_gb) - -value(cost) +solve!(sys_gb) # %% ps = params(nn_IQC) @@ -1491,11 +1522,20 @@ cs[end] # %% xs = range(-100, 100, length=100) -plot(xs, [ only(nn_flux([x])) for x in xs ]) +plt_c = plot(xs, [ only(nn_flux([x])) for x in xs ], label = L"$\mathrm{NN}(x)$ (trained)") +savefig("trained_nn_controller_100.tikz") +savefig("trained_nn_controller_100.tex") +savefig("trained_nn_controller_100.pdf") + +plot!() # %% xs = range(-1, 1, length=100) -plot(xs, [ only(nn_flux([x])) for x in xs ]) +plt_d = plot(xs, [ only(nn_flux([x])) for x in xs ], label = L"$\mathrm{NN}(x)$ (trained)") +savefig("conv_rate_trained_nn_controller_1.tikz") +savefig("conv_rate_trained_nn_controller_1.tex") +savefig("conv_rate_trained_nn_controller_1.pdf") +plot!() # %% value(cost) @@ -1542,11 +1582,233 @@ cs[end] # %% xs = range(-100, 100, length=100) -plot(xs, [ only(nn_flux([x])) for x in xs ]) +plt_e = plot(xs, [ only(nn_flux([x])) for x in xs ], label = L"$\mathrm{NN}(x)$ (trained)") +savefig("trained_nn_controller_100.tikz") +savefig("trained_nn_controller_100.tex") +savefig("trained_nn_controller_100.pdf") +plot!() # %% xs = range(-1, 1, length=100) -plot(xs, [ only(nn_flux([x])) for x in xs ]) +plt_f = plot(xs, [ only(nn_flux([x])) for x in xs ], label = L"$\mathrm{NN}(x)$ (trained)") +savefig("trained_nn_controller_1.tikz") +savefig("trained_nn_controller_1.tex") +savefig("trained_nn_controller_1.pdf") +plot!() + +# %% +plot!(plt_a, legend = nothing) +plot!(plt_e, legend = nothing) + +plot(plt_a, plt_b, plt_e, plt_f, layout = (2,2)) +savefig("nn_controller_merged.tikz") +savefig("nn_controller_merged.pdf") +savefig("nn_controller_merged.tex") +plot!() + +# %% +value(cost) + +# %% +xs = -2:1/2:2 +[ only(nn_flux([x])) for x in xs ] + +# %% +ps + +# %% +plot(rs, xaxis = "Gradient Steps", yaxis = "Convergence rate") + +# %% +plot(1.0 ./ rs, xaxis = "Gradient Steps", yaxis = "Growth rate") + +# %% +plot(cs, marker = :dot, line = nothing, xaxis = "Gradient Steps", yaxis = "Cost") + +# %% +plot!(yaxis = :log10) + +# %% +value(γ²_1) + +# %% +value(γ²_w) + +# %% [markdown] +# ## With f + +# %% +# A difficult seed, uesd stepsize 2e-4 +Random.seed!(17) +# A simpler seed, used stepsize 1e-3 +Random.seed!(41) +# This seems promising +Random.seed!(13) + +# %% +nn_flux = Flux.Chain(Flux.Dense(1, 4, IQC.NeuralNetworks.Relu()), Flux.Dense(4, 1, IQC.NeuralNetworks.Identity())) |> f64 + +# %% +for ℓ in nn_flux.layers + ℓ.bias .= randn(length(ℓ.bias)) +end + +# %% +xs = range(-100, 100, length=100) +plt_a = plot(xs, [ only(nn_flux(nn1([x]))) for x in xs ], label = L"$\mathrm{NN}(x)$ (initial)") +savefig("initial_nnf_controller_100.tikz") +savefig("initial_nnf_controller_100.tex") +savefig("initial_nnf_controller_100.pdf") +plot!() + +# %% +xs = range(-1, 1, length=100) +plt_b = plot(xs, [ only(nn_flux(nn1([x]))) for x in xs ], label = L"$\mathrm{NN}(x)$ (initial)") +savefig("initial_nnf_controller_1.tikz") +savefig("initial_nnf_controller_1.tex") +savefig("initial_nnf_controller_1.pdf") +plot!() + +# %% +sys_cr = DiscreteTimeSystem(opt = MosekTools.Optimizer, sampling_time = 1/30) + +@signals sys_cr x_cr y_cr u_cr + +@NN sys_cr Absolute nn_IQC=nn_flux nnf_IQC=nn1 nobias begin + QuadEq + CombLin + RateLimit +end + +@assume sys_cr begin + x_cr = (1/z)*(1.1x_cr + u_cr) + u_cr = nn_IQC(y_cr) + y_cr = nnf_IQC(x_cr) +end + + +# %% +sys_gb = DiscreteTimeSystem(opt = MosekTools.Optimizer, sampling_time = 1/30) + +@signals sys_gb x_gb y_gb u_gb w + +@NN sys_gb Absolute nn_IQC_bias=nn_flux nnf_IQC_bias=nn1 begin + QuadEq + CombLin + RateLimit +end + +@assume sys_gb begin + x_gb = (1/z)*(1.1x_gb + u_gb) + y_gb = nnf_IQC_bias(x_gb) + u_gb = nn_IQC_bias(y_gb + w) +end + +@variables sys_gb begin + γ²_w >= 0 + γ²_1 >= 0 +end + +cost = γ²_w + 100*γ²_1 + +@objective sys_gb Min cost + +@require sys_gb x_gb^2 <= γ²_w*w^2 + γ²_1*sys_gb.one^2 + + +# %% +convergence_rate(sys_cr, x_cr) + +# %% +solve!(sys_gb) + +# %% +ps = params(nn_IQC) +rs = [] + +# %% +progress = ProgressUnknown(desc="Gradient Steps:") + +while !solve!(sys_gb, silent = true) + (; conv_rate, grads ) = convergence_rate_gradient(sys_cr, x_cr, ps) + push!(rs, conv_rate) + + step = 1e-2/norm(grads[p] for p in ps) + + for p in ps + @set p = p + step*grads[p] + end + next!(progress, showvalues = [(:conv_rate, round(conv_rate, sigdigits = 3))]) +end +finish!(progress) + +rs[end] + +# %% +plot(rs) + +# %% +xs = range(-100, 100, length=100) +plot(xs, [ only(nn_flux(nn1([x]))) for x in xs ]) + +# %% +xs = range(-1, 1, length=100) +plot(xs, [ only(nn_flux(nn1([x]))) for x in xs ]) + +# %% +solve!(sys_gb) + +# %% +ps = params(nn_IQC_bias) +cs = [] + +# %% +ps + +# %% +xs = -2:1/2:2 +[ only(nn_flux(nn1([x]))) for x in xs ] + +# %% +n_steps = 1000 +progress = Progress(n_steps) +parameter_change = 1e-3 + +for _ in 1:n_steps + if solve!(sys_gb, silent=true) + push!(cs, value(cost)) + else + push!(cs, missing) + end + push!(rs, convergence_rate(sys_cr, x_cr)) + grads = gradient(sys_gb, ps) + step = parameter_change/norm(grads[p] for p in ps) + + for p in ps + @set p = value(p) - step*grads[p] + end + next!(progress, showvalues = [(:conv_rate, round(rs[end], sigdigits=3)), (:gain_bound, round(cs[end], sigdigits = 3))]) +end + +# %% +cs[end] + +# %% +xs = range(-100, 100, length=100) +plt_c = plot(xs, [ only(nn_flux(nn1([x]))) for x in xs ], label = L"$\mathrm{NN}(x)$ (trained)") +savefig("trained_a_nnf_controller_100.tikz") +savefig("trained_a_nnf_controller_100.tex") +savefig("trained_a_nnf_controller_100.pdf") + +plot!() + +# %% +xs = range(-1, 1, length=100) +plt_d = plot(xs, [ only(nn_flux([x])) for x in xs ], label = L"$\mathrm{NN}(x)$ (trained)") +savefig("conv_rate_trained_a_nnf_controller_1.tikz") +savefig("conv_rate_trained_a_nnf_controller_1.tex") +savefig("conv_rate_trained_a_nnf_controller_1.pdf") +plot!() # %% value(cost) @@ -1567,6 +1829,78 @@ plot(cs, marker = :dot, line = nothing) # %% plot!(yaxis = :log10) +# %% +n_steps = 3000 +progress = Progress(n_steps) +parameter_change = 1e-3 + +for _ in 1:n_steps + if solve!(sys_gb, silent=true) + push!(cs, value(cost)) + else + push!(cs, missing) + end + push!(rs, convergence_rate(sys_cr, x_cr)) + grads = gradient(sys_gb, ps) + step = parameter_change/norm(grads[p] for p in ps) + + for p in ps + @set p = value(p) - step*grads[p] + end + next!(progress, showvalues = [(:conv_rate, round(rs[end], sigdigits=3)), (:gain_bound, round(cs[end], sigdigits = 3))]) +end + +# %% +cs[end] + +# %% +xs = range(-100, 100, length=100) +plt_e = plot(xs, [ only(nn_flux(nn1([x]))) for x in xs ], label = L"$\mathrm{NN}(x)$ (trained)") +savefig("trained_nnf_controller_100.tikz") +savefig("trained_nnf_controller_100.tex") +savefig("trained_nnf_controller_100.pdf") +plot!() + +# %% +xs = range(-1, 1, length=100) +plt_f = plot(xs, [ only(nn_flux(nn1([x]))) for x in xs ], label = L"$\mathrm{NN}(x)$ (trained)") +savefig("trained_nnf_controller_1.tikz") +savefig("trained_nnf_controller_1.tex") +savefig("trained_nnf_controller_1.pdf") +plot!() + +# %% +plot!(plt_a, legend = nothing) +plot!(plt_e, legend = nothing) + +plot(plt_a, plt_b, plt_e, plt_f, layout = (2,2)) +savefig("nnf_controller_merged.tikz") +savefig("nnf_controller_merged.pdf") +savefig("nnf_controller_merged.tex") +plot!() + +# %% +value(cost) + +# %% +xs = -2:1/2:2 +[ only(nn_flux([x])) for x in xs ] + +# %% +ps + +# %% +plot(rs, xaxis = "Gradient Steps", yaxis = "Convergence rate") + +# %% +plot(1.0 ./ rs, xaxis = "Gradient Steps", yaxis = "Growth rate") + +# %% +plot(cs, marker = :dot, line = nothing, xaxis = "Gradient Steps", yaxis = "Cost") + +# %% +plot!(yaxis = :log10) + # %% value(γ²_1) diff --git a/src/NeuralNetworks.jl b/src/NeuralNetworks.jl index ad332fa..88ace69 100644 --- a/src/NeuralNetworks.jl +++ b/src/NeuralNetworks.jl @@ -117,6 +117,25 @@ struct Chain refs :: Union{Vector{Float64}, Nothing} end +function layer_has_bias(layer) + if hasfield(typeof(layer), :bias) + if layer.bias isa Bool + layer.bias + elseif layer.bias isa AbstractVector + !isempty(layer.bias) + else + @warn "bias not recognized" + dump(layer.bias) + true # Be conservative + end + else + false + end +end + +has_bias(chains) = any([ layer_has_bias(ℓ) for chain in chains for ℓ in chain.flux_chain.layers ]) + + function Chain(sys :: System, flux_chain :: Flux.Chain; refs = nothing, bias = true) input_size = size(flux_chain.layers[begin].weight, 2) @@ -209,10 +228,19 @@ macro NN(sys, setting, args...) end end ::Relu => begin - + ηs = [ η for (η, ζ) in inout] ζs = [ ζ for (η, ζ) in inout] + if has_bias(chains) && !($(nobias)) + @warn "Using sys.one as a chain has bias terms and nobias is not set" + + push!(ηs, $(esc(sys)).one) + push!(ζs, $(esc(sys)).one) + else + @warn "Skipping sys.one as no chain has bias terms or nobias is set" + end + if $(esc(sys)) isa StaticSystem IQCs.add_selection_of_qcs($(esc(sys)), :Absolute, ηs, ζs, nothing, $choices) else -- GitLab