From 06300846a7781192dc208cf162bf2bb9c28d8e35 Mon Sep 17 00:00:00 2001
From: Johan Gronqvist <johan.gronqvist@control.lth.se>
Date: Wed, 20 Nov 2024 17:14:03 +0100
Subject: [PATCH] Work on ToolboxExamples. Bias fix, more training, new
 examples

---
 examples/Manifest.toml      |   6 +-
 examples/Project.toml       |   1 +
 examples/ToolboxExamples.jl | 362 ++++++++++++++++++++++++++++++++++--
 src/NeuralNetworks.jl       |  30 ++-
 4 files changed, 381 insertions(+), 18 deletions(-)

diff --git a/examples/Manifest.toml b/examples/Manifest.toml
index 80529a6..e270839 100644
--- a/examples/Manifest.toml
+++ b/examples/Manifest.toml
@@ -2,7 +2,7 @@
 
 julia_version = "1.10.5"
 manifest_format = "2.0"
-project_hash = "61120cda448e8e4205b4a56b75fb97e2678121f2"
+project_hash = "46766fae1a150e893edd42270999623a41bcac33"
 
 [[deps.ADTypes]]
 git-tree-sha1 = "99a6f5d0ce1c7c6afdb759daa30226f71c54f6b0"
@@ -1236,9 +1236,9 @@ uuid = "81d17ec3-03a1-5e46-b53e-bddc35a13473"
 version = "3.0.1+0"
 
 [[deps.LaTeXStrings]]
-git-tree-sha1 = "50901ebc375ed41dbf8058da26f9de442febbbec"
+git-tree-sha1 = "dda21b8cbd6a6c40d9d02a73230f9d70fed6918c"
 uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
-version = "1.3.1"
+version = "1.4.0"
 
 [[deps.LambertW]]
 git-tree-sha1 = "c5ffc834de5d61d00d2b0e18c96267cffc21f648"
diff --git a/examples/Project.toml b/examples/Project.toml
index caf42bd..cf817bf 100644
--- a/examples/Project.toml
+++ b/examples/Project.toml
@@ -7,6 +7,7 @@ Hypatia = "b99e6be6-89ff-11e8-14f8-45c827f4f8f2"
 IQC = "03cba634-6a1b-4966-83c4-4a7cd72c5d48"
 JLD = "4138dd39-2aa7-5051-a626-17a0bb65d9c8"
 JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
+LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
 MLStyle = "d8e11817-5142-5d16-987a-aa16d5891078"
 MosekTools = "1ec41992-ff65-5c91-ac43-2df89e9693a4"
 Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
diff --git a/examples/ToolboxExamples.jl b/examples/ToolboxExamples.jl
index 28472f1..6743834 100644
--- a/examples/ToolboxExamples.jl
+++ b/examples/ToolboxExamples.jl
@@ -29,6 +29,10 @@ using LinearAlgebra
 using Plots
 using ProgressMeter
 import Random
+using LaTeXStrings
+
+# %%
+ProgressMeter.ijulia_behavior(:clear)
 
 # %%
 import MosekTools
@@ -1130,6 +1134,9 @@ cost = γ²_x + 100*γ²_1
 ## value(cost) = 52.757927422444304
 
 
+# %%
+[ layer.bias for layer in nn1.layers ]
+
 # %%
 vcat(vec.(value.(IQC.NeuralNetworks.params(nn2_IQC)))...)
 
@@ -1143,15 +1150,19 @@ vcat(vec.(value.(IQC.NeuralNetworks.params(nn2_IQC)))...)
 pgfplotsx()
 
 # %%
+plts_a = []
+
 for scale in [ 1, .1, .01 ] 
     xs = -scale:scale/100:scale
     Δ = sqrt.(value(γ²_x) .* xs .^ 2 .+ value(γ²_1))
-    plot(xs, [ [x] |> nn1 |> nn2 |> only for x in xs ], c = :red, label = "NN")
-    plot!(xs, xs, ribbon = Δ, c = :green, fillalpha = .2, label = "Bound")
+    plot(xs, [ [x] |> nn1 |> nn2 |> only for x in xs ], c = :red, label = L"$\mathrm{NN}(f(x))$")
+    plot!(xs, xs, c = :green, label = L"$x$")
+    plot!(xs, xs, ribbon = Δ, c = :green, fillalpha = .2, line = nothing, label = "Bound")
     plot!(legend = :topleft)
     savefig("NN_bound_$(scale).tikz")
     savefig("NN_bound_$(scale).tex")
     savefig("NN_bound_$(scale).pdf")
+    push!(plts_a, plot!())
     plot!() |> display
 end
 
@@ -1177,19 +1188,27 @@ end
 ## value(cost) = 2.3892430883958777
 
 # %%
-for scale in [ 1, .1, .01 ] 
+plts_b = []
+for scale in [ 1, .1, .01 ]
     xs = -scale:scale/100:scale
     Δ = sqrt.(value(γ²_x) .* xs .^ 2 .+ value(γ²_1))
-    plot(xs, [ [x] |> nn1 |> nn2 |> only for x in xs ], c = :red, label = "NN")
-    plot!(xs, xs, ribbon = Δ, c = :green, fillalpha = .2, label = "Bound")
+    plot(xs, [ [x] |> nn1 |> nn2 |> only for x in xs ], c = :red, label = L"$\mathrm{NN}(f(x))$")
+    plot!(xs, xs, c = :green, label = L"$x$")
+    plot!(xs, xs, ribbon = Δ, c = :green, fillalpha = .2, line = nothing, label = "Bound")
     plot!(legend = :topleft)
     savefig("trained_NN_bound_$(scale).tikz")
     savefig("trained_NN_bound_$(scale).tex")
     savefig("trained_NN_bound_$(scale).pdf")
+    push!(plts_b, plot!())
     plot!() |> display
 end
 
 # %%
+plot(plts_a[1], plts_a[2], plts_b[1], plts_b[2], layout = (2,2))
+savefig("merged_NN_bund.tikz")
+savefig("merged_NN_bund.tex")
+savefig("merged_NN_bund.pdf")
+plot!()
 
 # %%
 ps = IQC.NeuralNetworks.params(nn2_IQC)
@@ -1248,6 +1267,9 @@ value.(ps)
 # %% [markdown]
 # **Plan:** Use an NN controller. Train to stability without bias terms, then train on gain bound with constant term.
 
+# %%
+z = ControlSystems.tf("z", Δt)
+
 # %% [markdown]
 # ## Affine
 
@@ -1341,6 +1363,9 @@ value(k), value(b)
 # %% [markdown]
 # ## NN
 
+# %%
+pgfplotsx()
+
 # %%
 # A difficult seed, uesd stepsize 2e-4
 Random.seed!(17)
@@ -1359,11 +1384,19 @@ end
 
 # %%
 xs = range(-100, 100, length=100)
-plot(xs, [ only(nn_flux([x])) for x in xs ])
+plt_a = plot(xs, [ only(nn_flux([x])) for x in xs ], label = L"$\mathrm{NN}(x)$ (initial)")
+savefig("initial_nn_controller_100.tikz")
+savefig("initial_nn_controller_100.tex")
+savefig("initial_nn_controller_100.pdf")
+plot!()
 
 # %%
 xs = range(-1, 1, length=100)
-plot(xs, [ only(nn_flux([x])) for x in xs ])
+plt_b = plot(xs, [ only(nn_flux([x])) for x in xs ], label = L"$\mathrm{NN}(x)$ (initial)")
+savefig("initial_nn_controller_1.tikz")
+savefig("initial_nn_controller_1.tex")
+savefig("initial_nn_controller_1.pdf")
+plot!()
 
 # %%
 sys_cr = DiscreteTimeSystem(opt = MosekTools.Optimizer, sampling_time = 1/30)
@@ -1414,9 +1447,7 @@ cost = γ²_w + 100*γ²_1
 convergence_rate(sys_cr, x_cr)
 
 # %%
-@assert solve!(sys_gb)
-
-value(cost)
+solve!(sys_gb)
 
 # %%
 ps = params(nn_IQC)
@@ -1491,11 +1522,20 @@ cs[end]
 
 # %%
 xs = range(-100, 100, length=100)
-plot(xs, [ only(nn_flux([x])) for x in xs ])
+plt_c = plot(xs, [ only(nn_flux([x])) for x in xs ], label = L"$\mathrm{NN}(x)$ (trained)")
+savefig("trained_nn_controller_100.tikz")
+savefig("trained_nn_controller_100.tex")
+savefig("trained_nn_controller_100.pdf")
+
+plot!()
 
 # %%
 xs = range(-1, 1, length=100)
-plot(xs, [ only(nn_flux([x])) for x in xs ])
+plt_d = plot(xs, [ only(nn_flux([x])) for x in xs ], label = L"$\mathrm{NN}(x)$ (trained)")
+savefig("conv_rate_trained_nn_controller_1.tikz")
+savefig("conv_rate_trained_nn_controller_1.tex")
+savefig("conv_rate_trained_nn_controller_1.pdf")
+plot!()
 
 # %%
 value(cost)
@@ -1542,11 +1582,233 @@ cs[end]
 
 # %%
 xs = range(-100, 100, length=100)
-plot(xs, [ only(nn_flux([x])) for x in xs ])
+plt_e = plot(xs, [ only(nn_flux([x])) for x in xs ], label = L"$\mathrm{NN}(x)$ (trained)")
+savefig("trained_nn_controller_100.tikz")
+savefig("trained_nn_controller_100.tex")
+savefig("trained_nn_controller_100.pdf")
+plot!()
 
 # %%
 xs = range(-1, 1, length=100)
-plot(xs, [ only(nn_flux([x])) for x in xs ])
+plt_f = plot(xs, [ only(nn_flux([x])) for x in xs ], label = L"$\mathrm{NN}(x)$ (trained)")
+savefig("trained_nn_controller_1.tikz")
+savefig("trained_nn_controller_1.tex")
+savefig("trained_nn_controller_1.pdf")
+plot!()
+
+# %%
+plot!(plt_a, legend = nothing)
+plot!(plt_e, legend = nothing)
+
+plot(plt_a, plt_b, plt_e, plt_f, layout = (2,2))
+savefig("nn_controller_merged.tikz")
+savefig("nn_controller_merged.pdf")
+savefig("nn_controller_merged.tex")
+plot!()
+
+# %%
+value(cost)
+
+# %%
+xs = -2:1/2:2
+[ only(nn_flux([x])) for x in xs ]
+
+# %%
+ps
+
+# %%
+plot(rs, xaxis = "Gradient Steps", yaxis = "Convergence rate")
+
+# %%
+plot(1.0 ./ rs, xaxis = "Gradient Steps", yaxis = "Growth rate")
+
+# %%
+plot(cs, marker = :dot, line = nothing, xaxis = "Gradient Steps", yaxis = "Cost")
+
+# %%
+plot!(yaxis = :log10)
+
+# %%
+value(γ²_1)
+
+# %%
+value(γ²_w)
+
+# %% [markdown]
+# ## With f
+
+# %%
+# A difficult seed, uesd stepsize 2e-4
+Random.seed!(17)
+# A simpler seed, used stepsize 1e-3
+Random.seed!(41)
+# This seems promising
+Random.seed!(13)
+
+# %%
+nn_flux = Flux.Chain(Flux.Dense(1, 4, IQC.NeuralNetworks.Relu()), Flux.Dense(4, 1, IQC.NeuralNetworks.Identity())) |> f64
+
+# %%
+for ℓ in nn_flux.layers
+    ℓ.bias .= randn(length(ℓ.bias))
+end
+
+# %%
+xs = range(-100, 100, length=100)
+plt_a = plot(xs, [ only(nn_flux(nn1([x]))) for x in xs ], label = L"$\mathrm{NN}(x)$ (initial)")
+savefig("initial_nnf_controller_100.tikz")
+savefig("initial_nnf_controller_100.tex")
+savefig("initial_nnf_controller_100.pdf")
+plot!()
+
+# %%
+xs = range(-1, 1, length=100)
+plt_b = plot(xs, [ only(nn_flux(nn1([x]))) for x in xs ], label = L"$\mathrm{NN}(x)$ (initial)")
+savefig("initial_nnf_controller_1.tikz")
+savefig("initial_nnf_controller_1.tex")
+savefig("initial_nnf_controller_1.pdf")
+plot!()
+
+# %%
+sys_cr = DiscreteTimeSystem(opt = MosekTools.Optimizer, sampling_time = 1/30)
+
+@signals sys_cr x_cr y_cr u_cr
+
+@NN sys_cr Absolute nn_IQC=nn_flux nnf_IQC=nn1 nobias begin
+    QuadEq
+    CombLin
+    RateLimit
+end
+
+@assume sys_cr begin
+    x_cr = (1/z)*(1.1x_cr + u_cr)
+    u_cr = nn_IQC(y_cr)
+    y_cr = nnf_IQC(x_cr)
+end
+
+
+# %%
+sys_gb = DiscreteTimeSystem(opt = MosekTools.Optimizer, sampling_time = 1/30)
+
+@signals sys_gb x_gb y_gb u_gb w
+
+@NN sys_gb Absolute nn_IQC_bias=nn_flux nnf_IQC_bias=nn1 begin
+    QuadEq
+    CombLin
+    RateLimit
+end
+
+@assume sys_gb begin
+    x_gb = (1/z)*(1.1x_gb + u_gb)
+    y_gb = nnf_IQC_bias(x_gb)
+    u_gb = nn_IQC_bias(y_gb + w)
+end
+
+@variables sys_gb begin
+    γ²_w >= 0
+    γ²_1 >= 0
+end
+
+cost = γ²_w + 100*γ²_1
+
+@objective sys_gb Min cost
+
+@require sys_gb x_gb^2 <= γ²_w*w^2  + γ²_1*sys_gb.one^2
+
+
+# %%
+convergence_rate(sys_cr, x_cr)
+
+# %%
+solve!(sys_gb)
+
+# %%
+ps = params(nn_IQC)
+rs = []
+
+# %%
+progress = ProgressUnknown(desc="Gradient Steps:")
+
+while !solve!(sys_gb, silent = true)
+    (; conv_rate, grads ) = convergence_rate_gradient(sys_cr, x_cr, ps)
+    push!(rs, conv_rate)
+    
+    step = 1e-2/norm(grads[p] for p in ps)
+    
+    for p in ps
+        @set p = p + step*grads[p]
+    end
+    next!(progress, showvalues = [(:conv_rate, round(conv_rate, sigdigits = 3))])
+end
+finish!(progress)
+
+rs[end]
+
+# %%
+plot(rs)
+
+# %%
+xs = range(-100, 100, length=100)
+plot(xs, [ only(nn_flux(nn1([x]))) for x in xs ])
+
+# %%
+xs = range(-1, 1, length=100)
+plot(xs, [ only(nn_flux(nn1([x]))) for x in xs ])
+
+# %%
+solve!(sys_gb)
+
+# %%
+ps = params(nn_IQC_bias)
+cs = []
+
+# %%
+ps
+
+# %%
+xs = -2:1/2:2
+[ only(nn_flux(nn1([x]))) for x in xs ]
+
+# %%
+n_steps = 1000
+progress = Progress(n_steps)
+parameter_change = 1e-3
+
+for _ in 1:n_steps
+    if solve!(sys_gb, silent=true)
+        push!(cs, value(cost))
+    else
+        push!(cs, missing)
+    end
+    push!(rs, convergence_rate(sys_cr, x_cr))
+    grads = gradient(sys_gb, ps)
+    step = parameter_change/norm(grads[p] for p in ps)
+    
+    for p in ps
+        @set p = value(p) - step*grads[p]
+    end
+    next!(progress, showvalues = [(:conv_rate, round(rs[end], sigdigits=3)), (:gain_bound, round(cs[end], sigdigits = 3))])
+end
+
+# %%
+cs[end]
+
+# %%
+xs = range(-100, 100, length=100)
+plt_c = plot(xs, [ only(nn_flux(nn1([x]))) for x in xs ], label = L"$\mathrm{NN}(x)$ (trained)")
+savefig("trained_a_nnf_controller_100.tikz")
+savefig("trained_a_nnf_controller_100.tex")
+savefig("trained_a_nnf_controller_100.pdf")
+
+plot!()
+
+# %%
+xs = range(-1, 1, length=100)
+plt_d = plot(xs, [ only(nn_flux([x])) for x in xs ], label = L"$\mathrm{NN}(x)$ (trained)")
+savefig("conv_rate_trained_a_nnf_controller_1.tikz")
+savefig("conv_rate_trained_a_nnf_controller_1.tex")
+savefig("conv_rate_trained_a_nnf_controller_1.pdf")
+plot!()
 
 # %%
 value(cost)
@@ -1567,6 +1829,78 @@ plot(cs, marker = :dot, line = nothing)
 # %%
 plot!(yaxis = :log10)
 
+# %%
+n_steps = 3000
+progress = Progress(n_steps)
+parameter_change = 1e-3
+
+for _ in 1:n_steps
+    if solve!(sys_gb, silent=true)
+        push!(cs, value(cost))
+    else
+        push!(cs, missing)
+    end
+    push!(rs, convergence_rate(sys_cr, x_cr))
+    grads = gradient(sys_gb, ps)
+    step = parameter_change/norm(grads[p] for p in ps)
+    
+    for p in ps
+        @set p = value(p) - step*grads[p]
+    end
+    next!(progress, showvalues = [(:conv_rate, round(rs[end], sigdigits=3)), (:gain_bound, round(cs[end], sigdigits = 3))])
+end
+
+# %%
+cs[end]
+
+# %%
+xs = range(-100, 100, length=100)
+plt_e = plot(xs, [ only(nn_flux(nn1([x]))) for x in xs ], label = L"$\mathrm{NN}(x)$ (trained)")
+savefig("trained_nnf_controller_100.tikz")
+savefig("trained_nnf_controller_100.tex")
+savefig("trained_nnf_controller_100.pdf")
+plot!()
+
+# %%
+xs = range(-1, 1, length=100)
+plt_f = plot(xs, [ only(nn_flux(nn1([x]))) for x in xs ], label = L"$\mathrm{NN}(x)$ (trained)")
+savefig("trained_nnf_controller_1.tikz")
+savefig("trained_nnf_controller_1.tex")
+savefig("trained_nnf_controller_1.pdf")
+plot!()
+
+# %%
+plot!(plt_a, legend = nothing)
+plot!(plt_e, legend = nothing)
+
+plot(plt_a, plt_b, plt_e, plt_f, layout = (2,2))
+savefig("nnf_controller_merged.tikz")
+savefig("nnf_controller_merged.pdf")
+savefig("nnf_controller_merged.tex")
+plot!()
+
+# %%
+value(cost)
+
+# %%
+xs = -2:1/2:2
+[ only(nn_flux([x])) for x in xs ]
+
+# %%
+ps
+
+# %%
+plot(rs, xaxis = "Gradient Steps", yaxis = "Convergence rate")
+
+# %%
+plot(1.0 ./ rs, xaxis = "Gradient Steps", yaxis = "Growth rate")
+
+# %%
+plot(cs, marker = :dot, line = nothing, xaxis = "Gradient Steps", yaxis = "Cost")
+
+# %%
+plot!(yaxis = :log10)
+
 # %%
 value(γ²_1)
 
diff --git a/src/NeuralNetworks.jl b/src/NeuralNetworks.jl
index ad332fa..88ace69 100644
--- a/src/NeuralNetworks.jl
+++ b/src/NeuralNetworks.jl
@@ -117,6 +117,25 @@ struct Chain
     refs :: Union{Vector{Float64}, Nothing}
 end
 
+function layer_has_bias(layer)
+    if hasfield(typeof(layer), :bias)
+        if layer.bias isa Bool
+            layer.bias
+        elseif layer.bias isa AbstractVector
+            !isempty(layer.bias)
+        else
+            @warn "bias not recognized"
+            dump(layer.bias)
+            true # Be conservative
+        end
+    else
+        false
+    end
+end
+
+has_bias(chains) = any([ layer_has_bias(ℓ) for chain in chains for ℓ in chain.flux_chain.layers ])
+    
+
 function Chain(sys :: System, flux_chain :: Flux.Chain; refs = nothing, bias = true)
     input_size = size(flux_chain.layers[begin].weight, 2)
 
@@ -209,10 +228,19 @@ macro NN(sys, setting, args...)
                                 end
                             end
                             ::Relu => begin
-    
+                                
                                 ηs = [ η for (η, ζ) in inout]
                                 ζs = [ ζ for (η, ζ) in inout]
 
+                                if has_bias(chains) && !($(nobias))
+                                    @warn "Using sys.one as a chain has bias terms and nobias is not set"
+
+                                    push!(ηs, $(esc(sys)).one)
+                                    push!(ζs, $(esc(sys)).one)
+                                else
+                                    @warn "Skipping sys.one as no chain has bias terms or nobias is set"                                    
+                                end
+
                                 if $(esc(sys)) isa StaticSystem
                                     IQCs.add_selection_of_qcs($(esc(sys)), :Absolute, ηs, ζs, nothing, $choices)
                                 else
-- 
GitLab