updates

e117c2da · Fredrik Bagge Carlson · 4c0c7e32 · e117c2da · e117c2da · e117c2da
Commit e117c2da authored Dec 12, 2017 by Fredrik Bagge Carlson
--- a/HRL/bibtexfile.bib
+++ b/HRL/bibtexfile.bib
+@article{DQN,
+  title={Human-level control through deep reinforcement learning},
+  author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A and Veness, Joel and Bellemare, Marc G and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K and Ostrovski, Georg and others},
+  journal={Nature},
+  volume={518},
+  number={7540},
+  pages={529--533},
+  year={2015},
+  publisher={Nature Research}
+}
+@article{lillicrap2015continuous,
+  title={Continuous control with deep reinforcement learning},
+  author={Lillicrap, Timothy P and Hunt, Jonathan J and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan},
+  journal={arXiv preprint arXiv:1509.02971},
+  year={2015}
+}
--- a/TDAlambda/bibtexfile.bib
+++ b/TDAlambda/bibtexfile.bib
+@article{DQN,
+  title={Human-level control through deep reinforcement learning},
+  author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A and Veness, Joel and Bellemare, Marc G and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K and Ostrovski, Georg and others},
+  journal={Nature},
+  volume={518},
+  number={7540},
+  pages={529--533},
+  year={2015},
+  publisher={Nature Research}
+}
+@article{lillicrap2015continuous,
+  title={Continuous control with deep reinforcement learning},
+  author={Lillicrap, Timothy P and Hunt, Jonathan J and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan},
+  journal={arXiv preprint arXiv:1509.02971},
+  year={2015}
+}
--- a/flux/flux_forward_inverse_robot.jl
+++ b/flux/flux_forward_inverse_robot.jl
+# error("Kanske är forward/inverse regularization bättre på simulering än forward only. Inget signifikant syns vid prediktion.")
 @everywhere begin
    # @everywhere include("/var/tmp/fredrikb/v0.6/DynamicMovementPrimitives/src/two_link.jl")
    # @everywhere include(Pkg.dir("DynamicMovementPrimitives","src","two_link.jl"))
@@ -9,7 +10,7 @@
    using Flux: back!, truncate!, treelike, train!, mse, testmode!, combine, params
    using Flux.Optimise: weightdecay, Param, optimiser, RMSProp
-    N  = 2000
+    N  = 500
    n  = 2
    ns = n
    h = 0.02
@@ -36,8 +37,8 @@
        sol = solve(prob,Tsit5(),reltol=1e-8,abstol=1e-8)
        x = hcat(sol(t)...)
        # y = hcat([time_derivative(x[:,t], u[:,t])[3:4] for t in 1:N]...)
-        y = diff(x[1:2,:],2)
+        y = x[3:4,2:N+1]#diff(x[1:2,:],2)
-        x = x[1:2,1:N]
+        x = [x[1:2,1:N]; x[3:4,1:N]]
        u = u[:,1:N]
        # x[1:2,:] = x[1:2,:].% 2π
@@ -52,6 +53,18 @@
        x,u,y
    end
+    function simulate(m,x)
+        Flux.testmode!.(m, true)
+        xsim = copy(x)
+        for t = 2:size(x,2)
+            xsimt = m(xsim[:,t-1])
+            xsim[3:4,t] = mean(xsimt).data
+            xsim[1:2,t] = xsim[1:2,t-1] + h*xsim[3:4,t-1]
+        end
+        Flux.testmode!.(m, false)
+        xsim
+    end
    function fit_model(opt, loss, models, x, y, xv, yv;
        iters                 = 2000,
        doplot                = true,
@@ -79,7 +92,7 @@
            testmode!(ms)
            l = loss(x, y).data[1]
            push!(trace,iter,l)
-            if iter % 5 == 0
+            if iter % 10 == 0
                push!(vtrace, iter, loss(xv,yv).data[1])
                push!(ftrace, iter, loss_forwardo(xv,yv).data[1])
                push!(itrace, iter, loss_inverse(x,y).data[1])
@@ -87,7 +100,7 @@
                    println("Iter: $iter, Loss: ", l)
                    plot(trace,reuse=true,show=false, lab="Train", layout=5, subplot=1, size=(1400,1000))
                    plot!(vtrace,show=false, lab="Validation", subplot=1)
-                    plot!(ftrace,show=false, lab="Forward validation", subplot=1)
+                    plot!(ftrace,show=false, lab="Forward validation", subplot=1, yscale=:log10)
                    # plot!(itrace,show=false, lab="Inverse validation", subplot=1)
                    plot!(y', subplot=2:3, title="Forward model")
                    plot!(my(x).data', l=:dash, subplot=2:3)

--- a/flux/nn_prior/nn_prior.jl
+++ b/flux/nn_prior/nn_prior.jl
@@ -91,19 +91,19 @@ results1 = pmap(1:n_bootstrap) do it
    println("Done: ", it)
    results
 end
-plotresults(results1)
 Jm, Js, Jtrue = all_jacobians(results1)
 # Jtrue .-= [I zeros(4,2][:]'
+l = @layout [  [a{1.0w}; b{1.0w}] c{0.5w}]
 plot(plot_jacobians(Jm, Js, Jtrue),
-plot_eigvals(results1, systype == DiffSystem()),
+    plotresults(results1),
-plotresults(results1))
+    plot_eigvals(results1, false),layout=l)
 gui()
+plot(resue=false)
 systype = DiffSystem()
 seed = 1
 x,y,u = generate_data(sys, systype, seed)
@@ -121,13 +121,13 @@ results2 = pmap(1:n_bootstrap) do it
    println("Done: ", it)
    results
 end
-plotresults(results2)
 Jm, Js, Jtrue = all_jacobians(results2)
 # Jtrue .-= [I zeros(4,2][:]'
-plot(plot_jacobians(Jm, Js, Jtrue, systype == DiffSystem()),
+l = @layout [  [a{1.0w}; b{1.0w}] c{0.5w}]
-plot_eigvals(results2, systype == DiffSystem()),
+plot(plot_jacobians(Jm, Js, Jtrue, true),
-plotresults(results2))
+    plotresults(results2),
+    plot_eigvals(results2, true), layout=l)
 gui()

--- a/flux/nn_prior/utilities.jl
+++ b/flux/nn_prior/utilities.jl
@@ -97,19 +97,24 @@ function plot_jacobians(Jm, Js, Jtrue, shift=false)
 end
 function plot_eigvals(results, shift=false)
-    @unpack xv,uv,sys = results[1]
+    @unpack x,u,sys = results[1]
-    N = size(xv,2)
+    N = size(x,2)
-    plot()
+    plot(layout=(2,1), ratio=:equal)
    for evalpoint = 1:10:N
-        e = eigvals(true_jacobian(sys,evalpoint,xv,uv)[1:4,1:4])
+        e = eigvals(true_jacobian(sys,evalpoint,x,u)[1:4,1:4])
-        scatter!(real.(e), imag.(e), c=:red, show=false)
+        scatter!(real.(e), imag.(e), c=:red, show=false, subplot=1)
+        e = log.(e)/sys.h
-        e = eigvals(jacobian(models(results), xv[:,evalpoint])[1][1:4,1:4])
+        scatter!(real.(e), imag.(e), c=:red, show=false, subplot=2, legend=false)
-        scatter!(real.(e+shift), imag.(e), c=:blue, show=false)
+        e = eigvals(jacobian(models(results), x[:,evalpoint])[1][1:4,1:4])
+        e .+= shift
+        scatter!(real.(e), imag.(e), c=:blue, show=false, subplot=1)
+        e = log.(e)/sys.h
+        scatter!(real.(e), imag.(e), c=:blue, show=false, subplot=2, legend=false)
        plot!(title="Eigenvalue spectrum")
    end
    phi = linspace(0,2pi,300)
-    plot!(real.(exp.(phi.*im)), imag.(exp.(phi.*im)), show=true, legend=false, c=:black, l=:dash)
+    plot!(real.(exp.(phi.*im)), imag.(exp.(phi.*im)), show=true, legend=false, c=:black, l=:dash, subplot=1)
 end
@@ -122,7 +127,7 @@ end
 function plotresults(results)
    @unpack x,u,y,xv,uv,yv,systype = results[1]
-    fig = plot(xv[1:4,:]', lab="True", layout=4)
+    fig = plot(x[1:4,:]', lab="True", layout=4)
    ms = models(results)
    testmode!.(ms, true)
    plot_prediction(fig, results)
@@ -135,7 +140,7 @@ end
 function plot_prediction(fig, results)
    @unpack x,u,y,xv,uv,yv,systype = results[1]
    ms = models(results)
-    yh, bounds = predict(systype, ms, xv)
+    yh, bounds = predict(systype, ms, x)
    for i = 1:4
        plot!(fig, yh[i,:], fillrange = getindex.(bounds,i,:), fillalpha=0.3, subplot=i, lab="Prediction")
    end
@@ -144,7 +149,7 @@ end
 function plot_simulation(fig, results)
    @unpack x,u,y,xv,uv,yv,systype = results[1]
    ms = models(results)
-    yh = simulate(systype, ms, xv)
+    yh = simulate(systype, ms, x)
    for i = 1:4
        plot!(fig, yh[i,:], subplot=i, lab="Simulation")
    end