Merge branch 'master' of gitlab.control.lth.se:cont-frb/reinforcementlearning

49534e58 · Fredrik Bagge Carlson · db637b81 · 82c1290b · 49534e58
Commit 49534e58 authored 7 years ago by Fredrik Bagge Carlson
--- a/pf_vec.jl
+++ b/pf_vec.jl
-using StatsBase, Plots, Distributions, StaticArrays
+using StatsBase, Plots, Distributions, StaticArrays, Base.Test, TimerOutputs
-function init_pf(N, p0)
+function init_pf{T}(x0::AbstractVector{T}, N, p0)
-    xprev = rand(p0,N)
+    xprev = Vector{SVector{length(x0),T}}([x0 .+ rand(p0) for n=1:N])
-    x = similar(x)
+    x = similar(xprev)
    w = fill(log(1/N), N)
    x,xprev,w
 end
@@ -22,6 +22,8 @@ function pf!(x, xprev, w, u, y, g, f)
    x
 end
+shouldresample(w) = rand() < 0.5
 function weigthed_mean(x,w)
    xh = zeros(size(x[1]))
    @inbounds @simd  for i = eachindex(x)
@@ -30,15 +32,28 @@ function weigthed_mean(x,w)
    return xh
 end
-@inline logsumexp!(w) = w .-= log(sum(exp, w))
+@testset "weigthed_mean" begin
-# function logsumexp!(w)
+x = [randn(3) for i = 1:10000]
-#     offset = maximum(wT)
+w = ones(10000) |> logsumexp!
-#     normConstant = 0.0
+@test sum(abs, weigthed_mean(x,w)) < 0.05
-#     for i = 1:N
+end
-#         normConstant += exp(w[i,t]-offset)
-#     end
+# @inline logsumexp!(w) = w .-= log(sum(exp, w))
-#     wT .-= log(normConstant) + offset
+function logsumexp!(w)
-# end
+    offset = maximum(w)
+    normConstant = zero(eltype(w))
+    for i = eachindex(w)
+        normConstant += exp(w[i]-offset)
+    end
+    w .-= log(normConstant) + offset
+end
+@testset "logsumexp" begin
+w = randn(10)
+wc = copy(w)
+@test logsumexp!(w) ≈ wc.-log(sum(exp, wc))
+@test logsumexp!(ones(10)) ≈ fill(log(1/10),10)
+end
 function resample(w)
    N = length(w)
@@ -48,8 +63,7 @@ function resample(w)
    for i = 2:N
        bins[i] = bins[i-1] + exp(w[i])
    end
-    s = (rand()/N+0):1/N:bins[end]
+    s = (rand()/N):(1/N):bins[end]
    bo = 1
    for i = 1:N
        @inbounds for b = bo:N
@@ -63,39 +77,56 @@ function resample(w)
    return j
 end
-const pg = Distributions.Normal(0,1.0)
+@testset "resample" begin
-const pf = Distributions.Normal(0,1.0)
+w = logsumexp!(ones(10))
-const p0 = Distributions.Normal(0,2.0)
+@test resample(w) ≈ 1:10
+@test [1.,1,1,2,2,2,3,3,3] |> logsumexp! |> resample |> sum >= 56
+@test length(resample(w)) == length(w)
+for i = 1:10000
+    j = randn(100) |> logsumexp! |> resample
+    @test maximum(j) <= 100
+    @test minimum(j) >= 1
+end
+end
-n = 7
+n = 2
 m = 2
 p = 2
+const pg = Distributions.MvNormal(p,1.0)
+const pf = Distributions.MvNormal(n,1.0)
+const p0 = Distributions.MvNormal(n,2.0)
 T = randn(n,n)
 const A = SMatrix{n,n}(T*diagm(linspace(0.5,0.99,n))/T)
 const B = @SMatrix randn(n,m)
 const C = @SMatrix randn(p,n)
 function f(x,xp,u,j)
+    Bu = B*u
    @inbounds for i = eachindex(x)
-        x[i] =  A*xp[j[i]] + B*u + rand(pf)
+        x[i] =  A*xp[j[i]] + Bu + rand(pf)
    end
+    x
 end
 function f(x,u)
-    x[i] =  A*xp[j[i]] + B*u + rand(pf)
+    Bu = B*u
+    @inbounds for i = eachindex(x)
+        x[i] =  A*x[i] .+ Bu .+ rand(pf)
+    end
+    x
 end
-function g(w,y::Float64,x)
+function g(w,y,x)
    @inbounds for i = 1:length(w)
-        w[i] -= logpdf(pg, y-C*x[i])
+        w[i] += logpdf(pg, Vector(y-C*x[i]))
+        w[i] = ifelse(w[i] < -1000, -1000, w[i])
    end
+    w
 end
-rms(x) = sqrt(mean(abs2, x))
 function run_test()
-    particle_count = [5 10 20 50 100 200 500 1000 10_000]
+    particle_count = [5, 10, 20, 50, 100, 200, 500, 1000, 10_000]
    time_steps = [20, 50, 100, 200]
    RMSE = zeros(length(particle_count),length(time_steps)) # Store the RMS errors
    propagated_particles = 0
@@ -103,20 +134,19 @@ function run_test()
        for (Ni, N) in enumerate(particle_count)
            montecarlo_runs = maximum(particle_count)*maximum(time_steps) / T / N
            #             montecarlo_runs = 1
-            xp = Array{Float64}(n,N,T)
+            x = zeros(n,T)
-            w = Array{Float64}(n,N,T)
+            y = zeros(p,T)
-            x = Array{Float64}(n,T)
-            y = Array{Float64}(p,T)
-            E = sum(1:montecarlo_runs) do
-                xh,xhprev,w = init_pf(N, p0)
+            E = sum(1:montecarlo_runs) do mc_run
                u = randn(m,T)
                x[:,1] = rand(p0)
                y[:,1] = C*x[:,1] + rand(pg)
+                xh,xhprev,w = init_pf(x[:,1], N, p0)
                error = 0.0
-                @inbounds for t = 1:T-1
+                @timeit "pf" @inbounds for t = 1:T-1
-                    x[:,t+1] = f(x[:,t],u[:,T]) + rand(pf)
+                    # plot_particles2(xh,w,y,x,t)
+                    x[:,t+1] = f([x[:,t]],u[:,t])[]
                    y[:,t+1] = C*x[:,t+1]  + rand(pg)
                    pf!(xh, xhprev, w, u[:,t], y[:,t], g, f)
                    error += sum(abs2,x[:,t]-weigthed_mean(xh,w))
@@ -136,10 +166,8 @@ function run_test()
    return RMSE
 end
-@time pf!(eye(4),eye(4), ones(4), 4, g, f, σw )
+# @enter pf!(zeros(4),zeros(4), ones(4), ones(4), ones(4), g, f)
-gc()
+reset_timer!()
-Profile.clear()
 @time RMSE = run_test()
 # Profile.print()
@@ -148,8 +176,24 @@ function plotting(RMSE)
    particle_count = [5, 10, 20, 50, 100, 200, 500, 1000, 10_000]
    nT             = length(time_steps)
    leg            = reshape(["$(time_steps[i]) time steps" for i = 1:nT], 1,:)
    plot(particle_count,RMSE,xscale=:log10, ylabel="RMS errors", xlabel=" Number of particles", lab=leg)
 end
 plotting(RMSE)
+function plot_particles(x,w,y,xt)
+    xa = reinterpret(Float64, x, (length(x[1]), length(x)))
+    scatter(xa[1,:],xa[2,:], title="Particles", reuse=true, xlims=(-15,15), ylims=(-15,15), grid=false, size=(1000,1000))
+    scatter!([y[1]], [y[2]], m = (:red, 5))
+    scatter!([xt[1]], [xt[2]], m = (:green, 5))
+    sleep(0.2)
+end
+function plot_particles2(x,w,y,xt,t)
+    xa = reinterpret(Float64, x, (length(x[1]), length(x)))
+    plot(xt', title="Particles", reuse=true,  grid=false, size=(1000,1000), layout=(2,1), ylims=(-15,15))
+    plot!(y', l = (:red, 2))
+    scatter!(t*ones(size(xa,2)), xa[1,:], subplot=1)
+    scatter!(t*ones(size(xa,2)), xa[2,:], subplot=2)
+    sleep(0.2)
+end