Skip to content
Snippets Groups Projects
Commit 49534e58 authored by Fredrik Bagge Carlson's avatar Fredrik Bagge Carlson
Browse files

Merge branch 'master' of gitlab.control.lth.se:cont-frb/reinforcementlearning

parents db637b81 82c1290b
No related branches found
No related tags found
No related merge requests found
using StatsBase, Plots, Distributions, StaticArrays using StatsBase, Plots, Distributions, StaticArrays, Base.Test, TimerOutputs
function init_pf(N, p0) function init_pf{T}(x0::AbstractVector{T}, N, p0)
xprev = rand(p0,N) xprev = Vector{SVector{length(x0),T}}([x0 .+ rand(p0) for n=1:N])
x = similar(x) x = similar(xprev)
w = fill(log(1/N), N) w = fill(log(1/N), N)
x,xprev,w x,xprev,w
end end
...@@ -22,6 +22,8 @@ function pf!(x, xprev, w, u, y, g, f) ...@@ -22,6 +22,8 @@ function pf!(x, xprev, w, u, y, g, f)
x x
end end
shouldresample(w) = rand() < 0.5
function weigthed_mean(x,w) function weigthed_mean(x,w)
xh = zeros(size(x[1])) xh = zeros(size(x[1]))
@inbounds @simd for i = eachindex(x) @inbounds @simd for i = eachindex(x)
...@@ -30,15 +32,28 @@ function weigthed_mean(x,w) ...@@ -30,15 +32,28 @@ function weigthed_mean(x,w)
return xh return xh
end end
@inline logsumexp!(w) = w .-= log(sum(exp, w)) @testset "weigthed_mean" begin
# function logsumexp!(w) x = [randn(3) for i = 1:10000]
# offset = maximum(wT) w = ones(10000) |> logsumexp!
# normConstant = 0.0 @test sum(abs, weigthed_mean(x,w)) < 0.05
# for i = 1:N end
# normConstant += exp(w[i,t]-offset)
# end # @inline logsumexp!(w) = w .-= log(sum(exp, w))
# wT .-= log(normConstant) + offset function logsumexp!(w)
# end offset = maximum(w)
normConstant = zero(eltype(w))
for i = eachindex(w)
normConstant += exp(w[i]-offset)
end
w .-= log(normConstant) + offset
end
@testset "logsumexp" begin
w = randn(10)
wc = copy(w)
@test logsumexp!(w) wc.-log(sum(exp, wc))
@test logsumexp!(ones(10)) fill(log(1/10),10)
end
function resample(w) function resample(w)
N = length(w) N = length(w)
...@@ -48,8 +63,7 @@ function resample(w) ...@@ -48,8 +63,7 @@ function resample(w)
for i = 2:N for i = 2:N
bins[i] = bins[i-1] + exp(w[i]) bins[i] = bins[i-1] + exp(w[i])
end end
s = (rand()/N+0):1/N:bins[end] s = (rand()/N):(1/N):bins[end]
bo = 1 bo = 1
for i = 1:N for i = 1:N
@inbounds for b = bo:N @inbounds for b = bo:N
...@@ -63,39 +77,56 @@ function resample(w) ...@@ -63,39 +77,56 @@ function resample(w)
return j return j
end end
const pg = Distributions.Normal(0,1.0) @testset "resample" begin
const pf = Distributions.Normal(0,1.0) w = logsumexp!(ones(10))
const p0 = Distributions.Normal(0,2.0) @test resample(w) 1:10
@test [1.,1,1,2,2,2,3,3,3] |> logsumexp! |> resample |> sum >= 56
@test length(resample(w)) == length(w)
for i = 1:10000
j = randn(100) |> logsumexp! |> resample
@test maximum(j) <= 100
@test minimum(j) >= 1
end
end
n = 7 n = 2
m = 2 m = 2
p = 2 p = 2
const pg = Distributions.MvNormal(p,1.0)
const pf = Distributions.MvNormal(n,1.0)
const p0 = Distributions.MvNormal(n,2.0)
T = randn(n,n) T = randn(n,n)
const A = SMatrix{n,n}(T*diagm(linspace(0.5,0.99,n))/T) const A = SMatrix{n,n}(T*diagm(linspace(0.5,0.99,n))/T)
const B = @SMatrix randn(n,m) const B = @SMatrix randn(n,m)
const C = @SMatrix randn(p,n) const C = @SMatrix randn(p,n)
function f(x,xp,u,j) function f(x,xp,u,j)
Bu = B*u
@inbounds for i = eachindex(x) @inbounds for i = eachindex(x)
x[i] = A*xp[j[i]] + B*u + rand(pf) x[i] = A*xp[j[i]] + Bu + rand(pf)
end end
x
end end
function f(x,u) function f(x,u)
x[i] = A*xp[j[i]] + B*u + rand(pf) Bu = B*u
@inbounds for i = eachindex(x)
x[i] = A*x[i] .+ Bu .+ rand(pf)
end
x
end end
function g(w,y::Float64,x) function g(w,y,x)
@inbounds for i = 1:length(w) @inbounds for i = 1:length(w)
w[i] -= logpdf(pg, y-C*x[i]) w[i] += logpdf(pg, Vector(y-C*x[i]))
w[i] = ifelse(w[i] < -1000, -1000, w[i])
end end
w
end end
rms(x) = sqrt(mean(abs2, x))
function run_test() function run_test()
particle_count = [5 10 20 50 100 200 500 1000 10_000] particle_count = [5, 10, 20, 50, 100, 200, 500, 1000, 10_000]
time_steps = [20, 50, 100, 200] time_steps = [20, 50, 100, 200]
RMSE = zeros(length(particle_count),length(time_steps)) # Store the RMS errors RMSE = zeros(length(particle_count),length(time_steps)) # Store the RMS errors
propagated_particles = 0 propagated_particles = 0
...@@ -103,20 +134,19 @@ function run_test() ...@@ -103,20 +134,19 @@ function run_test()
for (Ni, N) in enumerate(particle_count) for (Ni, N) in enumerate(particle_count)
montecarlo_runs = maximum(particle_count)*maximum(time_steps) / T / N montecarlo_runs = maximum(particle_count)*maximum(time_steps) / T / N
# montecarlo_runs = 1 # montecarlo_runs = 1
xp = Array{Float64}(n,N,T) x = zeros(n,T)
w = Array{Float64}(n,N,T) y = zeros(p,T)
x = Array{Float64}(n,T)
y = Array{Float64}(p,T)
E = sum(1:montecarlo_runs) do
xh,xhprev,w = init_pf(N, p0)
E = sum(1:montecarlo_runs) do mc_run
u = randn(m,T) u = randn(m,T)
x[:,1] = rand(p0) x[:,1] = rand(p0)
y[:,1] = C*x[:,1] + rand(pg) y[:,1] = C*x[:,1] + rand(pg)
xh,xhprev,w = init_pf(x[:,1], N, p0)
error = 0.0 error = 0.0
@inbounds for t = 1:T-1 @timeit "pf" @inbounds for t = 1:T-1
x[:,t+1] = f(x[:,t],u[:,T]) + rand(pf) # plot_particles2(xh,w,y,x,t)
x[:,t+1] = f([x[:,t]],u[:,t])[]
y[:,t+1] = C*x[:,t+1] + rand(pg) y[:,t+1] = C*x[:,t+1] + rand(pg)
pf!(xh, xhprev, w, u[:,t], y[:,t], g, f) pf!(xh, xhprev, w, u[:,t], y[:,t], g, f)
error += sum(abs2,x[:,t]-weigthed_mean(xh,w)) error += sum(abs2,x[:,t]-weigthed_mean(xh,w))
...@@ -136,10 +166,8 @@ function run_test() ...@@ -136,10 +166,8 @@ function run_test()
return RMSE return RMSE
end end
@time pf!(eye(4),eye(4), ones(4), 4, g, f, σw ) # @enter pf!(zeros(4),zeros(4), ones(4), ones(4), ones(4), g, f)
gc() reset_timer!()
Profile.clear()
@time RMSE = run_test() @time RMSE = run_test()
# Profile.print() # Profile.print()
...@@ -148,8 +176,24 @@ function plotting(RMSE) ...@@ -148,8 +176,24 @@ function plotting(RMSE)
particle_count = [5, 10, 20, 50, 100, 200, 500, 1000, 10_000] particle_count = [5, 10, 20, 50, 100, 200, 500, 1000, 10_000]
nT = length(time_steps) nT = length(time_steps)
leg = reshape(["$(time_steps[i]) time steps" for i = 1:nT], 1,:) leg = reshape(["$(time_steps[i]) time steps" for i = 1:nT], 1,:)
plot(particle_count,RMSE,xscale=:log10, ylabel="RMS errors", xlabel=" Number of particles", lab=leg) plot(particle_count,RMSE,xscale=:log10, ylabel="RMS errors", xlabel=" Number of particles", lab=leg)
end end
plotting(RMSE) plotting(RMSE)
function plot_particles(x,w,y,xt)
xa = reinterpret(Float64, x, (length(x[1]), length(x)))
scatter(xa[1,:],xa[2,:], title="Particles", reuse=true, xlims=(-15,15), ylims=(-15,15), grid=false, size=(1000,1000))
scatter!([y[1]], [y[2]], m = (:red, 5))
scatter!([xt[1]], [xt[2]], m = (:green, 5))
sleep(0.2)
end
function plot_particles2(x,w,y,xt,t)
xa = reinterpret(Float64, x, (length(x[1]), length(x)))
plot(xt', title="Particles", reuse=true, grid=false, size=(1000,1000), layout=(2,1), ylims=(-15,15))
plot!(y', l = (:red, 2))
scatter!(t*ones(size(xa,2)), xa[1,:], subplot=1)
scatter!(t*ones(size(xa,2)), xa[2,:], subplot=2)
sleep(0.2)
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment