From 227824e835751a34c1d16eca896ab2a41504150e Mon Sep 17 00:00:00 2001 From: Fredrik Bagge Carlson <cont-frb@ulund.org> Date: Sat, 16 Mar 2019 12:32:23 +0100 Subject: [PATCH] update TDlambda to julia 1 --- tdlambda.jl | 45 ++++++++++++++++++++++++--------------------- tdlambda_setup.jl | 18 +++++++++--------- 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/tdlambda.jl b/tdlambda.jl index 6f33e8a..aea7fd3 100644 --- a/tdlambda.jl +++ b/tdlambda.jl @@ -1,4 +1,4 @@ - +using Distributed cd(@__DIR__) @everywhere begin num_episodes = 150 @@ -12,27 +12,30 @@ end @everywhere include("tdlambda_setup.jl") functions = [TDλlearning, TDOλlearning] +function runit() + for fun in functions + mc_runs = 1 + λvec = linspace(0,1,5) + λvecMC = repeat(λvec',mc_runs)[:] + n = length(λvec) + res = map(λvecMC) do λ + fun(num_episodes,α,λ) + end + rewards = getindex.(res,1) + evals = getindex.(res,2) -for fun in functions - mc_runs = 1 - λvec = linspace(0,1,5) - λvecMC = repmat(λvec',mc_runs)[:] - n = length(λvec) - @time res = pmap(λvecMC) do λ - fun(num_episodes,α,λ) - end - rewards = getindex.(res,1) - evals = getindex.(res,2) - - function average(x) - data = reshape(x,mc_runs,n) |> vec - mean(data,1)[:], std(data,1)[:] - end + function average(x) + data = reshape(x,mc_runs,n) |> vec + mean(data,1)[:], std(data,1)[:] + end - # @show average_reward,ae = [mean(r.values[end-3:end]) for r in rewards] |> average - # average_eval,aee = [mean(r.values) for r in evals] |> average - # max_eval,me = [maximum(r.values) for r in evals] |> average - # - # scatter(λvec.+0.1rand(n,3)-0.05,[average_reward average_eval max_eval], xlabel="λ", lab=["Average reward" "Average eval" "Max eval"], title=string(fun), yerror=[ae aee me]) + @show average_reward,ae = [mean(r.values[end-3:end]) for r in rewards] |> average + average_eval,aee = [mean(r.values) for r in evals] |> average + max_eval,me = [maximum(r.values) for r in evals] |> average + # + scatter(λvec.+0.1rand(n,3)-0.05,[average_reward average_eval max_eval], xlabel="λ", lab=["Average reward" "Average eval" "Max eval"], title=string(fun), yerror=[ae aee me]) + end end + +runit() diff --git a/tdlambda_setup.jl b/tdlambda_setup.jl index 14dc99f..5b755d1 100644 --- a/tdlambda_setup.jl +++ b/tdlambda_setup.jl @@ -1,13 +1,13 @@ using OpenAIGym, BasisFunctionExpansions, ValueHistories - +using Base.Iterators const env = GymEnv("CartPole-v0") -typealias SARS Tuple{Vector{Float64},Int,Float64,Vector{Float64}} -typealias V64 Vector{Float64} - -const ϕ = MultiUniformRBFE([linspace(-0.3,0.3) linspace(-2,2) linspace(-0.2,0.2) linspace(-3.2,3.2) linspace(0,1)], [5,5,5,5,2]) +const SARS = Tuple{Vector{Float64},Int,Float64,Vector{Float64}} +const V64 = Vector{Float64} +const linspace = LinRange +const ϕ = MultiUniformRBFE([linspace(-0.3,0.3,3) linspace(-2,2,3) linspace(-0.2,0.2,3) linspace(-3.2,3.2,3) linspace(0,1,3)], [5,5,5,5,2]) const P = length(ϕ(zeros(5))) -type Qfun +struct Qfun θ::Vector{Float64} ϕ::MultiUniformRBFE end @@ -19,8 +19,8 @@ function Base.setindex!(Q::Qfun, q, s, a) Q.θ .+= Q.ϕ([s;a])* q end -type GreedyPolicy <: AbstractPolicy end -type ϵGreedyPolicy <: AbstractPolicy +struct GreedyPolicy <: AbstractPolicy end +mutable struct ϵGreedyPolicy <: AbstractPolicy ϵ::Float64 decay_rate::Float64 end @@ -57,7 +57,7 @@ function TDλlearning(num_episodes,α,λ) t += 1 δ::Float64 = r + γ*max_a(Q, s1) - Q(s,a) E = γ*λ*E + ϕ([s;a])::V64 - Q.θ += α*δ*E + Q.θ .+= α*δ*E end push!(reward_history, i, ep.total_reward) evaluate(i,eval_history) -- GitLab