From 227824e835751a34c1d16eca896ab2a41504150e Mon Sep 17 00:00:00 2001
From: Fredrik Bagge Carlson <cont-frb@ulund.org>
Date: Sat, 16 Mar 2019 12:32:23 +0100
Subject: [PATCH] update TDlambda to julia 1

---
 tdlambda.jl       | 45 ++++++++++++++++++++++++---------------------
 tdlambda_setup.jl | 18 +++++++++---------
 2 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/tdlambda.jl b/tdlambda.jl
index 6f33e8a..aea7fd3 100644
--- a/tdlambda.jl
+++ b/tdlambda.jl
@@ -1,4 +1,4 @@
-
+using Distributed
 cd(@__DIR__)
 @everywhere begin
     num_episodes        = 150
@@ -12,27 +12,30 @@ end
 @everywhere include("tdlambda_setup.jl")
 
 functions = [TDλlearning, TDOλlearning]
+function runit()
+    for fun in functions
+        mc_runs = 1
+        λvec = linspace(0,1,5)
+        λvecMC = repeat(λvec',mc_runs)[:]
+        n = length(λvec)
+        res = map(λvecMC) do λ
+            fun(num_episodes,α,λ)
+        end
+        rewards = getindex.(res,1)
+        evals = getindex.(res,2)
 
-for fun in functions
-    mc_runs = 1
-    λvec = linspace(0,1,5)
-    λvecMC = repmat(λvec',mc_runs)[:]
-    n = length(λvec)
-    @time res = pmap(λvecMC) do λ
-        fun(num_episodes,α,λ)
-    end
-    rewards = getindex.(res,1)
-    evals = getindex.(res,2)
-
-    function average(x)
-        data = reshape(x,mc_runs,n) |> vec
-        mean(data,1)[:], std(data,1)[:]
-    end
+        function average(x)
+            data = reshape(x,mc_runs,n) |> vec
+            mean(data,1)[:], std(data,1)[:]
+        end
 
-    # @show average_reward,ae = [mean(r.values[end-3:end]) for r in rewards] |> average
-    # average_eval,aee = [mean(r.values) for r in evals] |> average
-    # max_eval,me = [maximum(r.values) for r in evals] |> average
-    #
-    # scatter(λvec.+0.1rand(n,3)-0.05,[average_reward average_eval max_eval], xlabel="λ", lab=["Average reward" "Average eval" "Max eval"], title=string(fun),    yerror=[ae aee me])
+        @show average_reward,ae = [mean(r.values[end-3:end]) for r in rewards] |> average
+        average_eval,aee = [mean(r.values) for r in evals] |> average
+        max_eval,me = [maximum(r.values) for r in evals] |> average
+        #
+        scatter(λvec.+0.1rand(n,3)-0.05,[average_reward average_eval max_eval], xlabel="λ", lab=["Average reward" "Average eval" "Max eval"], title=string(fun),    yerror=[ae aee me])
 
+    end
 end
+
+runit()
diff --git a/tdlambda_setup.jl b/tdlambda_setup.jl
index 14dc99f..5b755d1 100644
--- a/tdlambda_setup.jl
+++ b/tdlambda_setup.jl
@@ -1,13 +1,13 @@
 using OpenAIGym, BasisFunctionExpansions, ValueHistories
-
+using Base.Iterators
 const env = GymEnv("CartPole-v0")
-typealias SARS Tuple{Vector{Float64},Int,Float64,Vector{Float64}}
-typealias V64 Vector{Float64}
-
-const ϕ = MultiUniformRBFE([linspace(-0.3,0.3) linspace(-2,2) linspace(-0.2,0.2) linspace(-3.2,3.2) linspace(0,1)], [5,5,5,5,2])
+const SARS = Tuple{Vector{Float64},Int,Float64,Vector{Float64}}
+const V64 = Vector{Float64}
+const linspace = LinRange
+const ϕ = MultiUniformRBFE([linspace(-0.3,0.3,3) linspace(-2,2,3) linspace(-0.2,0.2,3) linspace(-3.2,3.2,3) linspace(0,1,3)], [5,5,5,5,2])
 const P = length(ϕ(zeros(5)))
 
-type Qfun
+struct Qfun
     θ::Vector{Float64}
     ϕ::MultiUniformRBFE
 end
@@ -19,8 +19,8 @@ function Base.setindex!(Q::Qfun, q, s, a)
     Q.θ .+= Q.ϕ([s;a])* q
 end
 
-type GreedyPolicy <: AbstractPolicy end
-type ϵGreedyPolicy <: AbstractPolicy
+struct GreedyPolicy <: AbstractPolicy end
+mutable struct ϵGreedyPolicy <: AbstractPolicy
     ϵ::Float64
     decay_rate::Float64
 end
@@ -57,7 +57,7 @@ function TDλlearning(num_episodes,α,λ)
             t += 1
             δ::Float64 = r + γ*max_a(Q, s1) - Q(s,a)
             E          = γ*λ*E + ϕ([s;a])::V64
-            Q.θ       += α*δ*E
+            Q.θ      .+= α*δ*E
         end
         push!(reward_history, i, ep.total_reward)
         evaluate(i,eval_history)
-- 
GitLab