From bc0906b59230929e3631c10b109dc8050bf53934 Mon Sep 17 00:00:00 2001 From: baggepinnen <cont-frb@ulund.org> Date: Mon, 14 May 2018 13:11:30 +0200 Subject: [PATCH] update slides --- jump_lin_id/bibtexfile.bib | 22 ++ jump_lin_id/pres/beamerthemeRegler2.sty | 162 +------------- jump_lin_id/pres/pres_idpaper.tex | 271 ++++++++++++++++++------ 3 files changed, 229 insertions(+), 226 deletions(-) diff --git a/jump_lin_id/bibtexfile.bib b/jump_lin_id/bibtexfile.bib index b1bf7be..d74999c 100644 --- a/jump_lin_id/bibtexfile.bib +++ b/jump_lin_id/bibtexfile.bib @@ -171,3 +171,25 @@ XXpages={1445--1450}, year={1965} } + +@ARTICLE{svensson2014identification, + author = {{Svensson}, A. and {Sch{\"o}n}, T.~B. and {Lindsten}, F.}, + title = "{Identification of jump Markov linear models using particle filters}", + journal = {ArXiv e-prints}, +archivePrefix = "arXiv", + eprint = {1409.7287}, + primaryClass = "stat.CO", + keywords = {Statistics - Computation, Mathematics - Optimization and Control, Statistics - Machine Learning}, + year = 2014, + month = sep, + adsurl = {http://adsabs.harvard.edu/abs/2014arXiv1409.7287S}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + +@book{gustafsson2000adaptive, + title={Adaptive filtering and change detection}, + author={Gustafsson, Fredrik and Gustafsson, Fredrik}, + volume={1}, + year={2000}, + publisher={Citeseer} +} diff --git a/jump_lin_id/pres/beamerthemeRegler2.sty b/jump_lin_id/pres/beamerthemeRegler2.sty index 2147726..8d2bd77 100644 --- a/jump_lin_id/pres/beamerthemeRegler2.sty +++ b/jump_lin_id/pres/beamerthemeRegler2.sty @@ -1,45 +1,3 @@ -\DeclareOption{lionbackground}{\def\@beamer@option{% -\AtBeginDocument {% - \pgfdeclareimage[width=70mm]{lionwhite}{LionSealWhite} -}% - -\usebackgroundtemplate{{% - \color{palegray}\vrule height\paperheight width\paperwidth - \kern -\paperwidth - \vbox to \paperheight{% - \vss\kern2mm\hbox to \paperwidth{\hss\pgfuseimage{lionwhite}\hss}\vss}% - }% -} - -\useframetitletemplate{\par\kern-1mm - \vbox to 10mm{\leavevmode\kern-\beamer@leftmargin - \colorbox{header}{\hbox to \paperwidth{\hss - \color{white} - \Large\bfseries\vrule height 7mm - depth 3mm width0mm\insertframetitle\strut\hss}}\kern -30mm\par\vss}% -}% - -\useinnertheme[shadow=true]{rounded} -\setbeamercolor{block title}{use=structure,fg=white,bg=structure.fg} - -\usefoottemplate{% - \vbox{\tiny% - \hbox{% - \setbox\beamer@linebox=\hbox to\paperwidth{% - \hbox to.5\paperwidth{\tiny\color{white}\frame@numbers\hfill\textbf{\insertshortauthor}\hskip.3cm}% - \hbox to.5\paperwidth{\hskip.3cm\tiny\color{white}\textbf{\insertshorttitle}\hfill}\hfill}% - \ht\beamer@linebox=2.625ex% - \dp\beamer@linebox=0pt% - \setbox\beamer@linebox=\vbox{\box\beamer@linebox\vskip1.125ex}% - \color{header}\hskip-\Gm@lmargin\vrule width.5\paperwidth - height\ht\beamer@linebox\color{structure}\vrule width.5\paperwidth - height\ht\beamer@linebox\hskip-\paperwidth% - \hbox{\box\beamer@linebox\hfill}\hfill\hskip-\Gm@rmargin}}} - -\setbeamercovered{transparent} - -}} % end \DeclareOption{lionbackground} - \DeclareOption{liontopcorner}{\def\@beamer@option{% \pgfdeclareimage[width=14mm]{lionsealwhitesmall}{LionSealWhite} \useframetitletemplate{\par\kern-1mm @@ -59,9 +17,9 @@ \hbox{% \setbox\beamer@linebox=\hbox to\paperwidth{% \hbox to.5\paperwidth{\tiny\color{white}\frame@numbers - \hfill\textbf{\insertshortauthor}\hskip.3cm}% + \hfill\textbf{\shadowtext{\insertshortauthor}}\hskip.3cm}% \hbox to.5\paperwidth{\hskip.3cm\tiny\color{white}% - \textbf{\insertshorttitle}\hfill}\hfill}% + \textbf{\shadowtext{\insertshorttitle \quad \insertshortdate}}\hfill}\hfill}% \ht\beamer@linebox=2.625ex% \dp\beamer@linebox=0pt% \setbox\beamer@linebox=\vbox{\box\beamer@linebox\vskip1.125ex}% @@ -74,118 +32,6 @@ }} % end \DeclareOption{liontopcorner} -\DeclareOption{lionheader}{\def\@beamer@option{% - - \useinnertheme[shadow=true]{rounded} - \setbeamercolor{block title}{use=structure,fg=white,bg=structure.fg} - \pgfdeclareimage[width=14mm]{lionsealbronzewhite}{LionSealBronzeWhite} - \useframetitletemplate{% - \vskip 2mm - {\leftskip15mm% - \rightskip-\beamer@rightmargin plus1fill\relax - \advance\rightskip by0.3cm\leavevmode - \color{black}\Large\bfseries\insertframetitle\par \small\insertframesubtitle\vspace{-3mm}\par} - \vbox to 2mm{\leavevmode\kern-\beamer@leftmargin \fboxsep=0.7mm\colorbox{header}{\hbox to \paperwidth{\hss}}\vss} - \moveleft 3mm \vbox to 0pt{\kern-20mm\pgfuseimage{lionsealbronzewhite}\vss} - \vskip -7mm -} - -\usefoottemplate{% - \vbox to 3mm{\hbox to \textwidth{\tiny - \hbox to 0pt{\kern -10mm\frame@numbers\hss} - \insertshortauthor: \insertshorttitle\hfill}\vss}} - -\setbeamercovered{transparent} -}} %end \DeclareOption{lionheader} - - -\DeclareOption{lionheaderLCCC}{\def\@beamer@option{% - - \useinnertheme[shadow=true]{rounded} - \setbeamercolor{block title}{use=structure,fg=white,bg=structure.fg} - \pgfdeclareimage[width=14mm]{lionsealbronzewhite}{LionSealBronzeWhite} - \pgfdeclareimage[width=9mm]{logo-lccc}{logo-lccc} - \useframetitletemplate{% - \vskip 2mm - {\leftskip15mm% - \rightskip-\beamer@rightmargin plus1fill\relax - \advance\rightskip by0.3cm\leavevmode - \color{black}\Large\bfseries\insertframetitle\par} - \vbox to 2mm{\leavevmode\kern-\beamer@leftmargin \fboxsep=0.7mm\colorbox{header}{\hbox to \paperwidth{\hss}}\vss} - \moveleft 5mm \vbox to 0pt{\kern-20mm\pgfuseimage{lionsealbronzewhite}\vss} - \moveright 105mm\vbox to 0pt{\kern-25mm\pgfuseimage{logo-lccc}\vss} - \vskip -7mm -} - -\usefoottemplate{% - \vbox to 3mm{\hbox to \textwidth{\tiny - \hbox to 0pt{\kern -10mm\frame@numbers\hss} - \insertshortauthor: \insertshorttitle\hfill}\vss}} - -\setbeamercovered{transparent} -}} %end \DeclareOption{lionheaderLCCC} - -\DeclareOption{lionheaderLCCCold}{\def\@beamer@option{% -\newsavebox\lionheadbox -\AtBeginDocument {% - \pgfdeclareimage[width=116mm]{blueline}{blueline} - \pgfdeclareimage[width=14mm]{lionbronzejpg}{lionbronzejpg} - \pgfdeclareimage[width=10mm]{logo-lccc}{logo-lccc} - \sbox{\lionheadbox}{\vbox to 0pt{\vss \parskip=0pt\noindent - \pgfuseimage{lionbronzejpg}\par - \kern -5.3mm - \hbox to \textwidth{\hss\pgfuseimage{blueline}\hss} - \kern -13mm - \hbox{\kern 96mm\pgfuseimage{logo-lccc}} - }% - }% -} - -\useinnertheme[shadow=true]{rounded} -\setbeamercolor{block title}{use=structure,fg=white,bg=structure.fg} - -\useframetitletemplate{% - \vskip 5mm - {\leftskip17mm% - \rightskip-\beamer@rightmargin plus1fill\relax - \advance\rightskip by0.3cm\leavevmode - \color{black}\Large\bfseries\insertframetitle\par - }\usebox{\lionheadbox}\par} - -\usefoottemplate{% - \vbox to 3mm{\hbox to \textwidth{\tiny - \hbox to 0pt{\kern -10mm\frame@numbers\hss} - \insertshortauthor: \insertshorttitle\hfill}\vss}} - -\setbeamercovered{transparent} -}} %end \DeclareOption{lionheaderLCCCold} - - -\DeclareOption{lioncorner}{\def\@beamer@option{% -\newsavebox\lioncornerbox -\AtBeginDocument {% - \pgfdeclareimage[width=30mm]{lionbronze}{LionSealBronze} - \sbox{\lioncornerbox}{\vbox to 0pt{\vskip3mm\hbox to \textwidth{% - \hskip2mm\pgfuseimage{lionbronze}\hss}% - \vss}% - }% -} - -\useframetitletemplate{% -\vbox to 0pt{% - \kern68mm\hbox to 0pt{\kern93mm\pgfuseimage{lionbronze}% - \hss}\vss} - \kern-2mm - {\leftskip\z@ plus1fill \rightskip\z@ plus1fill - \color{LUblue}\Large\bfseries\insertframetitle\par} - {\color{bronze}\medskip\hrule height 2pt \vspace{1mm}}} - -\usefoottemplate{% - \vbox to 3mm{\hbox to \textwidth{\tiny - \hbox to 0pt{\kern -10mm\frame@numbers\hss} - \insertshortauthor: \insertshorttitle\hfill}\vss}} - -}} %end \DeclareOption{lioncorner} \DeclareOption{handout}{\def\@beamer@option{% \useframetitletemplate{\par\kern-1mm @@ -242,7 +88,9 @@ \vspace{5mm} {\normalsize\textbf\insertauthor\par} \vspace{5mm} - {\scriptsize\insertinstitute\par}%\par\vskip1em + {\scriptsize\insertinstitute\par} + \vspace{5mm} + {\scriptsize\insertdate\par}%\par\vskip1em \end{centering} \vss } diff --git a/jump_lin_id/pres/pres_idpaper.tex b/jump_lin_id/pres/pres_idpaper.tex index a996519..dfae9ee 100644 --- a/jump_lin_id/pres/pres_idpaper.tex +++ b/jump_lin_id/pres/pres_idpaper.tex @@ -1,4 +1,4 @@ -\documentclass[10pt,handout]{beamer} +\documentclass[10pt]{beamer} % \usepackage{pgfpages} \pgfpagesuselayout{8 on 1}[a4paper,border shrink=5mm] \usetheme[liontopcorner,framenumbers]{Regler2} \usepackage{graphicx} @@ -14,6 +14,7 @@ \addbibresource{../bibtexfile.bib} \usepackage{siunitx} \usepackage{color} +\usepackage{shadowtext}\shadowoffset{0.05mm}\shadowcolor{black!80!white} \usepackage{pgfplots} \usepackage{booktabs}\usepackage{multirow} \usepgfplotslibrary{groupplots} @@ -48,9 +49,9 @@ label=center:{{$\sum$}}, minimum width=2em}} \title[Identification of LTV Models]{Identification of LTV Dynamical Models with\\ Smooth or Discontinuous Time Evolution \\by means of Convex Optimization} -\date{\today} \author[Fredrik Bagge Carlson]{\textbf{\large Fredrik Bagge Carlson}, \textnormal{Anders Robertsson, Rolf Johansson}} \institute{Lund University, Department of Automatic Control} +\date[]{ICCA Anchorage, June 2018} \definecolor{red}{rgb}{0.7,0.2,0.2} @@ -93,29 +94,64 @@ label=center:{{$\sum$}}, minimum width=2em}} \newcommand{\bmatrixx}[1]{\begin{bmatrix}#1\end{bmatrix}} +\usepgfplotslibrary{external} +\tikzexternalize +\tikzsetexternalprefix{figs/} \begin{document} +\setbeamercolor{footnote}{bg=mintgreen} \newlength\figureheight \newlength\figurewidth % \setbeamercolor{background canvas}{bg=goldishlight} \maketitle +%==================================================================== +%==================================================================== +\begin{frame}{Slides and Code}{} + Slides, code, figures and examples available at + + \centering + \includegraphics[width=0.3\linewidth]{figs/qr.png} + + \url{github.com/baggepinnen/LTVModels.jl} +\end{frame} + + +%==================================================================== +%==================================================================== +\begin{frame}{Outline}{} + \begin{enumerate} + \item LTI identification + \item LTV identification (Main topic) + \item Examples + \end{enumerate} +\end{frame} + %==================================================================== %==================================================================== \begin{frame}{LTI identification} - We start by considering the case of identification of the parameters in an LTI model on the form + LTI model \begin{equation} x_{t+1} = A x_t + B u_t + v_t, \quad t \in [1,T] \end{equation} where $x\inspace{n}$ and $u\inspace{m}$ are the state and input respectively. + \pause + + \begin{itemize} + \item Control design + \item Prediction + \item Simulation + \end{itemize} \end{frame} -\begin{frame}{}{} - $\y = \A\w$, and arrange the data according to + + +\begin{frame}{LTI identification}{} + Linear in the parameters, can be written $\y = \A\w$ \begin{align*} \y &= \begin{bmatrix} @@ -131,9 +167,11 @@ label=center:{{$\sum$}}, minimum width=2em}} \end{bmatrix} & &\in \mathbb{R}^{Tn\times K} \end{align*} + \pause + Closed form solution \begin{align} - \w^* &= \argmin_{\w} \normt{\A \w - \y}^2 \label{eq:lscost}\\ + \w^* &= \argmin_{\w} \normt{\y - \A \w}^2 \label{eq:lscost}\\ ~ &= \PI \y \label{eq:ls} \end{align} @@ -144,17 +182,20 @@ label=center:{{$\sum$}}, minimum width=2em}} %==================================================================== %==================================================================== -\begin{frame}{Time-varying dynamics} - We now extend our view to systems where the dynamics change with time. We limit the scope of this article to models on the form - \begin{equation} - \label{eq:tvk} - \begin{split} - x_{t+1} &= A_t x_t + B_t u_t + v_t\\ - \w_t &= \vec{\bmatrixx{A_t\T & B_t\T}} - \end{split} - \end{equation} +\begin{frame}{Problem formulation} + Estimate a model on the form + \begin{block}{Linear Time-Varying (LTV) dynamics} + \begin{equation} + \label{eq:tvk} + \begin{split} + x_{t+1} &= A_t x_t + B_t u_t + v_t\\ + \w_t &= \vec{\bmatrixx{A_t\T & B_t\T}} + \end{split} + \end{equation} + \end{block} \pause - where the parameters $\w$ are assumed to evolve according to the dynamical system + + Parameters $\w$ are assumed to evolve according to the dynamical system \begin{equation} \label{eq:dynsys} \begin{split} @@ -162,6 +203,28 @@ label=center:{{$\sum$}}, minimum width=2em}} y_t &= \big(I_n \otimes \bmatrixx{x_t\T & u_t\T}\big) \w_t \end{split} \end{equation} + \pause + + Free to choose $H$, e.g., $H = I$ +\end{frame} + + +%==================================================================== +%==================================================================== +\begin{frame}{Identification of LTV models}{} + \begin{block}{Previous research} + \begin{itemize} + \item Kalman filter with restarts\footfullcite{gustafsson2000adaptive} + \item Segmented least-squares\footfullcite{bellman1969curve} + \item EM and particle filtering\footfullcite{svensson2014identification} + \end{itemize} + \end{block} + \pause + + \begin{itemize} + \item Review of trend filtering + \item Review of regularization properties of norms + \end{itemize} \end{frame} @@ -170,12 +233,14 @@ label=center:{{$\sum$}}, minimum width=2em}} %==================================================================== %==================================================================== \begin{frame}{Trend filtering} - An important class of identification methods that has been popularized lately is \emph{trend filtering} methods~\footfullcite{kim2009ell_1, tibshirani2014adaptive}. + Class of identification methods for signal reconstruction, \emph{trend filtering}~\footfullcite{kim2009ell_1, tibshirani2014adaptive}. + \pause As a simple example, consider the reconstruction $\hat y$ of a noisy signal $y = \{y_t\inspace{}\}_{t=1}^T$ with piecewise constant segments. \begin{equation*} \label{eq:tf} \minimize{\hat{y}} \normt{y-\hat{y}}^2 + \lambda\sum_t |\hat{y}_{t+1} - \hat{y}_t| \end{equation*} + \pause \begin{itemize} \item Fitness function \item (Sparsity promoting) Regularization @@ -186,29 +251,48 @@ label=center:{{$\sum$}}, minimum width=2em}} %==================================================================== %==================================================================== -\begin{frame}{Regularization term intuition}{} - figure +\begin{frame}{Norms for Regularization}{} + \begin{description} + \item[$\norm{k}_1$ 1-norm] \emph{Sparsity-promoting} penalty.\\ A solution with a small number of non-zero entries in $k$ is favored.\vspace{2mm} + \item[$\norm{k}_2$ 2-norm] Penalizes large entries in $k$. \\Does not care about small, non-zero entries. + \end{description} - The 1-norm is a \emph{sparsity-promoting} penalty, hence a solution in which only a small number of non-zero first-order time differences in the model parameters is favored, i.e., a piecewise constant dynamics evolution. \end{frame} +%==================================================================== +%==================================================================== +\begin{frame}{Identification methods}{} + \begin{itemize} + \item Introduce a number of optimization problems + \item The difference lies in the regularization term + \item The regularization term can be given statistical interpretation + \end{itemize} +\end{frame} + + %==================================================================== %==================================================================== \begin{frame}{Low-frequency time evolution} A slowly varying signal is characterized by \emph{small first-order time differences}. \pause + Assume parameters $k$ evolve according to Brownian motion ($H = I$) + \begin{equation} \label{eq:slow} \minimize{\w} \normt{\y-\hat{\y}}^2 + \lambda^2\sum_t \normt{\w_{t+1} - \w_{t}}^2 \end{equation} \pause + Closed-form solution available \only<4->{\bad{(intractable)}} \begin{align}\label{eq:closedform} \tilde{\w}^* &= (\tilde{\A}\T\tilde{\A} + \lambda^2 D_1\T D_1)^{-1}\tilde{\A}\T \tilde{Y}\\ \tilde{\w} &= \operatorname{vec}(\w_1, ...\,, \w_T)\nonumber \end{align} + \pause + + Parameters found efficiently by Dynamic programming! \end{frame} @@ -220,9 +304,16 @@ label=center:{{$\sum$}}, minimum width=2em}} A smoothly varying signal is characterized by \emph{small second-order time differences}. \pause + Parameters $k$ are integrated twice (inertia) \begin{equation} \label{eq:smooth} \minimize{\w} \normt{\y-\hat{\y}}^2 + \lambda^2\sum_t \normt{\w_{t+2} -2 \w_{t+1} + \w_t}^2 \end{equation} + \pause + + \begin{itemize} + \item Closed-form solution and solution (intractable) + \item Dynamic programming solution (efficient) + \end{itemize} \end{frame} @@ -234,7 +325,7 @@ label=center:{{$\sum$}}, minimum width=2em}} \begin{equation} \label{eq:pwconstant} - \minimize{\w} \normt{\y-\hat{\y}}^2 + \lambda\sum_t \normt{ \w_{t+1} - \w_t} + \minimize{\w} \normt{\y-\hat{\y}}^2 + \lambda\sum_t \normt{ \w_{t+1} - \w_t}^{\only<3>{\bad{2}}} \end{equation} \pause @@ -245,7 +336,7 @@ label=center:{{$\sum$}}, minimum width=2em}} \end{frame} -\begin{frame}{}{} +\begin{frame}{Piecewise constant time evolution}{} At a first glance, one might consider the formulation \begin{equation} \label{eq:pwconstant_naive} @@ -253,14 +344,14 @@ label=center:{{$\sum$}}, minimum width=2em}} \end{equation} \pause - changes to different entries of $\w_t$ would not occur at the same time instants. + Changes to different entries of $\w_t$ would not occur at the same time instants. \end{frame} %==================================================================== %==================================================================== -\begin{frame}{Implementation} - Due to the non-squared norm penalty $\sum_t \normt{ \w_{t+1} - \w_t}$, problem \labelcref{eq:pwconstant} is significantly harder to solve than \labelcref{eq:smooth}. +\begin{frame}{Piecewise constant time evolution -- Implementation} + Due to the non-squared norm penalty $$\sum_t \normt{ \w_{t+1} - \w_t}$$ problem \labelcref{eq:pwconstant} is significantly harder to solve than \labelcref{eq:smooth}. An efficient implementation using the linearized ADMM algorithm \footfullcite{parikh2014proximal} is made available in the accompanying repository. @@ -272,11 +363,9 @@ label=center:{{$\sum$}}, minimum width=2em}} %==================================================================== %==================================================================== \begin{frame}{Summary} - The proposed optimization problems are summarized in~\cref{tab:opts}. - \begin{table}[] \centering - \caption{Summary of optimization problem formulations. $D_n$ refers to parameter vector time-differentiation of order $n$.} + \caption{Summary of optimization problem formulations. \hspace{\textwidth} $D_n$ refers to parameter vector time-differentiation of order $n$.} \label{tab:opts} \begin{tabular}{@{}lll@{}} \toprule @@ -297,7 +386,9 @@ label=center:{{$\sum$}}, minimum width=2em}} %==================================================================== %==================================================================== \begin{frame}{Example -- Jump-linear system} - We now consider a simulated example. Change in dynamics, from + We now consider a simulated example. + + Change in dynamics, from $$A_t = \left[ \begin{array}{cc} 0.95 & 0.1 \\ @@ -326,7 +417,7 @@ label=center:{{$\sum$}}, minimum width=2em}} \begin{description} \item[Input] $u \sim \N(0, 1)$ - \item[state transition noise and measurement noise] $\N(0, 0.2^2)$ + \item[State transition / measurement noise] $\N(0, 0.2^2)$ \end{description} \end{frame} @@ -341,6 +432,9 @@ label=center:{{$\sum$}}, minimum width=2em}} \label{fig:ss} \end{figure} + {\color{gray} + Code to reproduce: \href{https://github.com/baggepinnen/LTVModels.jl}{github.com/baggepinnen/LTVModels.jl} + } \end{frame} @@ -348,11 +442,29 @@ label=center:{{$\sum$}}, minimum width=2em}} %==================================================================== %==================================================================== \begin{frame}{Example -- Robot arm} - \begin{itemize} - \item Non-smooth dynamics - \item Discontinuous Coulomb friction - \item Stiff contact with environment - \end{itemize} + + \begin{columns} + \begin{column}{0.5\textwidth} + \begin{itemize} + \item Non-smooth dynamics + \item Discontinuous Coulomb friction + \item Stiff contact with environment + \item Learn LTV model for trajectory optimization + \item Reinforcement learning + \end{itemize} + \end{column} + \begin{column}{0.5\textwidth} + \centering + \includegraphics[width=\linewidth]{figs/robot_draw.jpg} + % \input{figs/robot.tex} + \end{column} + \end{columns} + + \vspace{1cm} + {\color{gray} + Code to reproduce: \href{https://github.com/baggepinnen/LTVModels.jl/blob/master/examples/two_link.jl}{github.com/baggepinnen/LTVModels.jl/blob/master/examples/two\_link.jl}} + + % The state of the robot arm consists of two joint coordinates, $q$, and their time derivatives, $\dot q$. \Cref{fig:robot_train} illustrates the state trajectories, control torques and simulations of a model estimated by solving~\labelcref{eq:pwconstant}. The figure clearly illustrates that the model is able to capture the dynamics both during the non-smooth sign change of the velocity, but also during the establishment of the stiff contact. The learned dynamics of the contact is however time-dependent, which is, in some situations, a drawback of the model and is illustrated in \Cref{fig:robot_val}, where the model is used on a validation trajectory where a different noise sequence was added to the control torque. Due to the novel input signal, the contact is established at a different time-instant and as a consequence, there is an error transient in the simulated data. @@ -360,7 +472,7 @@ label=center:{{$\sum$}}, minimum width=2em}} %==================================================================== %==================================================================== \begin{frame}{Robot -- Training trajectory}{} - + \vspace*{-6mm} \begin{figure} \centering \pgfplotsset{every axis/.append style={ @@ -377,17 +489,18 @@ label=center:{{$\sum$}}, minimum width=2em}} %==================================================================== %==================================================================== \begin{frame}{Robot -- Validation trajectory}{} + \vspace*{-6mm} \begin{figure} \centering \pgfplotsset{every axis/.append style={ - label style={font=\tiny}, - legend style={font=\tiny, draw=none}, - tick label style={font=\tiny} + label style={font=\scriptsize}, + legend style={font=\scriptsize, draw=none}, + tick label style={font=\scriptsize} }} \setlength{\figurewidth}{0.495\linewidth} \setlength{\figureheight }{3cm} \input{../figs/robot_val.tex} - \caption{Validation data vs. sample time index. The dashed lines indicate the event times for the training data, highlighting that the model is able to deal effortless with the non-smooth friction, but inaccurately predicts the time evolution around the contact event which now occurs at a slightly different time instance.} + \caption{Validation data vs. sample time index. The dashed lines indicate the event times for the training data.} \label{fig:robot_val} \end{figure} \end{frame} @@ -398,40 +511,49 @@ label=center:{{$\sum$}}, minimum width=2em}} %==================================================================== \begin{frame}{Example -- Reinforcement learning} \label{sec:rl} \begin{itemize} - \item Identify LTV dynamics models for reinforcement learning + \item Identify LTV models for reinforcement learning \item Dampen oscillations of a pendulum on a cart \item Quadratic cost on states and control \begin{enumerate} - \item fit a dynamics model along the last obtained trajectory - \item optimize the cost function under - the model using iterative LQG (differential dynamic programming)\footnote{Implementation made available at + \item Fit model along trajectory + \item Optimize the cost function under + model using iterative LQG (DDP)\footnote{Implementation made available at \href{github.com/baggepinnen/DifferentialDynamicProgramming.jl}{github.com/baggepinnen/DifferentialDynamicProgramming.jl}} - \item In order to stay close to the validity region of the linear model, we put bounds on the deviation between each new trajectory and the last trajectory. + \item Bounds on the deviation between optimized trajectory and previous trajectory. \end{enumerate} \end{itemize} - + \centering + \vspace{-5mm}\hspace{5cm}\includegraphics[width=0.3\linewidth]{figs/pendcart_draw.jpg} \end{frame} %==================================================================== %==================================================================== \begin{frame}{Example -- Reinforcement learning}{} + \footnotetext{$\normt{\w_{t+1} - \w_t}^2$} + \small + \begin{columns} + \begin{column}{0.5\textwidth} + We compare three different models + \begin{itemize} + \item The ground truth system model + \item LTV model \labelcref{eq:slow}\footnotemark + \item LTI model + \end{itemize} + \end{column} + \pause + \begin{column}{0.6\textwidth} + \begin{figure}[htp] + \centering + \setlength{\figurewidth}{0.99\linewidth} + \setlength{\figureheight }{5cm} + \pgfplotsset{every axis/.append style={ + legend style={draw=black!20!white, yshift=1mm}, + title style={yshift=1.3mm}, + }} + \input{../figs/ilc.tex} + \end{figure} + \end{column} + \end{columns} - We compare three different models - \begin{itemize} - \item The ground truth system model - \item LTV model (obtained by solving \labelcref{eq:smooth}) - \item LTI model - \end{itemize} - - The total cost over $T=500$ time steps is shown as a function of learning iteration. - \begin{figure}[htp] - \centering - \setlength{\figurewidth}{0.99\linewidth} - \setlength{\figureheight }{4.5cm} - \pgfplotsset{every axis/.append style={ - legend style={draw=black!20!white} - }} - \input{../figs/ilc.tex} - \end{figure} \end{frame} @@ -451,9 +573,10 @@ label=center:{{$\sum$}}, minimum width=2em}} %==================================================================== \begin{frame}{Discussion -- Reinforcement learning}{} \begin{itemize} - \item For iterative learning control and trajectory centric reinforcement learning, a first-order approximation to the dynamics is used for efficient optimization - \item Validity of the approximation is ensured by incorporating penalties or constraints between two consecutive trajectories. - \item[\nice{+}] This makes the proposed identification methods attractive in applications such as guided policy search (GPS)~\footfullcite{levine2013guided, levine2015learning} and non-linear iterative learning control (ILC)~\footfullcite{bristow2006survey}, where they can lead to dramatically decreased sample complexity. + \item In ILC and trajectory centric reinforcement learning, a first-order approximation to the dynamics is used for efficient optimization + \item Validity of the approximation ensured by constraints between two consecutive trajectories. + \item[\nice{+}] Proposed identification methods attractive in applications such as guided policy search (GPS)~\footfullcite{levine2013guided, levine2015learning} and non-linear ILC (ILC)~\footfullcite{bristow2006survey}. \\ + Can lead to dramatically decreased sample complexity. \end{itemize} \end{frame} @@ -464,7 +587,12 @@ label=center:{{$\sum$}}, minimum width=2em}} \begin{frame}{Conclusions}{} \begin{itemize} \item Framework for identification of linear, time-varying models along trajectories of nonlinear dynamical systems using convex optimization - \item Applications within trajectory-centric, model-based reinforcement learning, iterative learning control (ILC), and jump-linear system identification + \item Applications within + \begin{itemize} + \item Trajectory-centric, model-based reinforcement learning, ILC + \item Jump-linear system identification + \item Change-point detection + \end{itemize} \end{itemize} \pause @@ -479,8 +607,13 @@ label=center:{{$\sum$}}, minimum width=2em}} %==================================================================== %==================================================================== \begin{frame}{Open source}{} - Code to train the models and reproduce examples presented in this talk available at - \url{https://github.com/baggepinnen/LTVModels.jl} + More examples and code to train the models and reproduce examples presented in this talk available at + + \centering + \url{github.com/baggepinnen/LTVModels.jl} + + \includegraphics[width=0.4\linewidth]{figs/qr.png} + \end{frame} -- GitLab