diff --git a/jump_lin_id/id_paper.tex b/jump_lin_id/id_paper.tex index ae89c0a528a9985a49357077d21fb8bfbac17574..4c99f223c8a24d5a9e94341be0ba3004a19560ae 100644 --- a/jump_lin_id/id_paper.tex +++ b/jump_lin_id/id_paper.tex @@ -517,7 +517,7 @@ The state of the robot arm consists of two joint coordinates, $q$, and their tim \setlength{\figurewidth}{0.495\linewidth} \setlength{\figureheight }{4cm} \input{figs/robot_val.tex} - \caption{Simulation of non-smooth robot dynamics with stiff contact -- validation data vs. sample time index. The dashed lines indicate the event times for the training data, highlighting that the model is able to deal effortless with the non-smooth friction, but inaccurately predicts the time evolution around the contact event which now occurs at a slightly different time instance.} + \caption{Simulation of non-smooth robot dynamics with stiff contact -- validation data vs. sample time index. The dashed lines indicate the event times for the training data, highlighting that the model is able to deal effortlessly with the non-smooth friction, but inaccurately predicts the time evolution around the contact event which now occurs at a slightly different time instance.} \label{fig:robot_val} \end{figure*} diff --git a/jump_lin_id/pres/beamerthemeRegler2.sty b/jump_lin_id/pres/beamerthemeRegler2.sty new file mode 100644 index 0000000000000000000000000000000000000000..2147726293e58365f557151430d738ebf183768a --- /dev/null +++ b/jump_lin_id/pres/beamerthemeRegler2.sty @@ -0,0 +1,249 @@ +\DeclareOption{lionbackground}{\def\@beamer@option{% +\AtBeginDocument {% + \pgfdeclareimage[width=70mm]{lionwhite}{LionSealWhite} +}% + +\usebackgroundtemplate{{% + \color{palegray}\vrule height\paperheight width\paperwidth + \kern -\paperwidth + \vbox to \paperheight{% + \vss\kern2mm\hbox to \paperwidth{\hss\pgfuseimage{lionwhite}\hss}\vss}% + }% +} + +\useframetitletemplate{\par\kern-1mm + \vbox to 10mm{\leavevmode\kern-\beamer@leftmargin + \colorbox{header}{\hbox to \paperwidth{\hss + \color{white} + \Large\bfseries\vrule height 7mm + depth 3mm width0mm\insertframetitle\strut\hss}}\kern -30mm\par\vss}% +}% + +\useinnertheme[shadow=true]{rounded} +\setbeamercolor{block title}{use=structure,fg=white,bg=structure.fg} + +\usefoottemplate{% + \vbox{\tiny% + \hbox{% + \setbox\beamer@linebox=\hbox to\paperwidth{% + \hbox to.5\paperwidth{\tiny\color{white}\frame@numbers\hfill\textbf{\insertshortauthor}\hskip.3cm}% + \hbox to.5\paperwidth{\hskip.3cm\tiny\color{white}\textbf{\insertshorttitle}\hfill}\hfill}% + \ht\beamer@linebox=2.625ex% + \dp\beamer@linebox=0pt% + \setbox\beamer@linebox=\vbox{\box\beamer@linebox\vskip1.125ex}% + \color{header}\hskip-\Gm@lmargin\vrule width.5\paperwidth + height\ht\beamer@linebox\color{structure}\vrule width.5\paperwidth + height\ht\beamer@linebox\hskip-\paperwidth% + \hbox{\box\beamer@linebox\hfill}\hfill\hskip-\Gm@rmargin}}} + +\setbeamercovered{transparent} + +}} % end \DeclareOption{lionbackground} + +\DeclareOption{liontopcorner}{\def\@beamer@option{% +\pgfdeclareimage[width=14mm]{lionsealwhitesmall}{LionSealWhite} +\useframetitletemplate{\par\kern-1mm + \vbox to 10mm{\leavevmode\kern-\beamer@leftmargin + \colorbox{header}{\hbox to \paperwidth{\hss + \color{white} + \Large\bfseries\vrule height 7mm depth 3mm width0mm\relax + \insertframetitle\strut\hss}}\kern -30mm\par\vss}% + \vbox to 0pt{\kern-18mm\hbox to 0pt{\kern-12mm\relax + \pgfuseimage{lionsealwhitesmall}\hss}\vss} +}% +\useinnertheme[shadow=true]{rounded} +\setbeamercolor{block title}{use=structure,fg=white,bg=structure.fg} + +\usefoottemplate{% + \vbox{\tiny% + \hbox{% + \setbox\beamer@linebox=\hbox to\paperwidth{% + \hbox to.5\paperwidth{\tiny\color{white}\frame@numbers + \hfill\textbf{\insertshortauthor}\hskip.3cm}% + \hbox to.5\paperwidth{\hskip.3cm\tiny\color{white}% + \textbf{\insertshorttitle}\hfill}\hfill}% + \ht\beamer@linebox=2.625ex% + \dp\beamer@linebox=0pt% + \setbox\beamer@linebox=\vbox{\box\beamer@linebox\vskip1.125ex}% + \color{header}\hskip-\Gm@lmargin\vrule width.5\paperwidth + height\ht\beamer@linebox\color{structure}\vrule width.5\paperwidth + height\ht\beamer@linebox\hskip-\paperwidth% + \hbox{\box\beamer@linebox\hfill}\hfill\hskip-\Gm@rmargin}}} + +\setbeamercovered{transparent} + +}} % end \DeclareOption{liontopcorner} + +\DeclareOption{lionheader}{\def\@beamer@option{% + + \useinnertheme[shadow=true]{rounded} + \setbeamercolor{block title}{use=structure,fg=white,bg=structure.fg} + \pgfdeclareimage[width=14mm]{lionsealbronzewhite}{LionSealBronzeWhite} + \useframetitletemplate{% + \vskip 2mm + {\leftskip15mm% + \rightskip-\beamer@rightmargin plus1fill\relax + \advance\rightskip by0.3cm\leavevmode + \color{black}\Large\bfseries\insertframetitle\par \small\insertframesubtitle\vspace{-3mm}\par} + \vbox to 2mm{\leavevmode\kern-\beamer@leftmargin \fboxsep=0.7mm\colorbox{header}{\hbox to \paperwidth{\hss}}\vss} + \moveleft 3mm \vbox to 0pt{\kern-20mm\pgfuseimage{lionsealbronzewhite}\vss} + \vskip -7mm +} + +\usefoottemplate{% + \vbox to 3mm{\hbox to \textwidth{\tiny + \hbox to 0pt{\kern -10mm\frame@numbers\hss} + \insertshortauthor: \insertshorttitle\hfill}\vss}} + +\setbeamercovered{transparent} +}} %end \DeclareOption{lionheader} + + +\DeclareOption{lionheaderLCCC}{\def\@beamer@option{% + + \useinnertheme[shadow=true]{rounded} + \setbeamercolor{block title}{use=structure,fg=white,bg=structure.fg} + \pgfdeclareimage[width=14mm]{lionsealbronzewhite}{LionSealBronzeWhite} + \pgfdeclareimage[width=9mm]{logo-lccc}{logo-lccc} + \useframetitletemplate{% + \vskip 2mm + {\leftskip15mm% + \rightskip-\beamer@rightmargin plus1fill\relax + \advance\rightskip by0.3cm\leavevmode + \color{black}\Large\bfseries\insertframetitle\par} + \vbox to 2mm{\leavevmode\kern-\beamer@leftmargin \fboxsep=0.7mm\colorbox{header}{\hbox to \paperwidth{\hss}}\vss} + \moveleft 5mm \vbox to 0pt{\kern-20mm\pgfuseimage{lionsealbronzewhite}\vss} + \moveright 105mm\vbox to 0pt{\kern-25mm\pgfuseimage{logo-lccc}\vss} + \vskip -7mm +} + +\usefoottemplate{% + \vbox to 3mm{\hbox to \textwidth{\tiny + \hbox to 0pt{\kern -10mm\frame@numbers\hss} + \insertshortauthor: \insertshorttitle\hfill}\vss}} + +\setbeamercovered{transparent} +}} %end \DeclareOption{lionheaderLCCC} + +\DeclareOption{lionheaderLCCCold}{\def\@beamer@option{% +\newsavebox\lionheadbox +\AtBeginDocument {% + \pgfdeclareimage[width=116mm]{blueline}{blueline} + \pgfdeclareimage[width=14mm]{lionbronzejpg}{lionbronzejpg} + \pgfdeclareimage[width=10mm]{logo-lccc}{logo-lccc} + \sbox{\lionheadbox}{\vbox to 0pt{\vss \parskip=0pt\noindent + \pgfuseimage{lionbronzejpg}\par + \kern -5.3mm + \hbox to \textwidth{\hss\pgfuseimage{blueline}\hss} + \kern -13mm + \hbox{\kern 96mm\pgfuseimage{logo-lccc}} + }% + }% +} + +\useinnertheme[shadow=true]{rounded} +\setbeamercolor{block title}{use=structure,fg=white,bg=structure.fg} + +\useframetitletemplate{% + \vskip 5mm + {\leftskip17mm% + \rightskip-\beamer@rightmargin plus1fill\relax + \advance\rightskip by0.3cm\leavevmode + \color{black}\Large\bfseries\insertframetitle\par + }\usebox{\lionheadbox}\par} + +\usefoottemplate{% + \vbox to 3mm{\hbox to \textwidth{\tiny + \hbox to 0pt{\kern -10mm\frame@numbers\hss} + \insertshortauthor: \insertshorttitle\hfill}\vss}} + +\setbeamercovered{transparent} +}} %end \DeclareOption{lionheaderLCCCold} + + +\DeclareOption{lioncorner}{\def\@beamer@option{% +\newsavebox\lioncornerbox +\AtBeginDocument {% + \pgfdeclareimage[width=30mm]{lionbronze}{LionSealBronze} + \sbox{\lioncornerbox}{\vbox to 0pt{\vskip3mm\hbox to \textwidth{% + \hskip2mm\pgfuseimage{lionbronze}\hss}% + \vss}% + }% +} + +\useframetitletemplate{% +\vbox to 0pt{% + \kern68mm\hbox to 0pt{\kern93mm\pgfuseimage{lionbronze}% + \hss}\vss} + \kern-2mm + {\leftskip\z@ plus1fill \rightskip\z@ plus1fill + \color{LUblue}\Large\bfseries\insertframetitle\par} + {\color{bronze}\medskip\hrule height 2pt \vspace{1mm}}} + +\usefoottemplate{% + \vbox to 3mm{\hbox to \textwidth{\tiny + \hbox to 0pt{\kern -10mm\frame@numbers\hss} + \insertshortauthor: \insertshorttitle\hfill}\vss}} + +}} %end \DeclareOption{lioncorner} + +\DeclareOption{handout}{\def\@beamer@option{% +\useframetitletemplate{\par\kern-1mm + \vbox to 10mm{\leavevmode\kern-\beamer@leftmargin + \colorbox{white}{\hbox to \paperwidth{\hss + \color{header} + \Large\bfseries\vrule height 7mm + depth 2mm width0mm\insertframetitle\strut\hss}}\kern + -30mm\par + \color{structure}\hrule height 1mm\vss}% +}% +% +}} %end \DeclareOption{handout} + +\def\@beamer@option{\PackageError{'Regler'}{No theme variant specified}{}} + +\DeclareOption{framenumbers}{\def\frame@numbers{% + \quad\insertframenumber/\inserttotalframenumber}} +\let\frame@numbers=\relax + +\ProcessOptions +\usepackage{lmodern} +\usepackage[scaled=0.9]{helvet} +\usepackage{amsmath} +\newenvironment{gmatrix}{\left\lgroup\begin{matrix}}{\end{matrix}\right\rgroup} +% \usefonttheme{professionalfonts} +% \usefonttheme[onlymath]{serif} + +\definecolor{bronze}{rgb}{0.61,0.38,0.08} +\definecolor{palegray}{rgb}{0.97,0.95,0.95} +\definecolor{header}{rgb}{0,0,0.5} +\definecolor{LUblue}{rgb}{0,0,0.5} + +\setbeamercolor{structure}{fg=bronze} + +\parskip=\medskipamount +\userightsidebartemplate{0pt}{} + + +\AtBeginDocument {% + \pgfdeclareimage[width=70mm]{liongrey}{LionSealGrey} +}% + + +\@beamer@option + +\usetitlepagetemplate{ + \vbox to 0pt{\kern11.2mm\relax + \hbox to \hsize{\hss\pgfuseimage{liongrey}\hss}\vss} + \vbox to \textheight{ + \vss + \begin{centering} + {\Large\color{LUblue}\bfseries\inserttitle\par} + \vspace{5mm} + {\normalsize\textbf\insertauthor\par} + \vspace{5mm} + {\scriptsize\insertinstitute\par}%\par\vskip1em + \end{centering} + \vss + } +} diff --git a/jump_lin_id/pres/beamerthemeliontopcorner.sty b/jump_lin_id/pres/beamerthemeliontopcorner.sty new file mode 100644 index 0000000000000000000000000000000000000000..9759a8aa13ed00cc0b753467e02a304daffa1895 --- /dev/null +++ b/jump_lin_id/pres/beamerthemeliontopcorner.sty @@ -0,0 +1,5 @@ + +\PassOptionsToPackage{\CurrentOption}{beamerthemeRegler2} +\PassOptionsToPackage{liontopcorner}{beamerthemeRegler2} + +\RequirePackage{beamerthemeRegler2} diff --git a/jump_lin_id/pres/pres_idpaper.tex b/jump_lin_id/pres/pres_idpaper.tex index 4c519ea1a424b2977267d40acae0bdbaab613c33..a996519e72bdcd2886026864e6dc38d16ceed5b8 100644 --- a/jump_lin_id/pres/pres_idpaper.tex +++ b/jump_lin_id/pres/pres_idpaper.tex @@ -15,6 +15,7 @@ \usepackage{siunitx} \usepackage{color} \usepackage{pgfplots} +\usepackage{booktabs}\usepackage{multirow} \usepgfplotslibrary{groupplots} \pgfplotsset{compat=newest} \usepackage{tikz} @@ -45,10 +46,10 @@ label=center:{{$\sum$}}, minimum width=2em}} \setbeamercolor{item}{fg=actualbronze} % Change color of item bullet \setbeamercolor{block title}{use=structure,fg=white,bg=structure.fg} -\title[Neural-Networks for Dynamical System Modeling]{Tangent-Space Regularization for Dynamical System Modeling using Neural Networks} +\title[Identification of LTV Models]{Identification of LTV Dynamical Models with\\ Smooth or Discontinuous Time Evolution \\by means of Convex Optimization} \date{\today} -% \author[Fredrik Bagge Carlson]{\textbf{\large Fredrik Bagge Carlson}, \textnormal{Anders Robertsson, Rolf Johansson}} +\author[Fredrik Bagge Carlson]{\textbf{\large Fredrik Bagge Carlson}, \textnormal{Anders Robertsson, Rolf Johansson}} \institute{Lund University, Department of Automatic Control} @@ -68,19 +69,29 @@ label=center:{{$\sum$}}, minimum width=2em}} \newcommand{\cmt}[1]{{\color{yellow}{\textbf{Comment:} #1}}} \newcommand{\T}{^{\hspace{-0.1mm}\scriptscriptstyle \mathsf{T}}\hspace{-0.2mm}} \newcommand{\iT}{^{-T}\hspace{-0.6mm}} -\newcommand{\norm}[1]{\begin{Vmatrix}#1\end{Vmatrix}_2} +\newcommand{\normt}[1]{\begin{Vmatrix}#1\end{Vmatrix}_2} +\newcommand{\norm}[1]{\begin{Vmatrix}#1\end{Vmatrix}} \newcommand{\inspace}[1]{\in \mathbb{R}^{#1}} \newcommand{\incspace}[1]{\in \mathbb{C}^{#1}} \newcommand{\card}[1]{\text{card}(#1)} \renewcommand{\v}{v} \renewcommand{\a}{\dot{v}} \newcommand{\amp}{A} -\newcommand{\A}{\mathbf{A}} -\newcommand{\w}{k} +\newcommand{\A}{\Phi} +\newcommand{\y}{y} \newcommand{\PI}{\left(\A \hspace{-0.2mm}\T\hspace{-0.1mm}\A\right)^{\hspace{-0.4mm}-1} \hspace{-1mm} \A\hspace{-0.3mm}\T} \newcommand{\tA}{\tilde{\mathbf{A}}} +%\newcommand{\A}{\} +\newcommand{\w}{k} +\newcommand{\N}{\mathcal{N}} \DeclareMathOperator{\sign}{sign} \DeclareMathOperator*{\argmin}{arg\,min} +\newcommand{\minimize}[1]{\underset{#1}{\text{minimize} }} +\newcommand{\subjto}{\text{subject to }} +\renewcommand{\vec}[1]{\operatorname{vec}{(#1)}} +\newcommand{\diag}[1]{\operatorname{diag}{(#1)}} +\newcommand{\bmatrixx}[1]{\begin{bmatrix}#1\end{bmatrix}} + \begin{document} @@ -94,31 +105,381 @@ label=center:{{$\sum$}}, minimum width=2em}} %==================================================================== %==================================================================== -\begin{frame}{Introduction} - Dynamical control systems are often described by differential state-equations - $$\dot x(t) = f_c(x(t), u(t))$$ - where $x$ is the state, $u$ is the input - \begin{block}{Example -- Robot} - $$\ddot x = M^{-1}(x) \big( C(x,\dot x)\dot x + G(x) + F(\dot x) - u \big)$$ - \end{block} +\begin{frame}{LTI identification} + We start by considering the case of identification of the parameters in an LTI model on the form + \begin{equation} + x_{t+1} = A x_t + B u_t + v_t, \quad t \in [1,T] + \end{equation} + where $x\inspace{n}$ and $u\inspace{m}$ are the state and input respectively. +\end{frame} + + +\begin{frame}{}{} + $\y = \A\w$, and arrange the data according to + \begin{align*} + \y &= + \begin{bmatrix} + {x_1} \\ \vdots \\ {x_T} + \end{bmatrix} + & &\inspace{Tn} \\ + \w &= \vec{\bmatrixx{A\T & B\T}} & &\inspace{K}\\[0.2em] + \A &= + \begin{bmatrix} + I_n \otimes x_0\T & I_n \otimes u_0\T \\ + \vdots & \vdots\\ + I_n \otimes x_{T-1}\T & I_n \otimes u_{T-1}\T + \end{bmatrix} + & &\in \mathbb{R}^{Tn\times K} + \end{align*} + + \begin{align} + \w^* &= \argmin_{\w} \normt{\A \w - \y}^2 \label{eq:lscost}\\ + ~ &= \PI \y \label{eq:ls} + \end{align} + +\end{frame} + + + + +%==================================================================== +%==================================================================== +\begin{frame}{Time-varying dynamics} + We now extend our view to systems where the dynamics change with time. We limit the scope of this article to models on the form + \begin{equation} + \label{eq:tvk} + \begin{split} + x_{t+1} &= A_t x_t + B_t u_t + v_t\\ + \w_t &= \vec{\bmatrixx{A_t\T & B_t\T}} + \end{split} + \end{equation} \pause + where the parameters $\w$ are assumed to evolve according to the dynamical system + \begin{equation} + \label{eq:dynsys} + \begin{split} + k_{t+1} &= H_t k_t + w_t\\ + y_t &= \big(I_n \otimes \bmatrixx{x_t\T & u_t\T}\big) \w_t + \end{split} + \end{equation} +\end{frame} - Discretization (sampling) leads to - $$x_{t+1} = f(x_t, u_t)$$ - \begin{block}{Objective 1} - Learn the function $f$ - $$x_{t+1} = f(x_t, u_t)$$ - \end{block} + +%==================================================================== +%==================================================================== +\begin{frame}{Trend filtering} + An important class of identification methods that has been popularized lately is \emph{trend filtering} methods~\footfullcite{kim2009ell_1, tibshirani2014adaptive}. + + As a simple example, consider the reconstruction $\hat y$ of a noisy signal $y = \{y_t\inspace{}\}_{t=1}^T$ with piecewise constant segments. + \begin{equation*} \label{eq:tf} + \minimize{\hat{y}} \normt{y-\hat{y}}^2 + \lambda\sum_t |\hat{y}_{t+1} - \hat{y}_t| + \end{equation*} + \begin{itemize} + \item Fitness function + \item (Sparsity promoting) Regularization + \item Convex + \end{itemize} \end{frame} +%==================================================================== +%==================================================================== +\begin{frame}{Regularization term intuition}{} + figure + + The 1-norm is a \emph{sparsity-promoting} penalty, hence a solution in which only a small number of non-zero first-order time differences in the model parameters is favored, i.e., a piecewise constant dynamics evolution. +\end{frame} + + + +%==================================================================== +%==================================================================== +\begin{frame}{Low-frequency time evolution} + A slowly varying signal is characterized by \emph{small first-order time differences}. + \pause + + \begin{equation} \label{eq:slow} + \minimize{\w} \normt{\y-\hat{\y}}^2 + \lambda^2\sum_t \normt{\w_{t+1} - \w_{t}}^2 + \end{equation} + \pause + + \begin{align}\label{eq:closedform} + \tilde{\w}^* &= (\tilde{\A}\T\tilde{\A} + \lambda^2 D_1\T D_1)^{-1}\tilde{\A}\T \tilde{Y}\\ + \tilde{\w} &= \operatorname{vec}(\w_1, ...\,, \w_T)\nonumber + \end{align} +\end{frame} + + + + +%==================================================================== +%==================================================================== +\begin{frame}{Smooth time evolution} + A smoothly varying signal is characterized by \emph{small second-order time differences}. + \pause + + \begin{equation} \label{eq:smooth} + \minimize{\w} \normt{\y-\hat{\y}}^2 + \lambda^2\sum_t \normt{\w_{t+2} -2 \w_{t+1} + \w_t}^2 + \end{equation} +\end{frame} + + +%==================================================================== +%==================================================================== +\begin{frame}{Piecewise constant time evolution}\label{sec:pwconstant} + A signal which is mostly flat, with a small number of distinct level changes, is characterized by a \emph{sparse first-order time difference}. + \pause + + + \begin{equation} \label{eq:pwconstant} + \minimize{\w} \normt{\y-\hat{\y}}^2 + \lambda\sum_t \normt{ \w_{t+1} - \w_t} + \end{equation} + \pause + + We can give \labelcref{eq:pwconstant} an interpretation as a \emph{grouped-lasso} cost function. + + Penalty on the 1-norm on the \emph{length} of the difference vectors $\w_{t+1} - \w_t$ since $\norm{\normt{\cdot}}_1 = \normt{\cdot}$. + \pause + +\end{frame} + +\begin{frame}{}{} + + At a first glance, one might consider the formulation + \begin{equation} \label{eq:pwconstant_naive} + \minimize{\w} \normt{\y-\hat{\y}}^2 + \lambda\sum_t \norm{\w_{t+1} - \w_t}_1 + \end{equation} + \pause + + changes to different entries of $\w_t$ would not occur at the same time instants. +\end{frame} + + +%==================================================================== +%==================================================================== +\begin{frame}{Implementation} + Due to the non-squared norm penalty $\sum_t \normt{ \w_{t+1} - \w_t}$, problem \labelcref{eq:pwconstant} is significantly harder to solve than \labelcref{eq:smooth}. + + An efficient implementation using the linearized ADMM algorithm \footfullcite{parikh2014proximal} is made available in the accompanying repository. + + \url{https://github.com/baggepinnen/LTVModels.jl} +\end{frame} + + + +%==================================================================== +%==================================================================== +\begin{frame}{Summary} + The proposed optimization problems are summarized in~\cref{tab:opts}. + + \begin{table}[] + \centering + \caption{Summary of optimization problem formulations. $D_n$ refers to parameter vector time-differentiation of order $n$.} + \label{tab:opts} + \begin{tabular}{@{}lll@{}} + \toprule + Norm & $D_n$ & Result \\ \midrule + 1 & 1 & Small number of steps (piecewise constant) \\ + 1 & 2 & Small number of bends (piecewise affine) \\ + 2 & 1 & Small steps (slowly varying) \\ + 2 & 2 & Small bends (smooth) \\ \bottomrule + \end{tabular} + \end{table} + +\end{frame} + + + + + +%==================================================================== +%==================================================================== +\begin{frame}{Example -- Jump-linear system} + We now consider a simulated example. Change in dynamics, from + $$A_t = \left[ + \begin{array}{cc} + 0.95 & 0.1 \\ + 0.0 & 0.95 \\ + \end{array} + \right], \quad B_t = \left[ + \begin{array}{c} + 0.2 \\ + 1.0 \\ + \end{array} + \right] + $$ + to $$A_t = \left[ + \begin{array}{cc} + 0.5 & 0.05 \\ + 0.0 & 0.5 \\ + \end{array} + \right], \quad B_t = \left[ + \begin{array}{c} + 0.2 \\ + 1.0 \\ + \end{array} + \right] + $$ + occurred at $t=200$. + + \begin{description} + \item[Input] $u \sim \N(0, 1)$ + \item[state transition noise and measurement noise] $\N(0, 0.2^2)$ + \end{description} + +\end{frame} + +\begin{frame}{Example -- Jump-linear system} + \begin{figure} + \centering + \setlength{\figurewidth}{0.99\linewidth} + \setlength{\figureheight }{5.5cm} + \input{../figs/ss.tex} + \caption{True values are shown with dashed, black lines. Gaussian state-transition and measurement noise with $\sigma = 0.2$ were added.} + \label{fig:ss} + \end{figure} + +\end{frame} + + + +%==================================================================== +%==================================================================== +\begin{frame}{Example -- Robot arm} + \begin{itemize} + \item Non-smooth dynamics + \item Discontinuous Coulomb friction + \item Stiff contact with environment + \end{itemize} + + % The state of the robot arm consists of two joint coordinates, $q$, and their time derivatives, $\dot q$. \Cref{fig:robot_train} illustrates the state trajectories, control torques and simulations of a model estimated by solving~\labelcref{eq:pwconstant}. The figure clearly illustrates that the model is able to capture the dynamics both during the non-smooth sign change of the velocity, but also during the establishment of the stiff contact. The learned dynamics of the contact is however time-dependent, which is, in some situations, a drawback of the model and is illustrated in \Cref{fig:robot_val}, where the model is used on a validation trajectory where a different noise sequence was added to the control torque. Due to the novel input signal, the contact is established at a different time-instant and as a consequence, there is an error transient in the simulated data. + +\end{frame} +%==================================================================== +%==================================================================== +\begin{frame}{Robot -- Training trajectory}{} + + \begin{figure} + \centering + \pgfplotsset{every axis/.append style={ + label style={font=\tiny}, + legend style={font=\tiny, draw=none}, + tick label style={font=\tiny} + }} + \setlength{\figurewidth}{0.495\linewidth} + \setlength{\figureheight }{2.7cm} + \input{../figs/robot_train.tex} + \label{fig:robot_train} + \end{figure} +\end{frame} +%==================================================================== +%==================================================================== +\begin{frame}{Robot -- Validation trajectory}{} + \begin{figure} + \centering + \pgfplotsset{every axis/.append style={ + label style={font=\tiny}, + legend style={font=\tiny, draw=none}, + tick label style={font=\tiny} + }} + \setlength{\figurewidth}{0.495\linewidth} + \setlength{\figureheight }{3cm} + \input{../figs/robot_val.tex} + \caption{Validation data vs. sample time index. The dashed lines indicate the event times for the training data, highlighting that the model is able to deal effortless with the non-smooth friction, but inaccurately predicts the time evolution around the contact event which now occurs at a slightly different time instance.} + \label{fig:robot_val} + \end{figure} +\end{frame} + + + +%==================================================================== +%==================================================================== +\begin{frame}{Example -- Reinforcement learning} \label{sec:rl} + \begin{itemize} + \item Identify LTV dynamics models for reinforcement learning + \item Dampen oscillations of a pendulum on a cart + \item Quadratic cost on states and control + \begin{enumerate} + \item fit a dynamics model along the last obtained trajectory + \item optimize the cost function under + the model using iterative LQG (differential dynamic programming)\footnote{Implementation made available at + \href{github.com/baggepinnen/DifferentialDynamicProgramming.jl}{github.com/baggepinnen/DifferentialDynamicProgramming.jl}} + \item In order to stay close to the validity region of the linear model, we put bounds on the deviation between each new trajectory and the last trajectory. + \end{enumerate} + \end{itemize} + +\end{frame} +%==================================================================== +%==================================================================== +\begin{frame}{Example -- Reinforcement learning}{} + + We compare three different models + \begin{itemize} + \item The ground truth system model + \item LTV model (obtained by solving \labelcref{eq:smooth}) + \item LTI model + \end{itemize} + + The total cost over $T=500$ time steps is shown as a function of learning iteration. + \begin{figure}[htp] + \centering + \setlength{\figurewidth}{0.99\linewidth} + \setlength{\figureheight }{4.5cm} + \pgfplotsset{every axis/.append style={ + legend style={draw=black!20!white} + }} + \input{../figs/ilc.tex} + \end{figure} +\end{frame} + + +%==================================================================== +%==================================================================== +\begin{frame}{Discussion}{} + \begin{itemize} + \item[\nice{+}] The methods presented extend directly to nonlinear models that remain \emph{linear in the parameters}. + \item[\bad{-}] A first-order approximation to a nonlinear system is not guaranteed to generalize well as deviations from the trajectory become large. + \item All assumptions over time + \end{itemize} + +\end{frame} + + +%==================================================================== +%==================================================================== +\begin{frame}{Discussion -- Reinforcement learning}{} + \begin{itemize} + \item For iterative learning control and trajectory centric reinforcement learning, a first-order approximation to the dynamics is used for efficient optimization + \item Validity of the approximation is ensured by incorporating penalties or constraints between two consecutive trajectories. + \item[\nice{+}] This makes the proposed identification methods attractive in applications such as guided policy search (GPS)~\footfullcite{levine2013guided, levine2015learning} and non-linear iterative learning control (ILC)~\footfullcite{bristow2006survey}, where they can lead to dramatically decreased sample complexity. + \end{itemize} +\end{frame} + + + +%==================================================================== +%==================================================================== +\begin{frame}{Conclusions}{} + \begin{itemize} + \item Framework for identification of linear, time-varying models along trajectories of nonlinear dynamical systems using convex optimization + \item Applications within trajectory-centric, model-based reinforcement learning, iterative learning control (ILC), and jump-linear system identification + \end{itemize} + \pause + + In the paper + \begin{itemize} + \item Analysis of identifyability + \item Kalman smoother for efficient identification + \end{itemize} +\end{frame} + %==================================================================== %==================================================================== \begin{frame}{Open source}{} - Code to train the models presented in this talk available at + Code to train the models and reproduce examples presented in this talk available at \url{https://github.com/baggepinnen/LTVModels.jl} \end{frame}