From 58ee65c55c7891d1ad22a0ee3447b2b2420c55a5 Mon Sep 17 00:00:00 2001
From: Sven Robertz <sven@cs.lth.se>
Date: Fri, 17 May 2013 13:30:31 +0200
Subject: [PATCH] started compiling a tech report

---
 doc/tech_report.tex | 314 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 314 insertions(+)
 create mode 100644 doc/tech_report.tex

diff --git a/doc/tech_report.tex b/doc/tech_report.tex
new file mode 100644
index 0000000..47dcf5b
--- /dev/null
+++ b/doc/tech_report.tex
@@ -0,0 +1,314 @@
+% *** en embryo of a technical report describing the labcomm design rationale and implementation ***
+
+\documentclass[a4paper]{article}
+%\usepackage{verbatims}
+%\usepackage{todo}
+
+\begin{document}
+\title{Labcomm tech report}
+\author{Anders Blomdell and Sven Gesteg\aa{}rd Robertz }
+\date{embryo of draft, \today}
+
+\maketitle 
+
+\begin{abstract}
+
+LabComm is a binary protocol suitable for transmitting and storing samples of
+process data. It is self-describing and independent of programming language,
+processor, and network used (e.g., byte order, etc).  It is primarily intended
+for situations where the overhead of communication has to be kept at a minimum,
+hence LabComm only requires one-way communication to operate. The one-way
+operation also has the added benefit of making LabComm suitable as a storage
+format. 
+
+LabComm provides self-describing channels, as communication starts with the
+transmission of an encoded description of all possible sample types that can
+occur, followed by any number of actual samples in any order the sending
+application sees fit.
+
+The LabComm system is based on a binary protocol and
+and a compiler that generates encoder/decoder routines for popular languages
+including C, Java, and Python.
+
+The LabComm compiler accepts type and sample declarations in a small language
+that is similar to C or Java type-declarations. 
+\end{abstract}
+\section{Introduction}
+
+%[[http://rfc.net/rfc1057.html|Sun RPC]]
+%[[http://asn1.org|ASN1]].
+
+LabComm has got it's inspiration from Sun RPC~\cite{SunRPC}
+and ASN1~\cite{ANS1}. LabComm is primarily intended for situations
+where the overhead of communication has to be kept at a minimum, hence LabComm
+only requires one-way communication to operate. The one-way operation also has
+the added benefit of making LabComm suitable as a storage format. 
+
+\section{Communication model}
+
+LabComm provides self-describing communication channels, by always transmitting
+a machine readable description of the data before actual data is sent.
+Therefore, communication on a LabComm channel has two phases
+
+\begin{enumerate}
+\item the transmission of signatures (an encoded description including data
+types and names, see appendix~\ref{sec:ProtocolGrammar} for details) for all sample types
+that can be sent on the channel
+\item the transmission of any number of actual samples in any order
+\end{enumerate}
+
+During operation, LabComm will ensure (i.e., monitor) that a communication
+channel is fully configured, meaning that both ends agree on what messages that
+may be passed over that channel.  If an unregistered sample type is sent or
+received, the LabComm encoder or decoder will detect it and take action.
+
+The roles in setting up, and maintaining, the configuration of a channel are as follows:
+
+\paragraph{The application software} (or higher-level protocol) is required to
+
+\begin{itemize}
+\item register all samples to be sent on a channel with the encoder
+\item register handlers for all samples to be received  on a channel with the decoder
+\end{itemize}
+
+\paragraph{The transmitter (encoder)}
+
+\begin{itemize}
+ \item ensures that the signature of a sample is transmitted on the channel before samples are 
+       written to that channel
+\end{itemize}
+
+\paragraph{The receiver (decoder)}
+
+\begin{itemize}
+ \item checks, for each signature, that the application has registered a handler for that sample type
+ \item if an unhandled signature is received, pauses the channel and informs the application
+\end{itemize}
+
+\section{The Labcomm language}
+
+The following examples do not cover the entire language
+specification (see appendix~\ref{language_grammar}), but might serve as a
+gentle introduction to the LabComm language.
+
+\subsection{Primitive types}
+
+\begin{verbatim}
+  sample boolean a_boolean;
+  sample byte a_byte;
+  sample short a_short;
+  sample int an_int;
+  sample long a_long;
+  sample float a_float;
+  sample double a_double;
+  sample string a_string;
+\end{verbatim}
+
+\subsection{Arrays}
+
+\begin{verbatim}
+  sample int fixed_array[3];
+  sample int variable_array[_];                // Note 1
+  sample int fixed_array_of_array[3][4];       // Note 2
+  sample int fixed_rectangular_array[3, 4];    // Note 2
+  sample int variable_array_of_array[_][_];    // Notes 1 & 2
+  sample int variable_rectangular_array[_, _]; // Notes 1 & 2
+\end{verbatim}
+
+\begin{enumerate}
+\item In contrast to C, LabComm supports both fixed and variable (denoted
+by \verb+_+) sized arrays.
+
+\item In contrast to Java, LabComm supports multidimensional arrays and not
+only arrays of arrays.
+
+\end{enumerate}
+
+\subsection{Structures}
+
+\begin{verbatim}
+  sample struct {
+    int an_int_field;
+    double a_double_field;
+  } a_struct;
+\end{verbatim}
+
+\section{User defined types}
+
+\begin{verbatim}
+  typedef struct {
+    int field_1;
+    byte field_2;
+  } user_type[_];
+  sample user_type a_user_type_instance;
+  sample user_type another_user_type_instance;
+\end{verbatim}
+
+\section{User actions}
+
+User actions (similar to ioctl()) allowing the application or a higher level
+protocol to communicate with the underlying transport layer through the LabComm
+encoder A special case of this is a specific action informing the underlying
+transport that a signature is being sent (to allow handshaking)
+
+
+\section{LabComm is not...}
+
+\begin{itemize}
+\item a protocol for two-way connections
+\item intrinsically supporting reliable communication 
+\item providing semantic service-descriptions
+\end{itemize}
+
+But
+
+\begin{itemize}
+\item it is suitable for the individual channels of a structured connection
+\item the user action mechanism allows using feature of different transport layers
+  through labcomm (i.e., it allows encapsulation of the transport layer)
+\item the names of samples can be chosen and mapped according to a suitable taxonomy or ontology
+\end{itemize}
+
+
+
+\section{Example and its encoding}
+
+With the following `example.lc` file:
+
+\begin{verbatim}
+sample struct {
+  int sequence;
+  struct {
+    boolean last;
+    string data;
+  } line[_];
+} log_message;
+sample float data;
+\end{verbatim}
+
+and this \verb+example_encoder.c+ file 
+
+\begin{verbatim}
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <labcomm_fd_reader.h>
+#include <labcomm_fd_writer.h>
+#include "example.h"
+
+int main(int argc, char *argv[]) {
+  int fd;
+  struct labcomm_encoder *encoder;
+  int i, j;
+
+  fd = open("example.encoded", O_WRONLY|O_CREAT|O_TRUNC, 0644);
+  encoder = labcomm_encoder_new(labcomm_fd_writer, &fd);
+  labcomm_encoder_register_example_log_message(encoder);
+  labcomm_encoder_register_example_data(encoder);
+  for (i = 0 ; i < argc ; i++) {
+    example_log_message message;
+
+    message.sequence = i + 1;
+    message.line.n_0 = i;
+    message.line.a = malloc(message.line.n_0*sizeof(message.line));
+    for (j = 0 ; j < i ; j++) {
+      message.line.a[j].last = (j == message.line.n_0 - 1);
+      message.line.a[j].data = argv[j + 1];
+    }
+    labcomm_encode_example_log_message(encoder, &message);
+    free(message.line.a);
+  }
+  for (i = 0 ; i < argc ; i++) {
+    float f = i;
+    labcomm_encode_example_data(encoder, &f);
+  }
+}
+\end{verbatim}
+
+Running \verb+./example_encoder one two+, will yield the following result in example.encoded:
+
+
+\begin{verbatim}
+00000000  02 40 0b 6c 6f 67 5f 6d  65 73 73 61 67 65 11 02  |.@.log_message..|
+00000010  08 73 65 71 75 65 6e 63  65 23 04 6c 69 6e 65 10  |.sequence#.line.|
+00000020  01 00 11 02 04 6c 61 73  74 20 04 64 61 74 61 27  |.....last .data'|
+00000030  02 41 04 64 61 74 61 25  40 00 00 00 01 00 40 00  |.A.data%@.....@.|
+00000040  00 00 02 01 01 03 6f 6e  65 40 00 00 00 03 02 00  |......one@......|
+00000050  03 6f 6e 65 01 03 74 77  6f 41 00 00 00 00 41 3f  |.one..twoA....A?|
+00000060  80 00 00 41 40 00 00 00                           |...A@...|
+00000068
+\end{verbatim}
+
+
+\section{Ideas/Discussion}:
+
+The labcomm language is more expressive than its target languages regarding data types.
+E.g., labcomm can declare both arrays of arrays and matries where Java only has arrays of arrays
+In the generated Java code, a labcomm matrix is implemented as an array of arrays.
+
+Another case (not yet included) is unsigned types, which Java doesn't have. If we include
+unsigned long in labcomm, that has a larger range of values than is possible to express using
+Java primitive types. However, it is unlikely that the entire range is actually used, so one
+way of supporting the common cases is to include run-time checks for overflow in the Java encoders
+and decoders.
+
+\appendix
+
+\section{The LabComm protocol}
+\label{sec:ProtocolGrammar}
+
+\begin{verbatim}
+<packet> := ( <type_decl> | <sample_decl> | <sample_data> )*
+<type_decl> := 0x01 ''(packed)'' <user_id> <string> <type>
+<sample_decl> := 0x02 ''(packed)''<user_id> <string> <type>
+<user_id> := 0x60..0xffffffff  ''(packed)''
+<string> := <string_length> <char>*
+<string_length> := 0x00..0xffffffff  ''(packed)''
+<char> := any UTF-8 char
+<type> := ( <basic_type> | <user_id> | <array_decl> | <struct_decl> )
+<basic_type> := ( <boolean_type> | <byte_type> | <short_type> |
+                  <integer_type> | <long_type> | <float_type> | 
+                  <double_type> | <string_type> )
+<boolean_type> := 0x20 ''(packed)''
+<byte_type> := 0x21 ''(packed)''
+<short_type> := 0x22 ''(packed)''
+<integer_type> := 0x23 ''(packed)''
+<long_type> := 0x24 ''(packed)''
+<float_type> := 0x25 ''(packed)''
+<double_type> := 0x26 ''(packed)''
+<string_type> := 0x27 ''(packed)''
+<array_decl> := 0x10 ''(packed)'' <number_of_indices> <indices> <type>
+<number_of_indices> := 0x00..0xffffffff  ''(packed)''
+<indices> := ( <variable_index> | <fixed_index> )*
+<variable_index> := 0x00  ''(packed)''
+<fixed_index> := 0x01..0xffffffff  ''(packed)''
+<struct_decl> := 0x11 ''(packed)'' <number_of_fields> <field>*
+<number_of_fields> := 0x00..0xffffffff  ''(packed)''
+<field> := <string> <type>
+<sample_data> := <user_id> <packed_sample_data>
+<packed_sample_data> := is sent in network order, sizes are as follows:
+
+||Type           ||Encoding/Size                                         ||
+||---------------||------------------------------------------------------||
+||boolean        ||  8 bits                                              ||
+||byte           ||  8 bits                                              ||
+||short          || 16 bits                                              ||
+||integer        || 32 bits                                              ||
+||long           || 64 bits                                              ||
+||float          || 32 bits                                              ||
+||double         || 64 bits                                              ||
+||string         || length ''(packed)'', followed by UTF8 encoded string ||
+||array          || each variable index ''(packed)'',                    ||
+||               || followed by encoded elements                         ||
+||struct         || concatenation of encoding of each element            ||
+||               || in declaration order                                 ||
+\end{verbatim}
+
+Type fields, user IDs, number of indices and lengths are sent in a packed, or
+variable length, form.  An integer is sent as a sequence of bytes where the
+lower seven bits contain a chunk of the actual number and the high bit
+indicates if more chunks follow. The sequence of chunks are sent with the least
+significant chunk first.  (The numbers encoded in this form are indicated above
+with \textit{(packed)}.)
+
+\end{document}
-- 
GitLab