%% This file: http://user.it.uu.se/~embe8573/hs-linux/docs/report/report.tex

\documentclass[a4paper, twoside, 12pt]{report}

\usepackage{enumerate}

%% \noindent\begin{minipage}{\textwidth}
%% \end{minipage}

\usepackage{longtable}

%% insert text file
% verbatim
\usepackage{verbatim}
\newlength\myverbindent
\setlength\myverbindent{1cm}
\makeatletter
\def\verbatim@processline{%
  \hspace{\myverbindent}\the\verbatim@line\par}
\makeatother
% alltt
\usepackage{alltt}

%% insert pdf
\usepackage{pdfpages}

%% url
\usepackage{hyperref}

%%% DEFINITIONS
%% index
\usepackage[font=footnotesize]{idxlayout}
\usepackage{makeidx}
\makeindex
%% boxes, captions, etc.
\usepackage[labelformat=empty,font=bf]{caption}
\usepackage{color}
\definecolor{light-gray}{gray}{0.95}
\setlength{\fboxsep}{0.67\baselineskip}
\newcommand\boxdef[2]{%
  \vspace{\baselineskip}%
  \begin{center}%
    \colorbox{light-gray}{\parbox{0.80\linewidth}{%
        \index{#1 \emph{def.}}\captionof{figure}{#1}#2}}%
  \end{center}%
  \vspace{\baselineskip}%
}

%% page and section refs
\newcommand\spref[1]{[section \ref{#1}, page \pageref{#1}]}

%% misc
\usepackage[T1]{fontenc}

%% equations
\newcounter{eqnumber}
\setcounter{eqnumber}{0}
\newcommand{\beq}[2]{
  \addtocounter{eqnumber}{1}
  \vspace{0.33cm}
  \begin{tabular}{p{0.7\linewidth}p{0.3\linewidth}}
    \index{#2@\texttt{#2} \emph{eq.} (\arabic{eqnumber})}
    \begin{center}$#1$\end{center}&
    \begin{center}\tiny\texttt{eq. (\arabic{eqnumber}), \textbf{#2}}\end{center}
  \end{tabular}
  \vspace{0.33cm}
}

\def\relate{0 < r < p < l}

\def\topscheduler{g = (l, p, r, S, a)}

\def\taskscheduler{S = \{s_{i}\} = \{(m_{i}, b_{i}, P_{i}, \alpha_{i})\}}
\def\sporadic{P_{i} = \{t_{j}\} = \{(C_{j}, D_{j}, T_{j})\}}
\def\sporadictask{t_{j} = (C_{j}, D_{j}, T_{j})}

\def\responsetime{r_{j} = Q_{j} + o_{j} + i_{j} + C_{j}}

\def\zperiods{z = \frac{r_{j}}{p}}

\def\schedulingoverhead{o_{j} = \left\lfloor\frac{r_{j}}{r}\right\rfloor o}
\def\interference{i_{j} = \lceil z\rceil m_{i}58.5\ ns}

\def\queuing{Q_{j} = Q^{g}_{j} + Q^{s}_{j}}

\def\groupqueuing{Q^{g}_{j} = \sum\limits_{\{t_{j}, t_{k}\}\subseteq P_{i}, t_{k} \in hp(t_{j})}\left\lceil\frac{r_{j}}{T_{k}}\right\rceil C_{k}}

\def\systemqueuing{Q^{s}_{j} = (\lceil z\rceil + 1)(p - b_{i})} % (\sum\limits_{s_{k} \in S, k\neq j}b_{k})

\def\interval{I = \Delta\theta = \theta_{2} - \theta_{1}}

\def\supplycompleteperiods{w = \left\lfloor\frac{I}{p}\right\rfloor - 1}
\def\supplytimecompleteperiods{B_{i} = wb_{i}}
\def\supplylengthincompleteperiods{R = I  -  wp}
\def\supplylengthincompleteoutside{U = p - \frac{R}{2}}
\def\supplyincompleteperiods{\beta_{i} = 2 max(0, b_{i} - U)}
\def\supplyreduction{\rho = (w + 2)m_{i}58.5\ ns}
\def\supply{sbf(s_{i}, I) = B_{i} + \beta_{i} - \rho}

\def\demandincompleteperiod{h = min(I - fT_{j},C_{j})}
\def\demandtask{dbf(t_{j}, I) = fC_{j} + h}
\def\demandgroup{dbf(P_{i}, I) = \sum\limits_{t_{j} \in P_{i}}dbf(t_{j}, I)}
\def\demandcompleteperiods{f = \left\lfloor\frac{I}{T_{j}}\right\rfloor}

%% to-be-boxed definitions
\def\hierarchicalscheduler{A hierarchical scheduler is
  a scheduler that consists of several schedulers that
  are organized hierarchically, in a tree structure.
  The leaf schedulers are not themselves hierarchical,
  but ordinary: they schedule processes (or tasks).
  All schedulers above them, however, are hierarchical
  schedulers: they do not schedule processes, but the
  schedulers at the level immediately below.}

\def\WCET{WCET (Worst Case Execution Time) is the
  wall-clock time, or response time, of a program:
  the amount of time that passes from invocation to
  completion, in the \emph{worst} possible case (i.e.,
  it is impossible that it could ever take any longer
  to successfully terminate the task). The WCET
  parameter is helpful in models, to do computation,
  and it can also steer software behavior.
  Nonetheless, it is most often not possible to derive
  the WCET, in spite of much Computer Science research
  devoted to methods to that end. \cite[p. 110]{rtss} \\

  There are several reasons WCET may be incomputable:
  task-execution conditions may vary -- externally
  (the physical context), but also internally (e.g.,
  system load); also, tasks may depend on other tasks
  to complete before them, or to release resources
  they have monopolized; and, the system may be a
  multicore, or even distributed with asynchronous
  message passing in between. \cite[p. 3]{rtssva} \\

  In this project, both in software and in analysis,
  it is assumed the WCET is correct, wherever it
  appears. Actually, WCETs are simply assigned by the
  user with no questions asked if it is realistic or
  not. Wherever available, indata verification is done
  in only terms of \emph{other} parameter indata: the
  associated software is not examined.}

\def\edf{EDF (Earliest Deadline First) is a real-time
  scheduling algorithm. It requires each task be
  annotated a deadline (typically an integer). This is
  a static, a-priori property; it should be
  interpreted as the time interval between the release
  of the task and when it must be completed, less it
  will be considered delayed (i.e., a failure for hard
  real-time systems). When scheduling, the
  EDF-scheduler will select for execution the task
  with the most immediate absolute deadline -- that
  is, the earliest deadline, first. Because the
  absolute deadline is a function not only of the
  deadline task parameter, but also of the run-time
  instant the task was released, EDF is a dynamic
  scheduling algorithm.}

\def\resources{The \emph{resources} of a computer can
  be either in hardware or software. Hardware
  resources are the memory, the CPU, the disk, etc.
  Such resources can be either static -- e.g., the
  disk size -- or dynamic -- e.g., the available
  memory at the time of a certain workload. Resources
  can also be in software. If a program has been
  assigned a certain number of memory accesses, this
  budget is a resource: if 0, it is a depleted
  resource until it gets resupplied.}

\def\responsetimedef{The \emph{response time} is the
  wall-clock time elapsed from when the user invoked a
  command up and until the completion of the program
  that is associated with that command. The user does
  not have to be a human: it can be another program,
  e.g., a real-time scheduler.}

\def\schedulability{For any task set (where the tasks
  adhere to a specific task model), and a particular
  scheduling algorithm, there is a test than can
  determine \emph{schedulability}. That test answers
  the question: ``Can the algorithm make a schedule,
  that involves all the tasks in the set, so that, if
  they are executed accordingly, all tasks will
  complete before their deadlines?'' If so, the task
  set has passed the schedulability test for that
  algorithm: the task set is \emph{schedulable}.}

\def\scheduler{Computers are capable of executing
  several programs concurrently -- or seemingly so.
  Actually, the CPU switches from program to program,
  executing each just so much as to create the
  illusion that everything happens in parallel.
  \cite[p. 117]{linux} This makes for responsiveness
  to the user, but also better use of hardware
  resources: for example, while one program waits for
  I/O, it yields the CPU to another,
  computation-intense program (as the
  program-put-on-hold cannot make use of the CPU
  anyway, lacking I/O data). A special program decides
  what program executes, and when: the
  \emph{scheduler}. This program is part of the kernel
  as it is needs memory and otherwise hardware access.
  There are several scheduling algorithms that the
  scheduler can employ to make its choise what program
  to run. Programs are associated a run-time data
  structure -- the process control block, or PCB --
  that will provide the scheduler with uniform
  metadata on the state of the program in execution.
  Contrary, on multicores, true concurrency do exist
  as programs are executed in parallel, on different
  CPUs (or cores).}

\def\rtos{The \emph{real-time operating system} (RTOS)
  is the software that makes sure that the real-time
  tasks are executed in such a way that they are all
  guaranteed to complete before their deadlines
  expire. In principle, there should be several ways
  to implement such a system. In practice, the most
  common solution is to have a real-time scheduler
  instead of the ordinary scheduler, and then to
  annotate each task with real-time metadata, such as
  worst-case execution time and deadline. The
  real-time scheduler will process the metadata, as
  well as the overall system state (including the
  time), so that tasks are enqueued and selected for
  execution in a manner that will ensure compliance
  with the real-time constraints.}

\def\realtimesystem{``An operating
  system capable of responding to an external event in
  a predictable and appropriate way every time the
  external event occurs.'' \cite[p. 374]{dyson}}

\def\deadline{In real-time computing, a task
  (typically a small program instance) must not only
  be implemented to produce a correct result (like any
  other piece of software), it must also complete its
  computation before a certain time limit. Such a
  limit can be expressed in different ways. The
  sporadic task model used for this implementation
  expresses time constraints in terms of a
  \emph{deadline} for each task. The deadline is
  relative to the task arrival time: it is thus a
  static parameter, attached to the task much like the
  source code that is to be executed. If the arrival
  of task $t_i$ is at time $c$, and $t_{i}$ has
  deadline $D_{i}$, that means in absolute time, $t_i$
  must be completed no later than $c + D_{i}$,
  otherwise $t_{i}$ is considered a faulty piece of
  software, no less so than was there a bug in its
  code, making it crash or produce an incorrect
  result. (In some settings, a correct result -- which
  is delivered late -- can have even worse
  consequences than an incorrect result, that was
  delivered in time. \cite[p. 9]{stigge})}

\def\predictable{Computer systems are deterministic by
  definition. If they are \emph{predictable}, that
  determinism is lifted to a human level: the user is
  able to make a statement -- that something
  particular will happen, and not later than at a
  certain time. The statement is then proved to hold
  by use of formal methods. If the statement holds in
  theory as well as practice, the system is
  predictable, at least with respect to that
  particular statement. A predictable system consists
  of a set of likewise predictable statements/behavior
  patterns.}

%% TOC
%%
%% does not work:
%% \usepackage[toc]{multitoc}
%% \renewcommand*{zlticolumntoc}{2}
%% \setlength{\columnseprule}{0.5pt}

%% \addtocontents{toc}{\protect\enlargethispage{6cm}}

%% does not work:
%% \def\contentsname{\empty}

\usepackage{tocloft}

%% \renewcommand\cftchapfont{\tiny}
%% \renewcommand\cftsecfont{\tiny}
%% \renewcommand\cftsubsecfont{\tiny}
\renewcommand\cftsubsubsecfont{\small}
%% \renewcommand\cftchappagefont{\tiny}
%% \renewcommand\cftsecpagefont{\tiny}
%% \renewcommand\cftsubsecpagefont{\tiny}
%% \renewcommand\cftsubsubsecpagefont{\tiny}

%% GFX
\usepackage{graphicx}
\graphicspath{{./pics/}}
\newcommand{\baimgspace}{\vspace{0.5cm}}

\usepackage{listings}
\usepackage{enumitem}

%% TOC
\renewcommand\thesection{\arabic{section}}

%% use \tt as the math font
\usepackage{mathastext}
\MTfamily{\ttdefault}\Mathastext

%% new page for each section
\usepackage{titlesec}
\newcommand{\sectionbreak}{\clearpage}

%% paragraph
\titleformat{\paragraph}
            {\normalfont\normalsize\bfseries}
            {\theparagraph}
            {1em}
            {}
\titlespacing*{\paragraph}
              {0pt}
              {3.25ex plus 1ex minus .2ex}
              {1.5ex plus .2ex}
\raggedbottom

\usepackage[english]{babel}

\usepackage{csquotes}

\usepackage[backend=biber]{biblatex}
\newbibmacro{in:}{}
\addbibresource{report.bib}
\defbibheading{mybibliography}[\bibname]{%
\section{#1}%
}

\DeclareFieldFormat{isbn}{\mkbibacro{ISBN}\addcolon\addnbspace\mbox{#1}}
\DeclareFieldFormat{volume}{\bibstring{volume}\addnbspace#1}
\DefineBibliographyStrings{english}{
  volume           = {Volume},
  volumes          = {Volumes},
}

\usepackage{scrextend}

%% footnotes
\newcommand{\fnw}{1.8em}
\deffootnote[\fnw]{0pt}{1.6em}{\makebox[\fnw][l]{\thefootnotemark.}}
\renewcommand{\footnoterule}{
  \kern -3pt
  \hrule width \textwidth height 1pt
  \kern 2pt
}
\interfootnotelinepenalty=10000

\usepackage[parfill]{parskip}

\setcounter{tocdepth}{4}
\setcounter{secnumdepth}{4}

\usepackage[no-math]{fontspec}

\makeatletter
\newcommand{\ttinput}[1]
           {{\begingroup\lccode`~32\lowercase{\endgroup\let~\@xobeysp}%
               \begingroup\lccode`~`\^^M\lowercase{\endgroup\def~{\\}}%
               \def\arraystretch{0}\offinterlineskip
               \ttfamily\catcode`\^^M\active\obeyspaces
               \begin{tabular}{l}\@@input#1\relax\end{tabular}}}
\makeatother

\begin{document}

\begingroup
\makeatletter
\def\@makeschapterhead#1{
  {\parindent \z@ \raggedright
    \normalfont
    \interlinepenalty\@M
    \Huge \bfseries  #1\par\nobreak
    \vskip 40\p@
}}
\makeatother
\endgroup

\tableofcontents

\section{Introduction}

Report update: \today

The purpose of this project is to implement a
\emph{hierarchical scheduler} that manages a
real-time, mixed-criticality task system.

A mixed-criticality task system is a system where
tasks of different criticality levels are executed on
the same physical platform. The tasks are divided into
groups according to their assigned criticality levels.

\boxdef{Real-time System}{\realtimesystem}

\boxdef{Hierarchical Scheduler}{\hierarchicalscheduler}

The hierarchical scheduler and the hard real-time
tasks all execute on a single, separate core: the
\emph{critical} core.

In parallel, best-effort software runs on a dedicated
core as well: the \emph{best-effort} core.

The cores share main memory, the DRAM, as well as the
memory bus.

The theoretic scope of the project is to give methods
to compute the response times for the critical tasks,
considering that the best-effort core software can
interfere because the hardware that is shared between
the two cores.

\boxdef{Response Time}{\responsetimedef}

The criticality groups must be isolated from each
other so that the groups can be separately executed
and analyzed. What is more, the critical core must be
isolated from the best-effort core less the
best-effort core monopolizes the memory, shutting out
the critical tasks from execution thus making them
miss their deadlines.

\boxdef{Deadline}{\deadline}

The solution is two resource servers. On the critical
core a CPU-time resource server monitors and limits
the CPU time of individual groups as to prevent
starvation. Likewise, a resource server limits the
allowed number of best-effort core memory accesses.

The purpose is to realize an architecture and
implement a memory throttling method that will make it
possible to construct modular real time computer
systems whose parts can be analyzed in isolation.

\subsection{goal}

The goal of the project is to be able to:

\begin{enumerate}

\item execute tasks of different criticality levels on
  the critical core; while

\item executing best-effort software on the
  best-effort core; so that

\item software from either core can access the shared
  DRAM; but

\item best-effort core memory interference still does
  not break critical-task schedulability analysis.

\end{enumerate}

\subsection{criticality groups}

A critical group is a subset of all the real-time
tasks in the system. The critical groups are mutually
exclusive: every task belongs to one (and only one)
critical group.

The reason to divide the critical tasks into groups is
to be able to express and employ a more flexible and
fine-grained real-time system. In this project, there
are other parameters apart from the criticality level
attached to each group.

The task classifications are to be used by the
real-time operating system to guide its actions.
Real-time analysis is to be applied to the groups as
isolated entities, and for an entire system one group
at a time, as if the other groups did not exist.

\boxdef{Real-time Operating System}{\rtos}

The reason to create a task-based system is that
real-time systems interact with the outside, physical
world. Because there are so many aspects of the
physical world that can be measured, and in so many
ways -- and all has to be done continuously, virtually
in parallel -- a powerful solution is to employ a set
of tasks, wherein each task is responsible for
collecting and processing data of one particular such
aspect. For example, one task reports temperature,
another volume, yet a third system proximity to other
objects, and so on. This model mimics the
general-purpose process-based computing model, which
in itself has many advantages.

There are several advantages to having several
criticality groups:

\begin{itemize}

\item The system is more \textbf{modular}: tasks can
  be brought in and out as groups, and each group can
  be analyzed for real-time schedulability
  independently of the other groups.

\item The system is more \textbf{well-defined}, and
  \textbf{ordered}. For example, by assigning
  different CPU-budgets to different groups, the
  respective importance of the groups as subsystems
  are made explicit: in turn, this will affect the
  behavior of the system in execution.

\item The system is more \textbf{flexible}: the
  different CPU-budgets assigned to different groups
  can be used to assign a budget that is adopted to
  the typical execution patterns of the tasks of that
  group.

\boxdef{Schedulability}{\schedulability}

\end{itemize}

\subsection{real-time isolation}

The critical groups must be isolated from each other
in real-time analysis. If any one group is deemed
schedulable, this must hold for any run-time scenario
which can involve the tasks of that group, as well as
those of all others. That is, in order to provide
guarantees for a predictable execution of the tasks of
any one critical group, it is necessary to process the
runtime behavior of the tasks of all other groups as
well.

\boxdef{Predictable}{\predictable}

In practice, the resource server solution is an
attempt to formalize and simplify such considerations
to the point where an individual group does not have
to be analyzed in a way that actively has to consider
the peculiarities of the other groups, their tasks,
and statuses.

Whenever needed, the system must intervene to prevent
unruly behavior, right before it happens. If the tasks
of a certain group collectively have depleted their
allocated resources, the group must be hindered from
further activity, less it will jeopardize the
predictability of tasks that belong to other groups.

\boxdef{Resources}{\resources}

\subsection{different kinds of computer systems}

The genealogy of computer systems -- from a desktop OS
to a fully-fledged real-time system in the physical
world -- can be sketched like this:

\begin{itemize}

\item \emph{interactive} non-timesharing -- This is a
  typical computer system for a user to use for
  various, mundane purposes. The software that runs on
  this system does not have any associated deadlines:
  everything is simply executed without any
  guarantees, but also without any cumbersome
  real-time layer or metadata attached to executing
  programs. Still, in some sense, even such systems
  are ``real-time'', because human-computer
  interaction would be unbearable if for example a
  keystroke sometimes had to process for several
  seconds before the character appeared on the screen.

\item \emph{timesharing} -- A timesharing system is an
  interactive system, but it is also a multiprogrammed
  and multitasked system that supports several active
  users to be logged in at the same time. The
  implementations of such systems often employs the
  process-based architecture, which is also what is
  found in many real-time systems. This shows the
  proximity between timesharing and real-time systems,
  the difference being the absence of deadlines in a
  timesharing system. In general, a timesharing system
  is oriented toward active \emph{humans}, who provide
  instructions what the computer should do, while a
  real-time system is oriented toward the outside
  \emph{physical} world, and whatever goes on there is
  sampled and processed as input to trigger various
  responses. \cite[p. 3]{unix_intro}

\item \emph{soft} real-time -- Here, software
  \emph{is} associated with deadlines. If a deadline
  cannot be held, however, that does not imply
  system-wide disfunction. Rather it is up to the
  system designers (and sometimes the user) to figure
  out what to do in such an event.

\item \emph{firm} real-time -- Here, deadlines are
  soft in the sense that breaches are not encouraged,
  but allowed; on the other hand, deadlines are hard
  in the sense that computation whose result arrives
  late has zero value, and is discarded. (In some
  settings, delayed-computation data can even be
  dangerous if used as if it had arrived in time.)

\item \emph{hard} real-time -- Here, a deadline breach
  implies a system fault.

\item \emph{critical} real-time -- Here, a deadline
  breach implies a hazard to people's health. Critical
  real-time obviously implies hard real-time. What is
  not as obvious is that hard real-time does not imply
  critical real-time. For example, consider a computer
  game: a fast-pace, interactive game is for all
  practical purposes hard real-time, even though the
  implementation probably lacks explicit deadlines --
  still, if the game does not react virtually
  instantly to inputs and in-game events, it is
  unplayable. However, an unplayable game is not a
  hazard to the health of any living thing, so for
  this reason it is not considered critical real-time.

\end{itemize}

\subsubsection{hybrids and gray zones}

The real-time system classifications are not cut in
stone. Many system consist of subsystems of different
natures. Actually, there can be combinations within a
subsystem itself: consider a real-time system where
software is associated with \emph{both} a soft and a
hard deadline. \cite[pp. 2-3]{rtspls} If the program
does not complete before the soft deadline, the system
reallocates resources so the program will be more
likely to complete, quicker. Only when the task still
has not completed at the time of the hard deadline,
system fault is flagged.

\subsubsection{where does this project fit in?}

The hierarchical scheduler schedules pieces of
software which all have parameter values -- the
real-time metadata. Thus, any system that runs on top
of \texttt{hs} will be at least a soft real-time
system. But it could also be a firm or hard real-time
system, or something in between, with none or minimal
changes to the code required. Ultimately, it is the
user who decides what software will run and how that
software will behave, and when. Those decisions will
shape the entire system, including its place in the
real-time ladder.

As for now, what is hard-coded is the sporadic task
model: software tasks are annotated with the
parameters WCET, deadline, and period.

\boxdef{WCET}{\WCET}

The criticality side to it is even more unrestrictive.
While the ``criticality'' groups indeed can be
arranged to express criticality (as in physical
hazard) they can just as well be thought of as labels
of task groups that belong to a certain local
scheduler, with no criticality implication whatsoever
-- in execution, the parameter values of the scheduler
are what counts, not any qualitative implication that
the human mind is fond of doing.

\boxdef{Scheduler}{\scheduler}

The hierarchical scheduler does not do anything by
itself, but in combination with different software
sets, it can schedule as many different kinds of
systems.

\subsection{why a real-time multicore?}

Multicore architectures is one way the CPU industry
brings faster computation to computers.

Also, with multicores, there is more computation out
of less power. This is important not the least to
embedded systems, which often have to operate on a
limited power supply. Embedded systems -- because they
apply to problems outside the world of computers --
are a huge implementation field for real-time systems.

Real-time systems on multicores are yet rare, in part
because of the increased unpredictability due to the
multiple sources of computation. This poses
difficulties both theoretically -- guaranteeing real
time analytically -- as well as to enforce it in
practice, because distributed computation implies
communication and synchronization delays that must be
bounded.

\subsection{meta}

The writer of this report as well as the programmer of
all new related software can be reached at
\texttt{embe8573@student.uu.se}.

This is a BibTeX entry for this report:
\tiny
\begin{verbatim}

@techreport{multicore-mixed-criticality-with-a,
 author      = {Emanuel Berg},
 title       = {Multicore mixed-criticality with a hierarchical real-time scheduler and resource servers},
 institution = {Department of Information Technology, Uppsala University, Sweden},
 type        = {\unskip\space},
 year        = 2015
}

\end{verbatim}

\normalsize

The report as well as the source code can be found at
the author's home page:
\texttt{http://user.it.uu.se/$\sim$embe8573}

\section{Related work}

The classical paper on hierarchical scheduling is
\cite{ieee_hs}. However, the scheduler implementation
of this project is not perfectly layered. Optimally,
any hierarchical outline relies on a one-way
communication data flow, where superior entities never
ask (but only tell) the lesser components what to do
(and, whenever necessary, provide them with what they
need to do it). For a hierarchical scheduler, that
translates to the root scheduler scheduling the
schedulers below solely based on their properties,
without looking any further below, and only after one
particular scheduler has been favored would that
scheduler decide what of its task to execute, without
the interference of the above scheduler. Instead,
because of practical considerations when implementing
the EDF algorithm, the root scheduler is not ignorant
to what happens at task level: contrary to just
barking orders, it must poll the below layers for
information that will determine its decision.

\boxdef{EDF}{\edf}

Previous work to a large extent consider only two
criticality levels: i.e., one crticial level and one
level which is best effort. \cite{heechul}
\cite[pp. 299-308]{heechul2} In this project, it is
possible to implement and analyze an arbitrary number
of critical groups, and have best-effort software run
in parallel.

In this project, unlike \cite{heechul}, there is not a
best-effort sphere of possible memory access once the
allocated budget has been depleted. And, unlike
\cite{heechul2}, this implementation is userspace
only.

In \cite{memory-architectures-for-noc-based}, the
unusual memory architecture of a network-on-a-chip
system (a NoC) is used to provide isolation between
criticality levels. Here, specialized hardware is
used, including a predictable multiprocessor, whereas
our project uses commodity components which are by
original design unpredictable.

In \cite{workload-aware-shaping-of-shared}, a monitor
is (in part) instructing a scheduler what process to
execute from the suggestions of four queues.
The queues are a division based on qualitative
properties of the processes, so that such properties
also influence what process to run. The algorithm is
``slack'': context switches are not always carried out
at times of priority inversion. Instead, to decrease
overhead and improve best-effort quality-of-service,
preemption occur only at the very moment it has to,
less a critical process will not be able to make its
deadline. Our project is similar to their but their
solution is more sophisticated (and complicated).
In our project, the critical groups (which can come in
any number) are holder of scheduling and real-time
metadata that is arbitrary and cannot be
computationally derived from properties of the
processes which make up the groups. Also, our monitor
serves only to freeze the best-effort core -- it
doesn't influence any scheduler on either core. And,
freezing \emph{always} happens when the maximum
allowed DRAM accesses have been accounted, so our
algorithm is not slack, either.

One of the most sensible suggestions to the problem
how to uphold predictability while sharing resources
is presented in
\cite{mapping-mixed-criticality-applications-on}.
Here, the shared memory is divided into banks.
However, the banks are not merely distributed over the
critical tasks -- while that would indeed provide
isolation between criticality levels, it would do so
at the price of no longer sharing the memory and thus
have the whole system constantly operate at the bottom
end of what it is capable of. Instead, the tasks are
dynamically mapped to the banks by means of the
scheduling algorithm. Consequently, the entire scope
of the capabilities of the system can be employed: at
the very least, there is the subdivision and
distribution of memory; at most, there is unrestricted
access. The challenge is to find or (re)design an
algorithm that maps the tasks to the banks in such a
way that the system can run on the most cylinders the
most of the time. In effect, the once contradictory
poles -- predictability vs. resource sharing -- are
transformed into an optimization problem.

Also, in
\cite{privatization-and-fixed-priority-scheduling} the
solution is a subdivision of the DRAM between the
critical tasks. Each critical task is assigned a DRAM
bank. There is one critical task for each bank. This
means the critical tasks do not share memory anymore:
instead, they use their designated ``virtual memory''
bank which is a fraction in size to the original DRAM.
Nonetheless, what is innovative about this project is
that for each bank, alongside the singular critical
task, there can be an arbitrary number of best-effort
tasks. Access to the bank memory is granted according
to a priority algorithm. Priorities are fixed with the
critical task in each bank having the highest
priority. This uncomplicated scheme has the benefit of
providing complete isolation for the critical tasks,
both from each other and from the best-effort tasks.
The drawback is that resource utilization is sensitive
to both the setup and the dynamic situation of every
execution.

Another impressive suggestion is
\cite{scheduling-of-mixed-criticality-applications}.
Here, there are no dedicated cores -- all critical
tasks, of any and all criticality levels, coexist
across the cores. Nor are there any budgets to be
distributed by resource servers. This makes for
minimal a-priori arrangements. Also, there is no
reliance on specialized hardware.

The solution is based on subdividing all cores as well
as the shared memory along the criticality levels. A
dynamic scheduling algorithm synchronizes the cores so
that only tasks from the same criticality level access
memory at the same time.

Interestingly, the authors comment on the type of
solution that is presented in our paper:

\begin{itemize}
\item Our solution is flexible in terms of managing
  processes -- e.g., to include arrivals. This is
  true, especially compared to monotonic scheduling.
  However, their solution is even more flexible as all
  cores share all work. The lack of a-priori metadata
  markup and budget assignments is a two-sided sword.
  On the one hand, there is no room for humans to
  screw up the algorithm. On the other hand, no expert
  human can tweak it to perfection. In general, we say
  their solution is better, assuming a time-tested
  dynamic algorithm to do the allocations.

\item Our solution requires ``a high design and
  implementation overhead''. Here, it is unclear if
  this aims at the scheduling and throttling software,
  or the a-priori setup of task systems and resource
  budgets. If it aims at the software, our project
  shows such software is not insurmountable to create.
  However, if it aims at the setup, we agree, and
  especially if compared to their project, which does
  not require any of that.

\item Our solution cannot be done with COTS
  components. COTS is ``commercial off the shelf''
  computer hardware. What truth there is to that of
  course depends on the definition of ``COTS''. In
  fact, a performance monitor counter (PMC) is all
  specialized hardware that is required, and such are
  found in many desktop PCs.

\end{itemize}

\section{System overview: two cores, shared DRAM}

The system consists of two cores: the \emph{critical}
core, and the \emph{best-effort} core: Those cores
share DRAM and memory bus.

\vspace{2\baselineskip}
\index{multicore architecture \emph{fig.}}
{\setmonofont{DejaVu Sans Mono}\centerline{\ttinput{figures/multicore.txt}}}
\vspace{\baselineskip}
\begin{center}\textbf{Multicore Architecture}\end{center}

\subsection{the critical core}

The critical core runs critical tasks. The
mixed-criticality scheduler, as well as all the
critical groups, reside on the critical core.

\subsection{isolation between the cores}

Whatever happens on the best-effort core cannot be
allowed to influence the execution of critical tasks
to the extent that even a single critical task cannot
complete before its deadline. To that end, the
interference from the best-effort core is bounded by a
parameter for each critical group. The value specifies
how many memory accesses the best-effort core is
allowed to do for a specific time span, while a task
of that group is executing.

\subsection{the best-effort core}

The best-effort core runs a media player.

To run a media player as the best-effort task is
interesting because a media player task does not lend
itself to static real-time analysis. A media player is
user-oriented software. Some downgrade in quality can
be tolerated. If the media content is streamed from a
network, data may be sketchy, and arrival spotty; and,
the service is most often designed to work on
different end-hardware platforms. \cite[pp. 4-13]{media} Also,
any substantial quality improvement can be detected by
inspection.

As the real-time system can block the best-effort
software from accessing memory, the performance of the
best-effort software can be much worse compared to if
it did not have to subject itself to the demands of
the real-time system. The extent of this performance
downgrade is evaluated by executing and measuring the
media player, when it runs in parallel with the
critical task system.

\section{The Linux implementation}

\subsection{the real-time system}

The hierarchical scheduler, or \texttt{hs}, that runs
the critical tasks is written in C++: C++ is fast,
which matters because it is desirable to reduce the
scheduling overhead. The object oriented support in by
C++ is also helpful to express the different
components of the hierarchical scheduler, which is
modular by concept. More generally, C++ is an
extension of C, which is the system programming
language of choice since the early-mid 70s. \cite[pp.
  v-vi, 6, 41-42]{cpp}

Programmers of real-time systems have discussed what
makes for a good implementation language for such
systems. \cite[pp. 20-22]{lang} provides a list which
in terms of technology amounts to:

\begin{itemize}

\item The language should come with debugging tools,
  including a compiler which provide compile-time
  warnings and error messages, in order to detect
  problems at the earliest stage possible.

\item The language offers features for modular design
  and development, e.g. user-defined datatypes (or
  classes).

\item The language is portable, fast, and vast with
  respect to what problems it can express and solve.

\end{itemize}

C++ does that, of course.

There is also a discussion whether such a programming
language should in itself include real-time
functionality, and provide it transparently on any
platforms for which it is ported, \emph{or} if
real-time functionality can be delegated the
underlying OS, and just be accessed through an API.
\cite[pp. 22, 131]{lang} (Bear in mind that those
approaches are not mutually exclusive.)

In this project no attempt is made to avoid using OS
functionality, and the C++ used is all vanilla.

Both critical and best-effort cores run on Debian
GNU/Linux. Linux is a good all-around platform for
programming; it comes with both general and specific
tools that were indispensible while developing the
hierarchical scheduler, and while setting up the
overall dedicated-core architecture as well as the
framework to carry out the experiments.

The Linux kernel and distribution used for this
project are:

\begin{verbatim}

$ uname -a
Linux debian 3.16-2-amd64 #1 SMP Debian 3.16.3-2 (2014-09-20) x86_64 GNU/Linux

\end{verbatim}

\subsection{the critical tasks}

The software of the critical tasks is written in C++.
The task software is hard-coded functions that are
compiled together with the hierarchical scheduler --
but, the task systems are independently defined, in
text files, that are read at run-time.

The tasks can run synthetic workloads or benchmarks to
collectively produce a behavior similar to a real
system.

\texttt{hs} can also run ordinary Linux processes. If
so, they are forked and controlled by signals. The
interface to define and later execute such systems is
the same as for the hard-coded task software, only, as
for now, task software and Linux processes cannot be
mixed in a single system.

\subsection{\texttt{perf\_event\_open(2)}}

\begin{sloppypar}
  At every global tick, the hierarchical scheduler
  invokes the Linux \texttt{perf\_event\_open} tools
  to poll the number of last level cache (LLC) misses
  that originates from the best-effort core. An LLC
  miss implies a DRAM fetch and use of the memory bus
  to communicate both the request and the data.
  (Details on the caches are in \spref{caches}.)
\end{sloppypar}

By booking the number of such requests, it is possible
to calculate the number of LLC misses since the
beginning of the global period.

The outcome is at every tick compared to the
maximum-allowed accesses for the executing critical
task. If the performed best-effort accesses exceed the
number allowed, the best-effort core is frozen: all
software there will be suspended, and only resume
whenever a task executes that belongs to a task
scheduler with a non-depleted memory budget.

\label{poll}
Because data is \emph{polled}, and polled
periodically, the best-effort core will be able to
exceed the number allowed often, as it can only be
frozen on a scheduling interrupt. This problem can be
minimized by increasing the frequency of the global
scheduling interrupts. However, there are other
problems associated with a high frequency:
\spref{high-frequency-problem}

The problem of periodic, non-immediate enforcement of
memory budgets could possibly be avoided if over-use
instead would trigger a kernel interrupt, that would
immediately freeze the best-effort core. In such an
implementation, the memory budgets could be
communicated to the kernel by means of a syscall.

\subsection{\texttt{cgroups}}

To freeze and thaw the best-effort core,
\texttt{cgroups} is used. This method stems from the
high-performance computing world (HPC) where it is
often desired to collectively suspend or activate
process groups.

\section{Real-time Linux and Unix}

Conventional wisdom has it that UNIX and Unix-like
systems like Linux cannot host real-time systems
without considerable changes to their kernels.

It is argued, because of the layered architecture -- a
kernel space, a userspace, and asynchronous syscalls
in between -- that any userspace-only application is
inherently unpredictable. \cite[pp. 17-18]{advunix}

Furthermore, many Unix-like systems do not have a
scheduler suitable for shuffling real-time processes.
(Linux has such features, but they need to be enabled
explicitly.)

While those arguments are true to form, whether they
are true in essence depends on the preferred
definition of a real-time system. The definition used
for this project -- \realtimesystem\ -- is very much
attainable on Unix-like systems, with or without
kernel surgery.

\subsection{Linux real-time schedulers}

As for kernel version 3.14, Linux provides four
schedulers that may be more fit for real-time purposes
than the default one, which for its part maximizes
average throughput to benefit a varied dose of
interactive computer use (e.g., editing).

The real-time schedulers are:

\begin{itemize}

\item \texttt{SCHED\_FIFO} is a fixed-priority
  scheduler, only real-time processes do not get
  preempted by common processes. If several real-time
  processes have equal priority, the one who has the
  CPU will not let it go until completed at what time
  another contender monopolizes the CPU.

\item \texttt{SCHED\_RR} is like \texttt{SCHED\_FIFO},
  only when several real-time processes have equal
  priority, they do Round Robin, still excluding every
  other process from the CPU.

\item \texttt{SCHED\_OTHER} is the round-robin,
  time-sharing algorithm where processes execute for
  certain timeslices.

\item \texttt{SCHED\_DEADLINE} is the EDF algorithm.
  This can likely be used to enhance this project,
  which employs a somewhat modified brand of EDF, and
  -- when dealing with real Linux processes --
  enforces it with userspace signals.

\end{itemize}

For this project, the ordinary Linux scheduler is
used. To use a real-time scheduling policy complicates
matters, especially when \texttt{hs} forks processes
and then attempts to control them through signals. But
in all fairness, Linux has many features that
ultimately can transform it to just about any system
-- including a hard real-time system. For example,
\texttt{mlock(2)} can be used to lock the virtual
address space of a process into the RAM. As nothing of
that has been touched upon, scheduling is left alone
as well.

\subsection{the Linux and C++ clocks}
\label{clocks}
\index{clocks}

To uphold an even rate of periodic scheduling, the C++
library function
\texttt{std::this\_thread::sleep\_until} is used,
which however may
``block for longer [than what has been specified] due
to scheduling or resource contention delays''.
\footnote{http://en.cppreference.com/w/cpp/thread/sleep\_until}

Actually, there can be widely diverging results due to
many factors. To illustrate the extent of imperfect
periodicity \texttt{-l} (or equivalently
\texttt{--log}) can be used to have the hierarchical
scheduler output the current time in nanoseconds, at
every tick. (Again, the same library functions are
used.)

It is likely that using a real-time scheduling policy
for \texttt{hs} would result in a more even tick rate,
especially if \texttt{hs} is intended to run among
many other processes that are not real-time.

Here is a sample output for a period of one
millisecond:

\begin{verbatim}

3594454136974
3594456742892
3594456842067
3594459262486
3594459326740
3594459349369
3594460102258
3594461102664
3594462098601
3594463098448
3594464103883
3594465098703
3594466102182
3594467103985
3594468097687
...

\end{verbatim}

Then, the following method is used to calculate the
offsets from the intended tick times:

\begin{verbatim}

offset_0 = time_1 - time_0 - DESIRED_TICK
offset_1 = time_2 - time_1 - DESIRED_TICK
...

\end{verbatim}

This produces, for the example trace:

\begin{verbatim}

 1524064
-958095
-488762
 323073
-289016
 1605918
-900825
 1420419
-935746
-977371
-247111
 406
-4063
-153
5435
...

\end{verbatim}

Last, for all offsets acquired, some statistical data
is computed:

\begin{verbatim}

readings:             9999
mean:                 7.000000      # drift size
variance:             1828079556
standard deviation:   42756.047011  # drift stability
min:                 -977371        # observed worst cases
max:                  1605918

\end{verbatim}

\textbf{Note:} It has been observed that the early
tick times of a run are much more inexact than those
of the rest of the trace. This is the case in the
above example as well. It is telling that the first
mere 15 readings contain both the minimum value
($-977371$), and the maximum value ($1605918$), of all
9999 readings, whose mean is only seven! (For a longer
trace, the mean would be even smaller.) As for now,
this behavior remains a mystery.

To exemplify, here are the offsets found at the
end-most part of another trace -- a trace that
likewise has a specified rate of a single millisecond.
As is typical, these offsets are much smaller than
their early brethren.

\begin{verbatim}

 10743
-431
-4902
 3480
-4622
-153
 6832
 2642
-7696
 3480
 127
-152
-4622
-153

\end{verbatim}

\subsection{hardware}

The hardware platform must be considered because the
real-time task superstructure and all the individual
tasks share hardware resources. Specifically, the
critical tasks -- including the hierarchical scheduler
and the task software, and all forked processes --
share the critical core for CPU-execution; and, they
share the DRAM and the memory bus with the best-effort
core. That has to be accounted for both in analysis,
\emph{and} in practice, where monitoring and
throttling is employed to hinder over-use of shared
resources.

The CPU is a dual core \texttt{x86\_64}:

\begin{verbatim}

$ lscpu
Architecture:          x86_64
CPU op-mode(s):        32-bit, 64-bit
Byte Order:            Little Endian
CPU(s):                2
On-line CPU(s) list:   0,1
Thread(s) per core:    1
Core(s) per socket:    2
Socket(s):             1
NUMA node(s):          1
Vendor ID:             AuthenticAMD
CPU family:            15
Model:                 35
Stepping:              2
CPU MHz:               1000.000
BogoMIPS:              1989.83
L1d cache:             64K
L1i cache:             64K
L2 cache:              512K
NUMA node0 CPU(s):     0,1

\end{verbatim}

DRAM and memory bus usage is monitored with a PMC,
which is accessed by means of the
\texttt{perf\_event\_open} Linux tools.

\index{caches}
\label{caches}
The \texttt{L1} and \texttt{L2} caches are not shared
between the cores but appear pair-wise, with one pair
for each core. Save for the CPU registers, \texttt{L1}
is the smallest memory storage unit, with $64K$ for
data, and the same amount for instructions. Below
\texttt{L1} is the $512K$ \texttt{L2}, which is the
last level cache, or LLC, right above the shared DRAM.
Thus, a failed memory lookup at the second level is
indicative of an imminent DRAM access. In this
project, LLC misses are counted as a way to count
fetches from the shared DRAM.

\begin{center}
  \index{memory architecture \emph{fig.}}
  \includegraphics[width=\textwidth]{figures/llc/llc.pdf}
\end{center}
\textbf{Memory Architecture. First a miss in the
  last-level-cache (LLC), then a DRAM fetch.}

\clearpage
\subsection{isolation}

All critical groups must be isolated from all other
critical groups, but they must also be isolated from
the best-effort software. Here, the intersection is not
the CPU, but the shared DRAM and the shared memory
bus.

\index{isolation \emph{fig.}}
\vspace{\baselineskip}
\begin{center}
  \includegraphics[width=\textwidth]{figures/isolation-figure.png}
  \textbf{Isolate us! Resource sharing interference.}
\end{center}

Isolation must be implemented in practice, as well as
digested into something that is formally computable,
so it can be used to determine task response times,
and, later on, overall system schedulability. In this,
software as well as hardware are considered.

It is important to note that the word ``isolation'' is
not used in the general sense, but in terms of real
time schedulability analysis and task response-time
computation. If $A$ is isolated from $B$ in terms of
$i$, that does not mean $A$ is physically separated
from $B$ so that $B$ cannot ever influence $A$. On the
contrary, it means that $A$ \emph{is} influenced by
$B$ precisely through $i$, only the designers and
implementors of the system has accounted for this, for
example by providing throttling mechanisms that in
practice will provide a bound for the worst-case
interference $B$ can have on $A$ (through $i$). This
bound is then used in the response-time calculations,
which is the theory mirror image of the real-time
implementation.

\subsubsection{starvation}

\label{starvation}
Because the best-effort core and the critical core
share the DRAM, and because the mechanism that
throttles the best-effort core resides on the critical
core, it is possible that the best-effort core can
overflow the memory, shutting out the critical core to
the extent that the critical core cannot throttle the
best-effort core, whose memory budget thus cannot be
enforced. In terms of real-time, that would
immediately break the system.

%% In practice, such a situation is highly unlikely. For
%% the purposes of this project, the assumption is that
%% the best-effort core will not ever starve the
%% critical-core from enforcing a budget.

\subsubsection{isolating the cores on Linux}

The cores on the project computer are originally
general-purpose: the Linux scheduler can execute any
process on either. In order to turn these cores into
one critical core and one best-effort core, a kernel
parameter is set that will stop the Linux scheduler
from using the best-effort core; then, a userspace
tool is used to start best-effort processes so they
will only execute on their designated core.

This turns the system into one where there are
dedicated cores for specific software: distribution of
processes over the cores can be a function of a-priori
policy decisions.

\label{grub}
\index{GRUB}

To isolate one of the cores from the Linux scheduler
until explicitely told otherwise, in:

\begin{verbatim}

/etc/default/grub

\end{verbatim}

put (or change) \texttt{GRUB\_CMDLINE\_LINUX\_DEFAULT}
into:

\begin{verbatim}

GRUB_CMDLINE_LINUX_DEFAULT="quiet isolcpus=1"

\end{verbatim}

Then, run

\begin{verbatim}

sudo update-grub

\end{verbatim}

and reboot.

After that, here is how to execute a program, say,
\texttt{forever}, exclusively on CPU 1:

\begin{verbatim}
taskset -c 1 forever
# to confirm:
ps -o psr,comm -p `pgrep forever` # psr = processor = core
\end{verbatim}

This method is used to invoke all best-effort
software.

\label{remaining-processes}
The method to isolate the best-effort core by setting
a kernel parameter in a GRUB initiation file does
isolate the core from most processes that are not
explicitly told otherwise. Still, a few system
processes remains on the best-effort core, unaffected
by the GRUB configuration. This is why, on the
critical core, the hierarchical scheduler and the
critical-task software co-exists with some modest
Linux system software:

\begin{verbatim}

watchdog/1
migration/1
ksoftirqd/1
kworker/1:0
kworker/1:0H
kworker/1:1
kworker/1:2
kworker/1:1H

\end{verbatim}

Fortunately, these processes are low-key: they demand
CPU or memory resources in quantities that are zero or
negligible in the face of userspace processes that are
intentionally and explicitly invoked to be executed
exclusively on the best-effort core.

To illustrate, one execution of a media player on the
best-effort core for 10 seconds booked $70464921$
memory accesses. During this execution, no activity
what so ever could be detected from the above
processes, which were monitored with \texttt{top(1)}.

A consecutive run of \texttt{hs} for another 10
seconds but with an idle best-effort core (i.e.,
without the media player) booked the number of memory
accesses as: $0$.

\section{Scheduling}

There is a single, top-scheduler, that schedules
task-schedulers at the level below. The
task-schedulers schedule tasks. At the down-most level
reside the individual tasks. The tasks are grouped and
assigned one unique task-scheduler: such a group of
tasks, along with its scheduler, is the
implementation of a criticality group.

Scheduling is done \emph{hierarchically}. The
top-scheduler is the highest entity. The
task-schedulers are at the level below, themselves at
the same level: they all belong to the top-scheduler.
Scheduling is carried out at two levels: at the level
of the top-scheduler, and at the level of the
task-schedulers.

Scheduling is also, conceptually, done in parallel,
or vertically: every task-scheduler schedules its task
set. The tasks are themselves at the same, down-most
level, below the task-schedulers to which they belong.
But, unlike the task-schedulers, the tasks do not all
belong to the same above scheduler: they are grouped,
together with their designated task-scheduler, into
criticality groups. There can be an arbitrary number
of task-schedulers and each can have a task set which
contains an arbitrary number of tasks.

The task system can be visualized as a tree: the root
(or top node) is the top-scheduler; the $n$ internal
nodes immediately below are the task-schedulers; and,
at the third and lowest level, the $k$ leafs (or
terminal nodes) are the individual tasks.

\vspace{2\baselineskip}
\index{hierarchical scheduler \emph{fig.}}
{\setmonofont{DejaVu Sans
    Mono}\centerline{\ttinput{figures/scheduler.txt}}}
\vspace{\baselineskip}
\begin{center}\textbf{Hierarchical Scheduler}\end{center}

\subsection{algorithm}

The scheduling algorithm is global, budgeted, and
preemptive EDF.

\begin{itemize}

\item It is \textbf{global} as it considers the critical
  tasks of \emph{all} task-schedulers, and compares
  them on a scheduler as well as task-by-task basis.

\item It is \textbf{budgeted} as all task-schedulers
  are assigned a CPU-budget. For each global period,
  the tasks of a particular task-scheduler can execute
  for a total time not exceeding that budget.

\item It is \textbf{preemptive} as a task, once assigned
  the CPU, can later be replaced, and put back
  into the ready queue, before it has completed. Here,
  the algorithm does not differ from the commonplace
  preemptive EDF of real-time scheduling,
  except for one implementation-derived detail:
  preemption can only be done at the time of a global
  scheduling interrupt. Those occur regularly and
  frequently according to a system parameter, which is
  set beforehand and thereafter does not change.
  Arrival of new tasks also takes place at those
  interrupts, so a task with the highest priority that
  just arrived will immediately preempt the CPU, as
  long as the task-scheduler it belongs to has not
  depleted its budget. So the algorithm is
  ``polled-preemptive EDF''.
\end{itemize}

\subsection{step-by-step description}

Scheduling is defined as follows:

\begin{enumerate}

\item Execution regularly comes to a halt in order to
  do scheduling.

\item Whenever so, the top-scheduler tells all
  task-schedulers to do scheduling of their respective
  task sets. In this implementation, all
  task-schedulers do the same, commonplace EDF. The
  task states are updated. The task-schedulers all
  propose a task to the top-scheduler.

\item Of the proposed tasks, the task with the most
  immediate deadline is favored, as long as its
  task-scheduler has a non-depleted CPU-budget.

\end{enumerate}

\subsection{task priority}

EDF is a priority scheduling algorithm where the
priority is a dynamic property.

For this project, \emph{locally}, at the level of an
individual task-scheduler, the highest-priority task is
the task with the most immediate absolute deadline,
just like commonplace EDF.

However, \emph{globally}, the highest-priority task
must also belong to a task-scheduler that has not
depleted its CPU-budget. If the task-scheduler has a
depleted budget, its proposal to the top-scheduler is
academic as no matter what deadline, no task of that
task-scheduler will ever be favored. If a task belongs
to a depleted task-scheduler, that is equivalent to
the task having the lowest possibly priority. (If all
schedulers are depleted the CPU will idle.)

Note that a depleted scheduler has a zero remaining
CPU-budget. If the budget is non-zero, it is not
depleted and its task will be considered,
even if that task requires more than the remaining
CPU-budget to terminate. (This is a possible window
for optimization that has not been explored.)

\section{The task finite state machine}

Each individual task belongs to one (and only one)
state, always.

\vspace{2\baselineskip}
\index{sporadic task implementation \emph{fig.}}
{\setmonofont{DejaVu Sans Mono}\centerline{\ttinput{figures/task-states.txt}}}
\vspace{\baselineskip}
\begin{center}\textbf{Sporadic Task Implementation}\end{center}

\subsection{transitions}

Transitions occur regularly, and at the same time for
all tasks, namely when the global scheduler interrupts
execution to re-asses the state of the system.

The transitions are:

\begin{enumerate}[label=(\alph*)]

\item The task arrives: it is employed. This happens
  after a semi-random delay: in practice, the task
  should arrive sooner rather than later. (The random
  delay can be disabled with the \texttt{-i} or
  \texttt{--immediate} option.)

\item The task has the most immediate deadline in the
  system of all tasks that belong to non-depleted
  schedulers and is thus passed to the CPU for
  execution.

\item The task is preempted by another task from a
  non-depleted task scheduler, a task with a more
  immediate deadline.

\item The task is completed and is released by the CPU.

\item The minimum inter-arrival time has passed, so a
  new task instance can be released.

\item Timeout -- the absolute deadline of the task is
  in the past, but the task still has not completed.

\end{enumerate}

\subsection{states}

\begin{longtable}{p{0.2\linewidth}p{0.7\linewidth}}
  \texttt{UNEMPLOYED} & Either of:
  \vspace{\baselineskip}
  \begin{itemize}

  \item The task has never executed, and has not yet
    been placed in the ready queue (where it would
    have state \texttt{READY}).

    \vspace{\baselineskip}

  \item The task has had at least one instance of
    itself executing (the \texttt{RUNNING} state); it
    completed its computation, and has been put on
    hold (the \texttt{HOLD} state). However, the time
    of necessary holding (due to the sporadic task
    model) has transpired, so another, new instance of
    the task can indeed be placed in the ready queue
    -- only this has not happened, yet.

  \end{itemize}

  \vspace{\baselineskip}

  Which one of these situations is at hand is not of
  practical or analytical importance, but they
  illustrate the cyclic nature of the life of a task
  in this system.

  \vspace{\baselineskip}

  The reason for this state is to get the sporadic,
  semi-random behavior of task arrivals.

  \vspace{\baselineskip}

  Note that an unemployed task has not arrived so it
  does not have a pending deadline, and it cannot be
  selected for execution. \\\\

  \texttt{READY} & The task is queuing. It has been
  deployed -- it has arrived -- and may in part have
  been executed, only, if so, it has been preempted --
  either way, it is not completed so it waits,
  queuing. All time spent in this state is added to
  the response time of the task. \\\\

  \texttt{RUNNING} & The task is executing: the
  scheduler that holds the task has not depleted its
  CPU-budget, and the task has the most immediate
  deadline for all tasks that run on non-depleted
  schedulers. When executing, the task is closing in
  on the absolute deadline as much as when it idles in
  the ready queue. However, contrary to when idling,
  the task is also closing in (at the same speed) on
  its own termination, at what point it has received
  enough CPU time to complete its computation and
  terminate successfully. \\\\

  \texttt{HOLD} & The previous task instance has
  completed, and before another instance of the same
  task can arrive, it must hold until its period is
  over, in accordance with the minimal-interarrival,
  one-instance-at-a-time policy of the sporadic task
  model. \\\\

  \texttt{DELAYED} & The task has missed its deadline:
  the absolute deadline is in the past, and the task
  has not been assigned the CPU (state \texttt{RUNNING})
  for a time that equals or exceeds the WCET of the
  task. In this system, a delayed task will not ever
  be completed, nor will any further instances of that
  task be released.

\end{longtable}

\section{System description}

\textbf{Terminology:} This section is a
\emph{description} of the hierarchical scheduler as a
piece of software. The parameters correspond directly
to C++ data types: in some cases classes, in some
cases primitives. Most parameters are part of the
interface and can thus be specified by the user
without any changes to the software required. Only the
parameter \emph{names} are simplified, most often to
single letters; and OO relationships are expressed as
parenthesized lists: this is to make the description
more accessible, and, later, to facilitate the use of
the parameters in formulas. Nonetheless this remains a
description of actual software and is not a model to
be verified by formal methods -- which, alas, at best
would verify only the model itself.

\subsection{the top-scheduler}
$g$ is the singular, or root, top-scheduler:

\beq{\topscheduler}{top\_scheduler}

\begin{longtable}{p{0.15\linewidth}p{0.15\linewidth}p{0.15\linewidth}p{0.50\linewidth}}

  parameter & name & (unit) & description \\ \hline \\

  $l$ & lifetime & ms & The system
  lifetime. This is the amount of time that the
  top-scheduler will execute, from the time of
  invocation. As the top-scheduler is the topmost body
  of the system, $l$ equals the system lifetime. \\\\

  $p$ & period & ms & The global period. At the
  beginning of each period the task-schedulers have
  their budgets (re)supplied to their individual,
  pre-assigned levels. \\\\

  $r$ & rate & ms & The scheduling rate by which
  $g$ will schedule $S$. At every $r$, execution comes
  to a halt: scheduling is done, and execution
  resumes. \\\\

  $S$ & schedulers & & The set of task schedulers that
  the top-scheduler schedules. \\\\

  $a$ & algorithm & & The scheduling algorithm that $g$
  applies to $S$, in order to select what task to
  execute (for all $s \in S$). The algorithm updates
  and maintains the statuses of all other tasks and
  schedulers as well.

\end{longtable}

\subsection{how the time parameters relate}

\beq{\relate}{relate}

If not $0 < r$ there will be constant preemption and
not any task execution at all.

If not $r < p$, then at each tick, the budgets of all
$s \in S$ will have been resupplied, and what happened
before the tick will not influence the selection of
the next $s'$, as then (and always), every $s' \in S$
(including $s$) is equally ready. If so, there will
not be any isolation between the critical groups as
that is expressed and implemented as the CPU-budgets
of the schedulers. If the budgets cannot be depleted,
they cannot play a role in this system.

If not $p < l$, there will not be any resupplies of
budgets.

\subsection{task scheduler}

$S$ is the set of task-schedulers.

The task-schedulers are the implementation of the
critical groups.

$S \neq \emptyset$ as, unless so, there is not any
$s \in S$ for $g$ to schedule (or any tasks to
execute).

The set of task schedulers is:

\beq{\taskscheduler}{task\_scheduler}

\textbf{Notation:} The subscripts are only given to
indicate that the properties belong to a certain task
scheduler. A single task-scheduler $s$ would be fully
described as $s = (m, b, P, \alpha)$.

\subsubsection{$m_{i}$, the maximum best-effort
  core memory accesses}

For each global period (of time $p$), the software
that runs on the best-effort core must not exceed a
certain number of memory accesses -- $m_{i}$ -- during
the execution of the critical tasks of $s_{i}$.

That way interference from the best-effort core
vis-a-vis a specific critical group can be quantified,
and thus bounded. The bound is used in response-time
analysis of the critical tasks on a task-scheduler
basis.

\subsubsection{$b_{i}$, the CPU-budget}

$b_{i}$ is the budget assigned to $s_{i}$. The time
the computer spends executing a $t \in P_{i}$ will
reduce the working budget of $s_{i}$ by the same
amount of time. When the working budget is depleted
for $s_{i}$, $g$ will not consider $s_{i}$; however,
$g$ will restore the working budget of $s_{i}$ to
$b_{i}$ at the transitions between global periods.

$b_{i} > 0$ as otherwise $g$ will never consider $s_{i}$.

The CPU-budget feature makes it possible to isolate
the critical groups from each other. For each global
period, the tasks of $s_i$ -- $P_i$ -- cannot delay
any task that belongs to another group by any more
than $b_i$, the budget of $s_i$.

\subsubsection{$\alpha_{i}$, the (task) scheduling algorithm}

$\alpha_{i}$ is the scheduling algorithm $s_{i}$ uses
to propose a $t \in P_{i}$ for execution.

In principle, $\alpha_{i}$ can be an altogether
different algorithm than the global scheduling
algorithm $a$. $\alpha_{i}$ can also be different from
one task-scheduler to another. In this project, all
task-schedulers do commonplace EDF, while the
top-scheduler do a slightly modified EDF, the most
notable addition being the ruling-out of schedulers
that have depleted budgets.

\subsection{$P_{i}$, the sporadic task set}

\beq{\sporadic}{sporadic\_task\_set}

$P_{i} \neq \emptyset$ as otherwise there are not any
tasks to schedule.

\subsection{the sporadic task model}
\label{sporadic-task-model}
\index{sporadic task model}

The sporadic task model is a small-but-important
extension to Liu and Layland's \emph{periodic} model
in which each task has only two parameters: WCET, and
period. \cite[pp. 46-61]{periodicmodel} The sporadic
model is different from the periodic model because it
decouples the deadline from the period \cite{mok}
whereas the period remains as an upper bound for the
deadline and also determines the frequency of task
instance releases.

The sporadic task model of this system does not differ
from the sporadic task model commonly used in real-time
scheduling:

\beq{\sporadictask}{sporadic\_task}

\subsubsection{task parameters}

\begin{itemize}
\item $C_{j}$ is the WCET.
\item $D_{j}$ is the deadline, relative to the  arrival time.
\item $T_{j}$ is the minimal inter-release time: i.e.,
  there can be at most one instance of $t_{j}$ for each
  $T_{j}$.
\item $0 < C_{j} \leq D_{j} \leq T_{j}$
\end{itemize}

\clearpage
\subsubsection{task properties}

\begin{itemize}

\item the tasks do not share any task-specific
  resources

\item there are not any task-precedence constraints

\item there is not any communication between tasks

\end{itemize}

\section{Schedulability method: response time}

One method to ensure schedulability is to assume the
worst-case scenario: i.e., that the best-effort core
software produces a maximum number of memory accesses,
at all times, and at the \emph{worst} times with
respect to the critical tasks completing before their
deadlines -- while, simultaneously, critical tasks are
released in the most unfavorable way -- the way that
causes maximum queuing.

\subsection{$r_{j}$, the worst-case response-time for $t_{j}$}

$r_{j}$ is the worst-case response-time for the task
$t_{j}$: it is the maximum-length time span from the
arrival of $t_{j}$, to its completion.

The following terms add to the delay of $t_{j}$. The
sum of those, plus $C_{j}$, the WCET of $t_{j}$, is
equal to the worst-case response time:

\beq{\responsetime}{response\_time}

\subsubsection{$Q_{j}$, queuing}

$Q_{j}$ is the worst-case time spent queuing for
$t_{j}$. When a task is queuing, it is in the
\texttt{READY} state. The definition of queuing is
whenever $t_{j}$ cannot execute, while some other task
not only can, but is. (As a consequence, it is not
considered queuing when \emph{all} tasks are inactive,
as is the case during a global scheduling interrupt.)
There are two (or three) situations when $t_{j}$ can
be queued, which makes up for a total queuing delay
formula of:

\beq{\queuing}{queuing}

\subsubsection{$z$, the response time in global periods}

The worst-case (largest) number of global periods that
can transpire during the existence of a task is,
without rounding:

\beq{\zperiods}{periods}

\subsubsection{$Q^{g}_{j}$, group deadline queuing}

Here, $t_{j}$ is queuing because it has a less
immediate deadline than another task in the same group
-- a task that belongs to the same scheduler as
$t_{j}$.

Note that just about any task can have $t_{j}$
preempted and/or queued at some point: a higher
priority is equivalent to a more immediate
\emph{absolute} deadline. This is a dynamic property
that does not imply $D_{i} \le D_{j}$ for a higher
priority task $t_{i}$: if $t_{i}$ was released before
$t_{j}$, the absolute deadline of $t_{i}$ may still be
more immediate than that of $t_{j}$. The set
$hp(t_{j})$ is all tasks that at some point can
have a higher priority than $t_{j}$.

The worst-case system queuing is:

\beq{\groupqueuing}{group\_queuing}

\textbf{Note: } $hp(t_{j})$ is effectually \emph{all}
tasks except for $t_{j}$ itself! Because of that, this
schedulability method is reduced to that of a
fixed-priority system, only worse, as here, all tasks
would have equal priority. While there exist methods
to compute the response time for individual tasks
under EDF, those are complicated and typically not
worth the effort as long as the purpose is to compute
general schedulability. Still, the formulas here are
included to illuminate the mechanics of this
implementation.

\subsubsection{$Q^{s}_{j}$, cross-group (system) deadline queuing}

The task $t_{j}$ of the scheduler $s_{i}$ can be
delayed by a higher-priority task that belongs to
another scheduler. In the worst case, \emph{all} other
schedulers will spend \emph{all} of their budgets
before $t_{j}$ can execute:

\beq{\systemqueuing}{system\_queuing}

\subsubsection{depletion queuing}

During depletion queuing, $t_{j}$ cannot execute
because its scheduler $s_{i}$ has depleted its
CPU-budget. Fortunately, the worst-case time $t_{j}$
can be delayed for this reason is already included in
$Q^{s}_{j}$.

\subsubsection{$o_{j}$, the scheduling overhead}

The worst-case time required to do a single execution
of global scheduling is denoted $o$ (just $o$, without
an index). It is the the scheduling overhead.

As the top-scheduler controls all of the
task-schedulers, $o$ includes task-scheduling overhead
as well.

$o_{j}$ is the worst-case scheduling overhead during a
single execution of any one instance of $t_{j}$.

Because scheduling occurs once every global tick ($r$),
the worst-case delay is:

\beq{\schedulingoverhead}{schedulability\_overhead}

\subsection{$i_{j}$, interference from the best-effort
  core}

Interference from the best-effort core is due to
best-effort core software contention for the shared
DRAM and the shared memory bus.

For each period $p$, with respect to the critical
tasks of scheduler $s_{i}$, the worst-case number of
memory accesses that can arise from the best-effort
core is bounded by $m_{i}$.

The worst-case interference for any task is a
situation in which the best-effort core makes
\emph{all} its allowed memory accesses during the
employment of that task, for \emph{all} global periods
during which that task instance exists.

The assumption is that during a best-effort memory
fetch, the critical core task cannot do anything, but
idles. (This assumption is probably very pessimistic.)

\label{timed-memory-access}
If the worst-case response time for a DRAM access (for
either core) is 58.5 nanoseconds \cite{hyoseung}, then
the worst-case memory interference from the
best-effort core is:

\beq{\interference}{interference}

\section{Schedulability method: resource server}

Another method to guarantee schedulability is to consider
an individual task-scheduler a
\emph{resource server}. The resource provided is
CPU-time; it is consumed by the tasks that belong to
that particular scheduler.

This method involves less advanced computation than
the response-time method, because here, there is not
any reliance on the dynamic set of tasks with higher
priorities than the task whose response time is
calculated. (Another implementation of this idea is
described in \cite[pp. 301-324]{baruah}. There, only
one processor is considered, and the notation is
different.)

This method relies on two functions: a \emph{supply}
bound function, \texttt{sbf}, and a \emph{demand}
bound function, \texttt{dbf}.

Both functions have two parameters. $\Delta\theta$ is
the same for both: it is the length of the time
interval for which supply or demand is computed. This
interval can occur anywhere in time, and during that
interval the system executing can be in any sound
state.

The other parameter, for \texttt{sbf} and
\texttt{dbf}, are the supplier and demander
themselves, respectively. If the functions are used to
compare supply and demand, they must be used
pair-wise: if $s_{i}$ is the argument to \texttt{sbf},
$P_{i}$ is the argument to \texttt{dbf}. Indeed, the
tasks of $P_{i}$ are the only tasks that can demand
CPU-time supplied by $s_{i}$.

$sbf(\Delta\theta, s_{i})$ is the \emph{minimum}
possible CPU-time supplied by $s_{i}$ during any time
period of length $\Delta\theta$.

Correspondingly, $sbf(\Delta\theta, s_{i})$ is the
\emph{maximum} possible CPU-time demanded.

The worst-case scenario is: the maximum possibly
demanded vs. the minimum possibly supplied.

For any task-scheduler, schedulability is guaranteed
if the maximum demand for any particular interval is
below or equal to the minimum supply for all interval
of the same length.

The system is schedulable if all task-schedulers are.

\clearpage
\subsection{interval length}

If the interval starts at $\theta_{1}$ and ends at
$\theta_{2}$ the length of the interval is:

\beq{\interval}{interval}

\subsection{\texttt{sbf}}

During each complete global period $p$, the
task-scheduler $s_{i}$ supplies exactly $b_{i}$ of
CPU-time for its tasks.

The number of complete periods is:

\beq{\supplycompleteperiods}{supply\_complete\_periods}

The CPU-time supplied by $s_{i}$ for those periods is:

\beq{\supplytimecompleteperiods}{supply\_time\_complete\_periods}

Moreover, there can be zero, one or two incomplete
periods that are part of the interval. If so, they
exist before and/or after the consecutive $w$ complete
periods.

The incomplete periods have the total length:

\beq{\supplylengthincompleteperiods}{supply\_length\_incomplete\_periods}

The worst case, when the task-scheduler supplies the
least, is when there are two incomplete periods of the
same length. Then, it is the most likely that the
task-scheduler supplies all of its CPU-budgets, both
times, during the stretches of the incomplete periods
that are outside the interval.

Both time spans that belong to incomplete periods
outside of the interval has length:

\beq{\supplylengthincompleteoutside}{supply\_length\_incomplete\_outside}

If $U$ is longer than $b_{i}$, the minimum supply case
for $s_{i}$ is when all of $b_{i}$ is supplied during
$U$. However, if $b_{i}$ exceeds $U$, the part of
$b_{i}$ that exceeds $U$ is supplied:

\beq{\supplyincompleteperiods}{supply\_incomplete\_periods}

However, because of the best-effort core interference,
the actual CPU-time supplied is less.

As seen, there are $w$ complete periods in each
interval. In addition, there can be two more periods
that are incomplete: one at each side of the
consecutive, complete periods. In all, there can be a
maximum of $(w + 2)$ periods. The task-scheduler
$s_{i}$ allows $m_{i}$ best-effort core LLC misses for
each period, and for the entire interval:
$(w + 2)m_{i}$. If the data and assumptions in
\spref{timed-memory-access} are reused, the total
supply reduction is:

\beq{\supplyreduction}{supply\_reduction}

All in all, the minimum supply from $s_{i}$ to all
tasks $t \in P_{i}$ is:

\beq{\supply}{supply}

\clearpage
\subsection{\texttt{dbf}}

The number of complete periods of $t_{j}$ that exists
within the interval is:

\beq{\demandcompleteperiods}{demand\_complete\_periods}

Once the complete periods have been accounted for, it
is possible that an incomplete period remains, during
which $t_{j}$ can demand CPU-time. However, it cannot
request more than either the computation time of the
task, or the entire incomplete period:

\beq{\demandincompleteperiod}{demand\_incomplete\_period}

Note that this is the opposite to the supply function.
There, what is desired is the minimum supply possible,
so the spill-over global period is divided into two.
Here, the aim is maximum demand so it is assumed the
spill-over task period is not spread over two periods.

The maximum demand made by the task is:

\beq{\demandtask}{demand\_task}

The maximum demand made by $P_{i}$ is the sum of the
demands made by all $t \in P_{i}$:

\beq{\demandgroup}{demand\_group}

\section{Experiments}

This project carries out several experiments. But
there can be many other experiments as well, that are
not necessarily less interesting. Because of a
building-block approach, it is quick to set up and
execute an experiment. Even most data is collected
automatically. Rather, the challenge is to grasp the
meaning of certain combinations, as well as to
correctly analyze the results.

\subsection{building blocks}

All experiments involve the same technology and the
same tools, but they are employed in different
combinations, and with different parameter values.

The building blocks are:

\begin{itemize}
  \item \texttt{hs} is the hierarchical scheduler. It
    is specifically developed for this project.
    \texttt{hs} can be invoked in different ways.
    Apart from executing task systems, \texttt{hs} can
    also output the global tick trace, which can be
    included in an experiment, if desired.

  \item \texttt{taskset(1)} is used to isolate a
    process to a specific hardware CPU, or core. How
    to isolate a core from most other processes is
    described in \spref{grub}.

  \item \texttt{stress(1)} is used to generate memory
    traffic. It can be used on any and all cores. For
    most experiments, \texttt{stress} is used to
    simulate interfering software: on the best-effort
    core, \texttt{stress} is used to simulate
    arbitrary non-critical software.

    On the critical core, \texttt{stress} is used
    simulate processor and memory usage of the
    software that would be executed by the tasks that
    are scheduled by \texttt{hs}. Unless \texttt{hs}
    is told to fork processes, the tasks that
    \texttt{hs} schedules do not utilize computer
    resources to the extent that real software would.

    \textbf{Note:} While \texttt{stress} can be made
    to behave virtually like any piece of software or
    software set -- and this makes it useful in
    simulations -- it is just as easy to run ordinary
    Linux software, on either core, to attain natural
    contention for computer resources. Simulation may
    be useful to create certain scenarios, that would
    otherwise require extensive setups of ordinary
    software, but most scenarios are just as easily
    attained without simulation. What to do depends on
    the nature of the experient: \texttt{stress} can
    be used to show that something actually can
    happen, under rare-but-possible conditions. On the
    other hand, ordinary software can be used to
    produce a realistic setting, say when the aim of
    the experiment is to illustrate every-day system
    use.

    \label{stress-warning}
    \textbf{Warning:} \texttt{stress} does not behave
    like a normal piece of software. Instead,
    \texttt{stress} can be made to congest the entire
    memory, at what point the computer is
    nonresponsive. After all memory has been
    congested, it is not enough to kill the
    \texttt{stress} processes to remedy the situation.
    While the memory is thus released, any running
    application still needs to reassert its footprint
    before it can resume proper work and respond to
    user IO. This usually takes a couple of seconds
    for the application immediately grasped for -- but
    for dormant processes, the effect can remain
    hidden, and appear unexpectedly. This makes
    \texttt{stress} even less reliable to use in
    experiments as the memory state of one experiment
    instance can linger on to affect other, where
    perhaps \texttt{stress} is not even used.

  \item \texttt{perf\_event\_open} is used to measure
    the memory usage. It does so by booking LLC
    misses, as they imply DRAM fetches and memory-bus
    traffic. \texttt{perf\_event\_open} can be used on
    a single process, on a set of processes, or on any
    and all cores, collectively. If need be, there can
    be parallel instances with different focus. Here,
    \texttt{perf\_event\_open} is used by \texttt{hs}
    so that at the end of its execution it outputs the
    total number of memory accesses that stem from the
    best-effort core.

  \item \texttt{perf\_event\_open} is also the way
    that \texttt{hs} monitors the best-effort core, to
    decide weather the software that runs there is
    supposed to be freezed, thawed, or left alone. If
    \texttt{hs} decides to change the status of the
    best-effort core, this is effectuated by means of
    a Linux \texttt{cgroup}: such a group must be
    initialized before \texttt{hs} is run.

  \item To measure the execution time of a process,
    \texttt{time(1)} is used. This tool can be used to
    measure performance downgrade by clocking the
    completion time of identical commands, only in
    different setups.

\end{itemize}

There is some material appended to this document --
\spref{code} -- to facilitate interaction with not
only \texttt{hs} but the associated tools and entities
(including the best-effort core) as well as the setup
and execution of experiments.

\subsection{design}

An experiment can be anything that employs \texttt{hs}
with a task system and any combination of the
mentioned tools. This checklist how to arrange and
execute an experiment should cover most cases:

\begin{enumerate}
\item the setup: what happens on the best-effort core,
  and what happens on the critical core (except for
  \texttt{hs}), while the experiment runs

\item the \texttt{hs} task system

\item a time-interval how long the experiment will run
  -- recall that a \texttt{hs} system has a lifetime
  parameter, which can fill this purpose

\item what processes are investigated with
  \texttt{time} and \texttt{perf\_event\_open}

\item the experiment should also have a verbalized
  rationale -- ``we do this because...''
  (implementation details of an experiment can
  contradict the purpose of the experiment: if deemed
  acceptable still, the problem should be mentioned as
  well as what is exciting about the experiment)

\item results (after the experiment is done)

\item a possible post-processing of the results by
  some number-crunching tool

\item conclusion
\end{enumerate}

\subsection{zsh wrappers}

To manage the experiments, and make sure they stay the
same for different input values (e.g., task systems),
zsh functions are used to wrap the specific commands
that make up the experiments.

This method is one of clearly defined functions which
are reproducible, highly automatized, and easily
re-executed after say a change to the involved
software.

There is one function for each experiment. Such a
function can, for example, execute several input task
systems, while booking their results, and later
comparing them.

The experiment functions: \spref{functions}

\section{Contention experiment}

\label{contention-experiment}
\index{contention experiment}

zsh function: \texttt{run-contention-experiment} \spref{functions}

This experiment attempts to show that processing, even
though separated on different cores, still affects,
and is affected by, simultaneous processing.

\subsection{setup}

The experiment consists of compiling some 50 Elisp
files on the critical core, in order to measure how
long it takes to complete compilation for different
setups. The changes involve the critical core as well
as the best-effort core.

In parallel with compilation, \texttt{hs} runs,
likewise exclusively on the critical core. The purpose
of the compilation is to simulate the memory accesses
and the CPU strain that the task software of
\texttt{hs} would produce, if indeed they carried real
software and not mock-offs.

On the best-effort core, for certain setups of this
experiment, \texttt{mplayer2(1)} runs: whenever so, it
displays a three gigabyte mp4 file. To display such a
file on the experiment computer is deemed expensive,
but not excessively or unrealistically so.

In addition, \texttt{stress(1)} sometimes run on the
critical core to simulate the resource demand and
consumption of would-be critical software.

The results are, in nanoseconds:

\verbatiminput{/home/incal/public_html/hs-linux/results/cr.log}

\subsection{conclusion}

Intuitively, the more contention, the more time is
needed to complete the compilation. Contrary to this,
in this experiment, neither \texttt{hs} nor the media
player influence the speed of the parallel compilation
process.

\section{Best-effort core memory experiment}

\label{memory-experiment}
\index{memory experiment}

zsh function: \texttt{run-memory-experiment} \spref{functions}

The contention experiment shows that the critical core
is affected by best-effort core activity:
\spref{contention-experiment} Now, the intention is
to show that influence goes both ways: in particular,
how the memory budgets of task systems affect the
best-effort core whenever \texttt{hs} is instructed
to, whenever appropriate, either freeze or thaw it.

The influence is quantified as the number of memory
accesses that originates from the best-effort core
during executions of task systems with different
memory budgets. The best-effort core executes the
media player.

Here, \texttt{hs} is invoked with
\texttt{--memory-budget-add}, which add its argument
to all memory budgets. Thus, the same system is is
executed repeatedly, only with different
memory-budgets. At termination, the number of
best-effort core memory accesses are output.

\subsection{the importance of this experiment: delays
  and overheads}

This project describes a problem, and looking at the
big picture, the implementation attempts to solve it
by controlling the number of best-effort core memory
accesses. The degree of success is possible to
quantify by executing this very experiment. Likewise,
later on, the experiment in \spref{audio-experiment}
will quantify the ``small picture'' success, which
directly relates to the real-time deadlines of the
involved tasks.

In this experiment, the interpretation of the result
is: if $x$ best-effort core memory accesses are
allowed for an execution of \texttt{hs}, and $y$ are
those actually performed, then the difference $y - x$
should be as close to zero as possible (and not less
than zero, for a busy system).

\label{black-list}
A perfect execution, where $x = y$, is unrealistic for
several reasons, some of which appear at every global
tick:

\begin{itemize}

\item the \texttt{hs} C++ scheduling and freeze/thaw
  overhead

\item the \texttt{hs} inexact tick, discussed in
  \spref{clocks}

\item the PMC data fetch overhead

\item the polled EDF, discussed in \spref{poll}, and
  otherwise the userspace implementation, notably the
  signal control of forked processes

\end{itemize}

In addition, some unpredictable factors appear at most
once for every task scheduler and global period:

\begin{itemize}

\item cgroup freezing and thawing overhead (an
  experiment indicates that this overhead is all but
  nonexistent)

\end{itemize}

While all those factors do reduce the quality of the
outcome, their respective weights are at this point
uncertain, as is weather they actually do influence
enough to qualify as scapegoats, should the experiment
outcome be unsatisfactory.

\subsection{systems}

The experiment includes two systems. One has a single
task scheduler; the other has two. The reason to do
the experiment twice is to see if the fallout is
responsive to setup details, in what case the
causality should be further explored. Both systems
have full budgets, with implicit deadlines ($D_{i} =
T_{i}$) for all tasks, and less than full utilization:

\begin{center}
  \vspace{1.5\baselineskip}
  $U_{T} = \sum\limits_{t_{i} \in T}\frac{C_{i}}{T_{i}} \leq 1$
\end{center}

The experiment results are showed below in two
figures; also, in \spref{memory-experiment-fallout}
are the complete tables of data from which the figures
are generated.

\subsubsection{one-scheduler system:
  \texttt{base-faculty-1}}

System:

\verbatiminput{/home/incal/public_html/hs-linux/sys/base-faculty-1}

Fallout:

\begin{center}
  \includegraphics[width=\textwidth]{pics/plot/1.png}
\end{center}

\subsubsection{two-scheduler system:
  \texttt{base-faculty-2}}

System:

\verbatiminput{/home/incal/public_html/hs-linux/sys/base-faculty-2}

Fallout:

\begin{center}
  \includegraphics[width=\textwidth]{pics/plot/2.png}
\end{center}

\clearpage
\subsection{conclusion}

The correspondence between memory budgets and
best-effort core memory accesses should be: the bigger
the budgets, the more accesses -- up and until where
budgets are not depleted because the best-effort core
software does not generate that much memory traffic.
(What is beyond that point is not included in the
above plots.) The linear tendency in both plots shows
that in general, memory throttling works as expected.
However, the high error percentages are discouraging.
In a real-time system, the specific, worst case has to
be considered, not the general tendency.

\section{\texttt{hs} task systems experiment}

\label{task-system-experiment}
\index{task system experiment}

zsh function: \texttt{run-task-system-experiment} \spref{functions}

All instances of this experiment are of the same form.
Only one input parameter differs: the \texttt{hs} task
system. Everything else is the same to facilitate
comparisons of the fallout data: if the setup does not
change, different results are a consequence of
different task systems.

\subsection{setup}

On the critical core, \texttt{hs} runs, as well as the
Elisp compilation, which is also timed. On the
best-effort core, the media player runs

What is different from the first experiment is the
behavior of \texttt{hs}: it will now freeze, and thaw,
the best-effort core. What happens is a function of
how many memory fetches the best-effort core produces,
as well as the dynamic state of the \texttt{hs} task
system.

The only way to affect the results is thus to change
the parameter values of the task system -- and those
of the scheduler itself (those parameters are in the
same task system file).

\begin{sloppypar}
  The method is to execute several task systems, with
  widely different parameter values at key points, and
  then examine if and how that affects the outcome. To
  this end, each system is given a personality name
  which describes what property is emphasized: e.g.,
  one system is called \texttt{short\_rate}, another
  \texttt{small\_memory\_budgets}, and so on. There
  can be many such personalities and combinations
  thereof: only a small subset is tested here.
\end{sloppypar}

Because there are so many parameters, it is a
challenge to assign neutral values that will not affect
the outcome and thus neither morph or hide the part of
the outcome that is a consequence of the spelled-out
personality. To circumvent this problem, the systems
have identical values except for their defining
traits.

\subsection{what is a task system?}

A task system contains parameter values for each task,
according to the sporadic task model: for each task
there is a WCET, a deadline, and a
minimum-interarrival time, or period, as described in
\spref{sporadic-task-model}. But there is more than
tasks to a task system. The system definition also
specifies the parameters values for each task
\emph{scheduler}, or critical group: e.g., the memory
budget that \texttt{hs} uses to freeze and thaw the
best-effort core. Also, a system definition contains
parameter values of the top-most scheduler: e.g., the
scheduling rate and the total system lifetime. So
technically, a more exact phrasing would be a
multicore task/scheduler system. Here, ``task system''
is used for simplicity.

An example task system can be found in
\spref{example-hs-task-system}.

\subsection{simulation issues}

This experiment is the first step toward an experiment
that is more realistic, and less simulated. Although
there is still heavy reliance on simulation,
simulation is implemented as well-defined puzzle
pieces with clear purposes. The thought is that they
can be easily replaced, one by one, further down the
line.

The issues are:

\begin{itemize}

  \item \texttt{hs} does real scheduling, only what
    it schedules is not real software in the sense
    that it actually does anything useful (it
    \emph{can} be hard-coded to do useful things, but
    as for now it is not).

  \item Although the compilation is an example of
    real, useful software in execution, it actually
    simulates all the instances of would-be
    critical-task software.

  \item The best-effort interference is measured in
    terms of how much it can delay the critical-core
    Elisp compilation, even though \texttt{hs} is in
    control of the best-effort software. That is,
    interference is not measured in terms of the
    real-time parameters -- the deadlines -- that
    belongs to the \texttt{hs} tasks.

\end{itemize}

\subsection{the result data}

After the completion of an instance of this
experiment, there are four newly created files which
contain all parts of the result. Those files are in a
directory that bears the same name as the executed
task system.

\begin{minipage}{\textwidth}

  The result is organized like this:

  \begin{longtable}{p{0.33\linewidth}p{0.67\linewidth}}
    file name & description \\ \hline \\

    \texttt{hs\_output.log} & The \texttt{hs} run: all
    the task state changes, as well as any task
    software output. It is the execution trace of the
    task system as scheduled, and executed, by
    \texttt{hs}. Also, this file contains the number
    of best-effort core memory accesses during the
    system run. \\\\

    \texttt{lisp.log} & The time required to compile
    the Elisp source files. \\\\

    \texttt{tick\_times.log} & The time readings in
    nanoseconds for every global tick. Details and
    examples are in \spref{clocks}. \\\\

    \texttt{stats.log} & This file contains the offset
    from the specified global tick rate, for every
    tick. It is computed of the
    \texttt{tick\_times.log} file; likewise, it is
    discussed elsewhere.
  \end{longtable}

  There are also the files \texttt{stats.txt} and
  \texttt{BE.txt} that are convenience parsings of
  data already available in the above files.

\end{minipage}

\subsection{systems and results}

The first system is \texttt{base}. It is a basic
system that the other systems model, save for some
characteristic trait changed. The purpose is to
examine how any step away from \texttt{base} makes --
or does not make -- its way into the result data, and
then analyze how this comes to happen.

\subsubsection{\texttt{base}}

\begin{itemize}

\item The system:

  \verbatiminput{/home/incal/public_html/hs-linux/sys/base}

\item Lisp compilation time, in nanoseconds:

  \verbatiminput{/home/incal/public_html/hs-linux/results/base/lisp.log}

\item Tick stats:

  \verbatiminput{/home/incal/public_html/hs-linux/results/base/stats.txt}

\item Best-effort core memory accesses:

  \verbatiminput{/home/incal/public_html/hs-linux/results/base/BE.txt}

\end{itemize}

\subsubsection{\texttt{long-ticks}}

The \texttt{long-ticks} system has a much slower
global scheduling rate than the \texttt{base} system.
This affects presumably all aspects of any system
execution because it is only at the scheduling
interrupts that one task can be replaced by another as
the one executed by the CPU; moreover, it is only at
the scheduling interrupts that \texttt{hs} freezes or
thaws the best-effort core.

\begin{itemize}

\item The system:

  \verbatiminput{/home/incal/public_html/hs-linux/sys/long-ticks}

\item Lisp compilation time, in nanoseconds:

  \verbatiminput{/home/incal/public_html/hs-linux/results/long-ticks/lisp.log}

\item Tick stats:

  \verbatiminput{/home/incal/public_html/hs-linux/results/long-ticks/stats.txt}

\item Best-effort core memory accesses:

  \verbatiminput{/home/incal/public_html/hs-linux/results/long-ticks/BE.txt}

\end{itemize}

\subsubsection{\texttt{long-period}}

The \texttt{long-period} system has a much longer
global period than the \texttt{base} system.

\begin{itemize}

\item The system:

  \verbatiminput{/home/incal/public_html/hs-linux/sys/long-period}

\item Lisp compilation time, in nanoseconds:

  \verbatiminput{/home/incal/public_html/hs-linux/results/long-period/lisp.log}

\item Tick stats:

  \verbatiminput{/home/incal/public_html/hs-linux/results/long-period/stats.txt}

\item Best-effort core LLC misses:

  \verbatiminput{/home/incal/public_html/hs-linux/results/long-period/BE.txt}

\end{itemize}

\subsection{conclusion}

The different outcomes of this experiment show that
interference from the best-effort core can be
throttled as a function of the memory access profile
of the best-effort core software in combination with
what happens on the critical core, down to the
individual task that is currently executed.

\section{Linux processes experiment}

zsh function: \texttt{run-linux-process-experiment} \spref{functions}

This experiment is similar to the previous one --
\spref{task-system-experiment} -- with the huge
exception that \texttt{hs} does not run mock software
anymore, instead, it spawns and schedules ordinary
Linux processes.

The purpose of the experiment is to show that real
software is as much susceptible to interference as the
software measured in the previous simulation.

At best, the previously used hard-coded task software
provides a simulation of an actual system. Here, that
is replaced by Linux processes. The processes are
scheduled exactly like the task software, only -- were
it fits -- the processes are suspended and resumed
from \texttt{hs}, by means of \texttt{kill(2)}
signals. Those are sent on the basis of process group
IDs (PGIDs), so that processes spawned down the ladder
will be controlled by \texttt{hs} as well.

\subsection{advantages}

\begin{itemize}

\item Because real software is being executed it puts
  a definite end to everything simulated or artificial
  about this project.

\item Because the processes are now separated from
  \texttt{hs}, they can be monitored individually,
  isolated from each other as well as from
  \texttt{hs}. Recall that in the previous experiment,
  there was a compilation process that run in parallel
  with \texttt{hs}. The slowdown in compilation due to
  interference from the best-effort core was a measure
  of how, collectively, critical-core software was
  affected. Here, with real processes, the processes
  are not representing anything but themselves, so any
  slowdown detected (for whatever reason) is actual
  slowdown and no mere indication of such a tendency.

\item Because the processes are ordinary Linux
  processes, when they are resumed (after being
  suspended), they take on where they were preempted.
  That means total execution wall-clock time will not
  only be prolonged by the the best-effort core
  interference, it will to a much higher degree depend
  on the associated real-time parameters of the
  software, because those determine how \texttt{hs}
  will schedule the processes.

\end{itemize}

\subsection{disadvantages}

\begin{itemize}

\item If a system employs lengthy background
  processes, but still is desired to be highly
  responsive with frequent context switches, the task
  parameters of the sporadic task model must be
  reinterpreted. The $C$ parameter for WCET, as well
  as the other parameters, no longer express times for
  an entire process, but rather the burst patterns in
  which it will execute until completion. It can be
  thought of as a version of the multiframe model
  \cite[pp. 635-645]{multiframe}; however, it does not
  come with fixed frames in terms of exactly what part
  of the software is being executed. (Actually, a
  hard-framed model would be easy to implement with
  the hard-coded task software: a static C++ variable
  -- as in persistent, not class -- to hold the
  current frame, and then a branch is all it
  takes.)

\item Although slowdown is measured with actual
  software -- not some representation thereof --
  slowdown is not measured in terms of the real-time
  parameters associated with the software. This
  further contrasts with the sporadic task model, in
  which everything is fine as long as all tasks
  complete before their deadlines. Even if deadlines
  are reinterpreted as bounds of bursts of processes
  in execution, this experiment still does not examine
  how the context in which processes execute either
  fails to interfere enough, or actually breaks task
  deadlines.

\end{itemize}

\subsection{systems and results}

\subsubsection{\texttt{base-p}}

The \texttt{base-p} system consists of real Linux
processes. One is an infinite loop outputting a
string; the other compiles the by now familiar Elisp
files. However, compilation in this experiment is not
equivalent to what happened in the previous
experiment, with the \texttt{base} system -- then,
there was a compilation process that ran
interwovenly with \texttt{hs} on the critical
core; here, compilation is
critical software that \texttt{hs} schedules.
Thus, the compilation process is subject to its
sporadic task representation and the values it holds.
The by far biggest consequence of this is that the
result data cannot be interpreted in terms of
interference alone, although interference has not
decreased. Rather, a much bigger factor is competition
on \texttt{hs} from other critical tasks, and the task
parameter values of the process itself.

The result for \texttt{base-p}:

\begin{itemize}

\item The system:

  \verbatiminput{ /home/incal/public_html/hs-linux/sys/base-p }

\item Lisp compilation time, in nanoseconds:

  \verbatiminput{/home/incal/public_html/hs-linux/results/base-p/lisp.log}

\item Tick stats:

  \verbatiminput{/home/incal/public_html/hs-linux/results/base-p/stats.txt}

\item Best-effort core LLC misses:

  \verbatiminput{/home/incal/public_html/hs-linux/results/base-p/BE.txt}

\end{itemize}

\subsubsection{\texttt{base-p-no-memory-budget}}

These are the results for an identical system, besides
having a virtually nonexistent memory budget.

\begin{itemize}

\item The system:

  \verbatiminput{/home/incal/public_html/hs-linux/sys/base-p-no-memory-budget}

\item Lisp compilation time, in nanoseconds:

  \verbatiminput{/home/incal/public_html/hs-linux/results/base-p-no-memory-budget/lisp.log}

\item Tick stats:

  \verbatiminput{/home/incal/public_html/hs-linux/results/base-p-no-memory-budget/stats.txt}

\item Best-effort core LLC misses:

  \verbatiminput{/home/incal/public_html/hs-linux/results/base-p-no-memory-budget/BE.txt}

\end{itemize}

\subsection{conclusion}

While the results of this experiment do enlighten the
inefficiency of the method, equally revealing was the
process of setting up the experiment.

The sporadic task model is not suited for continuous
processes. In order to have an interactive system, the
computation times (the WCET parameter values) must be
small. On the other hand, the deadlines cannot be too
tight less you risk having delayed processes (or
delayed parts of processes). Yet, on the third hand,
if deadlines are wide, so are periods, which means the
processes will not get back to action right away but
must idle until the period ends. This is sometimes
acceptable, even welcome, as long as other processes
can execute to fill the gap; if not, it is a waste of
resources.

Also, remember that the task model parameters are not
alone in defining a system: there are also the global
scheduler parameters, and those of every task
schedulers. All in all, it is a jungle just to execute
a program and have it complete within a reasonable
amount of time.

For this reason, there are only two systems included
in this experiment, to show the mechanics.

Of course, there is no ruling out that a system
implemented this way could be functional, but that
would have to involve experimentation and evaluation
as to what to assign each end every parameter. And
that is not a good idea: the parameters should reflect
reality, or at the least intuition, and pretty much
instantly so -- they are not to be tweaked just so a
simple program can be executed.

\section{Real real-time: audio experiment}

\label{audio-experiment}
\index{audio experiment}

zsh function: \texttt{run-audio-experiment} \spref{functions}

This experiments puts it all together. Just like the
previous experiment, it uses real software:
\texttt{hs} schedules Linux processes.

But unlike the previous experiment, all tasks are
expected to terminate within their deadline frames, as
expressed by their sporadic task representations.

This time, nothing is measured. Instead, everything is
done in terms of the real-time task system, and the
associated software.

Furthermore, the process that is examined executes
software that measures an external, physical property:
the sound level. This is measured with a microphone
plugged into the computer. (To make the system even
more interesting, it can be augmented with all sorts
of gadgets: an USB thermometer, for starters.)

The audio process executes a script that logs the
sound level by appending it to a file:

\verbatiminput{/home/incal/scripts/a-level}

As seen, that script is not optimized but uses a
combination of commonly-used utilities.

The script is a server: it idles until it receives an
\texttt{USR1} signal, which triggers the function that
reads and stores the sound level. This scheme makes it
unnecessary to create a new process for every instance
released.

The rationale of this experiment is that if realistic
real-time parameter values are assigned the sporadic
task representation of the process that executes the
audio script, then this system can be asserted not in
terms of increased memory/CPU contention, but instead:
failure is whenever the absolute deadline of the audio
task is in the past, while the task still has not
booked the audio level.

If such a failure is deducted to be the result of
best-effort core memory contention, it will be
attempted to nullify that influence in an execution
run where \texttt{hs} throttles the best-effort core.
If this changes the outcome, then it has been showed
that this project assesses a real problem that it is
able to solve as well.

\subsection{system}

The strategy is to create a system that for a
virtually unloaded computer will execute successfully:
it will not suffer a single deadline miss. However,
the margin of error should be the slimmest possible.

That way, if the same system thereafter executes in
parallel with best-effort core activity, the scale
will tip and interference will be detectable in the
form of missed deadlines.

The third step is to execute the system anew, again
along with interference, only this time with
throttling enabled, and examine if that counteracts
the interference, and has the system execute
successfully once again, just as if the interference
had not been there.

The system is:

\verbatiminput{/home/incal/public_html/hs-linux/sys/base-audio-2}

\subsection{execution}

In order to understand in what way a system is
executed and how it is influenced by activity on the
best-effort core, a framework of \texttt{tmux(1)}
panes is used. The screenshot below shows this -- only
the colors have been inverted, to facilitate reading
on paper.

In the comic-book reading-order -- left-to-right and
top-to-bottom -- the panes are:

\begin{itemize}

\item The IO of \texttt{hs}, and that which
  \texttt{hs} schedules (be it the hard-coded
  task-software or, as here, Linux processes).

\item The best-effort core policy: \texttt{no BE}
  means there is not any activity on the best-effort
  core; \texttt{tamed BE} means there is activity on
  the best-effort core but \texttt{hs} is instructed
  to throttle it; and, \texttt{wild BE} means that
  there is activity on the best-effort core, but it is
  never throttled.

  The parenthesis next to the best-effort core policy
  shows the system execution number. In order not to
  interpretate uncommon events as recurring, it is
  beneficial to execute the same system several times,
  under the same circumstances.

\item The next pane shows the state of the best-effort
  core: \texttt{FROZEN}, or \texttt{THAWED}. Here, a
  couple of PIDs are sometimes shown: those are the
  PIDs of the processes that are frozen, whenever the
  best-effort core is. There is only room to show
  three PIDs: if there are more processes on the
  best-effort core, they are treated the same as those
  of the PIDs mentioned.

\item The number of signals sent to the audio task
  during the \texttt{no BE} run(s). If there are
  several iterations this number is the sum of all
  signals sent during past executions plus the
  current, so far.

\item The number of outputted audio levels during the
  \texttt{no BE} phase, so far.

\item Ditto \texttt{tamed BE} (two consecutive panes).

\item Ditto \texttt{wild BE}.

\item The processes on the best-effort core: the data
  shown is core, PID, and command. ($1$ is the
  best-effort core.)

\end{itemize}

\baimgspace
\begin{center}
  \includegraphics[width=\textwidth]{audio_results_i.png}
\end{center}
\baimgspace

\label{high-frequency-problem}
\index{high-frequency-problem}
\textbf{Note:} In the course of carrying out this
experiment, it was discovered that the global
preemption rate cannot be set too low when scheduling
real processes. If so, presumably, the overhead
handling processes -- interrupting and otherwise
controlling them through signals -- will hinder actual
software execution. All but instantly, that will
result in missed deadlines. This situation can be
remedied without changing any other parameter value
save for the rate, which must be increased.

\subsection{ideal fallout}

A perfect fallout for this experiment would be:

\begin{itemize}

\item For a system that is schedulable, and executed
  without any best-effort core memory contention, the
  aim should be set that if there are $s$ signals
  sent, then there should be $o = s$ audio level
  outputs.

\item Keeping the same system, but adding contention
  at the time of execution (contention which
  nonetheless can be throttled), then if there are
  $s'$ signals, there should still be $o' = s'$
  outputs. The assumption is that the throttling
  mechanism is configured so it can, if necessary, put
  the best-effort core entirely out of business.

\item Keeping the same system and the same (severe)
  contention, only this time with throttling
  disenabled, then the number of outputted audio
  levels $o''$ should be considerable smaller than $o$
  and $o'$.

\end{itemize}

\subsection{conclusion}

This experiment shows that \texttt{hs} can control the
best-effort so that some critical tasks (though very
few) will complete before their deadlines in spite of
best-effort interference, which, without the
\texttt{hs} throttle, would not have been the case.

What is disappointing is the lack of precision. It
appears as though \texttt{hs} can barely remedy a
dysfunctional overall state, while not sophisticated
by far to tweak or improve a functional system with
any granularity to it.

It is telling that if \texttt{stress} is substituted
for a piece of normal software -- e.g., the media
player, which has a small footprint compared to the
entire memory space available -- there are not not any
deadlines misses due to memory contention, or, if
occurring, they are never remedied by throttling.

Another thing disappointing is the amount of deadline
misses even without any interference. A likely reason
is that the audio process has an execution time that
varies too much so that often the WCET do not reflect
reality.

However, it is clear that this experiment fails not
because of shortcomings in \texttt{hs} and/or the
wider architecture, but already due to the inability
to present the critical core with best-effort core
interference that in a realistic and sensible way
affects the performance of the critical-core software.

Nevertheless, it is even more clear that \texttt{hs}
is incapable of stopping \texttt{stress} from breaking
the entire system: because \texttt{hs} is itself
suspectible to best-effort core interference, the
throttling mechanism reverts to a dysfunctional state
along with the rest of the system as all memory
becomes unavailable.

\section{Conclusion}

The implementation process showed that a hierarchical
scheduler is suited to host a mixed criticality
system. Here, critical groups correspond one-to-one to
the task schedulers: because of its very composition,
the hierarchical scheduler provides isolation between
critical groups.

Because a hierarchical scheduler is modular by
definition, and here serves as a system host, it was
desired to use a fast, general-purpose, and
object-oriented langauge. C++ worked well: the
implementation process never ran into any obstacles
that were perceived as insurmountable.

Also, the many tools used in this project, all of them
available on a common Linux system, worked as
intended, and for the most part without complications.
Among the tools and technologies used were the GRUB
configuration to isolate the cores, then
\texttt{taskset} to steer processes to the best-effort
core; moreover, there was a cgroup to control the
best-effort core software; \texttt{perf\_event\_open}
to poll the number of best-effort core LLC misses;
and, \texttt{tmux} and zsh to make an interface to
automatize experiments.

Last, the idea to specify task systems in text files
that were parsed by \texttt{hs} at runtime proved
powerful, not the least in experiments that required
fast employment of different systems. It could all be
accounted for, and automatized.

Regretably, somewhere along the way, the sum of the
project became less than the sum of its parts. While
\texttt{hs} can reduce the number of best-effort core
memory accesses, it fails to do so with sufficient
precision, as is evident by the memory experiment.

At this point, there is not any theory as to what part
of the system is the weakest link of the chain. It is
not even clear if the lack of precision is the
consequence of one or a few such parts, or the overall
architecture, and, if so, at what level(s).

Moreover, there is a worse, even fatal system
exposure, which nonetheless is more satisfying because
the reason is known: because \texttt{hs} is itself
suspectible to best-effort core interference, in face
of massive (but possible) best-effort core memory
traffic, the throttling mechanism -- a part of
\texttt{hs} -- reverts to a dysfunctional state along
with \texttt{hs} and the rest of the system, as all
memory becomes unavailable.

Here, I should be noted that the relative success of
the memory experiment is achieved in the face of
\emph{moderate} best-effort contention -- most likely,
it would not work at all if exposed to the same
interference as the audio experiment.

\textbf{Note:} Some of the experiments that were
carried out in the course of this project are not
mentioned here. The reason is that those experiments,
while not exactly failing, did not offer any key
insights that are not conveyed by
subsequent experiments.

%% references
\renewcommand\bibname{References}
\printbibliography[heading=mybibliography]

\appendix

\renewcommand\thesection{\Alph{section}}
\renewcommand\thesubsection{\thesection.\alph{subsection}}
\renewcommand\thesubsubsection{\thesubsection.\alph{subsubsection}}

\section{Appendix A: Memory experiment fallout}

\label{memory-experiment-fallout}
\index{memory experiment fallout}

\textbf{Note:} If and whenever the tables above are
spotwise incomplete, that is because a failed hard
real-time task system has made \texttt{hs} exit
preemptly, without a result to output.
Correspondingly, the digits actually outputted are all
of systems whose tasks completed before or at their
deadlines, without fail.

\input{/home/incal/public_html/hs-linux/results/memory-experiment-tables.log}

\section{Appendix B: Formulas}

\subsection{system}

$\topscheduler$ where $\relate$

$\taskscheduler$

\subsection{sporadic task model}

$\sporadic$

$\sporadictask$

\subsection{response time}

$\responsetime$

$\zperiods$

$\schedulingoverhead$

$\interference$

\textbf{Queuing:}

$\queuing$

$\groupqueuing$

$\systemqueuing$

\subsection{resource server}

$\interval$

\subsubsection{supply}

$\supplycompleteperiods$

$\supplytimecompleteperiods$

$\supplylengthincompleteperiods$

$\supplylengthincompleteoutside$

$\supplyincompleteperiods$

$\supply$

\subsubsection{demand}

$\demandincompleteperiod$

$\demandtask$

$\demandgroup$

$\demandcompleteperiods$

\section{Appendix C: \texttt{hs} code}

This appendix contains the complete C++ source code
for \texttt{hs} as well as a groff man page which
describes the use of all command-line options.

%% man page for hs
\refstepcounter{subsection}
\addcontentsline{toc}{subsection}{%
  \protect\numberline{\thesubsection}%
  man page for \texttt{hs}}%
\texttt{\thesubsection\ man page for \texttt{hs}}
\lstset{
  basicstyle=\ttfamily,
  columns=fullflexible,
  keepspaces=true,
  showspaces=false,
  showstringspaces=false,
  commentstyle=\ttfamily
}
\lstinputlisting[language=bash]{../hs.txt}
\clearpage

\subsection{\texttt{ask}}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/include/ask.hh}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/ask.cc}

\subsection{\texttt{be}}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/include/be.hh}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/be.cc}

\subsection{\texttt{file\_io}}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/include/file_io.hh}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/file_io.cc}

\subsection{\texttt{global\_scheduler}}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/include/global_scheduler.hh}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/global_scheduler.cc}

\subsection{\texttt{llc}}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/include/llc.hh}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/llc.cc}

\subsection{\texttt{log}}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/include/log.hh}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/log.cc}

\subsection{\texttt{main}}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/include/main.hh}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/main.cc}

\subsection{\texttt{options}}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/include/options.hh}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/options.cc}

\subsection{\texttt{program}}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/include/program.hh}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/program.cc}

\subsection{\texttt{sporadic\_task}}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/include/sporadic_task.hh}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/sporadic_task.cc}

\subsection{\texttt{task\_scheduler}}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/include/task_scheduler.hh}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/task_scheduler.cc}

\subsection{\texttt{tcb}}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/include/tcb.hh}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/tcb.cc}

\subsection{\texttt{time\_io}}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/include/time_io.hh}
\lstinputlisting[language=c++]{/home/incal/public_html/hs-linux/src/time_io.cc}

\section{Appendix D: experiment code and example data}

\label{code}
\index{code}

%% example hs task system
\refstepcounter{subsection}
\addcontentsline{toc}{subsection}{%
  \protect\numberline{\thesubsection}%
  example \texttt{hs} system}%
\texttt{\thesubsection\ example hs task system}
\label{example-hs-task-system}
\index{example hs task system}
\verbatiminput{/home/incal/public_html/hs-linux/sys/base}
\clearpage

%% useful zsh commands
\refstepcounter{subsection}
\addcontentsline{toc}{subsection}{%
  \protect\numberline{\thesubsection}%
  useful zsh commands}%

\texttt{\thesubsection\ useful zsh commands}

\lstinputlisting[language=bash]{/home/incal/.zsh/be-task}
\lstinputlisting[language=bash]{/home/incal/.zsh/be-monitor}
\lstinputlisting[language=bash]{/home/incal/.zsh/be-paths}
\lstinputlisting[language=bash]{/home/incal/.zsh/be-signals}
\clearpage

%% zsh wrapper to run experiments
\refstepcounter{subsection}
\addcontentsline{toc}{subsection}{%
  \protect\numberline{\thesubsection}%
  zsh wrapper to run experiments}%
\texttt{\thesubsection\ zsh wrapper to run experiments}
\label{functions}
\index{functions}
\lstinputlisting[language=bash]{/home/incal/.zsh/be-experiment}
\lstinputlisting[language=bash]{/home/incal/.zsh/be-experiment-setup}
\lstinputlisting[language=bash]{/home/incal/.zsh/be-compile-elisp}
\lstinputlisting[language=bash]{/home/incal/.zsh/be-contention-experiment}
\lstinputlisting[language=bash]{/home/incal/.zsh/be-memory-experiment}
\lstinputlisting[language=bash]{/home/incal/.zsh/be-system-experiment}
\lstinputlisting[language=bash]{/home/incal/.zsh/be-process-experiment}
\lstinputlisting[language=bash]{/home/incal/.zsh/be-audio-experiment}
\clearpage

%% Elisp to crunch tick traces
\refstepcounter{subsection}
\addcontentsline{toc}{subsection}{%
  \protect\numberline{\thesubsection}%
  tick trace cruncher in Elisp}%
\texttt{\thesubsection\ tick trace cruncher in Elisp}
\lstinputlisting{/home/incal/public_html/hs-linux/src/times.el}

%% index
\refstepcounter{section}
\addcontentsline{toc}{section}{%
  \protect\numberline{$\rightarrow$}%
  \indexname}%

\renewcommand\indexname{Index}

\printindex

\end{document}