445 lines
13 KiB
TeX
445 lines
13 KiB
TeX
\documentclass{beamer}
|
|
\usecolortheme{beaver}
|
|
\beamertemplatenavigationsymbolsempty
|
|
|
|
% Fonts
|
|
\usepackage{fontspec}
|
|
\setmainfont{Source Serif Pro}[Ligatures=TeX]
|
|
\setsansfont{Source Sans Pro}[Ligatures=TeX]
|
|
\setmonofont{Source Code Pro}[
|
|
BoldFont={* Medium},
|
|
BoldItalicFont={* Medium Italic},
|
|
]
|
|
|
|
\usepackage[outputdir=out]{minted}
|
|
\usepackage{tikz}
|
|
\usetikzlibrary{positioning, fit}
|
|
|
|
\tikzset{
|
|
invisible/.style={opacity=0,text opacity=0},
|
|
highlight/.style={color=red},
|
|
intro/.code args={<#1>}{%
|
|
\only<#1>{\pgfkeysalso{highlight}}
|
|
\alt<#1->{}{\pgfkeysalso{invisible}}
|
|
},
|
|
}
|
|
|
|
\title{Miri}
|
|
\subtitle{An interpreter for Rust's mid-level intermediate representation}
|
|
\author{
|
|
Scott Olson
|
|
\texorpdfstring{\\ \scriptsize{Supervisor: Christopher Dutchyn}}{}
|
|
}
|
|
\institute{
|
|
CMPT 400 \\
|
|
University of Saskatchewan
|
|
}
|
|
\date{}
|
|
\titlegraphic{
|
|
\includegraphics[width=64px,height=64px]{rust-logo-512x512.png} \\
|
|
\scriptsize{\url{https://www.rust-lang.org}}
|
|
}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% Intro slides
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
\begin{document}
|
|
|
|
\maketitle
|
|
|
|
\begin{frame}[fragile]
|
|
\frametitle{What is Rust? \small{[review]}}
|
|
|
|
According to the website\dots
|
|
|
|
\begin{quote}
|
|
\textbf{Rust} is a systems programming language that runs blazingly fast,
|
|
prevents nearly all segfaults, and guarantees thread safety.
|
|
\end{quote}
|
|
|
|
It's a new programming language from Mozilla, and it looks like this:
|
|
|
|
\begin{minted}[
|
|
autogobble,
|
|
fontsize=\footnotesize,
|
|
mathescape,
|
|
xleftmargin=.3in,
|
|
]{rust}
|
|
fn factorial(n: u64) -> u64 {
|
|
(1..n).fold(1, |a, b| a * b)
|
|
}
|
|
|
|
fn main() {
|
|
for x in 1..6 {
|
|
println!("{}", factorial(x));
|
|
}
|
|
// $\Rightarrow$ 1
|
|
// $\Rightarrow$ 1
|
|
// $\Rightarrow$ 2
|
|
// $\Rightarrow$ 6
|
|
// $\Rightarrow$ 24
|
|
}
|
|
\end{minted}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{How does Rust compile code? \onslide<-6>{\small{[review]}}}
|
|
|
|
\begin{center}
|
|
\begin{tikzpicture}[x=4cm, y=3.5cm, auto, rounded corners]
|
|
\tikzstyle{basic-stage}=[rectangle, draw, thick, align=center]
|
|
\tikzstyle{stage}=[basic-stage, font=\tiny]
|
|
\tikzstyle{pass}=[thick, -stealth]
|
|
\tikzstyle{pass-label}=[font=\footnotesize]
|
|
|
|
\node[basic-stage] (src) at (0,0) {Source\\Code};
|
|
\node[basic-stage] (mach) at (2,-1) {Machine\\Code};
|
|
|
|
\draw<1>[pass, out=0, in=180]
|
|
(src.east) to node[font=\Huge] {?} (mach.west);
|
|
|
|
\node[stage, intro=<2>] (ast) at (1,0)
|
|
{\normalsize{AST} \\ Abstract Syntax Tree};
|
|
\draw[pass, intro=<2>]
|
|
(src) -- node[pass-label] {Parse} (ast);
|
|
|
|
\node[stage, intro=<3>] (hir) at (2,0)
|
|
{\normalsize{HIR} \\ High-level Intermediate\\Representation};
|
|
\draw[pass, intro=<3>]
|
|
(ast) -- node[pass-label] {Simplify} (hir);
|
|
|
|
\node[stage, intro=<4>] (mir) at (0,-1)
|
|
{\normalsize{MIR} \\ Mid-level Intermediate\\Representation};
|
|
\path (hir.south) -- coordinate (middle) (mir.north);
|
|
\draw[pass, intro=<4>]
|
|
(hir.south) |- (middle) -| (mir.north);
|
|
\node[pass-label, above, intro=<4>] at (middle) {Lower};
|
|
|
|
\node[stage, intro=<5>] (llvm) at (1,-1)
|
|
{\normalsize{LLVM IR} \\ Low-level Intermediate\\Representation};
|
|
\draw[pass, intro=<5>]
|
|
(mir) -- node[pass-label] {Translate} (llvm);
|
|
|
|
\draw<6->[pass, intro=<6>]
|
|
(llvm) -- node[pass-label] {Magic} (mach);
|
|
|
|
\node[stage, intro=<7>] (exec) at (1,-1.75)
|
|
{\normalsize{Execution}};
|
|
\draw[pass, intro=<7>]
|
|
(mach) -- node[pass-label] {CPU} (exec);
|
|
|
|
\draw[pass, intro=<8>]
|
|
(mir) -- node[pass-label] {Miri} (exec);
|
|
\end{tikzpicture}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{Why build Miri?}
|
|
\begin{itemize}
|
|
\item For fun and learning.
|
|
|
|
\item I originally planned to use it for testing the compiler and execution
|
|
of unsafe code, but shifted my goals along the way. \pause
|
|
|
|
\item Now it serves as an experimental implementation of the upcoming
|
|
compile-time function evaluation feature in Rust. \pause
|
|
|
|
\begin{itemize}
|
|
\item Similar to C++14's \mintinline{cpp}{constexpr} feature.
|
|
|
|
\item You can do complicated calculations at compile time and compile
|
|
their \emph{results} into the executable. \pause
|
|
|
|
\item For example, you can compute a ``perfect hash function'' for a
|
|
statically-known map at compile-time and have guaranteed no-collision
|
|
lookup at runtime. \pause
|
|
|
|
\item Miri actually supports far more of Rust than C++14's
|
|
\mintinline{cpp}{constexpr} does of C++ --- even heap allocation and
|
|
unsafe code.
|
|
\end{itemize}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{How was it built?}
|
|
|
|
At first I wrote a naive version with a number of downsides:
|
|
|
|
\begin{itemize}
|
|
\item represented values in a traditional dynamic language format, where
|
|
every value was the same size.
|
|
|
|
\item didn't work well for aggregates (structs, enums, arrays, etc.).
|
|
|
|
\item made unsafe programming tricks that make assumptions about low-level
|
|
value layout essentially impossible.
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{How was it built?}
|
|
\begin{itemize}
|
|
\item Later, a Rust compiler team member proposed a ``Rust abstract
|
|
machine'' with specialized value layout which solved my previous problems.
|
|
\pause
|
|
|
|
\item His proposal was intended for a compile-time function evaluator in the
|
|
Rust compiler, so I effectively implemented an experimental version of
|
|
that. \pause
|
|
|
|
\item After this point, making Miri work well was primarily a software
|
|
engineering problem.
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{Data layout}
|
|
\begin{itemize}
|
|
\item Memory in Miri is literally a HashMap from ``allocation IDs'' to
|
|
``abstract allocations''.
|
|
|
|
\item Allocations are represented by: \pause
|
|
\begin{enumerate}
|
|
\item An array of \textbf{raw bytes} with a size based on the type of
|
|
the value \pause
|
|
\item A set of \textbf{relocations} --- pointers into other abstract
|
|
allocations \pause
|
|
\item A mask determining which bytes are \textbf{undefined}
|
|
\end{enumerate}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]
|
|
\frametitle{\texttt{square} example}
|
|
\begin{center}
|
|
\begin{minted}[autogobble,fontsize=\scriptsize]{rust}
|
|
// Rust
|
|
fn square(n: u64) -> u64 {
|
|
n * n
|
|
}
|
|
|
|
// Generated MIR
|
|
fn square(arg0: u64) -> u64 {
|
|
let var0: u64; // n // On function entry, Miri creates
|
|
// virtual allocations for all the
|
|
// arguments, variables, and
|
|
// temporaries.
|
|
|
|
bb0: {
|
|
var0 = arg0; // Copy the argument into `n`.
|
|
return = Mul(var0, var0); // Multiply `n` with itself.
|
|
goto -> bb1; // Jump to basic block `bb1`.
|
|
}
|
|
|
|
bb1: {
|
|
return; // Return from the current fn.
|
|
}
|
|
}
|
|
\end{minted}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]
|
|
\frametitle{\texttt{sum} example}
|
|
\begin{center}
|
|
\begin{minted}[autogobble,fontsize=\tiny]{rust}
|
|
// Rust
|
|
fn sum() -> u64 {
|
|
let mut sum = 0; let mut i = 0;
|
|
while i < 10 { sum += i; i += 1; }
|
|
sum
|
|
}
|
|
|
|
// Generated MIR
|
|
fn sum() -> u64 {
|
|
let mut var0: u64; // sum
|
|
let mut var1: u64; // i
|
|
let mut tmp0: bool;
|
|
|
|
bb0: {
|
|
// sum = 0; i = 0;
|
|
var0 = const 0u64; var1 = const 0u64; goto -> bb1;
|
|
}
|
|
bb1: {
|
|
// if i < 10 { goto bb2; } else { goto bb3; }
|
|
tmp0 = Lt(var1, const 10u64);
|
|
if(tmp0) -> [true: bb2, false: bb3];
|
|
}
|
|
bb2: {
|
|
var0 = Add(var0, var1); // sum = sum + i;
|
|
var1 = Add(var1, const 1u64); // i = i + 1;
|
|
goto -> bb1;
|
|
}
|
|
bb3: {
|
|
return = var0; goto -> bb4;
|
|
}
|
|
bb4: { return; }
|
|
}
|
|
\end{minted}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]
|
|
\frametitle{Heap allocations!}
|
|
\begin{minted}[autogobble,fontsize=\scriptsize]{rust}
|
|
fn make_vec() -> Vec<u8> {
|
|
// Empty array with space for 4 bytes - allocated on the heap!
|
|
let mut vec = Vec::with_capacity(4);
|
|
// Initialize the first two slots.
|
|
vec.push(1);
|
|
vec.push(2);
|
|
vec
|
|
}
|
|
|
|
// For reference:
|
|
// struct Vec<T> { capacity: usize, data: *mut T, length: usize }
|
|
|
|
// Resulting allocations (on 32-bit little-endian architectures):
|
|
// Region A:
|
|
// 04 00 00 00 00 00 00 00 02 00 00 00
|
|
// └───(B)───┘
|
|
//
|
|
// Region B:
|
|
// 01 02 __ __ (underscores denote undefined bytes)
|
|
\end{minted}
|
|
|
|
\footnotesize{Evaluating the above involves a number of compiler built-ins,
|
|
``unsafe'' code blocks, and more inside the standard library,
|
|
but Miri handles it all.}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]
|
|
\frametitle{Unsafe code!}
|
|
\begin{minted}[autogobble,fontsize=\scriptsize]{rust}
|
|
fn out_of_bounds() -> u8 {
|
|
let mut vec = vec![1, 2]
|
|
unsafe { *vec.get_unchecked(5) }
|
|
}
|
|
|
|
// test.rs:3: error: pointer offset outside bounds of allocation
|
|
// test.rs:3: unsafe { *vec.get_unchecked(5) }
|
|
// ^~~~~~~~~~~~~~~~~~~~~
|
|
|
|
fn undefined_bytes() -> u8 {
|
|
let mut vec = Vec::with_capacity(10);
|
|
unsafe { *vec.get_unchecked(5) }
|
|
}
|
|
|
|
// test.rs:3: error: attempted to read undefined bytes
|
|
// test.rs:3: unsafe { *vec.get_unchecked(5) }
|
|
// ^~~~~~~~~~~~~~~~~~~~~
|
|
\end{minted}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{What can't Miri do?}
|
|
\begin{itemize}
|
|
\item Miri can't do all the stuff I didn't implement yet. :)
|
|
\begin{itemize}
|
|
\item non-trivial casts
|
|
\item function pointers
|
|
\item calling destructors and freeing memory
|
|
\item taking target architecture endianess and alignment information
|
|
into account when computing data layout
|
|
\item handling all constants properly (but, well, Miri might be
|
|
replacing the old constants system)
|
|
\end{itemize}
|
|
\pause
|
|
|
|
\item Miri can't do foreign function calls (e.g. calling functions defined
|
|
in C or C++), but there is a reasonable way it could be done with libffi.
|
|
\begin{itemize}
|
|
\item On the other hand, for constant evaluation in the compiler, you
|
|
want the evaluator to be deterministic and safe, so FFI calls might be
|
|
banned anyway.
|
|
\end{itemize}
|
|
\pause
|
|
|
|
\item Without quite some effort, Miri will probably never handle inline
|
|
assembly...
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\begin{center}
|
|
\LARGE{Questions?}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% Extra slides
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
\begin{frame}[fragile]
|
|
\frametitle{\texttt{varN} vs. \texttt{argN}}
|
|
\begin{center}
|
|
\begin{minted}[autogobble,fontsize=\scriptsize]{rust}
|
|
// Rust
|
|
type Pair = (u64, u64);
|
|
fn swap((a, b): Pair) -> Pair {
|
|
(b, a)
|
|
}
|
|
|
|
// Generated MIR
|
|
fn swap(arg0: (u64, u64)) -> (u64, u64) {
|
|
let var0: u64; // a
|
|
let var1: u64; // b
|
|
|
|
bb0: {
|
|
var0 = arg0.0; // get the 1st part of the pair
|
|
var1 = arg0.1; // get the 2nd part of the pair
|
|
return = (var1, var0); // build a new pair in the result
|
|
goto -> bb1;
|
|
}
|
|
|
|
bb1: {
|
|
return;
|
|
}
|
|
}
|
|
\end{minted}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]
|
|
\frametitle{\texttt{factorial} example}
|
|
\begin{center}
|
|
\begin{minted}[autogobble,fontsize=\tiny]{rust}
|
|
// Rust
|
|
fn factorial(n: u64) -> u64 {
|
|
(1..n).fold(1, |a, b| a * b)
|
|
}
|
|
|
|
// Generated MIR
|
|
fn factorial(arg0: u64) -> u64 {
|
|
let var0: u64; // n
|
|
let mut tmp0: Range<u64>; // Miri calculates sizes for generics like Range<u64>.
|
|
let mut tmp1: [closure];
|
|
|
|
bb0: {
|
|
var0 = arg0;
|
|
|
|
// tmp0 = 1..n
|
|
tmp0 = Range<u64> { start: const 1u64, end: var0 };
|
|
|
|
// tmp1 = |a, b| a * b
|
|
tmp1 = [closure];
|
|
|
|
// This loads the MIR for the `fold` fn from the standard library.
|
|
// In general, MIR for any function from any library can be loaded.
|
|
// return tmp0.fold(1, tmp1)
|
|
return = Range<u64>::fold(tmp0, const 1u64, tmp1) -> bb1;
|
|
}
|
|
|
|
bb1: {
|
|
return;
|
|
}
|
|
}
|
|
\end{minted}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
\end{document}
|