\documentclass{beamer}
\usepackage[latin1]{inputenc}
\usepackage{pgf}
\usepackage{pgfnodes}
\usepackage{graphicx}
\usepackage{colortbl}
\usepackage{tikz}

% userdefined stuff
\setbeamercolor{uppercol}{fg=black,bg=orange}%
\setbeamercolor{lowercol}{fg=green,bg=black}%
\setbeamercolor{tu}{fg=blue,bg=blue}
\setbeamercolor{tl}{fg=white,bg=blue}

\newcommand{\shell}[1]{
\begin{beamerboxesrounded}[upper=uppercol,lower=lowercol,shadow=true]{\textbf{Fiw - Command Line Interface}}	
{\tt \small #1}
\end{beamerboxesrounded}
}

%TODO choose right blue
\newcommand{\ti}[1]{
\begin{beamerboxesrounded}[upper=tl,lower=tl,shadow=true]

{ \center{\hspace{1cm}\huge #1}}
\end{beamerboxesrounded}
}



\usetheme{Haegar}
%\usetheme{default}
\setbeamertemplate{navigation symbols}{}
%\pgfdeclaremask{tu}{beamer-tu-logo-mask}
%\pgfdeclaremask{ur}{beamer-ur-logo-mask}
\pgfdeclareimage[width=0.6cm]{csrrt}{./graphs/csrrt}
\pgfdeclareimage[width=1cm]{loria}{./graphs/loria}
\pgfdeclareimage[width=1cm]{inria}{./graphs/inria}

%\logo{\hbox{\hbox to 1cm{\hfil\pgfuseimage{loria}}\hskip0.1cm\hbox{\pgfuseimage{inria}}}\hskip0.1cm\hbox{\pgfuseimage{csrrt}}}
\date{October 19, 2007}


\title{Automated Malware Analysis}
\author[Wagener, State, Dulaunoy]{%
Gérard~Wagener\inst{1}\inst{2}
\and 
Alexandre~Dulaunoy\inst{2}
\and 
Radu~State\inst{1}\inst{3}
}
\institute[madynes cssrt-lu]{

\inst{1} \normalsize MADYNES - LORIA \\ \tiny{Laboratoire Lorrain de Recherche en Informatique et ses Applications} 
\and
\inst{2}\normalsize{CSRRT-LU} \\ \tiny{Computer Security Research and Response Team - Luxembourg}   
\and
\inst{3} \normalsize{INRIA} \\ \tiny Institut National de Recherche en Informatique et Automatique
\normalsize
}


%\AtBeginSection[] %
%{
%\frame{\frametitle{Outline}\tableofcontents[currentsection, %currentsubsection]}
%}


\begin{document}


%*********************************
\begin{frame}
%\small
%\maketitle
\ti{
Automated Malware Analysis
}

\begin{center}
\textbf{
\begin{tabular}{lll}
Gérard Wagener$^{12}$&
Alexandre Dulaunoy$^{2}$&
Radu State$^{13}$\\
\end{tabular}
}
\end{center}

\begin{center}
\vspace{0.1cm}
$^{1}$\textbf{MADYNES - LORIA} \\ \small{Laboratoire Lorrain de Recherche en Informatique et ses Applications} 

\normalsize
\vspace{0.1cm}

$^{2}$\textbf{CSRRT-LU} \\ \small{Computer Security Research and Response Team - Luxembourg}

\normalsize
\vspace{0.1cm}
$^{3}$\textbf{INRIA} \\ \small Institut National de Recherche en Informatique et Automatique

\normalsize
\vspace{0.1cm}
\textbf{October 19, 2007}
\end{center}

\begin{center}
%\begin{beamerboxesrounded}[upper=lowercol,lower=tl,shadow=true]


\includegraphics[scale=0.3]{graphs/Hl2007banner.png}
\hspace{0.1cm}
\includegraphics[scale=0.10]{graphs/loria.png}
\hspace{0.1cm}
\includegraphics[scale=0.4]{graphs/csrrt.png}
\hspace{0.1cm}
\includegraphics[scale=0.2]{graphs/inria.png}
\end{center}
%\end{beamerboxesrouned}
%\normalsize
\end{frame}
\begin{frame}
\frametitle{Outline}
\tableofcontents
\end{frame}
%*********************************
\section{Introduction}
\begin{frame}

\frametitle{Introduction}
\begin{itemize}
	\item \cite{aycock} there are two malware analysis techniques:
	\begin{itemize}
		\item Static Analysis: which is often hard to do due to packers and encryption techniques.
		\item Dynamic Analysis: which is often incomplete due to \alert{conditions} that are not fulfilled.
	\end{itemize}
	\item \cite{gerard} et al. proposed the usage of phylogenetic trees in order to identify malware families based on sequences of called system functions.
	\item \cite{gerard} et al. proposed a design of a framework for analyzing malware. (Automated aNalysis and Network Emulation, ANNE)

\end{itemize}	
\end{frame}


\begin{frame}
\frametitle{Introduction}
A piece of malware can have multiple behaviors.
\begin{center}
\includegraphics[scale=0.3]{./graphs/decision.pdf}
\end{center}
\end{frame}

\begin{frame}
\frametitle{Introduction}
\framesubtitle{Contribution}
\begin{itemize}
	\item \cite{moser} et al. explored multiple execution paths of malware using snapshots from CPU emulators.
	\item We want to explore other strategies to unveil more information from malware.
	\item In order to discover such strategies we propose a tool fiw which:
		\begin{enumerate}
			\item provides an interactive sandbox $\to$ high-level debugger.
%			\item Merge debugging features with system emulators.
			\item tackles with current applied anti-debugging or anti - reverse engineering techniques.
		\end{enumerate}
\end{itemize}
\end{frame}

\section{Fiw - the debugger}


\begin{frame}
\frametitle{Fiw - the debugger}
\framesubtitle{Goal: Analyzing malware (unknown w32 binaries)}
\begin{itemize}
	\item High - level observations:
	\begin{itemize}
		\item File system \& registry modifications.
		\item Malware networking.
		\begin{itemize}
			\item Discover communication protocols of a malware sample.
		\end{itemize}
	\end{itemize}
	\item Low - level observations:
	\begin{itemize}
		\item System function call observation.
		\item Discovering packing / encryption techniques.
		\item Discovering vulnerabilities $\to$ dump \& study machine code.
	\end{itemize}
	\item Actions:
		\begin{itemize}
			\item Apply fuzzing techniques via fiw - script interface or plugins.
		\end{itemize}
	\end{itemize}
\end{frame}
\subsection{Features}
\begin{frame}
\frametitle{Fiw - the debugger}
\framesubtitle{Features}
\begin{itemize}
	\item Traditional debugging actions:
	\begin{itemize}
		\item Step by by execution.
		\item Memory layout inspection.
		\item Replay of a debug session.
		\item Memory access facility.
	\end{itemize}
	\item Environmental changes:
	\begin{itemize}
		\item File system changes.
		\item Registry modifications.
		\item Process memory alternation.
		\item Crafting network messages.
		\item Automated debug actions \& plugins.
	\end{itemize}
	\item The tool fiw was implemented in \emph{C} and runs on linux operating system and has 6409 lines of code.
\end{itemize}
\end{frame}

\subsection{Architecture \& design}
\begin{frame}
\frametitle{Fiw - the debugger}
\framesubtitle{Architecture \& design}
\begin{center}
	\includegraphics[scale=0.28]{./graphs/arch.pdf}
\end{center}
\end{frame}

\begin{frame}
\frametitle{Fiw - the debugger}
\framesubtitle{Architecture \& design}

\begin{center}
Observing function calls \& Memory Access.
\includegraphics[scale=0.26]{./graphs/frelay.pdf}
\end{center}
\end{frame}

\begin{frame}
\frametitle{Fiw - the debugger}
\framesubtitle{Architecture \& design}
\begin{figure}
\begin{center}
Discovering memory layout.

\includegraphics[scale=0.23]{./graphs/memlayout.pdf}
\end{center}
\end{figure}
\end{frame}

\begin{frame}
\frametitle{Fiw - the debugger}
\framesubtitle{Architecture \& design}
\begin{center}
Which function calls belong to the unknown binary?
\includegraphics[scale=0.26]{./graphs/vexec.pdf}
\end{center}
\end{frame}

\begin{frame}
\frametitle{Fiw - the debugger}
\framesubtitle{Architecture \& design}
\begin{center}
Handling disk and registry access.
\includegraphics[scale=0.26]{./graphs/diskio.pdf}
\end{center}
\end{frame}

\begin{frame}
\frametitle{Fiw - the debugger}
\framesubtitle{Architecture \& design}
\begin{center}
Virtual networking.

\includegraphics[scale=0.26]{./graphs/nethow.pdf}
\end{center}
\end{frame}

\begin{frame}
\frametitle{Fiw - the debugger}
\begin{itemize}
	\item Replay of a debugging session:
	\begin{itemize}
		\item Pseudo break points, function calls that do not match the break point are automatically acknowledged.
		\item Stop auto acknowledgment if a given \alert{return address} emerges.
		\item Stop auto acknowledgment id a given \alert{function call} emerges.
		\item Replay with fiw script interface.
	\end{itemize}
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{Fiw - the debugger}
\framesubtitle{Architecture \& design}
\begin{itemize}
	\item Automated debug actions:
	\begin{itemize}
		\item Sometimes it is a burden to step through malware code.
		\item i.e. Acknowledge long sequences of {\tt GetProcAddress, AllocHeap} function calls.
	\end{itemize}
	\item Plugins
	\begin{itemize}
		\item The tool fiw puts debugging information in environment variables.
		\item This information can be accessed via script / programs  that can be launched from the tool fiw.
		\item Results can be carried back via exit code of the script / programs.
	\end{itemize}
\end{itemize}
\end{frame}
\subsection{Malware analysis scenarios}
\begin{frame}
\frametitle{Malware analysis scenarios}
\framesubtitle{Discovering anti - debugging techniques}
\begin{itemize}
	\item Dealing with packing techniques
		\begin{itemize}
		\item Let the piece of malware unpack itself and dump code from memory.
%		\item In worst case you get  after the first function a valid code address.
		\end{itemize}
		\item Anti - debugging techniques
		\begin{itemize}
			\item Detection of \emph{soft-ice}, \emph{regmon}, \emph{filemon}, ... via {\tt Createfile, EnumDeviceDrivers, IsDebuggerPresent}\footnote{Windows API functions}
			\item Code integrity checks.
			\item Debugger traps (int 3 or 0xCC).
			\item Registry key look-ups to related debuggers.
		\end{itemize}
	\item $\to$ these techniques do not work with the debugger fiw.
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{Malware analysis scenarios}
\framesubtitle{Discovering anti - debugging techniques}
\begin{example}[SdBot Analysis]
\begin{table}\rowcolors[]{1}{structure!25!averagebackgroundcolor}{structure!10!averagebackgroundcolor}
\begin{tabular}{l}
	WORM/SdBot.506880.3, antivirname\\
	First seen: 08.10.2007 02:58:46\\
	Collected by nepenthes.csrrt.org\\
	Note: Does not like debuggers :-)\\
\end{tabular}
\end{table}
\end{example}
\end{frame}
\begin{frame}
\frametitle{Malware analysis scenarios}
\framesubtitle{Discovering anti - debugging or anti - analysis techniques}
\shell{

fiw$>$start sdbot\_506880\_3.exe

fiw$>$CreateFileA(00568cff, "$ \backslash \backslash \backslash .\backslash \backslash$SICE", c0000000,3,0,3,80) pid: 23325 tid: 0009 ret: 00568f1f

fiw$>$IsDebuggerPresent() pid: 23325 tid: 0009 ret: 0059b1d9

fiw$>$RegOpenKeyA(80000002, 0059f306, SOFTWARE$\backslash \backslash$NuMega$\backslash \backslash$
 DriverStudio") pid: 23325 tid: 0009 ret: 0059f49e



 FindWindowA(0059be9f, FilemonClass") pid: 23325 tid: 0009 ret: 0059bf6f

}
\end{frame}

\begin{frame}
\frametitle{Malware analysis scenarios}
\framesubtitle{Malware code obfuscation}
\begin{itemize}
	\item Goal: Make reverse engineering difficult as much as possible
	\item Techniques:
	\begin{itemize}
		\item Use of dead code.
		\item Use of not needed unconditional jumps.
		\item Use of garabage instructions $\dots$
		\item Frameworks for these purpose exits!
	\end{itemize}
\end{itemize}
Analysis of \emph{Sdbot 506880 3}
\shell{
fiw$>$OutputDebugStringA(00577f31 "Themida Professional (c) pid: 23721 tid: 0009 ret: 0057803b

fiw$>$
}
\end{frame}

\begin{frame}
\frametitle{Malware analysis scenarios}
\framesubtitle{Discovering termination cause}
\begin{itemize}
	\item Study cause of termination of execution.
	\item Let the tool fiw step trough $\dots$
\end{itemize}
Analysis of \emph{Sdbot 506880 3}
\shell{
fiw$>$start sdbot\_506880\_3.exe

fiw$>$auto ack

fiw$>$cont

[A] GetEnvironmentVariableA(00542fbd, LNumDLLsProt", 00542fcd) pid: 26887 tid: 0009 ret: 005437de

[A] TerminateProcess(ffffffff, 00000000) pid: 26887 tid: 0009 ret: 0054542f
}
2nd round adjust conditions!
\end{frame}

\begin{frame}
\frametitle{Malware analysis scenarios}
\framesubtitle{Discovering termination cause}
\begin{itemize}
	\item Set pseudo break point on an address or function name.
	\item Launch the analysis again.
\end{itemize}

Analysis of \emph{Sdbot 506880 3}
\shell{
fiw>start sdbot\_506880\_3.exe

fiw>break ret 005437de

fiw>GetEnvironmentVariableA(00542fbd, LNumDLLsProt", 00542fcd) pid: 27206 tid: 0009 ret: 005437de

fiw>
}
\end{frame}

%\subsection{Discovering packing techniques}

\begin{frame}
\frametitle{Malware analysis scenarios}
\framesubtitle{Discovering packing techniques}
\begin{itemize}
	\item Have a look at the memory layout.
	\item Observe return address of function calls.
\end{itemize}
Analysis of \emph{Sdbot 506880 3}
\shell{
fiw$>$!./info\_process\_all

00400000-00401000 r-xp 00000000 62:00 456973    SdBot\_506880\_3.exe

00401000-0041e000 rwxp 00001000 62:00 456973   SdBot\_506880\_3.exe

fiw$>$cont

fiw$>$GlobalAddAtomW(6084c926 L"ux\_theme") pid: 28009 tid: 0009 ret: 6083b9ad

}

{\tt 0x6083b9ad} $\notin $[{\tt 0x00400000} - {\tt 0x0041e000}].

\end{frame}

\begin{frame}
\frametitle{Malware analysis scenarios}
\framesubtitle{Communicating with the malware sample}

\begin{itemize}
	\item  A piece of malware sometimes communicates with other entities.
	\item A piece malware that includes a backdoor accepts commands.
	\item It would be nice to discover the commands.
	\item The tool fiw has a command {\tt vnet}
	\item In that case the messages send by a malware sample are sent to the tool fiw.
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{Communicating with the malware sample}

Worm/Rbot.102912.13 analysis:

\shell{
fiw$>$vnet on

fiw$>$break name connect

fiw$>$start rbot\_102912\_13.exe

fiw$>$connect(58, 33f478, 10) pid: 28309 tid: 0009 ret: 0040e3a4

fiw$>$cont

fiw$>$send(00000058, 0033f220, 0000002e, 00000000) pid: 28309 tid: 0009 ret: 0040e4d1

fiw$>$cont

fiw>vnet tcp client list

*** Client socket list ***

Socket: 6

}
\end{frame}

\begin{frame}
\frametitle{Malware analysis scenarios}
\framesubtitle{Communicating with the malware sample}
\shell{
fiw>vnet tcp recv 6 46

*** Reveived 46 bytes ***

0x4e 0x49 0x43 0x4b 0x20 0x55 0x53 0x41 0x7c 0x31 0x33 0x36 0x35 0x38 0x31 0xd 0xa 0x55 0x53 0x45 0x52 0x20 0x77 0x65 0x68 0x65 0x79 0x63 0x20 0x30 0x20 0x30 0x20 0x3a 0x55 0x53 0x41 0x7c 0x31 0x33 0x36 0x35 0x38 0x31 0xd 0xa

fiw>cont
}
\begin{itemize}
\item Malware often sends binary data
\item This time: The string above is in ascii \emph{NICK USA|136581 USER weheyc 0 0 :USA|136581}.
\end{itemize}
\end{frame}

%TODO check pids
\begin{frame}
\frametitle{Discovering malware communication protocol}
Disassembling the memory in order to discover conditions.
\shell{

fiw>recv(00000058, 0033d6a0, 00001000, 00000000) pid: 28309 tid: 0009 ret: 0040e519

fiw>dasm  0040e519 128

*** Disassemble address 40e519 size 128

\begin{tabular}{lll}
0040E519& 85C0&test eax,eax\\
0040E51B&7ECD&jng 0x40e4ea\\
0040E51D&8D85E0F3FFFF&lea eax,[ebp-0xc20]\\
0040E523&50&push eax\\
0040E524&8D85E0E3FFFF&lea eax,[ebp-0x1c20]\\
0040E52A&50&push eax\\
0040E52B&E81CD7FFFF&call dword 0xffffd71c\\
\end{tabular}

fiw$>$
}

\end{frame}
%\section{Automated malware analysis}

%Do not loose orientation in code analysis
%\subsection{Function call observations}

\section{Experimental results}
\begin{frame}
\frametitle{Experimental results}

\begin{table}\rowcolors[]{1}{structure!25!averagebackgroundcolor}{structure!10!averagebackgroundcolor}
\caption{General information about the malware set}
\label{ginfo}

\begin{tabular}{ll}
Number of malware&104\\
Observation period&2005-2007\\
%Malware from 2005&10\\
%\hline
%Malware from 2006&91\\
%\hline
%Malware from 2007&3\\
%\hline
Average file size&135KB\\
%Smallest file&8KB\\
%\hline
%Biggest file&665KB\\
%\hline
Worms&34\%\\
Mean detection rate&57.21\%\\
Antivir detection rate&69.23\%\\
Clamav detection rate&35.58\%\\
Fprot detection rate&57.69\%\\
Norman detection rate&66.35\%\\
\end{tabular}
\end{table}


\end{frame}

\begin{frame}
\frametitle{Experimental results}
\begin{itemize}
\item Explore main execution path.
\item Relaunch execution and check if another execution path is there.
\end{itemize}

\begin{table}\rowcolors[]{1}{structure!25!averagebackgroundcolor}{structure!10!averagebackgroundcolor}
\caption{Control Flow division}
\label{cfg-div}
\scalebox{0.9}{
\begin{tabular}{ll}
Function calls that influence the CFG&\#of malware\\
String compare functions&41\%\\
Query of a registry value&60\%\\
Query current date&41\%\\
Check for internet connectivity&43\%\\
Mutex&64\%\\
\end{tabular}
}

\end{table}

\end{frame}
 

\section{Future work \& Conclusion}

\begin{frame}
\frametitle{Conclusion}
\begin{itemize}
\item We presented a debugger for analyzing malware.
\item The tool fiw is tightly bound to a virtual operating system w32.
\item System function calls need to be acknowledged by the tool fiw before their execution.
\item The tool fiw can inspect the memory of a malware sample
\item The tool fiw can change the file-system and the registry in order to make a function call successful.
\item The tool fiw can communicate with a malware sample via a virtual network.
\item The tool fiw can do some automated debug actions via plugins.
\item We showed various malware analysis scenarios.
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{Demo}
\begin{itemize}
\item Demo
\item Questions \& Answers
\item Acknowledgments: Hereby I want to thank the CSRRT-LU team for having given me access to 
their malware database for doing the experiments.
\end{itemize}
\end{frame}


\begin{frame}
\frametitle{Future work}
\begin{itemize}
\item Use a native windows OS as virtual operating system.
\item Make the tool fiw more user-friendly.
\item Add more debugging facilities.
\item Integrate the tool in the malware analysis framework A.N.N.E. (Automated Analysis and Network Emulation, \cite{anne}).
\begin{itemize}
\item Goal of the A.N.N.E. framework: Do malware analysis in automated way.
\item A server accepts malware samples and analyzes them with custom defined plugins.
\end{itemize}
\item Improve replay debugging actions.
\item Correlate observations performed by the tool fiw.
\item Correlate observation performed by the tool fiw with network information.
\end{itemize}
\end{frame}


\begin{frame}
\huge
\begin{center}
Thank you for your attention!

Contact: haegardev@gmail.com
\end{center}
\normalsize
\end{frame}


\begin{frame}
\frametitle{References}

\frametitle{Bibliography}
\framesubtitle{\hspace{1cm}}

\begin{thebibliography}{10}
\small
\bibitem[Aycock, 2006]{aycock} John Aycock. \newblock Computer Viruses and Malware. Springer, 2006.
\bibitem[Wagener, 2007]{gerard} G{\'e}rard Wagener and Radu State and Alexandre Dulaunoy. \newblock \emph{Malware Behaviour Analysis}. In proceedings of the the 2nd International Workshop on the Theory of Computer Viruses 2007.

\bibitem [Moser, 2007]{moser} Andreas Moser and Christopher Krügel and Engin Kirda. \newblock \emph{Exploring Multiple Execution Paths for Malware Analysis.}. IEEE Symposium on Security and Privacy, 231-245, 2007.

\bibitem[ANNE,2006]{anne} G{\'e}rard Wagener and Alexandre Dulaunoy and Thomas Engel. \newblock \emph{Automated aNalysis and Network Emulation}. http://www.csrrt.org.lu/wiki/index.php/Capturing\_and\_analyzing\_Malware, 2006.
\end{thebibliography}
\end{frame}

\end{document}
