├── pig ├── pig.pdf ├── .gitignore ├── Figures │ ├── opt1.pdf │ ├── opt2.pdf │ ├── opt3.pdf │ ├── cogroup.pdf │ ├── compiler.pdf │ ├── pig_perf.pdf │ ├── example2_mr.pdf │ ├── expressions.pdf │ ├── example2_plan.pdf │ └── pig_overview.pdf ├── pig.tex └── references.bib ├── disa ├── disa.pdf ├── .gitignore ├── Figures │ ├── pr.png │ ├── pairs.png │ ├── pbfs.png │ ├── pr_toy.png │ ├── stripes.png │ ├── combiners.png │ ├── functional.png │ ├── pr_sketch.png │ ├── simple_MR.png │ ├── word_count.png │ └── simple_MR_combiners.png ├── references.bib ├── disa.tex ├── principles.tex ├── graph_algorithms.tex ├── programming_model.tex └── design_patterns.tex ├── .gitignore ├── ccomp └── dscc-4.pdf ├── dstore ├── dscc-1.pdf └── dscc-2.pdf ├── hadoop ├── hadoop.pdf ├── Figures │ ├── hdfs.png │ ├── data2map.png │ ├── map_task.png │ ├── mapreduce.png │ ├── reduce_task.png │ ├── sequencefiles.png │ ├── split_block.png │ ├── hadoop_distance.png │ ├── spill_partition.png │ ├── chain_replication.png │ └── cluster_net_topology.png ├── introduction.tex ├── references.bib ├── hadoop.tex ├── io.tex ├── hdfs.tex ├── deployments.tex └── mapreduce.tex ├── intro ├── course.pdf ├── .gitignore ├── course.tex └── introduction.tex ├── relal ├── relal.pdf ├── .gitignore ├── relal.tex └── relational.tex ├── coordination └── dscc-3.pdf └── README.md /pig/pig.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/pig/pig.pdf -------------------------------------------------------------------------------- /disa/disa.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/disa/disa.pdf -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .aux 2 | .bbl 3 | .blg 4 | .log 5 | .nav 6 | .out 7 | .snm 8 | .toc 9 | 10 | -------------------------------------------------------------------------------- /ccomp/dscc-4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/ccomp/dscc-4.pdf -------------------------------------------------------------------------------- /dstore/dscc-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/dstore/dscc-1.pdf -------------------------------------------------------------------------------- /dstore/dscc-2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/dstore/dscc-2.pdf -------------------------------------------------------------------------------- /hadoop/hadoop.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/hadoop/hadoop.pdf -------------------------------------------------------------------------------- /intro/course.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/intro/course.pdf -------------------------------------------------------------------------------- /relal/relal.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/relal/relal.pdf -------------------------------------------------------------------------------- /disa/.gitignore: -------------------------------------------------------------------------------- 1 | .aux 2 | .bbl 3 | .blg 4 | .log 5 | .nav 6 | .out 7 | .snm 8 | .toc 9 | 10 | -------------------------------------------------------------------------------- /disa/Figures/pr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/disa/Figures/pr.png -------------------------------------------------------------------------------- /intro/.gitignore: -------------------------------------------------------------------------------- 1 | .aux 2 | .bbl 3 | .blg 4 | .log 5 | .nav 6 | .out 7 | .snm 8 | .toc 9 | 10 | -------------------------------------------------------------------------------- /pig/.gitignore: -------------------------------------------------------------------------------- 1 | .aux 2 | .bbl 3 | .blg 4 | .log 5 | .nav 6 | .out 7 | .snm 8 | .toc 9 | 10 | -------------------------------------------------------------------------------- /relal/.gitignore: -------------------------------------------------------------------------------- 1 | .aux 2 | .bbl 3 | .blg 4 | .log 5 | .nav 6 | .out 7 | .snm 8 | .toc 9 | 10 | -------------------------------------------------------------------------------- /disa/Figures/pairs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/disa/Figures/pairs.png -------------------------------------------------------------------------------- /disa/Figures/pbfs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/disa/Figures/pbfs.png -------------------------------------------------------------------------------- /pig/Figures/opt1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/pig/Figures/opt1.pdf -------------------------------------------------------------------------------- /pig/Figures/opt2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/pig/Figures/opt2.pdf -------------------------------------------------------------------------------- /pig/Figures/opt3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/pig/Figures/opt3.pdf -------------------------------------------------------------------------------- /coordination/dscc-3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/coordination/dscc-3.pdf -------------------------------------------------------------------------------- /disa/Figures/pr_toy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/disa/Figures/pr_toy.png -------------------------------------------------------------------------------- /disa/Figures/stripes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/disa/Figures/stripes.png -------------------------------------------------------------------------------- /hadoop/Figures/hdfs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/hadoop/Figures/hdfs.png -------------------------------------------------------------------------------- /pig/Figures/cogroup.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/pig/Figures/cogroup.pdf -------------------------------------------------------------------------------- /pig/Figures/compiler.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/pig/Figures/compiler.pdf -------------------------------------------------------------------------------- /pig/Figures/pig_perf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/pig/Figures/pig_perf.pdf -------------------------------------------------------------------------------- /disa/Figures/combiners.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/disa/Figures/combiners.png -------------------------------------------------------------------------------- /disa/Figures/functional.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/disa/Figures/functional.png -------------------------------------------------------------------------------- /disa/Figures/pr_sketch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/disa/Figures/pr_sketch.png -------------------------------------------------------------------------------- /disa/Figures/simple_MR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/disa/Figures/simple_MR.png -------------------------------------------------------------------------------- /disa/Figures/word_count.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/disa/Figures/word_count.png -------------------------------------------------------------------------------- /hadoop/Figures/data2map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/hadoop/Figures/data2map.png -------------------------------------------------------------------------------- /hadoop/Figures/map_task.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/hadoop/Figures/map_task.png -------------------------------------------------------------------------------- /pig/Figures/example2_mr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/pig/Figures/example2_mr.pdf -------------------------------------------------------------------------------- /pig/Figures/expressions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/pig/Figures/expressions.pdf -------------------------------------------------------------------------------- /hadoop/Figures/mapreduce.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/hadoop/Figures/mapreduce.png -------------------------------------------------------------------------------- /pig/Figures/example2_plan.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/pig/Figures/example2_plan.pdf -------------------------------------------------------------------------------- /pig/Figures/pig_overview.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/pig/Figures/pig_overview.pdf -------------------------------------------------------------------------------- /hadoop/Figures/reduce_task.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/hadoop/Figures/reduce_task.png -------------------------------------------------------------------------------- /hadoop/Figures/sequencefiles.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/hadoop/Figures/sequencefiles.png -------------------------------------------------------------------------------- /hadoop/Figures/split_block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/hadoop/Figures/split_block.png -------------------------------------------------------------------------------- /hadoop/Figures/hadoop_distance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/hadoop/Figures/hadoop_distance.png -------------------------------------------------------------------------------- /hadoop/Figures/spill_partition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/hadoop/Figures/spill_partition.png -------------------------------------------------------------------------------- /disa/Figures/simple_MR_combiners.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/disa/Figures/simple_MR_combiners.png -------------------------------------------------------------------------------- /hadoop/Figures/chain_replication.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/hadoop/Figures/chain_replication.png -------------------------------------------------------------------------------- /hadoop/Figures/cluster_net_topology.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fede1024/DISC-CLOUD-COURSE/master/hadoop/Figures/cluster_net_topology.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Data-intensive Scalable Computing and Clouds 2 | ================= 3 | 4 | This repo contains the Latex sources of some of the lecture notes of the Distributed Systems and Cloud Computing course at Eurecom. 5 | 6 | ### DISC or Distributed Systems? 7 | Actually, a bit of both, but I couldn't figure out a short name for all of that. 8 | 9 | ### Who? 10 | This repo is maintained by [Pietro Michiardi](http://www.eurecom.fr/~michiard). 11 | 12 | I work at [Eurecom](http://www.eurecom.fr) 13 | -------------------------------------------------------------------------------- /intro/course.tex: -------------------------------------------------------------------------------- 1 | \documentclass{beamer} 2 | 3 | \usepackage[lined,ruled]{algorithm2e} 4 | \usepackage{subfigure} 5 | \usepackage[english]{babel} 6 | \usepackage[latin1]{inputenc} 7 | \usepackage{times} 8 | \usepackage[T1]{fontenc} 9 | \usepackage{color} 10 | 11 | \usetheme[secheader]{Boadilla} 12 | \usefonttheme[onlylarge]{structurebold} 13 | \setbeamerfont*{frametitle}{size=\normalsize,series=\bfseries} 14 | \setbeamertemplate{navigation symbols}{} 15 | \setbeamertemplate{mini frames}[box] 16 | \setbeamertemplate{sections/subsections in toc}[square] 17 | \setbeamertemplate{blocks}[rounded][shadow=true] 18 | \setbeamertemplate{bibliography item}[text] 19 | 20 | \setbeamercolor{lightorange}{fg=black,bg=orange!40} 21 | \setbeamercolor{lightblue}{fg=black,bg=blue!30} 22 | 23 | \newenvironment{colorblock}[2] 24 | {\setbeamercolor{item}{fg=#1,bg=#1}\begin{beamerboxesrounded}[upper=#1,lower=#2,shadow=true]} 25 | {\end{beamerboxesrounded}} 26 | 27 | 28 | 29 | % Setup TikZ 30 | 31 | \usepackage{tikz} 32 | \usetikzlibrary{arrows} 33 | \tikzstyle{block}=[draw opacity=0.7,line width=1.4cm] 34 | 35 | 36 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 37 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 38 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 39 | 40 | \newtheorem{observation}[theorem]{Observation} 41 | 42 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 43 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 44 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 45 | 46 | \title{Data-intensive Scalable Computing} 47 | \subtitle{Introduction} 48 | \author{Pietro Michiardi} 49 | \institute{Eurecom} 50 | \date 51 | 52 | 53 | \begin{document} 54 | 55 | \begin{frame} 56 | \titlepage 57 | \end{frame} 58 | 59 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 60 | \section{Introduction and Motivations} 61 | 62 | \begin{frame} 63 | \begin{colorblock}{blue}{lightblue}{ } 64 | \begin{center} 65 | \Huge \textbf{\texttt{Introduction and Motivations}} 66 | \end{center} 67 | \end{colorblock} 68 | \end{frame} 69 | 70 | \input{./introduction} 71 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 72 | 73 | \end{document} 74 | -------------------------------------------------------------------------------- /hadoop/introduction.tex: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | \frame {\frametitle{From Theory to Practice} 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | \begin{itemize} 5 | \item \textbf{The story so far} 6 | \begin{itemize} 7 | \item Principles behind the MapReduce Framework 8 | \item Programming model 9 | \item Algorithm design and patterns 10 | \end{itemize} 11 | 12 | \vspace{20pt} 13 | 14 | \item \textbf{Hadoop implementation of MapReduce} 15 | \begin{itemize} 16 | \item HDFS in details 17 | \item Hadoop MapReduce 18 | \begin{itemize} 19 | \item Implementation details 20 | \item Types and Formats 21 | \end{itemize} 22 | \item Hadoop I/O 23 | \end{itemize} 24 | 25 | \vspace{20pt} 26 | 27 | \item \textbf{Hadoop Deployments} 28 | \begin{itemize} 29 | \item The BigFoot platform 30 | \end{itemize} 31 | \end{itemize} 32 | } 33 | 34 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 35 | \frame {\frametitle{Terminology} 36 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 37 | \begin{itemize} 38 | \item \textbf{MapReduce:} 39 | \begin{itemize} 40 | \item \texttt{Job}: an execution of a Mapper and Reducer across a 41 | data set 42 | \item \texttt{Task}: an execution of a Mapper or a Reducer on a slice of 43 | data 44 | \item \texttt{Task Attempt}: instance of an attempt to execute a 45 | task 46 | \item \textbf{Example:} 47 | \begin{itemize} 48 | \item Running ``Word Count'' across 20 files is one job 49 | \item 20 files to be mapped = 20 map tasks + some number of 50 | reduce tasks 51 | \item At least 20 attempts will be performed... more if a 52 | machine crashes 53 | \end{itemize} 54 | \end{itemize} 55 | 56 | 57 | 58 | \vspace{20pt} 59 | 60 | \item \textbf{Task Attempts} 61 | \begin{itemize} 62 | \item Task attempted at least once, possibly more 63 | \item Multiple crashes on input imply discarding it 64 | \item Multiple attempts may occur in parallel (a.k.a. speculative execution) 65 | \item Task ID from TaskInProgress is not a unique identifier 66 | \end{itemize} 67 | \end{itemize} 68 | } 69 | -------------------------------------------------------------------------------- /intro/introduction.tex: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | \frame {\frametitle{What is this Course About} 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | \begin{itemize} 5 | \item \textbf{The MapReduce Programming Model} 6 | \begin{itemize} 7 | \item Principles of functional programming 8 | \item Scalable algorithm design 9 | \end{itemize} 10 | 11 | \vspace{20pt} 12 | 13 | \item \textbf{In-depth description of Hadoop MapReduce} 14 | \begin{itemize} 15 | \item Architecture internals 16 | \item Software components 17 | \item Cluster deployments 18 | \end{itemize} 19 | 20 | \vspace{20pt} 21 | 22 | \item \textbf{Relational Algebra and High-Level Languages} 23 | \begin{itemize} 24 | \item Basic operators and their equivalence in MapReduce 25 | \item Hadoop Pig and PigLatin 26 | \end{itemize} 27 | 28 | \end{itemize} 29 | } 30 | 31 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 32 | \frame {\frametitle{What is MapReduce?} 33 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 34 | \begin{itemize} 35 | \item \textbf{A programming model}: 36 | \begin{itemize} 37 | \item Inspired by functional programming 38 | \item Parallel computations on massive amounts of data 39 | \end{itemize} 40 | 41 | \vspace{20pt} 42 | 43 | \item \textbf{An execution framework}: 44 | \begin{itemize} 45 | \item Designed for large-scale data processing 46 | \item Designed to run on clusters of commodity hardware 47 | \end{itemize} 48 | \end{itemize} 49 | } 50 | 51 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 52 | \frame {\frametitle{What is Big Data?} 53 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 54 | \begin{itemize} 55 | \item \textbf{Vast repositories of data} 56 | \begin{itemize} 57 | \item The Web 58 | \item Physics 59 | \item Astronomy 60 | \item Finance 61 | \end{itemize} 62 | 63 | \vspace{20pt} 64 | 65 | \item \textbf{Volume, Velocity, Variety} 66 | 67 | \vspace{20pt} 68 | 69 | \item \textbf{It's not the algorithm, it's the data!} \cite{banko01} 70 | \begin{itemize} 71 | \item More data leads to better accuracy 72 | \item With more data, accuracy of different algorithms converges 73 | \end{itemize} 74 | \end{itemize} 75 | } 76 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 77 | 78 | -------------------------------------------------------------------------------- /pig/pig.tex: -------------------------------------------------------------------------------- 1 | \documentclass{beamer} 2 | 3 | \usepackage{subfigure} 4 | \usepackage[english]{babel} 5 | \usepackage[latin1]{inputenc} 6 | \usepackage{times} 7 | \usepackage[T1]{fontenc} 8 | \usepackage{color} 9 | 10 | \usepackage{algorithm} 11 | \usepackage{algorithmicx} 12 | \usepackage[noend]{algpseudocode} 13 | 14 | \usetheme[secheader]{Boadilla} 15 | \usefonttheme[onlylarge]{structurebold} 16 | \setbeamerfont*{frametitle}{size=\normalsize,series=\bfseries} 17 | \setbeamertemplate{navigation symbols}{} 18 | \setbeamertemplate{mini frames}[box] 19 | \setbeamertemplate{sections/subsections in toc}[square] 20 | \setbeamertemplate{blocks}[rounded][shadow=true] 21 | \setbeamertemplate{bibliography item}[text] 22 | 23 | \setbeamercolor{lightorange}{fg=black,bg=orange!40} 24 | \setbeamercolor{lightblue}{fg=black,bg=blue!30} 25 | 26 | \newenvironment{colorblock}[2] 27 | {\setbeamercolor{item}{fg=#1,bg=#1}\begin{beamerboxesrounded}[upper=#1,lower=#2,shadow=true]} 28 | {\end{beamerboxesrounded}} 29 | 30 | 31 | 32 | % Setup TikZ 33 | 34 | \usepackage{tikz} 35 | \usetikzlibrary{arrows} 36 | \tikzstyle{block}=[draw opacity=0.7,line width=1.4cm] 37 | 38 | 39 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 40 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 41 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 42 | 43 | \newtheorem{observation}[theorem]{Observation} 44 | 45 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 46 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 47 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 48 | 49 | \title{High-level Programming Languages} 50 | \subtitle{Apache Pig and Pig Latin} 51 | \author{Pietro Michiardi} 52 | \institute{Eurecom} 53 | \date 54 | 55 | 56 | \begin{document} 57 | 58 | \begin{frame} 59 | \titlepage 60 | \end{frame} 61 | 62 | 63 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 64 | \section{Apache Pig} 65 | 66 | \begin{frame} 67 | \begin{colorblock}{blue}{lightblue}{ } 68 | \begin{center} 69 | \Huge \textbf{\texttt{Apache Pig}} 70 | \end{center} 71 | \end{colorblock} 72 | 73 | \begin{itemize} 74 | \item[] See also the 4 segments on Pig on coursera: 75 | \item[] \url{https://www.coursera.org/course/datasci} 76 | \end{itemize} 77 | 78 | \end{frame} 79 | 80 | \input{./pig-overview} 81 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 82 | 83 | 84 | 85 | 86 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 87 | \section{References} 88 | 89 | \begin{frame} 90 | \begin{colorblock}{blue}{lightblue}{ } 91 | \begin{center} 92 | \Huge \textbf{\texttt{References}} 93 | \end{center} 94 | \end{colorblock} 95 | \end{frame} 96 | 97 | \begin{frame}[allowframebreaks]{References} 98 | \bibliographystyle{plain} 99 | \bibliography{references} 100 | \end{frame} 101 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 102 | 103 | \end{document} 104 | -------------------------------------------------------------------------------- /pig/references.bib: -------------------------------------------------------------------------------- 1 | % This file was created with JabRef 2.7.2. 2 | % Encoding: MacRoman 3 | 4 | @INPROCEEDINGS{Brewer01, 5 | author = {Eric Brewer}, 6 | title = {Lessons from giant-scale services}, 7 | booktitle = {In IEEE Internet Computing}, 8 | year = {2001} 9 | } 10 | 11 | @INPROCEEDINGS{Chang2006, 12 | author = {Fay Chang and Jeffrey Dean and Sanjay Ghemawat and Wilson C. Hsieh 13 | and Deborah A. Wallach and Mike Burrows and Tushar Chandra and Andrew 14 | Fikes and Robert E. Gruber}, 15 | title = {Bigtable: A Distributed Storage System for Structured Data}, 16 | booktitle = {Proc. od USENIX OSDI}, 17 | year = {2006}, 18 | owner = {michiard}, 19 | timestamp = {2012.02.21} 20 | } 21 | 22 | @INPROCEEDINGS{Dean2004, 23 | author = {Jeffrey Dean and Sanjay Ghemawat}, 24 | title = {MapReduce: Simplified Data Processing on Large Clusters}, 25 | booktitle = {Proc. of ACM OSDI}, 26 | year = {2004}, 27 | owner = {michiard}, 28 | timestamp = {2012.02.21} 29 | } 30 | 31 | @BOOK{George2011, 32 | title = {HBase, The Definitive Guide}, 33 | publisher = {O'Reilly}, 34 | year = {2011}, 35 | author = {Lars George}, 36 | owner = {michiard}, 37 | timestamp = {2012.02.21} 38 | } 39 | 40 | @INPROCEEDINGS{Ghemawat2003, 41 | author = {Sanjay Ghemawat and Howard Gobioff and Shun-Tak Leung}, 42 | title = {The Google File System}, 43 | booktitle = {Proc. of ACM OSDI}, 44 | year = {2003}, 45 | owner = {michiard}, 46 | timestamp = {2012.02.21} 47 | } 48 | 49 | @UNPUBLISHED{ONeil1996, 50 | author = {Patrick O'Neil and Edward Cheng and Dieter Gawlick and Elizabeth 51 | O'Neil}, 52 | title = {The Log-Structured Merge-Tree (LSM-Tree)}, 53 | year = {1996}, 54 | owner = {michiard}, 55 | timestamp = {2012.02.21} 56 | } 57 | 58 | @INPROCEEDINGS{Olston2008, 59 | author = {C. Olston and B. Reed and U. Srivastava and R. Kumar and and A. Tomkins}, 60 | title = {Pig Latin: A Not-So-Foreign Language for Data Processing}, 61 | booktitle = {Proc. of ACM SIGMOD}, 62 | year = {2008}, 63 | owner = {michiard}, 64 | timestamp = {2012.03.01} 65 | } 66 | 67 | @MISC{Salmen09, 68 | author = {D. Salmen}, 69 | title = {Cloud Data Structure Diagramming Techniques and Design Patterns}, 70 | howpublished = {\url{https://www.data-tactics-corp.com/index.php/component/jdownloads/finish/22-white-papers/68-cloud-data-structure-diagramming}}, 71 | year = {2009}, 72 | owner = {michiard}, 73 | timestamp = {2012.02.21} 74 | } 75 | 76 | @BOOK{White2010, 77 | title = {Hadoop, The Definitive Guide}, 78 | publisher = {O'Reilly, Yahoo}, 79 | year = {2010}, 80 | author = {Tom White}, 81 | owner = {michiard}, 82 | timestamp = {2011.04.29} 83 | } 84 | 85 | @ELECTRONIC{b+tree, 86 | title = {B+ Tree}, 87 | howpublished = {\url{http://en.wikipedia.org/wiki/B%2B_tree}}, 88 | owner = {michiard}, 89 | timestamp = {2012.02.21} 90 | } 91 | 92 | @MISC{pig, 93 | title = {Pig Wiki}, 94 | howpublished = {\url{http://wiki.apache.org/pig/}}, 95 | owner = {michiard}, 96 | timestamp = {2012.03.01} 97 | } 98 | 99 | -------------------------------------------------------------------------------- /disa/references.bib: -------------------------------------------------------------------------------- 1 | % This file was created with JabRef 2.6. 2 | % Encoding: MacRoman 3 | 4 | @INPROCEEDINGS{banko01, 5 | author = {Michele Banko and Eric Brill}, 6 | title = {Scaling to very very large corpora for natural language disambiguation}, 7 | booktitle = {Proc. of the 39th Annual Meeting of the Association for Computational 8 | Linguistic (ACL)}, 9 | year = {2001} 10 | } 11 | 12 | @INPROCEEDINGS{barroso09, 13 | author = {Luiz Andre Barroso and Urs Holzle}, 14 | title = {The Datacebter as a Computer: An introduction to the Design of Warehouse-Scale 15 | Machines}, 16 | year = {2009}, 17 | publisher = {Morgan \& Claypool Publishers} 18 | } 19 | 20 | @INPROCEEDINGS{Bianchini2005, 21 | author = {Monica Bianchini and Marco Gori and Franco Scarselli}, 22 | title = {Inside PageRank}, 23 | booktitle = {ACM Transactions on Internet Technology}, 24 | year = {2005}, 25 | owner = {michiard}, 26 | timestamp = {2011.05.11} 27 | } 28 | 29 | @INPROCEEDINGS{hamilton09, 30 | author = {James Hamilton}, 31 | title = {Cooperative Expendable Micro-Slice Servers (CEMS): Low cost, low 32 | power servers for Internet-scale services}, 33 | booktitle = {Proc. of the 4th Biennal Conference on Innovative Data Systems Research 34 | (CIDR)}, 35 | year = {2009} 36 | } 37 | 38 | @INPROCEEDINGS{hey09, 39 | author = {Tony Hey and Stewart Tansley and Kristin Tolle}, 40 | title = {The Fourth Paradigm: Data-Intensive Scientific Discovery}, 41 | year = {2009}, 42 | publisher = {Microsoft Research} 43 | } 44 | 45 | @INPROCEEDINGS{Lattanzi2011, 46 | author = {Silvio Lattanzi and Benjamin Moseley and Siddharth Suri and Sergei 47 | Vassilvitskii}, 48 | title = {Filtering: a Method for Solving Graph Problems in MapReduce}, 49 | booktitle = {Proc. of SPAA}, 50 | year = {2011}, 51 | owner = {michiard}, 52 | timestamp = {2011.05.11} 53 | } 54 | 55 | @INPROCEEDINGS{Leskovec2005, 56 | author = {Jure Leskovec and Jon Kleinberg and Christos Faloutsos}, 57 | title = {Graphs over time: Densification laws, shrinking diamters and possible 58 | explanations}, 59 | booktitle = {Proc. of SIGKDD}, 60 | year = {2005}, 61 | owner = {michiard}, 62 | timestamp = {2011.05.11} 63 | } 64 | 65 | @INPROCEEDINGS{Page1999, 66 | author = {Lawrence Page and Sergey Brin and Rajeev Motwani and Terry Winograd}, 67 | title = {The PageRank citation ranking: Bringin order to the Web}, 68 | booktitle = {Stanford Digital Library Working Paper}, 69 | year = {1999}, 70 | owner = {michiard}, 71 | timestamp = {2011.05.11} 72 | } 73 | 74 | @INPROCEEDINGS{shvachko10, 75 | author = {Konstantin Shvachko and Hairong Kuang and Sanjay Radia and Robert 76 | Chansler}, 77 | title = {The Hadoop Distributed File System}, 78 | booktitle = {Proc. of the 26th IEEE Symposium on Massive Storage Systems and Technologies 79 | (MSST)}, 80 | year = {2010}, 81 | publisher = {IEEE} 82 | } 83 | 84 | @BOOK{hadoop_book, 85 | title = {Hadoop, The Definitive Guide}, 86 | publisher = {O'Reilly, Yahoo}, 87 | year = {2010}, 88 | author = {Tom White}, 89 | owner = {michiard}, 90 | timestamp = {2011.04.29} 91 | } 92 | 93 | @MISC{AIRWeb, 94 | title = {Adversarial Information Retrieval Workshop}, 95 | owner = {michiard}, 96 | timestamp = {2011.05.11} 97 | } 98 | 99 | -------------------------------------------------------------------------------- /hadoop/references.bib: -------------------------------------------------------------------------------- 1 | % This file was created with JabRef 2.6. 2 | % Encoding: MacRoman 3 | 4 | @INPROCEEDINGS{banko01, 5 | author = {Michele Banko and Eric Brill}, 6 | title = {Scaling to very very large corpora for natural language disambiguation}, 7 | booktitle = {Proc. of the 39th Annual Meeting of the Association for Computational 8 | Linguistic (ACL)}, 9 | year = {2001} 10 | } 11 | 12 | @INPROCEEDINGS{barroso09, 13 | author = {Luiz Andre Barroso and Urs Holzle}, 14 | title = {The Datacebter as a Computer: An introduction to the Design of Warehouse-Scale 15 | Machines}, 16 | year = {2009}, 17 | publisher = {Morgan \& Claypool Publishers} 18 | } 19 | 20 | @INPROCEEDINGS{Bianchini2005, 21 | author = {Monica Bianchini and Marco Gori and Franco Scarselli}, 22 | title = {Inside PageRank}, 23 | booktitle = {ACM Transactions on Internet Technology}, 24 | year = {2005}, 25 | owner = {michiard}, 26 | timestamp = {2011.05.11} 27 | } 28 | 29 | @INPROCEEDINGS{hamilton09, 30 | author = {James Hamilton}, 31 | title = {Cooperative Expendable Micro-Slice Servers (CEMS): Low cost, low 32 | power servers for Internet-scale services}, 33 | booktitle = {Proc. of the 4th Biennal Conference on Innovative Data Systems Research 34 | (CIDR)}, 35 | year = {2009} 36 | } 37 | 38 | @INPROCEEDINGS{hey09, 39 | author = {Tony Hey and Stewart Tansley and Kristin Tolle}, 40 | title = {The Fourth Paradigm: Data-Intensive Scientific Discovery}, 41 | year = {2009}, 42 | publisher = {Microsoft Research} 43 | } 44 | 45 | @INPROCEEDINGS{Lattanzi2011, 46 | author = {Silvio Lattanzi and Benjamin Moseley and Siddharth Suri and Sergei 47 | Vassilvitskii}, 48 | title = {Filtering: a Method for Solving Graph Problems in MapReduce}, 49 | booktitle = {Proc. of SPAA}, 50 | year = {2011}, 51 | owner = {michiard}, 52 | timestamp = {2011.05.11} 53 | } 54 | 55 | @INPROCEEDINGS{Leskovec2005, 56 | author = {Jure Leskovec and Jon Kleinberg and Christos Faloutsos}, 57 | title = {Graphs over time: Densification laws, shrinking diamters and possible 58 | explanations}, 59 | booktitle = {Proc. of SIGKDD}, 60 | year = {2005}, 61 | owner = {michiard}, 62 | timestamp = {2011.05.11} 63 | } 64 | 65 | @INPROCEEDINGS{Page1999, 66 | author = {Lawrence Page and Sergey Brin and Rajeev Motwani and Terry Winograd}, 67 | title = {The PageRank citation ranking: Bringin order to the Web}, 68 | booktitle = {Stanford Digital Library Working Paper}, 69 | year = {1999}, 70 | owner = {michiard}, 71 | timestamp = {2011.05.11} 72 | } 73 | 74 | @INPROCEEDINGS{shvachko10, 75 | author = {Konstantin Shvachko and Hairong Kuang and Sanjay Radia and Robert 76 | Chansler}, 77 | title = {The Hadoop Distributed File System}, 78 | booktitle = {Proc. of the 26th IEEE Symposium on Massive Storage Systems and Technologies 79 | (MSST)}, 80 | year = {2010}, 81 | publisher = {IEEE} 82 | } 83 | 84 | @BOOK{hadoop_book, 85 | title = {Hadoop, The Definitive Guide}, 86 | publisher = {O'Reilly, Yahoo}, 87 | year = {2010}, 88 | author = {Tom White}, 89 | owner = {michiard}, 90 | timestamp = {2011.04.29} 91 | } 92 | 93 | @MISC{AIRWeb, 94 | title = {Adversarial Information Retrieval Workshop}, 95 | owner = {michiard}, 96 | timestamp = {2011.05.11} 97 | } 98 | 99 | -------------------------------------------------------------------------------- /relal/relal.tex: -------------------------------------------------------------------------------- 1 | \documentclass{beamer} 2 | 3 | \usepackage{subfigure} 4 | \usepackage[english]{babel} 5 | \usepackage[latin1]{inputenc} 6 | \usepackage{times} 7 | \usepackage[T1]{fontenc} 8 | \usepackage{color} 9 | 10 | \usepackage{algorithm} 11 | \usepackage{algorithmicx} 12 | \usepackage[noend]{algpseudocode} 13 | 14 | \usetheme[secheader]{Boadilla} 15 | \usefonttheme[onlylarge]{structurebold} 16 | \setbeamerfont*{frametitle}{size=\normalsize,series=\bfseries} 17 | \setbeamertemplate{navigation symbols}{} 18 | \setbeamertemplate{mini frames}[box] 19 | \setbeamertemplate{sections/subsections in toc}[square] 20 | \setbeamertemplate{blocks}[rounded][shadow=true] 21 | \setbeamertemplate{bibliography item}[text] 22 | 23 | \setbeamercolor{lightorange}{fg=black,bg=orange!40} 24 | \setbeamercolor{lightblue}{fg=black,bg=blue!30} 25 | 26 | \newenvironment{colorblock}[2] 27 | {\setbeamercolor{item}{fg=#1,bg=#1}\begin{beamerboxesrounded}[upper=#1,lower=#2,shadow=true]} 28 | {\end{beamerboxesrounded}} 29 | 30 | 31 | 32 | % Setup TikZ 33 | 34 | \usepackage{tikz} 35 | \usetikzlibrary{arrows} 36 | \tikzstyle{block}=[draw opacity=0.7,line width=1.4cm] 37 | 38 | 39 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 40 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 41 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 42 | 43 | \newtheorem{observation}[theorem]{Observation} 44 | 45 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 46 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 47 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 48 | 49 | \title{Relational Algebra and MapReduce} 50 | \subtitle{Towards High-level Programming Languages} 51 | \author{Pietro Michiardi} 52 | \institute{Eurecom} 53 | \date 54 | 55 | 56 | \begin{document} 57 | 58 | \begin{frame} 59 | \titlepage 60 | \end{frame} 61 | 62 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 63 | \section{Sources and Acks} 64 | 65 | \begin{frame} 66 | \begin{itemize} 67 | \item Jimmy Lin and Chris Dyer, ``Data-Intensive Text Processing with MapReduce,'' Morgan \& Claypool Publishers, 2010. \url{http://lintool.github.io/MapReduceAlgorithms/} 68 | 69 | \item[] 70 | 71 | \item Tom White, ``Hadoop, The Definitive Guide,'' O'Reilly / Yahoo Press, 2012 72 | 73 | \item[] 74 | 75 | \item Anand Rajaraman, Jeffrey D. Ullman, Jure Leskovec, ``Mining of Massive Datasets'', Cambridge University Press, 2013 76 | \end{itemize} 77 | \end{frame} 78 | 79 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 80 | 81 | 82 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 83 | \section{Relational Algebra} 84 | 85 | \begin{frame} 86 | \begin{colorblock}{blue}{lightblue}{ } 87 | \begin{center} 88 | \Huge \textbf{\texttt{Relational Algebra and MapReduce}} 89 | \end{center} 90 | \end{colorblock} 91 | \end{frame} 92 | 93 | \input{./relational} 94 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 95 | 96 | 97 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 98 | % \section{References} 99 | 100 | % \begin{frame} 101 | % \begin{colorblock}{blue}{lightblue}{ } 102 | % \begin{center} 103 | % \Huge \textbf{\texttt{References}} 104 | % \end{center} 105 | % \end{colorblock} 106 | % \end{frame} 107 | 108 | % \begin{frame}[allowframebreaks]{References} 109 | % \bibliographystyle{plain} 110 | % \bibliography{references} 111 | % \end{frame} 112 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 113 | 114 | \end{document} 115 | -------------------------------------------------------------------------------- /hadoop/hadoop.tex: -------------------------------------------------------------------------------- 1 | \documentclass{beamer} 2 | 3 | \usepackage[lined,ruled]{algorithm2e} 4 | \usepackage{subfigure} 5 | \usepackage[english]{babel} 6 | \usepackage[latin1]{inputenc} 7 | \usepackage{times} 8 | \usepackage[T1]{fontenc} 9 | \usepackage{color} 10 | 11 | \usetheme[secheader]{Boadilla} 12 | \usefonttheme[onlylarge]{structurebold} 13 | \setbeamerfont*{frametitle}{size=\normalsize,series=\bfseries} 14 | \setbeamertemplate{navigation symbols}{} 15 | \setbeamertemplate{mini frames}[box] 16 | \setbeamertemplate{sections/subsections in toc}[square] 17 | \setbeamertemplate{blocks}[rounded][shadow=true] 18 | \setbeamertemplate{bibliography item}[text] 19 | 20 | \setbeamercolor{lightorange}{fg=black,bg=orange!40} 21 | \setbeamercolor{lightblue}{fg=black,bg=blue!30} 22 | 23 | \newenvironment{colorblock}[2] 24 | {\setbeamercolor{item}{fg=#1,bg=#1}\begin{beamerboxesrounded}[upper=#1,lower=#2,shadow=true]} 25 | {\end{beamerboxesrounded}} 26 | 27 | 28 | 29 | % Setup TikZ 30 | 31 | \usepackage{tikz} 32 | \usetikzlibrary{arrows} 33 | \tikzstyle{block}=[draw opacity=0.7,line width=1.4cm] 34 | 35 | 36 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 37 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 38 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 39 | 40 | \newtheorem{observation}[theorem]{Observation} 41 | 42 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 43 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 44 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 45 | 46 | \title{Hadoop Internals} 47 | % \subtitle{Introduction} 48 | \author{Pietro Michiardi} 49 | \institute{Eurecom} 50 | \date 51 | 52 | 53 | \begin{document} 54 | 55 | \begin{frame} 56 | \titlepage 57 | \end{frame} 58 | 59 | 60 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 61 | \section{Introduction and Recap} 62 | 63 | \begin{frame} 64 | \begin{colorblock}{blue}{lightblue}{ } 65 | \begin{center} 66 | \Huge \textbf{\texttt{Introduction and Recap}} 67 | \end{center} 68 | \end{colorblock} 69 | \end{frame} 70 | 71 | \input{./introduction} 72 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 73 | 74 | 75 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 76 | \section{HDFS} 77 | 78 | \begin{frame} 79 | \begin{colorblock}{blue}{lightblue}{ } 80 | \begin{center} 81 | \Huge \textbf{\texttt{Hadoop Distributed File-System}} 82 | \end{center} 83 | \end{colorblock} 84 | \end{frame} 85 | 86 | \input{./hdfs} 87 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 88 | 89 | 90 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 91 | \section{Hadoop MapReduce} 92 | 93 | \begin{frame} 94 | \begin{colorblock}{blue}{lightblue}{ } 95 | \begin{center} 96 | \Huge \textbf{\texttt{Hadoop MapReduce}} 97 | \end{center} 98 | \end{colorblock} 99 | \end{frame} 100 | 101 | \input{./mapreduce} 102 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 103 | 104 | 105 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 106 | \section{Hadoop I/O} 107 | 108 | \begin{frame} 109 | \begin{colorblock}{blue}{lightblue}{ } 110 | \begin{center} 111 | \Huge \textbf{\texttt{Hadoop I/O}} 112 | \end{center} 113 | \end{colorblock} 114 | \end{frame} 115 | 116 | \input{./io} 117 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 118 | 119 | 120 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 121 | \section{Hadoop Deployments} 122 | 123 | \begin{frame} 124 | \begin{colorblock}{blue}{lightblue}{ } 125 | \begin{center} 126 | \Huge \textbf{\texttt{Hadoop Deployments}} 127 | \end{center} 128 | \end{colorblock} 129 | \end{frame} 130 | 131 | \input{./deployments} 132 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 133 | 134 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 135 | \section{References} 136 | 137 | \begin{frame} 138 | \begin{colorblock}{blue}{lightblue}{ } 139 | \begin{center} 140 | \Huge \textbf{\texttt{References}} 141 | \end{center} 142 | \end{colorblock} 143 | \end{frame} 144 | 145 | \begin{frame}[allowframebreaks]{References} 146 | \bibliographystyle{plain} 147 | \bibliography{references} 148 | \end{frame} 149 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 150 | 151 | \end{document} 152 | -------------------------------------------------------------------------------- /disa/disa.tex: -------------------------------------------------------------------------------- 1 | \documentclass{beamer} 2 | 3 | \usepackage{subfigure} 4 | \usepackage[english]{babel} 5 | \usepackage[latin1]{inputenc} 6 | \usepackage{times} 7 | \usepackage[T1]{fontenc} 8 | \usepackage{color} 9 | 10 | \usepackage{algorithm} 11 | \usepackage{algorithmicx} 12 | \usepackage[noend]{algpseudocode} 13 | 14 | \usetheme[secheader]{Boadilla} 15 | \usefonttheme[onlylarge]{structurebold} 16 | \setbeamerfont*{frametitle}{size=\normalsize,series=\bfseries} 17 | \setbeamertemplate{navigation symbols}{} 18 | \setbeamertemplate{mini frames}[box] 19 | \setbeamertemplate{sections/subsections in toc}[square] 20 | \setbeamertemplate{blocks}[rounded][shadow=true] 21 | \setbeamertemplate{bibliography item}[text] 22 | 23 | \setbeamercolor{lightorange}{fg=black,bg=orange!40} 24 | \setbeamercolor{lightblue}{fg=black,bg=blue!30} 25 | 26 | \newenvironment{colorblock}[2] 27 | {\setbeamercolor{item}{fg=#1,bg=#1}\begin{beamerboxesrounded}[upper=#1,lower=#2,shadow=true]} 28 | {\end{beamerboxesrounded}} 29 | 30 | 31 | 32 | % Setup TikZ 33 | 34 | \usepackage{tikz} 35 | \usetikzlibrary{arrows} 36 | \tikzstyle{block}=[draw opacity=0.7,line width=1.4cm] 37 | 38 | 39 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 40 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 41 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 42 | 43 | \newtheorem{observation}[theorem]{Observation} 44 | 45 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 46 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 47 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 48 | 49 | \title{Scalable Algorithm Design} 50 | \subtitle{The MapReduce Programming Model} 51 | \author{Pietro Michiardi} 52 | \institute{Eurecom} 53 | \date 54 | 55 | 56 | \begin{document} 57 | 58 | \begin{frame} 59 | \titlepage 60 | \end{frame} 61 | 62 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 63 | \section{Sources and Acks} 64 | 65 | \begin{frame} 66 | \begin{itemize} 67 | \item Jimmy Lin and Chris Dyer, ``Data-Intensive Text Processing with MapReduce,'' Morgan \& Claypool Publishers, 2010. \url{http://lintool.github.io/MapReduceAlgorithms/} 68 | 69 | \item[] 70 | 71 | \item Tom White, ``Hadoop, The Definitive Guide,'' O'Reilly / Yahoo Press, 2012 72 | 73 | \item[] 74 | 75 | \item Anand Rajaraman, Jeffrey D. Ullman, Jure Leskovec, ``Mining of Massive Datasets'', Cambridge University Press, 2013 76 | \end{itemize} 77 | \end{frame} 78 | 79 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 80 | 81 | 82 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 83 | \section{Key Principles} 84 | 85 | \begin{frame} 86 | \begin{colorblock}{blue}{lightblue}{ } 87 | \begin{center} 88 | \Huge \textbf{\texttt{Key Principles}} 89 | \end{center} 90 | \end{colorblock} 91 | \end{frame} 92 | 93 | \input{./principles} 94 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 95 | 96 | 97 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 98 | \section{The Programming Model} 99 | 100 | \begin{frame} 101 | \begin{colorblock}{blue}{lightblue}{ } 102 | \begin{center} 103 | \Huge \textbf{\texttt{The Programming Model}} 104 | \end{center} 105 | \end{colorblock} 106 | \end{frame} 107 | 108 | \input{./programming_model} 109 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 110 | 111 | 112 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 113 | \section{Basic Design Patterns} 114 | 115 | \begin{frame} 116 | \begin{colorblock}{blue}{lightblue}{ } 117 | \begin{center} 118 | \Huge \textbf{\texttt{Basic Design Patterns}} 119 | \end{center} 120 | \end{colorblock} 121 | \end{frame} 122 | 123 | \input{./design_patterns.tex} 124 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 125 | 126 | 127 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 128 | % \section{Graph Algorithms} 129 | 130 | % \begin{frame} 131 | % \begin{colorblock}{blue}{lightblue}{ } 132 | % \begin{center} 133 | % \Huge \textbf{\texttt{Graph Algorithms [Optional]}} 134 | % \end{center} 135 | % \end{colorblock} 136 | % \end{frame} 137 | 138 | % \input{./graph_algorithms} 139 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 140 | 141 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 142 | \section{References} 143 | 144 | \begin{frame} 145 | \begin{colorblock}{blue}{lightblue}{ } 146 | \begin{center} 147 | \Huge \textbf{\texttt{References}} 148 | \end{center} 149 | \end{colorblock} 150 | \end{frame} 151 | 152 | \begin{frame}[allowframebreaks]{References} 153 | \bibliographystyle{plain} 154 | \bibliography{references} 155 | \end{frame} 156 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 157 | 158 | \end{document} 159 | -------------------------------------------------------------------------------- /hadoop/io.tex: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 3 | \subsection{Technical details} 4 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 5 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 6 | 7 | 8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 9 | \frame {\frametitle{I/O operations in Hadoop} 10 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 11 | \begin{itemize} 12 | \item \textbf{Reading and writing data} 13 | \begin{itemize} 14 | \item From/to HDFS 15 | \item From/to local disk drives 16 | \item Across machines (inter-process communication) 17 | \end{itemize} 18 | 19 | \vspace{20pt} 20 | 21 | \item \textbf{Customized tools for large amounts of data} 22 | \begin{itemize} 23 | \item Hadoop does not use Java native classes 24 | \item Allows flexibility for dealing with custom data (e.g. binary) 25 | \end{itemize} 26 | 27 | \vspace{20pt} 28 | 29 | \item \textbf{What's next} 30 | \begin{itemize} 31 | \item Overview of what Hadoop offers 32 | \item For an in depth knowledge, use \cite{hadoop_book} 33 | \end{itemize} 34 | 35 | \end{itemize} 36 | 37 | } 38 | 39 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 40 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 41 | \subsection{Data Integrity} 42 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 43 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 44 | 45 | 46 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 47 | \frame {\frametitle{Data Integrity} 48 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 49 | \begin{itemize} 50 | \item \textbf{Every I/O operation on disks or the network may 51 | corrupt data} 52 | \begin{itemize} 53 | \item Users expect data not to be corrupted during storage or 54 | processing 55 | \item Data integrity usually achieved with a simple \textbf{checksum} mechanism 56 | \end{itemize} 57 | 58 | \vspace{40pt} 59 | 60 | \item \textbf{HDFS transparently checksums all data during I/O} 61 | \begin{itemize} 62 | \item HDFS makes sure that storage overhead is roughly 1\% 63 | \item \texttt{DataNodes} are in charge of checksumming 64 | \begin{itemize} 65 | \item With replication, the last replica performs the check 66 | \item Checksums are timestamped and logged for 67 | {\color{red}statistics on disks} 68 | \end{itemize} 69 | \item Checksumming is also run periodically in a separate thread 70 | \begin{itemize} 71 | \item Note that thanks to replication, {\color{red}error 72 | correction} is possible in addition to detection 73 | \end{itemize} 74 | \end{itemize} 75 | 76 | 77 | \end{itemize} 78 | } 79 | 80 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 81 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 82 | \subsection{Data Compression} 83 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 84 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 85 | 86 | 87 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 88 | \frame {\frametitle{Compression} 89 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 90 | \begin{itemize} 91 | \item \textbf{Why using compression} 92 | \begin{itemize} 93 | \item Reduce storage requirements 94 | \item Speed up data transfers (across the network or from disks) 95 | \end{itemize} 96 | 97 | \vspace{20pt} 98 | 99 | \item \textbf{Compression and Input Splits} 100 | \begin{itemize} 101 | \item IMPORTANT: use compression that supports 102 | {\color{red}splitting} (e.g. bzip2) 103 | \end{itemize} 104 | 105 | \vspace{20pt} 106 | 107 | \item \textbf{Splittable files, Example 1} 108 | \begin{itemize} 109 | \item Consider an uncompressed file of 1GB 110 | \item HDFS will split it in 16 blocks, 64MB each, to be 111 | processed by separate Mappers 112 | \end{itemize} 113 | \end{itemize} 114 | } 115 | 116 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 117 | \frame {\frametitle{Compression} 118 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 119 | \begin{itemize} 120 | \item \textbf{Unsplittable files, Example 2 (gzip)} 121 | \begin{itemize} 122 | \item Consider a compressed file of 1GB 123 | \item HDFS will split it in 16 blocks of 64MB each 124 | \item Creating an \texttt{InputSplit} for each block will not 125 | work, since it is not possible to read at an arbitrary point 126 | \end{itemize} 127 | 128 | \vspace{20pt} 129 | 130 | \item \textbf{What's the problem?} 131 | \begin{itemize} 132 | \item This forces MapReduce to treat the file as a 133 | {\color{red}single split} 134 | \item Then, a single Mapper is fired by the framework 135 | \item For this Mapper, only 1/16-th is local, the rest comes from 136 | the network 137 | \end{itemize} 138 | 139 | \vspace{20pt} 140 | 141 | \item \textbf{Which compression format to use?} 142 | \begin{itemize} 143 | \item Use bzip2 144 | \item Otherwise, use \texttt{SequenceFiles} 145 | \item See Chapter 4 \cite{hadoop_book} 146 | \end{itemize} 147 | 148 | \end{itemize} 149 | } 150 | 151 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 152 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 153 | \subsection{Serialization} 154 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 155 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 156 | 157 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 158 | \frame {\frametitle{Serialization} 159 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 160 | \begin{itemize} 161 | \item \textbf{Transforms structured objects into a byte stream} 162 | \begin{itemize} 163 | \item For transmission over the network: {\color{red}Hadoop uses RPC} 164 | \item For persistent storage on disks 165 | \end{itemize} 166 | 167 | \vspace{20pt} 168 | 169 | \item \textbf{Hadoop uses its own serialization format, 170 | \texttt{Writable}} 171 | \begin{itemize} 172 | \item Comparison of types is crucial (Shuffle and Sort phase): 173 | Hadoop provides a custom \texttt{RawComparator}, which avoids 174 | deserialization 175 | \item Custom \texttt{Writable} for having full control on the 176 | binary representation of data 177 | \item Also ``external'' frameworks are allowed: enter \textbf{Avro} 178 | \end{itemize} 179 | 180 | \vspace{20pt} 181 | 182 | \item \textbf{Fixed-length or variable-length encoding?} 183 | \begin{itemize} 184 | \item Fixed-length: when the distribution of values is uniform 185 | \item Variable-length: when the distribution of values is not uniform 186 | \end{itemize} 187 | 188 | \end{itemize} 189 | } 190 | 191 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 192 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 193 | \subsection{Sequence Files} 194 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 195 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 196 | 197 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 198 | \frame {\frametitle{Sequence Files} 199 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 200 | \begin{itemize} 201 | \item \textbf{Specialized data structure to hold custom input data} 202 | \begin{itemize} 203 | \item Using blobs of binaries is not efficient 204 | \end{itemize} 205 | 206 | \vspace{10pt} 207 | 208 | \item \textbf{\texttt{SequenceFiles}} 209 | \begin{itemize} 210 | \item Provide a persistent data structure for binary key-value 211 | pairs 212 | \item Also work well as containers for smaller files so that the 213 | framework is more happy (remember, better few large files than 214 | lots of small files) 215 | \item They come with the \texttt{sync()} method to introduce sync 216 | points to help managing \texttt{InputSplits} for MapReduce 217 | \end{itemize} 218 | 219 | \begin{center} 220 | \framebox{\includegraphics[scale=0.2]{./Figures/sequencefiles}} 221 | \end{center} 222 | 223 | 224 | \end{itemize} 225 | } 226 | 227 | -------------------------------------------------------------------------------- /disa/principles.tex: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | \frame {\frametitle{ 3 | \begin{beamerboxesrounded}[shadow=true]{} 4 | \begin{center} 5 | Scale out, not up! 6 | \end{center} 7 | \end{beamerboxesrounded} 8 | } 9 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 10 | \begin{itemize} 11 | \item \textbf{For data-intensive workloads, a large number of commodity 12 | servers is preferred over a small number of high-end servers} 13 | \begin{itemize} 14 | \item Cost of super-computers is not linear 15 | \item But datacenter efficiency is a difficult problem to solve 16 | \cite{barroso09, hamilton09} 17 | \end{itemize} 18 | 19 | \vspace{20pt} 20 | 21 | \item \textbf{Some numbers ($\sim$ 2012):} 22 | \begin{itemize} 23 | \item Data processed by Google every day: 100+ PB 24 | \item Data processed by Facebook every day: 10+ PB 25 | \end{itemize} 26 | 27 | \end{itemize} 28 | } 29 | 30 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 31 | \frame {\frametitle{Implications of Scaling Out} 32 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 33 | \begin{itemize} 34 | \item \textbf{Processing data is quick, I/O is very slow} 35 | \begin{itemize} 36 | \item 1 HDD = 75 MB/sec 37 | \item 1000 HDDs = 75 GB/sec 38 | \end{itemize} 39 | 40 | \vspace{20pt} 41 | 42 | \item \textbf{Sharing vs. Shared nothing}: 43 | \begin{itemize} 44 | \item Sharing: manage a common/global state 45 | \item Shared nothing: {\color{red} independent} entities, no common state 46 | \end{itemize} 47 | 48 | \vspace{20pt} 49 | 50 | \item \textbf{Sharing is difficult}: 51 | \begin{itemize} 52 | \item Synchronization, deadlocks 53 | \item Finite bandwidth to access data from SAN 54 | \item Temporal dependencies are complicated (restarts) 55 | \end{itemize} 56 | \end{itemize} 57 | 58 | 59 | 60 | } 61 | 62 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 63 | \frame {\frametitle{ 64 | \begin{beamerboxesrounded}[shadow=true]{} 65 | \begin{center} 66 | Failures are the norm, not the exception 67 | \end{center} 68 | \end{beamerboxesrounded} 69 | } 70 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 71 | 72 | \begin{itemize} 73 | \item LALN data [DSN 2006] 74 | \begin{itemize} 75 | \item Data for ~ 5000 machines, for 9 years 76 | \item Hardware: 60\%, Software: 20\%, Network 5\% 77 | \end{itemize} 78 | 79 | \vspace{20pt} 80 | 81 | \item DRAM error analysis [Sigmetrics 2009] 82 | \begin{itemize} 83 | \item Data for 2.5 years 84 | \item 8\% of DIMMs affected by errors 85 | \end{itemize} 86 | 87 | \vspace{20pt} 88 | 89 | \item Disk drive failure analysis [FAST 2007] 90 | \begin{itemize} 91 | \item Utilization and temperature major causes of failures 92 | \end{itemize} 93 | 94 | \vspace{20pt} 95 | 96 | \item Amazon Web Service(s) failures [Several!] 97 | \begin{itemize} 98 | \item Cascading effect 99 | \end{itemize} 100 | 101 | \end{itemize} 102 | 103 | } 104 | 105 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 106 | \frame {\frametitle{Implications of Failures} 107 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 108 | \begin{itemize} 109 | \item \textbf{Failures are part of everyday life} 110 | \begin{itemize} 111 | \item Mostly due to the scale and shared environment 112 | \end{itemize} 113 | 114 | \vspace{20pt} 115 | 116 | \item \textbf{Sources of Failures} 117 | \begin{itemize} 118 | \item Hardware / Software 119 | \item Electrical, Cooling, ... 120 | \item Unavailability of a resource due to overload 121 | \end{itemize} 122 | 123 | \vspace{20pt} 124 | 125 | \item \textbf{Failure Types} 126 | \begin{itemize} 127 | \item Permanent 128 | \item Transient 129 | \end{itemize} 130 | \end{itemize} 131 | } 132 | 133 | 134 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 135 | \frame {\frametitle{ 136 | \begin{beamerboxesrounded}[shadow=true]{} 137 | \begin{center} 138 | Move Processing to the Data 139 | \end{center} 140 | \end{beamerboxesrounded} 141 | } 142 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 143 | 144 | \begin{itemize} 145 | \item \textbf{Drastic departure from high-performance computing model} 146 | \begin{itemize} 147 | \item HPC: distinction between processing nodes and storage nodes 148 | \item HPC: CPU intensive tasks 149 | \end{itemize} 150 | 151 | \vspace{20pt} 152 | 153 | \item \textbf{Data intensive workloads} 154 | \begin{itemize} 155 | \item Generally not processor demanding 156 | \item The network becomes the bottleneck 157 | \item MapReduce assumes processing and storage nodes to be 158 | collocated 159 | \item[$\to$] {\color{red}\textbf{Data Locality Principle}} 160 | \end{itemize} 161 | 162 | \vspace{20pt} 163 | 164 | \item \textbf{Distributed filesystems are necessary} 165 | \end{itemize} 166 | } 167 | 168 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 169 | \frame {\frametitle{ 170 | \begin{beamerboxesrounded}[shadow=true]{} 171 | \begin{center} 172 | Process Data Sequentially and Avoid Random Access 173 | \end{center} 174 | \end{beamerboxesrounded} 175 | } 176 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 177 | 178 | \begin{itemize} 179 | \item \textbf{Data intensive workloads} 180 | \begin{itemize} 181 | \item Relevant datasets are too large to fit in memory 182 | \item Such data resides on disks 183 | \end{itemize} 184 | 185 | \vspace{20pt} 186 | 187 | \item \textbf{Disk performance is a bottleneck} 188 | \begin{itemize} 189 | \item \textbf{Seek times} for random disk access are \textbf{the problem} 190 | \begin{itemize} 191 | \item Example: 1 TB DB with $10^{10}$ 100-byte records. Updates on 192 | 1\% requires 1 month, reading and rewriting the whole DB would 193 | take 1 day\footnote{From a post by Ted Dunning on the Hadoop mailing list} 194 | \end{itemize} 195 | \item Organize computation for sequential reads 196 | \end{itemize} 197 | 198 | \end{itemize} 199 | 200 | } 201 | 202 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 203 | \frame {\frametitle{Implications of Data Access Patterns} 204 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 205 | \begin{itemize} 206 | \item \textbf{MapReduce is designed for:} 207 | \begin{itemize} 208 | \item {\color{red}\textbf{Batch processing}} 209 | \item involving (mostly) {\color{red}\textbf{full scans}} of the data 210 | \end{itemize} 211 | 212 | \vspace{20pt} 213 | 214 | \item\textbf{ Typically, data is collected ``elsewhere'' and copied to the 215 | distributed filesystem} 216 | \begin{itemize} 217 | \item E.g.: Apache Flume, Hadoop Sqoop, $\cdots$ 218 | \end{itemize} 219 | 220 | \vspace{20pt} 221 | 222 | \item \textbf{Data-intensive applications} 223 | \begin{itemize} 224 | \item Read and process the whole Web (e.g. PageRank) 225 | \item Read and process the whole Social Graph (e.g. LinkPrediction, a.k.a. ``friend suggest'') 226 | \item Log analysis (e.g. Network traces, Smart-meter data, $\cdots$) 227 | \end{itemize} 228 | 229 | \end{itemize} 230 | } 231 | 232 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 233 | \frame {\frametitle{ 234 | \begin{beamerboxesrounded}[shadow=true]{} 235 | \begin{center} 236 | Hide System-level Details 237 | \end{center} 238 | \end{beamerboxesrounded} 239 | } 240 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 241 | 242 | \begin{itemize} 243 | \item \textbf{Separate the \textit{what} from the \textit{how}} 244 | \begin{itemize} 245 | \item MapReduce abstracts away the ``distributed'' part of the system 246 | \item Such details are handled by the framework 247 | \end{itemize} 248 | 249 | \vspace{20pt} 250 | 251 | \item {\color{red}\textbf{BUT: }}\textbf{In-depth knowledge of the framework is key} 252 | \begin{itemize} 253 | \item Custom data reader/writer 254 | \item Custom {\color{red}data partitioning} 255 | \item Memory utilization 256 | \end{itemize} 257 | 258 | \vspace{20pt} 259 | 260 | \item \textbf{Auxiliary components} 261 | \begin{itemize} 262 | \item Hadoop Pig 263 | \item Hadoop Hive 264 | \item Cascading/Scalding 265 | \item ... and many many more! 266 | \end{itemize} 267 | 268 | \end{itemize} 269 | } 270 | 271 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 272 | \frame {\frametitle{ 273 | \begin{beamerboxesrounded}[shadow=true]{} 274 | \begin{center} 275 | Seamless Scalability 276 | \end{center} 277 | \end{beamerboxesrounded} 278 | } 279 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 280 | 281 | \begin{itemize} 282 | \item \textbf{We can define scalability along two dimensions} 283 | \begin{itemize} 284 | \item In terms of data: given twice the amount of data, the same 285 | algorithm should take no more than twice as long to run 286 | \item In terms of resources: given a cluster twice the size, the 287 | same algorithm should take no more than half as long to run 288 | \end{itemize} 289 | 290 | \vspace{20pt} 291 | 292 | \item \textbf{Embarrassingly parallel problems} 293 | \begin{itemize} 294 | \item Simple definition: independent ({\color{red}shared nothing}) 295 | computations on fragments of the dataset 296 | \item How to to decide if a problem is embarrassingly 297 | parallel or not? 298 | \end{itemize} 299 | 300 | \vspace{20pt} 301 | 302 | \item \textbf{MapReduce is a first attempt, not the final answer} 303 | \end{itemize} 304 | } 305 | -------------------------------------------------------------------------------- /hadoop/hdfs.tex: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 3 | \subsection{Motivations} 4 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 5 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 6 | 7 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 8 | \frame {\frametitle{Collocate data and computation!} 9 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 10 | \begin{itemize} 11 | \item \textbf{As dataset sizes increase, more computing capacity is 12 | required for processing} 13 | 14 | \vspace{20pt} 15 | 16 | \item \textbf{As compute capacity grows, the link between the 17 | compute nodes and the storage nodes becomes a bottleneck} 18 | \begin{itemize} 19 | \item One could eventually think of special-purpose interconnects 20 | for high-performance networking 21 | \item This is often a costly solution as cost does not increase 22 | linearly with performance 23 | \end{itemize} 24 | 25 | \vspace{20pt} 26 | 27 | \item \textbf{{\color{red}Key idea}: abandon the separation between 28 | compute and storage nodes} 29 | \begin{itemize} 30 | \item This is exactly what happens in current implementations of 31 | the MapReduce framework 32 | \item A distributed filesystem is not mandatory, but highly desirable 33 | \end{itemize} 34 | \end{itemize} 35 | } 36 | 37 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 38 | \frame {\frametitle{The Hadoop Distributed Filesystem} 39 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 40 | \begin{itemize} 41 | \item \textbf{Large dataset(s) outgrowing the storage capacity of a single 42 | physical machine} 43 | \begin{itemize} 44 | \item Need to partition it across a number of separate machines 45 | \item Network-based system, with all its complications 46 | \item Tolerate failures of machines 47 | \end{itemize} 48 | 49 | \vspace{20pt} 50 | 51 | \item \textbf{Distributed filesystems are not new!} 52 | \begin{itemize} 53 | \item HDFS builds upon previous results, tailored to the specific 54 | requirements of MapReduce 55 | \item {\color{red}Write once, read many workloads} 56 | \item Does not handle concurrency, but allow replication 57 | \item Optimized for throughput, not latency 58 | \end{itemize} 59 | 60 | \vspace{20pt} 61 | 62 | \item \textbf{Hadoop Distributed Filesystem\cite{shvachko10, hadoop_book}} 63 | \begin{itemize} 64 | \item Very large files 65 | \item Streaming data access 66 | \item Commodity hardware 67 | \end{itemize} 68 | \end{itemize} 69 | } 70 | 71 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 72 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 73 | \subsection{Blocks} 74 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 75 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 76 | 77 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 78 | \frame {\frametitle{HDFS Blocks} 79 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 80 | \begin{itemize} 81 | \item \textbf{(Big) files are broken into block-sized chunks} 82 | \begin{itemize} 83 | \item Blocks are big! [64, 128] MB 84 | \item Avoids problems related to metadata management 85 | \item \texttt{NOTE}: A file that is smaller than a single block {\color{red}does not} occupy a full block's worth of underlying storage 86 | \end{itemize} 87 | 88 | \vspace{20pt} 89 | 90 | \item \textbf{Blocks are stored on independent machines} 91 | \begin{itemize} 92 | \item Replicate across the local disks of nodes in the cluster 93 | \item Reliability and parallel access 94 | \item Replication is handled by storage nodes themselves (similar 95 | to \textbf{chain replication}) 96 | \end{itemize} 97 | 98 | \vspace{20pt} 99 | 100 | \item \textbf{Why is a block so large?} 101 | \begin{itemize} 102 | \item Make transfer times larger than seek latency 103 | \item E.g.: Assume seek time is 10ms and the transfer rate is 100 104 | MB/s, if you want seek time to be 1\% of transfer time, then the 105 | block size should be 100MB 106 | \end{itemize} 107 | \end{itemize} 108 | 109 | } 110 | 111 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 112 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 113 | \subsection{Architecture} 114 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 115 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 116 | 117 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 118 | \frame {\frametitle{NameNodes and DataNodes} 119 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 120 | \begin{itemize} 121 | \item \textbf{\texttt{NameNode}} 122 | \begin{itemize} 123 | \item Keeps metadata {\color{red}in RAM} 124 | \item Each block information occupies roughly 150 bytes of memory 125 | \item Without \texttt{NameNode}, the filesystem cannot be used 126 | \begin{itemize} 127 | \item Persistence of metadata: synchronous and atomic writes to 128 | NFS 129 | \end{itemize} 130 | \item Maintains overall {\color{red}health} of the file system 131 | \end{itemize} 132 | 133 | \vspace{20pt} 134 | 135 | \item \textbf{\texttt{Secondary NameNode}} 136 | \begin{itemize} 137 | \item Merges the namespace with the edit log 138 | \item A useful trick to recover from a failure of the 139 | \texttt{NameNode} is to use the NFS copy of metadata and 140 | switch the secondary to primary 141 | \end{itemize} 142 | 143 | \vspace{20pt} 144 | 145 | \item \textbf{\texttt{DataNode}} 146 | \begin{itemize} 147 | \item They store data and talk to clients 148 | \item They report periodically to the \texttt{NameNode} the list 149 | of blocks they hold 150 | \end{itemize} 151 | 152 | \end{itemize} 153 | } 154 | 155 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 156 | \frame {\frametitle{Architecture Illustration} 157 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 158 | \begin{figure}[h] 159 | \centering 160 | \includegraphics[scale=0.36]{./Figures/hdfs} 161 | \caption{Architecture sketch of HDFS operations.} 162 | \label{fig:hdfs} 163 | \end{figure} 164 | } 165 | 166 | 167 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 168 | \frame {\frametitle{Anatomy of a File Read} 169 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 170 | \begin{itemize} 171 | \item \textbf{\texttt{NameNode} is only used to get block location} 172 | \begin{itemize} 173 | \item Unresponsive \texttt{DataNode} are discarded by clients 174 | \item Batch reading of blocks is allowed 175 | \end{itemize} 176 | 177 | \vspace{20pt} 178 | 179 | \item \textbf{``External'' clients} 180 | \begin{itemize} 181 | \item For each block, the \texttt{NameNode} returns {\color{red}a 182 | set} of \texttt{DataNodes} holding a copy thereof 183 | \item \texttt{DataNodes} are sorted according to their proximity 184 | to the client 185 | \end{itemize} 186 | 187 | \vspace{20pt} 188 | 189 | \item \textbf{``MapReduce'' clients} 190 | \begin{itemize} 191 | \item \texttt{TaskTracker} and \texttt{DataNodes} are 192 | {\color{red}collocated} 193 | \item For each block, the \texttt{NameNode} 194 | usually\footnote{Exceptions exist due to stragglers.} returns 195 | the local \texttt{DataNode} 196 | \end{itemize} 197 | 198 | \end{itemize} 199 | } 200 | 201 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 202 | \frame {\frametitle{Anatomy of a File Write} 203 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 204 | \begin{itemize} 205 | \item \textbf{Details on replication} 206 | \begin{itemize} 207 | \item Clients ask \texttt{NameNode} for a list of suitable 208 | \texttt{DataNodes} 209 | \item This list forms a \texttt{pipeline}: first \texttt{DataNode} 210 | stores a copy of a block, then forwards it to the second, and so 211 | on 212 | \end{itemize} 213 | 214 | \vspace{40pt} 215 | 216 | \item \textbf{Replica Placement} 217 | \begin{itemize} 218 | \item {\color{red}Tradeoff} between reliability and bandwidth 219 | \item Default placement: 220 | \begin{itemize} 221 | \item First copy on the ``same'' node of the client, second 222 | replica is {\color{red}off-rack}, third replica is on the same 223 | rack as the second but on a different node 224 | \item Since Hadoop 0.21, replica placement can be customized 225 | \end{itemize} 226 | \end{itemize} 227 | 228 | \end{itemize} 229 | 230 | } 231 | 232 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 233 | \frame {\frametitle{Chain Replication and Distance Metrics} 234 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 235 | 236 | \begin{columns}[c] 237 | \column{5cm} 238 | \framebox{\includegraphics[width=4cm]{./Figures/chain_replication}} 239 | \column{5cm} 240 | \framebox{\includegraphics[width=4cm]{./Figures/hadoop_distance}} 241 | \end{columns} 242 | 243 | } 244 | 245 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 246 | \frame {\frametitle{HDFS Coherency Model} 247 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 248 | \begin{itemize} 249 | \item \textbf{Read your writes is not guaranteed} 250 | \begin{itemize} 251 | \item The namespace is updated 252 | \item Block contents may not be visible after a write is finished 253 | \item Application design (other than MapReduce) should use 254 | \texttt{sync()} to force synchronization 255 | \item \texttt{sync()} involves some overhead: tradeoff between 256 | robustness/consistency and throughput 257 | \end{itemize} 258 | 259 | \vspace{40pt} 260 | 261 | \item \textbf{Multiple writers (for the {\color{red}same} block) are not 262 | supported} 263 | \begin{itemize} 264 | \item Instead, different blocks can be written in parallel (using MapReduce) 265 | \end{itemize} 266 | 267 | \end{itemize} 268 | } 269 | 270 | -------------------------------------------------------------------------------- /hadoop/deployments.tex: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | \frame {\frametitle{Setting up a Hadoop Cluster} 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | \begin{itemize} 5 | \item \textbf{Cluster deployment} 6 | \begin{itemize} 7 | \item Private cluster 8 | \item Cloud-based cluster 9 | \item AWS Elastic MapReduce 10 | \end{itemize} 11 | 12 | \vspace{20pt} 13 | 14 | \item \textbf{Outlook:} 15 | \begin{itemize} 16 | \item Cluster specification 17 | \begin{itemize} 18 | \item Hardware 19 | \item Network Topology 20 | \end{itemize} 21 | \item Hadoop Configuration 22 | \begin{itemize} 23 | \item Memory considerations 24 | \end{itemize} 25 | \end{itemize} 26 | \end{itemize} 27 | } 28 | 29 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 30 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 31 | \subsection{Specification} 32 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 33 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 34 | 35 | 36 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 37 | \frame {\frametitle{Cluster Specification} 38 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 39 | \begin{itemize} 40 | \item \textbf{Commodity Hardware} 41 | \begin{itemize} 42 | \item Commodity $\neq$ Low-end 43 | \begin{itemize} 44 | \item False economy due to failure rate and maintenance costs 45 | \end{itemize} 46 | \item Commodity $\neq$ High-end 47 | \begin{itemize} 48 | \item High-end machines perform better, which would imply a 49 | smaller cluster 50 | \item A single machine failure would compromise a large fraction 51 | of the cluster 52 | \end{itemize} 53 | \end{itemize} 54 | 55 | \vspace{20pt} 56 | 57 | \item \textbf{A 2012 specification}: 58 | \begin{itemize} 59 | \item Dual socket, Two exacore 60 | \item 128 GB {\color{red}ECC} RAM 61 | \item 8 $\times$ 1 TB disks\footnote{\color{red}Why not using 62 | RAID instead of JBOD?} 63 | \item \{1,10\} Gigabit Ethernet 64 | \end{itemize} 65 | \end{itemize} 66 | 67 | } 68 | 69 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 70 | \frame {\frametitle{Cluster Specification} 71 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 72 | \begin{itemize} 73 | \item \textbf{Example:} 74 | \begin{itemize} 75 | \item Assume your data grows by 1 TB per week 76 | \item Assume you have three-way replication in HDFS 77 | \item[$\to$] You need additional 3TB of raw storage per week 78 | \item Allow for some overhead (temporary files, logs) 79 | \item[$\to$] {\color{red}This is a new machine per week} 80 | \end{itemize} 81 | 82 | \vspace{20pt} 83 | 84 | \item \textbf{How to dimension a cluster?} 85 | \begin{itemize} 86 | \item Obviously, you won't buy a machine per week!! 87 | \item The idea is that the above back-of-the-envelope calculation 88 | is that you can project over a 2 year life-time of your system 89 | \item[$\to$] You would need a 100-machine cluster 90 | \end{itemize} 91 | 92 | \vspace{20pt} 93 | 94 | \item \textbf{Where should you put the various components?} 95 | \begin{itemize} 96 | \item Small cluster: NameNode and JobTracker can be 97 | {\color{red}collocated} 98 | \item Large cluster: requires more RAM at the NameNode 99 | \end{itemize} 100 | 101 | \end{itemize} 102 | } 103 | 104 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 105 | \frame {\frametitle{Cluster Specification} 106 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 107 | \begin{itemize} 108 | \item \textbf{Should we use 64-bit or 32-bit machines?} 109 | \begin{itemize} 110 | \item NameNode should run on a 64-bit machine: this avoids the 111 | 3GB Java heap size limit on 32-bit machines 112 | \end{itemize} 113 | 114 | \vspace{40pt} 115 | 116 | \item \textbf{What's the role of Java?} 117 | \begin{itemize} 118 | \item Recent releases (Java6) implement some optimization to 119 | eliminate large pointer overhead 120 | \item[$\to$] A cluster of 64-bit machines has no downside 121 | \end{itemize} 122 | \end{itemize} 123 | 124 | } 125 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 126 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 127 | \subsection{Network Topology} 128 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 129 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 130 | 131 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 132 | \frame {\frametitle{Network Topology} 133 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 134 | \begin{center} 135 | \framebox{\includegraphics[scale=0.3]{./Figures/cluster_net_topology}} 136 | \end{center} 137 | } 138 | 139 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 140 | \frame {\frametitle{Network Topology} 141 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 142 | \begin{itemize} 143 | \item \textbf{Two-level network topology} 144 | \begin{itemize} 145 | \item Switch redundancy is not shown in the figure 146 | \end{itemize} 147 | 148 | \vspace{20pt} 149 | 150 | \item \textbf{Typical configuration} 151 | \begin{itemize} 152 | \item 30-40 servers per rack 153 | \item 10 GB switch TOR 154 | \item Core switch or router with 10GB or better 155 | \end{itemize} 156 | 157 | \vspace{20pt} 158 | 159 | \item \textbf{Features} 160 | \begin{itemize} 161 | \item Aggregate bandwidth between nodes on the same rack is much 162 | larger than for nodes on different racks 163 | \item {\color{red}Rack awareness} 164 | \begin{itemize} 165 | \item Hadoop should know the cluster topology 166 | \item Benefits both HDFS (data placement) and MapReduce (locality) 167 | \end{itemize} 168 | \end{itemize} 169 | \end{itemize} 170 | } 171 | 172 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 173 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 174 | \subsection{Hadoop Configuration} 175 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 176 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 177 | 178 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 179 | \frame {\frametitle{Hadoop Configuration} 180 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 181 | \begin{itemize} 182 | \item \textbf{There are a handful of files for controlling the operation of 183 | an Hadoop Cluster} 184 | \begin{itemize} 185 | \item Hundreds of parameters!! 186 | \item See next slide for a summary table 187 | \end{itemize} 188 | 189 | \vspace{20pt} 190 | 191 | \item \textbf{Managing the configuration across several machines} 192 | \begin{itemize} 193 | \item All machines of an Hadoop cluster must be in sync! 194 | \item What happens if you dispatch an update and some machines are 195 | down? 196 | \item What happens when you add (new) machines to your cluster? 197 | \item What if you need to patch MapReduce? 198 | \end{itemize} 199 | 200 | \vspace{20pt} 201 | 202 | \item \textbf{Common practice: use configuration management tools} 203 | \begin{itemize} 204 | \item Chef, Puppet, ... 205 | \item Declarative language to specify configurations 206 | \item Allow also to install software 207 | \end{itemize} 208 | 209 | \end{itemize} 210 | } 211 | 212 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 213 | \frame {\frametitle{Hadoop Configuration} 214 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 215 | \begin{tiny} 216 | \begin{table}[h] 217 | \centering 218 | \begin{tabular}{||c|c|l||} 219 | \hline 220 | \hline 221 | {\textbf{Filename}} & {\textbf{Format}} & {\textbf{Description}} \\ 222 | \hline 223 | \hline 224 | hadoop-env.sh & Bash script & {Environment variables that are 225 | used in the scripts to run Hadoop.} \\ 226 | core-site.xml & Hadoop configuration XML & I/O settings that are common 227 | to HDFS and MapReduce.\\ 228 | hdfs-site.xml & Hadoop configuration XML & Namenode, the secondary 229 | namenode, and the datanodes. \\ 230 | mapred-site.xml & Hadoop configuration XML & Jobtracker, and the 231 | tasktrackers.\\ 232 | masters & Plain text & A list of machines that 233 | each run a secondary namenode.\\ 234 | slaves & Plain text & A list of machines that 235 | each run a datanode and a tasktracker. \\ 236 | \hline 237 | \hline 238 | \end{tabular} 239 | \caption{Hadoop Configuration Files} 240 | \label{tab:conf} 241 | \end{table} 242 | \end{tiny} 243 | } 244 | 245 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 246 | \frame {\frametitle{Hadoop Configuration: memory utilization} 247 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 248 | \begin{itemize} 249 | \item \textbf{Hadoop uses a lot of memory} 250 | \begin{itemize} 251 | \item Default values, for a typical cluster configuration 252 | \begin{itemize} 253 | \item DataNode: 1 GB 254 | \item TaskTracker: 1 GB 255 | \item Child JVM map task: 2 $\times$ 200MB 256 | \item Child JVM reduce task: 2 $\times$ 200MB 257 | \end{itemize} 258 | \end{itemize} 259 | 260 | \vspace{20pt} 261 | 262 | \item \textbf{All the moving parts of Hadoop (HDFS and MapReduce) can be 263 | individually configured} 264 | \begin{itemize} 265 | \item This is true for cluster configuration but also for {\color{red}job 266 | specific} configurations 267 | \end{itemize} 268 | 269 | \vspace{20pt} 270 | 271 | \item \textbf{Hadoop is fast when using RAM} 272 | \begin{itemize} 273 | \item Generally, MapReduce Jobs {\color{red}are not} CPU-bound 274 | \item Avoid I/O on disk as much as you can 275 | \item Minimize network traffic 276 | \begin{itemize} 277 | \item Customize the partitioner 278 | \item Use compression ($\to$ decompression is in RAM) 279 | \end{itemize} 280 | \end{itemize} 281 | 282 | \end{itemize} 283 | } 284 | 285 | 286 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 287 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 288 | \subsection{Cloud Deployments} 289 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 290 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 291 | 292 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 293 | \frame {\frametitle{Elephants in the cloud!} 294 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 295 | \begin{itemize} 296 | \item \textbf{Many organizations run Hadoop in private clusters} 297 | \begin{itemize} 298 | \item Pros and cons 299 | \end{itemize} 300 | 301 | \vspace{40pt} 302 | 303 | \item \textbf{Cloud based Hadoop installations (Amazon biased)} 304 | \begin{itemize} 305 | \item Use Cloudera + \{Whirr, boto, ...\} 306 | \item Use Elastic MapReduce 307 | \end{itemize} 308 | \end{itemize} 309 | } 310 | 311 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 312 | \frame {\frametitle{Hadoop on EC2} 313 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 314 | \begin{itemize} 315 | \item \textbf{Launch instances of a cluster on demand, paying by hour} 316 | \begin{itemize} 317 | \item CPU, in general bandwidth is used from within a datacenter, 318 | hence it's free 319 | \end{itemize} 320 | 321 | \vspace{20pt} 322 | 323 | \item \textbf{Apache Whirr project} 324 | \begin{itemize} 325 | \item Launch, terminate, modify a running cluster 326 | \item Requires AWS credentials 327 | \end{itemize} 328 | 329 | \vspace{20pt} 330 | 331 | \item \textbf{Example} 332 | \begin{itemize} 333 | \item Launch a cluster \texttt{test-hadoop-cluster}, with one 334 | master node (\texttt{JobTracker} and \texttt{NameNode}) and 5 335 | worker nodes (\texttt{DataNodes} and \texttt{TaskTrackers}) 336 | \item[$\to$] \texttt{hadoop-ec2 launch-cluster test-hadoop-cluster 337 | 5} 338 | \item See Chapter 9 \cite{hadoop_book} 339 | \end{itemize} 340 | \end{itemize} 341 | } 342 | 343 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 344 | \frame {\frametitle{AWS Elastic MapReduce} 345 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 346 | \begin{itemize} 347 | \item \textbf{Hadoop as a service} 348 | \begin{itemize} 349 | \item Amazon handles everything, which becomes transparent 350 | \item How this is done remains a mystery 351 | \end{itemize} 352 | 353 | \vspace{40pt} 354 | 355 | \item \textbf{Focus on What not How} 356 | \begin{itemize} 357 | \item All you need to do is to package a MapReduce Job in a JAR 358 | and upload it using a Web Interface 359 | \item Other Jobs are available: python, pig, hive, ... 360 | \item {\color{red}Test your jobs locally!!!} 361 | \end{itemize} 362 | \end{itemize} 363 | 364 | } 365 | -------------------------------------------------------------------------------- /disa/graph_algorithms.tex: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 3 | \subsection{Preliminaries} 4 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 5 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 6 | 7 | 8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 9 | \frame {\frametitle{Motivations} 10 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 11 | \begin{itemize} 12 | \item \textbf{Examples of graph problems} 13 | \begin{itemize} 14 | \item Clustering 15 | \item Matching problems 16 | \item Element analysis: node and edge centralities 17 | \end{itemize} 18 | 19 | \vspace{20pt} 20 | 21 | \item \textbf{The problem: big graphs} 22 | 23 | \vspace{20pt} 24 | 25 | \item \textbf{Why MapReduce?} 26 | \begin{itemize} 27 | \item Algorithms for the above problems on a single machine are 28 | not scalable 29 | \item Recently, Google designed a new system, Pregel, for 30 | large-scale ({\color{red}incremental}) graph processing 31 | \item Even more recently, \cite{Lattanzi2011} indicate a 32 | fundamentally new design pattern to analyze graphs in MapReduce 33 | \item New trend: graph databases, graph processing systems\footnote{If you're interested, we'll discuss this off-line.} 34 | \end{itemize} 35 | \end{itemize} 36 | } 37 | 38 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 39 | \frame {\frametitle{Graph Representations} 40 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 41 | \begin{itemize} 42 | \item \textbf{Basic data structures} 43 | \begin{itemize} 44 | \item Adjacency matrix 45 | \item Adjacency list 46 | \end{itemize} 47 | 48 | \vspace{20pt} 49 | 50 | \item \textbf{Are graphs sparse or dense?} 51 | \begin{itemize} 52 | \item Determines which data-structure to use 53 | \begin{itemize} 54 | \item Adjacency matrix: operations on incoming links are easy 55 | (column scan) 56 | \item Adjacency list: operations on outgoing links are easy 57 | \item The shuffle and sort phase can help, by grouping edges by 58 | their destination reducer 59 | \end{itemize} 60 | \item \cite{Leskovec2005} dispelled the notion of sparseness of real-world graphs 61 | \end{itemize} 62 | \end{itemize} 63 | } 64 | 65 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 66 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 67 | \subsection{Breadth-First Search} 68 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 69 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 70 | 71 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 72 | \frame {\frametitle{Parallel Breadth-First Search} 73 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 74 | \begin{itemize} 75 | \item \textbf{Single-source shortest path} 76 | \begin{itemize} 77 | \item Dijkstra algorithm using a {\color{red}global priority 78 | queue} 79 | \begin{itemize} 80 | \item Maintains a globally sorted list of nodes by current distance 81 | \end{itemize} 82 | \item How to solve this problem in parallel? 83 | \begin{itemize} 84 | \item ``Brute-force'' approach: breadth-first search 85 | \end{itemize} 86 | \end{itemize} 87 | 88 | \vspace{40pt} 89 | 90 | \item \textbf{Parallel BFS: intuition} 91 | \begin{itemize} 92 | \item Flooding 93 | \item {\color{red}Iterative algorithm} in MapReduce 94 | \item Shoehorn message passing style algorithms 95 | \end{itemize} 96 | 97 | \end{itemize} 98 | } 99 | 100 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 101 | \frame {\frametitle{Parallel Breadth-First Search} 102 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 103 | \begin{center} 104 | \includegraphics[scale=0.4]{./Figures/pbfs} 105 | \end{center} 106 | } 107 | 108 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 109 | \frame {\frametitle{Parallel Breadth-First Search} 110 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 111 | \begin{itemize} 112 | \item \textbf{Assumptions} 113 | \begin{itemize} 114 | \item Connected, directed graph 115 | \item Data structure: adjacency list 116 | \item Distance to each node is stored alongside the adjacency list 117 | of that node 118 | \end{itemize} 119 | 120 | \vspace{20pt} 121 | 122 | \item \textbf{The pseudo-code} 123 | \begin{itemize} 124 | \item We use $n$ to denote the node id (an integer) 125 | \item We use $N$ to denote the node adjacency list and current 126 | distance 127 | \item The algorithm works by mapping over all nodes 128 | \item Mappers emit a key-value pair for each neighbor on the 129 | node's adjacency list 130 | \begin{itemize} 131 | \item The key: node id of the neighbor 132 | \item The value: the current distance to the node plus one 133 | \item If we can reach node $n$ with a distance $d$, then we must 134 | be able to reach all the nodes connected to $n$ with distance $d+1$ 135 | \end{itemize} 136 | \end{itemize} 137 | \end{itemize} 138 | } 139 | 140 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 141 | \frame {\frametitle{Parallel Breadth-First Search} 142 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 143 | \begin{itemize} 144 | \item \textbf{The pseudo-code (continued)} 145 | \begin{itemize} 146 | \item After shuffle and sort, reducers receive keys corresponding 147 | to the destination node ids and distances corresponding to all 148 | paths leading to that node 149 | \item The reducer selects the shortest of these distances and 150 | update the distance in the node data structure 151 | \end{itemize} 152 | 153 | \vspace{20pt} 154 | 155 | \item \textbf{Passing the graph along} 156 | \begin{itemize} 157 | \item The mapper: emits the node adjacency list, with the node id 158 | as the key 159 | \item The reducer: must distinguish between the node data 160 | structure and the distance values 161 | \end{itemize} 162 | \end{itemize} 163 | } 164 | 165 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 166 | \frame {\frametitle{Parallel Breadth-First Search} 167 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 168 | \begin{itemize} 169 | \item \textbf{MapReduce iterations} 170 | \begin{itemize} 171 | \item The first time we run the algorithm, we ``discover'' all 172 | nodes connected to the source 173 | \item The second iteration, we discover all nodes connected to 174 | those 175 | \item[$\to$] Each iteration expands the ``search frontier'' by one 176 | hop 177 | \item {\color{red}How many iterations before convergence?} 178 | \end{itemize} 179 | 180 | \vspace{40pt} 181 | 182 | \item \textbf{This approach is suitable for small-world graphs} 183 | \begin{itemize} 184 | \item The diameter of the network is small 185 | \item See \cite{Lattanzi2011} for advanced topics on the subject 186 | \end{itemize} 187 | \end{itemize} 188 | } 189 | 190 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 191 | \frame {\frametitle{Parallel Breadth-First Search} 192 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 193 | \begin{itemize} 194 | \item \textbf{Checking the termination of the algorithm} 195 | \begin{itemize} 196 | \item Requires a ``driver'' program which submits a job, check 197 | termination condition and eventually iterates 198 | \item In practice: 199 | \begin{itemize} 200 | \item Hadoop counters 201 | \item Side-data to be passed to the job configuration 202 | \end{itemize} 203 | \end{itemize} 204 | 205 | \vspace{40pt} 206 | 207 | \item \textbf{Extensions} 208 | \begin{itemize} 209 | \item Storing the actual shortest-path 210 | \item Weighted edges (as opposed to unit distance) 211 | \end{itemize} 212 | \end{itemize} 213 | } 214 | 215 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 216 | \frame {\frametitle{The story so far} 217 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 218 | \begin{itemize} 219 | \item \textbf{The graph structure is stored in an adjacency lists} 220 | \begin{itemize} 221 | \item This data structure can be augmented with additional information 222 | \end{itemize} 223 | 224 | \vspace{20pt} 225 | 226 | \item \textbf{The MapReduce framework} 227 | \begin{itemize} 228 | \item Maps over the node data structures involving only the node's 229 | internal state and it's {\color{red}local} graph structure 230 | \item Map results are ``passed'' along outgoing edges 231 | \item The graph itself is passed from the mapper to the reducer 232 | \begin{itemize} 233 | \item This is a very costly operation for large graphs! 234 | \end{itemize} 235 | \item Reducers aggregate over ``same destination'' nodes 236 | \end{itemize} 237 | 238 | \vspace{20pt} 239 | 240 | \item \textbf{Graph algorithms are generally iterative} 241 | \begin{itemize} 242 | \item Require a driver program to check for termination 243 | \end{itemize} 244 | \end{itemize} 245 | } 246 | 247 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 248 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 249 | \subsection{PageRank} 250 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 251 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 252 | 253 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 254 | \frame {\frametitle{Introduction} 255 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 256 | \begin{itemize} 257 | \item \textbf{What is PageRank} 258 | \begin{itemize} 259 | \item It's a measure of the relevance of a Web page, based on the 260 | structure of the hyperlink graph 261 | \item Based on the concept of random Web surfer 262 | \end{itemize} 263 | 264 | \vspace{20pt} 265 | 266 | \item \textbf{Formally we have: } 267 | $$P(n) = \alpha \Big( \frac{1}{|G|}\Big) + (1-\alpha) \sum_{m \in L(n)}\frac{P(m)}{C(m)}$$ 268 | \begin{itemize} 269 | \item $|G|$ is the number of nodes in the graph 270 | \item $\alpha$ is a random jump factor 271 | \item $L(n)$ is the set of out-going links from page $n$ 272 | \item $C(m)$ is the out-degree of node $m$ 273 | \end{itemize} 274 | \end{itemize} 275 | } 276 | 277 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 278 | \frame {\frametitle{PageRank in Details} 279 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 280 | \begin{itemize} 281 | \item \textbf{PageRank is defined recursively, hence we need an 282 | iterative algorithm} 283 | \begin{itemize} 284 | \item A node receives ``contributions'' from all pages that link 285 | to it 286 | \end{itemize} 287 | 288 | \vspace{20pt} 289 | 290 | \item \textbf{Consider the set of nodes $L(n)$} 291 | \begin{itemize} 292 | \item A random surfer at $m$ arrives at $n$ with probability 293 | $1/C(m)$ 294 | \item Since the PageRank value of $m$ is the probability that the 295 | random surfer is at $m$, the probability of arriving at $n$ from 296 | $m$ is $P(m)/C(m)$ 297 | \end{itemize} 298 | 299 | \vspace{20pt} 300 | 301 | \item \textbf{To compute the PageRank of $n$ we need:} 302 | \begin{itemize} 303 | \item Sum the contributions from all pages that link to $n$ 304 | \item Take into account the random jump, which is uniform over all 305 | nodes in the graph 306 | \end{itemize} 307 | \end{itemize} 308 | } 309 | 310 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 311 | \frame {\frametitle{PageRank in MapReduce} 312 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 313 | \begin{center} 314 | \includegraphics[scale=0.4]{./Figures/pr} 315 | \end{center} 316 | } 317 | 318 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 319 | \frame {\frametitle{PageRank in MapReduce} 320 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 321 | \begin{center} 322 | \includegraphics[scale=0.4]{./Figures/pr_toy} 323 | \end{center} 324 | } 325 | 326 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 327 | \frame {\frametitle{PageRank in MapReduce} 328 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 329 | \begin{center} 330 | \includegraphics[scale=0.4]{./Figures/pr_sketch} 331 | \end{center} 332 | } 333 | 334 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 335 | \frame {\frametitle{PageRank in MapReduce} 336 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 337 | \begin{itemize} 338 | \item \textbf{Sketch of the MapReduce algorithm} 339 | \begin{itemize} 340 | \item The algorithm maps over the nodes 341 | \item For each node computes the PageRank mass the needs to be 342 | distributed to neighbors 343 | \item Each fraction of the PageRank mass is emitted as the value, 344 | keyed by the node ids of the neighbors 345 | \item In the shuffle and sort, values are grouped by node id 346 | \begin{itemize} 347 | \item Also, we pass the graph structure from mappers to reducers 348 | (for subsequent iterations to take place over the updated graph) 349 | \end{itemize} 350 | \item The reducer updates the value of the PageRank of every 351 | single node 352 | \end{itemize} 353 | \end{itemize} 354 | } 355 | 356 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 357 | \frame {\frametitle{PageRank in MapReduce} 358 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 359 | \begin{itemize} 360 | \item \textbf{Implementation details} 361 | \begin{itemize} 362 | \item Loss of PageRank mass for sink nodes 363 | \item Auxiliary state information 364 | \item One iteration of the algorithm 365 | \begin{itemize} 366 | \item Two MapReduce jobs: one to distribute the PageRank mass, 367 | the other for dangling nodes and random jumps 368 | \end{itemize} 369 | \item Checking for convergence 370 | \begin{itemize} 371 | \item Requires a driver program 372 | \item When updates of PageRank are ``stable'' the algorithm stops 373 | \end{itemize} 374 | \end{itemize} 375 | 376 | \vspace{20pt} 377 | 378 | \item \textbf{Further reading on {\color{red}convergence} and 379 | {\color{red}attacks}} 380 | \begin{itemize} 381 | \item Convergence: \cite{Page1999, Bianchini2005} 382 | \end{itemize} 383 | \end{itemize} 384 | } 385 | -------------------------------------------------------------------------------- /disa/programming_model.tex: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | \frame {\frametitle{Functional Programming Roots} 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | \begin{itemize} 5 | \item \textbf{Key feature: higher order functions} 6 | \begin{itemize} 7 | \item Functions that accept other functions as arguments 8 | \item \textbf{Map} and \textbf{Fold} 9 | \end{itemize} 10 | \end{itemize} 11 | 12 | \begin{figure}[h] 13 | \centering 14 | \includegraphics[scale=0.4]{./Figures/functional} 15 | \caption{Illustration of \emph{map} and \emph{fold}.} 16 | \label{fig:functional} 17 | \end{figure} 18 | } 19 | 20 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 21 | \frame {\frametitle{Functional Programming Roots} 22 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 23 | \begin{itemize} 24 | \item \textbf{map phase:} 25 | \begin{itemize} 26 | \item Given a list, \emph{map} takes as an argument a function $f$ 27 | (that takes a single argument) and applies it to all element in a list 28 | \end{itemize} 29 | 30 | \vspace{20pt} 31 | 32 | \item \textbf{fold phase:} 33 | \begin{itemize} 34 | \item Given a list, fold takes as arguments a function $g$ (that 35 | takes two arguments) and an initial value (an accumulator) 36 | \item $g$ is first applied to the initial value and the first 37 | item in the list 38 | \item The result is stored in an intermediate variable, which is 39 | used as an input together with the next item to a second 40 | application of $g$ 41 | \item The process is repeated until all items in the list have 42 | been consumed 43 | \end{itemize} 44 | \end{itemize} 45 | } 46 | 47 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 48 | \frame {\frametitle{Functional Programming Roots} 49 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 50 | \begin{itemize} 51 | \item \textbf{We can view map as a transformation over a dataset} 52 | \begin{itemize} 53 | \item This transformation is specified by the function $f$ 54 | \item Each functional application happens in 55 | {\color{red} \textbf{isolation}} 56 | \item The application of $f$ to each element of a dataset can be 57 | parallelized in a straightforward manner 58 | \end{itemize} 59 | 60 | \vspace{20pt} 61 | 62 | \item \textbf{We can view fold as an aggregation operation} 63 | \begin{itemize} 64 | \item The aggregation is defined by the function $g$ 65 | \item Data locality: elements in the list must be ``brought 66 | together'' 67 | \item If we can {\color{red} \textbf{group}} elements of the list, also the fold phase can proceed in parallel 68 | \end{itemize} 69 | 70 | \vspace{20pt} 71 | 72 | \item \textbf{Associative and commutative operations} 73 | \begin{itemize} 74 | \item Allow performance gains through local aggregation and reordering 75 | \end{itemize} 76 | \end{itemize} 77 | 78 | } 79 | 80 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 81 | \frame {\frametitle{Functional Programming and MapReduce} 82 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 83 | \begin{itemize} 84 | \item \textbf{Equivalence of MapReduce and Functional Programming: } 85 | \begin{itemize} 86 | \item The map of MapReduce corresponds to the map operation 87 | \item The reduce of MapReduce corresponds to the fold operation 88 | \end{itemize} 89 | 90 | \vspace{20pt} 91 | 92 | \item\textbf{The framework coordinates the map and reduce phases:} 93 | \begin{itemize} 94 | \item Grouping intermediate results happens in parallel 95 | \end{itemize} 96 | 97 | \vspace{20pt} 98 | 99 | \item \textbf{In practice:} 100 | \begin{itemize} 101 | \item User-specified computation is applied (in parallel) to all input 102 | records of a dataset 103 | \item Intermediate results are aggregated by another 104 | user-specified computation 105 | \end{itemize} 106 | 107 | \end{itemize} 108 | } 109 | 110 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 111 | \frame {\frametitle{What can we do with MapReduce?} 112 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 113 | \begin{itemize} 114 | \item \textbf{MapReduce ``implements'' a subset of functional 115 | programming} 116 | \begin{itemize} 117 | \item The programming model appears quite limited and strict 118 | \end{itemize} 119 | 120 | \vspace{20pt} 121 | 122 | \item \textbf{There are several important problems that can be 123 | adapted to MapReduce} 124 | \begin{itemize} 125 | \item We will focus on illustrative cases 126 | \item We will see in detail ``design patterns'' 127 | \begin{itemize} 128 | \item How to transform a problem and its input 129 | \item How to save memory and bandwidth in the system 130 | \end{itemize} 131 | \end{itemize} 132 | 133 | \end{itemize} 134 | 135 | } 136 | 137 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 138 | \frame {\frametitle{Data Structures} 139 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 140 | \begin{itemize} 141 | \item \textbf{Key-value pairs are the basic data structure in 142 | MapReduce} 143 | \begin{itemize} 144 | \item Keys and values can be: integers, float, strings, raw bytes 145 | \item They can also be \textbf{arbitrary data structures} 146 | \end{itemize} 147 | 148 | \vspace{20pt} 149 | 150 | \item \textbf{The design of MapReduce algorithms involves}: 151 | \begin{itemize} 152 | \item Imposing the key-value structure on arbitrary datasets\footnote{There's more about it: here we only look at the input to the map function.} 153 | \begin{itemize} 154 | \item E.g.: for a collection of Web pages, input keys may be URLs 155 | and values may be the HTML content 156 | \end{itemize} 157 | \item In some algorithms, input keys are not used, in others they 158 | uniquely identify a record 159 | \item Keys can be combined in complex ways to design various algorithms 160 | \end{itemize} 161 | \end{itemize} 162 | } 163 | 164 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 165 | \frame {\frametitle{A Generic MapReduce Algorithm} 166 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 167 | \begin{itemize} 168 | \item \textbf{The programmer defines a mapper and a reducer as 169 | follows}\footnote{We use the convention $[ \cdots ]$ to denote a list.}\footnote{Pedices indicate different data types.}: 170 | \begin{itemize} 171 | \item map: $(k_1,v_1) \to [(k_2,v_2)]$ 172 | \item reduce: $(k_2,[v_2]) \to [(k_3,v_3)]$ 173 | \end{itemize} 174 | 175 | \vspace{20pt} 176 | 177 | \item \textbf{In words}: 178 | \begin{itemize} 179 | \item A dataset stored on an underlying \textbf{distributed} filesystem, 180 | which is split in a number of \textbf{blocks} across machines 181 | \item The mapper is applied to every input key-value pair to 182 | generate intermediate key-value pairs 183 | \item The reducer is applied to all values associated with the 184 | same intermediate key to generate output key-value pairs 185 | \end{itemize} 186 | \end{itemize} 187 | } 188 | 189 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 190 | \frame {\frametitle{Where the magic happens} 191 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 192 | \begin{itemize} 193 | \item \textbf{Implicit between the map and reduce phases is a {\color{red}parallel ``\textbf{group by}''} operation on intermediate keys} 194 | \begin{itemize} 195 | \item Intermediate data arrive at each reducer in order, sorted by 196 | the key 197 | \item No ordering is guaranteed across reducers 198 | \end{itemize} 199 | 200 | \vspace{20pt} 201 | 202 | \item \textbf{Output keys from reducers are written back to the 203 | distributed filesystem} 204 | \begin{itemize} 205 | \item The output may consist of $r$ distinct files, where $r$ is 206 | the number of reducers 207 | \item Such output may be the input to a subsequent MapReduce phase\footnote{Think of \textbf{iterative algorithms}.} 208 | \end{itemize} 209 | 210 | \vspace{20pt} 211 | 212 | \item \textbf{Intermediate keys are transient}: 213 | \begin{itemize} 214 | \item They are not stored on the distributed filesystem 215 | \item They are ``spilled'' to the local disk of each machine in 216 | the cluster 217 | \end{itemize} 218 | \end{itemize} 219 | } 220 | 221 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 222 | \frame {\frametitle{``Hello World'' in MapReduce} 223 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 224 | 225 | \begin{algorithm}[H] 226 | \algrenewcommand\algorithmicfunction{\textbf{class}} 227 | \algrenewcommand\algorithmicprocedure{\textbf{method}} 228 | 229 | \begin{algorithmic}[1] 230 | \Function{Mapper}{} 231 | \Procedure{Map}{offset $a$, line $l$} 232 | \ForAll{term $t \in$ line $l$} 233 | \State $\textsc{Emit}(\textrm{term }t, \textrm{count }1)$ 234 | \EndFor 235 | \EndProcedure 236 | \EndFunction 237 | \end{algorithmic} 238 | 239 | \begin{algorithmic}[1] 240 | \Function{Reducer}{} 241 | \Procedure{Reduce}{term $t$, counts $[ c_1, c_2, \ldots ]$} 242 | \State $sum \gets 0$ 243 | \ForAll{$ \textrm{count }c \in \textrm{counts }[ c_1, c_2, \ldots ]$} 244 | \State $sum \gets sum + c$ 245 | \EndFor 246 | \State $\textsc{Emit}(\textrm{term }t, \textrm{count }sum)$ 247 | \EndProcedure 248 | \EndFunction 249 | \end{algorithmic} 250 | 251 | \end{algorithm} 252 | 253 | } 254 | 255 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 256 | \frame {\frametitle{} 257 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 258 | \begin{figure}[h] 259 | \centering 260 | \includegraphics[scale=0.23]{./Figures/simple_MR} 261 | \label{fig:simple_MR} 262 | \end{figure} 263 | } 264 | 265 | \frame {\frametitle{``Hello World'' in MapReduce} 266 | \begin{itemize} 267 | 268 | \item \textbf{Input:} 269 | \begin{itemize} 270 | \item Key-value pairs: (offset, line) of a file stored on the distributed filesystem 271 | \item a: unique identifier of a line offset 272 | \item l: is the text of the line itself 273 | \end{itemize} 274 | 275 | \item \textbf{Mapper:} 276 | \begin{itemize} 277 | \item Takes an input key-value pair, tokenize the line 278 | \item Emits intermediate key-value pairs: the word is the key and 279 | the integer is the value 280 | \end{itemize} 281 | 282 | \item \textbf{The framework:} 283 | \begin{itemize} 284 | \item Guarantees all values associated with the same key (the 285 | word) are brought to the same reducer 286 | \end{itemize} 287 | 288 | \item \textbf{The reducer:} 289 | \begin{itemize} 290 | \item Receives all values associated to some keys 291 | \item Sums the values and writes output key-value pairs: the key 292 | is the word and the value is the number of occurrences 293 | \end{itemize} 294 | \end{itemize} 295 | } 296 | 297 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 298 | \frame {\frametitle{Combiners} 299 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 300 | \begin{itemize} 301 | \item \textbf{Combiners are a general mechanism to reduce the amount 302 | of intermediate data} 303 | \begin{itemize} 304 | \item They could be thought of as ``mini-reducers'' 305 | \end{itemize} 306 | 307 | \vspace{20pt} 308 | 309 | \item \textbf{Back to our running example: word count} 310 | \begin{itemize} 311 | \item Combiners aggregate term counts across documents processed 312 | by each map task 313 | \item If combiners take advantage of all opportunities for local 314 | aggregation we have at most $m \times V$ intermediate key-value 315 | pairs 316 | \begin{itemize} 317 | \item $m$: number of mappers 318 | \item $V$: number of unique terms in the collection 319 | \end{itemize} 320 | \item Note: due to Zipfian nature of term distributions, not all 321 | mappers will see all terms 322 | \end{itemize} 323 | 324 | \end{itemize} 325 | } 326 | 327 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 328 | \frame {\frametitle{A word of caution} 329 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 330 | \begin{itemize} 331 | \item \textbf{The use of combiners must be thought carefully} 332 | \begin{itemize} 333 | \item In Hadoop, they are optional: the correctness of the 334 | algorithm cannot depend on computation (or even execution) of 335 | the combiners 336 | \end{itemize} 337 | 338 | \vspace{20pt} 339 | 340 | \item \textbf{Combiners I/O types} 341 | \begin{itemize} 342 | \item Input: $(k_2, [v_2])$ [Same input as for Reducers] 343 | \item Output: $[(k_2, v_2)]$ [Same output as for Mappers] 344 | \end{itemize} 345 | 346 | \vspace{20pt} 347 | 348 | \item \textbf{Commutative and Associative computations} 349 | \begin{itemize} 350 | \item Reducer and Combiner code may be interchangeable (e.g. Word Count) 351 | \item This is not true in the general case 352 | \end{itemize} 353 | \end{itemize} 354 | } 355 | 356 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 357 | \frame {\frametitle{} 358 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 359 | \begin{center} 360 | \includegraphics[scale=0.23]{./Figures/simple_MR_combiners} 361 | \end{center} 362 | } 363 | 364 | 365 | 366 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 367 | \frame {\frametitle{Algorithmic Correctness: an Example} 368 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 369 | \begin{itemize} 370 | \item \textbf{Problem statement} 371 | \begin{itemize} 372 | \item We have a large dataset where input keys are strings and input 373 | values are integers 374 | \item We wish to compute the mean of all integers associated with 375 | the same key 376 | \begin{itemize} 377 | \item In practice: the dataset can be a log from a website, 378 | where the keys are user IDs and values are some measure of activity 379 | \end{itemize} 380 | \end{itemize} 381 | 382 | \vspace{20pt} 383 | 384 | \item \textbf{Next, a baseline approach} 385 | \begin{itemize} 386 | \item We use an \textbf{identity mapper}, which groups and sorts 387 | appropriately input key-value pairs 388 | \item Reducers keep track of running sum and the number of 389 | integers encountered 390 | \item The mean is emitted as the output of the reducer, with the 391 | input string as the key 392 | \end{itemize} 393 | 394 | \vspace{20pt} 395 | 396 | \item \textbf{Inefficiency problems in the shuffle phase} 397 | \end{itemize} 398 | } 399 | 400 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 401 | \frame {\frametitle{Example: Computing the mean} 402 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 403 | \begin{algorithm}[H] 404 | \algrenewcommand\algorithmicfunction{\textbf{class}} 405 | \algrenewcommand\algorithmicprocedure{\textbf{method}} 406 | \begin{algorithmic}[1] 407 | \Function{Mapper}{} 408 | \Procedure{Map}{string $t$, integer $r$} 409 | \State $\textsc{Emit}(\textrm{string }t, \textrm{integer }r)$ 410 | \EndProcedure 411 | \EndFunction 412 | \end{algorithmic} 413 | 414 | \begin{algorithmic}[1] 415 | \Function{Reducer}{} 416 | \Procedure{Reduce}{string $t$, integers $[ r_1, r_2, \ldots ]$} 417 | \State $sum \gets 0$ 418 | \State $cnt \gets 0$ 419 | \ForAll{$ \textrm{integer }r \in \textrm{integers }[ r_1, r_2, \ldots ]$} 420 | \State $sum \gets sum + r$ 421 | \State $cnt \gets cnt + 1$ 422 | \EndFor 423 | \State $r_{avg} \gets sum/cnt$ 424 | \State $\textsc{Emit}(\textrm{string }t, \textrm{integer } r_{avg})$ 425 | \EndProcedure 426 | \EndFunction 427 | \end{algorithmic} 428 | \end{algorithm} 429 | } 430 | 431 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 432 | \frame {\frametitle{Algorithmic Correctness} 433 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 434 | \begin{itemize} 435 | \item \textbf{Note: operations are not distributive} 436 | \begin{itemize} 437 | \item \texttt{Mean}(1,2,3,4,5) $\neq$ 438 | \texttt{Mean}(\texttt{Mean}(1,2), \texttt{Mean}(3,4,5)) 439 | \item Hence: a combiner cannot output partial means and hope that 440 | the reducer will compute the correct final mean 441 | \end{itemize} 442 | 443 | \vspace{20pt} 444 | 445 | \item \textbf{Rule of thumb:} 446 | \begin{itemize} 447 | \item Combiners are optimizations, the algorithm should work even when ``removing'' them 448 | \end{itemize} 449 | \end{itemize} 450 | } 451 | 452 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 453 | \frame {\frametitle{Example: Computing the mean with combiners} 454 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 455 | 456 | \begin{algorithm}[H] 457 | \algrenewcommand\algorithmicfunction{\textbf{class}} 458 | \algrenewcommand\algorithmicprocedure{\textbf{method}} 459 | \begin{tiny} 460 | \begin{algorithmic}[1] 461 | \Function{Mapper}{} 462 | \Procedure{Map}{string $t$, integer $r$} 463 | \State $\textsc{Emit}(\textrm{string }t, \textrm{pair }(r, 1))$ 464 | \EndProcedure 465 | \EndFunction 466 | \end{algorithmic} 467 | 468 | \begin{algorithmic}[1] 469 | \Function{Combiner}{} 470 | \Procedure{Combine}{string $t$, pairs $[ (s_1, c_1), (s_2, c_2) \ldots ]$} 471 | \State $sum \gets 0$ 472 | \State $cnt \gets 0$ 473 | \ForAll{$ \textrm{pair }(s, c) \in \textrm{pairs }[ (s_1, c_1), (s_2, c_2) \ldots ]$} 474 | \State $sum \gets sum + s$ 475 | \State $cnt \gets cnt + c$ 476 | \EndFor 477 | \State $\textsc{Emit}(\textrm{string }t, \textrm{pair }(sum, cnt))$ 478 | \EndProcedure 479 | \EndFunction 480 | \end{algorithmic} 481 | 482 | \begin{algorithmic}[1] 483 | \Function{Reducer}{} 484 | \Procedure{Reduce}{string $t$, pairs $[ (s_1, c_1), (s_2, c_2) \ldots ]$} 485 | \State $sum \gets 0$ 486 | \State $cnt \gets 0$ 487 | \ForAll{$ \textrm{pair }(s, c) \in \textrm{pairs }[ (s_1, c_1), (s_2, c_2) \ldots ]$} 488 | \State $sum \gets sum + s$ 489 | \State $cnt \gets cnt + c$ 490 | \EndFor 491 | \State $r_{avg} \gets sum/cnt$ 492 | \State $\textsc{Emit}(\textrm{string }t, \textrm{integer } r_{avg})$ 493 | \EndProcedure 494 | \EndFunction 495 | \end{algorithmic} 496 | \end{tiny} 497 | \end{algorithm} 498 | 499 | } 500 | 501 | -------------------------------------------------------------------------------- /relal/relational.tex: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 3 | \subsection{Introduction} 4 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 5 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 6 | 7 | 8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 9 | \frame {\frametitle{Introduction} 10 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 11 | \begin{itemize} 12 | \item \textbf{Disclaimer} 13 | \begin{itemize} 14 | \item This is not a full course on Relational Algebra 15 | \item Neither this is a course on SQL 16 | \end{itemize} 17 | 18 | \vspace{20pt} 19 | 20 | \item \textbf{Introduction to Relational Algebra, RDBMS and SQL} 21 | \begin{itemize} 22 | \item Follow the video lectures of the Stanford class on RDBMS 23 | \item[] \url{https://www.coursera.org/course/db} 24 | \item[$\to$] Note that you have to sign up for an account 25 | \end{itemize} 26 | 27 | \vspace{20pt} 28 | 29 | \item \textbf{Overview of this part} 30 | \begin{itemize} 31 | \item Brief introduction to simplified relational algebra 32 | \item Useful to understand Pig, Hive and HBase 33 | \end{itemize} 34 | 35 | \end{itemize} 36 | } 37 | 38 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 39 | \frame {\frametitle{Relational Algebra Operators} 40 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 41 | \begin{itemize} 42 | \item \textbf{There are a number of operations on data that fit well 43 | the relational algebra model} 44 | \begin{itemize} 45 | \item In traditional RDBMS, queries involve retrieval of 46 | {\color{red}small amounts of data} 47 | \item In this course, and in particular in this class, we should 48 | keep in mind the particular workload underlying MapReduce 49 | \item[$\to$] Full scans of large amounts of data 50 | \item[$\to$] Queries are not selective\footnote{This is true in general. However, most ETL jobs involve selection and projection to do data preparation.}, they process all data 51 | \end{itemize} 52 | 53 | \vspace{20pt} 54 | 55 | \item \textbf{A review of some terminology} 56 | \begin{itemize} 57 | \item A {\color{red}\textit{relation}} is a table 58 | \item {\color{red}\textit{Attributes}} are the column headers of 59 | the table 60 | \item The set of attributes of a relation is called a 61 | {\color{red}\textit{schema}} 62 | \item[] Example: $R(A_1,A_2,...,A_n)$ indicates a relation called 63 | $R$ whose attributes are $A_1,A_2,...,A_n$ 64 | \end{itemize} 65 | \end{itemize} 66 | } 67 | 68 | 69 | 70 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 71 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 72 | \subsection{Operators} 73 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 74 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 75 | 76 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 77 | \frame {\frametitle{Operators} 78 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 79 | \begin{itemize} 80 | \item \textbf{Let's start with an example} 81 | \begin{itemize} 82 | \item Below, we have part of a relation called \textit{Links} 83 | describing the structure of the Web 84 | \item There are two \textit{attributes}: \textit{From} and 85 | \textit{To} 86 | \item A row, or {\color{red}\textit{tuple}}, of the relation is a 87 | pair of URLs, indicating the existence of a link between them 88 | \item[$\to$] The number of tuples in a real dataset is in the 89 | order of billions ($10^9$) 90 | \end{itemize} 91 | \end{itemize} 92 | 93 | \begin{center} 94 | \begin{tabular}[h]{|c|c|} 95 | \hline 96 | From & To \\ 97 | \hline 98 | \hline 99 | \texttt{url1} & \texttt{url2} \\ 100 | \texttt{url1} & \texttt{url3} \\ 101 | \texttt{url2} & \texttt{url3} \\ 102 | \texttt{url2} & \texttt{url4} \\ 103 | $\cdots$ & $\cdots$\\ 104 | \hline 105 | \end{tabular} 106 | \end{center} 107 | } 108 | 109 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 110 | \frame {\frametitle{Operators} 111 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 112 | \begin{itemize} 113 | \item \textbf{Relations (however big) can be stored in a distributed 114 | filesystem} 115 | \begin{itemize} 116 | \item If they don't fit in a single machine, they're broken into 117 | pieces (think HDFS) 118 | \end{itemize} 119 | 120 | \vspace{20pt} 121 | 122 | \item \textbf{Next, we review and describe a set of relational 123 | algebra operators} 124 | \begin{itemize} 125 | \item Intuitive explanation of what they do 126 | \item ``Pseudo-code'' of their implementation in/by MapReduce 127 | \end{itemize} 128 | \end{itemize} 129 | } 130 | 131 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 132 | \frame {\frametitle{Operators} 133 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 134 | \begin{itemize} 135 | \item \textbf{Selection: $\sigma_C(R)$} 136 | \begin{itemize} 137 | \item Apply condition $C$ to each tuple of relation $R$ 138 | \item Produce in output a relation containing only tuples that 139 | satisfy $C$ 140 | \end{itemize} 141 | 142 | \vspace{10pt} 143 | 144 | \item \textbf{Projection: $\pi_S(R)$} 145 | \begin{itemize} 146 | \item Given a \textit{subset} $S$ of relation $R$ attributes 147 | \item Produce in output a relation containing only tuples for 148 | the attributes in $S$ 149 | \end{itemize} 150 | 151 | \vspace{10pt} 152 | 153 | \item \textbf{Union, Intersection and Difference} 154 | \begin{itemize} 155 | \item Well known operators on sets 156 | \item Apply to the set of tuples in two relations that have the 157 | {\color{red}same schema} 158 | \item Variations on the theme: work on \textit{bags} 159 | \end{itemize} 160 | 161 | \end{itemize} 162 | } 163 | 164 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 165 | \frame {\frametitle{Operators} 166 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 167 | \begin{itemize} 168 | \item \textbf{Natural join $R \Join S$} 169 | \begin{itemize} 170 | \item Given two relations, \textit{compare each pair of tuples}, 171 | one from each relation 172 | \item If the tuples agree on all the attributes common to both 173 | schema $\to$ produce an output tuple that has components on each 174 | attribute 175 | \item Otherwise produce nothing 176 | \item {\color{red}\textit{Join condition}} can be on a subset of attributes 177 | \end{itemize} 178 | 179 | \vspace{20pt} 180 | 181 | \item \textbf{Let's work with an example} 182 | \begin{itemize} 183 | \item Recall the \textit{Links} relation from previous slides 184 | \item \texttt{Query} (or data processing job): \texttt{find the paths of length two 185 | in the Web} 186 | \end{itemize} 187 | \end{itemize} 188 | } 189 | 190 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 191 | \frame {\frametitle{Join Example} 192 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 193 | \begin{itemize} 194 | \item \textbf{Informally, to satisfy the query we must:} 195 | \begin{itemize} 196 | \item find the triples of URLs in the form $(u,v,w)$ such that 197 | there is a link from $u$ to $v$ and a link from $v$ to $w$ 198 | \end{itemize} 199 | 200 | \vspace{20pt} 201 | 202 | \item \textbf{Using the join operator} 203 | \begin{itemize} 204 | \item Imagine we have two relations (with different schema), and 205 | let's try to apply the natural join operator 206 | \item There are two copies of \textit{Links}: $L_1(U_1,U_2)$ and 207 | $L_2(U_2,U_3)$ 208 | \item Let's compute $L_1 \Join L_2$ 209 | \begin{itemize} 210 | \item For each tuple $t_1$ of $L_1$ and each tuple $t_2$ of 211 | $L_2$, see if their $U_2$ component are the same 212 | \item If yes, then produce a tuple in output, with the schema $(U_1,U_2,U_3)$ 213 | \end{itemize} 214 | \end{itemize} 215 | 216 | \end{itemize} 217 | } 218 | 219 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 220 | \frame {\frametitle{Join Example} 221 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 222 | \begin{itemize} 223 | \item \textbf{What we have seen is called (to be precise) a 224 | {\color{red}self-join}} 225 | \begin{itemize} 226 | \item {\color{red}Question}: How would you implement a self join in your favorite programming language? 227 | \item {\color{red}Question}: What is the time complexity of your 228 | algorithm? 229 | \item {\color{red}Question}: What is the space complexity of your 230 | algorithm? 231 | \end{itemize} 232 | 233 | \vspace{20pt} 234 | 235 | \item \textbf{To continue the example} 236 | \begin{itemize} 237 | \item Say you are not interested in the entire two-hop path but 238 | just the start and end nodes 239 | \item Then you do a projection and the notation would be: 240 | $\pi_{U_1,U_3}(L_1 \Join L_2)$ 241 | \end{itemize} 242 | \end{itemize} 243 | 244 | } 245 | 246 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 247 | \frame {\frametitle{Operators} 248 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 249 | \begin{itemize} 250 | \item \textbf{Grouping and Aggregation: $\gamma_X(R)$} 251 | \begin{itemize} 252 | \item Given a relation $R$, partition its tuples according to 253 | their values in one set of attributes $G$ 254 | \begin{itemize} 255 | \item The set $G$ is called the {\color{red}grouping attributes} 256 | \end{itemize} 257 | \item Then, for each group, aggregate the values in certain other 258 | attributes 259 | \begin{itemize} 260 | \item Aggregation functions: \texttt{SUM}, \texttt{COUNT}, \texttt{AVG}, \texttt{MIN}, \texttt{MAX}, ... 261 | \end{itemize} 262 | \end{itemize} 263 | 264 | \vspace{20pt} 265 | 266 | \item \textbf{In the notation, $X$ is a list of elements that can be:} 267 | \begin{itemize} 268 | \item A grouping attribute 269 | \item An expression $\theta(A)$, where $\theta$ is one of the 270 | (five) aggregation functions and $A$ is an attribute 271 | {\color{red}NOT} among the grouping attributes 272 | \end{itemize} 273 | 274 | \end{itemize} 275 | 276 | } 277 | 278 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 279 | \frame {\frametitle{Operators} 280 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 281 | \begin{itemize} 282 | \item \textbf{Grouping and Aggregation: $\gamma_X(R)$} 283 | \begin{itemize} 284 | \item The result of this operation is a relation with one tuple 285 | for each group 286 | \item That tuple has a component for each of the grouping 287 | attributes, with the value common to tuples of that group 288 | \item That tuple has another component for each aggregation, with 289 | the aggregate value for that group 290 | \end{itemize} 291 | 292 | \vspace{20pt} 293 | 294 | \item \textbf{Let's work with an example} 295 | \begin{itemize} 296 | \item Imagine that a social-networking site has a relation 297 | \item[] \texttt{Friends(User, Friend)} 298 | \item The tuples are pairs $(a,b)$ such that $b$ is a friend of 299 | $a$ 300 | \item \texttt{Query: compute the number of friends each member has} 301 | \end{itemize} 302 | 303 | \end{itemize} 304 | } 305 | 306 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 307 | \frame {\frametitle{Grouping and Aggregation Example} 308 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 309 | \begin{itemize} 310 | \item \textbf{How to satisfy the query} 311 | \begin{itemize} 312 | \item[] $\gamma_{User, \mathtt{COUNT}(Friend))}(Friends)$ 313 | \item This operation groups all the tuples by the value in their 314 | frist component 315 | \item[$\to$] There is one group for each user 316 | \item Then, for each group, it counts the number of friends 317 | \end{itemize} 318 | 319 | \vspace{20pt} 320 | 321 | \item \textbf{Some details} 322 | \begin{itemize} 323 | \item The \texttt{COUNT} operation applied to an attribute does 324 | not consider the values of that attribute 325 | \item In fact, it counts the number of tuples in the group 326 | \item In SQL, there is a ``count distinct'' operator that counts 327 | the number of different values 328 | \end{itemize} 329 | \end{itemize} 330 | } 331 | 332 | 333 | 334 | 335 | 336 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 337 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 338 | \subsection{Operators and MapReduce} 339 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 340 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 341 | 342 | 343 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 344 | \frame {\frametitle{Computing Selection} 345 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 346 | \begin{itemize} 347 | \item \textbf{In practice, selections do not need a full-blown 348 | MapReduce implementation} 349 | \begin{itemize} 350 | \item They can be implemented in the {\color{red}map phase alone} 351 | \item Actually, they could also be implemented in the reduce portion 352 | \end{itemize} 353 | 354 | \vspace{20pt} 355 | 356 | \item \textbf{A MapReduce implementation of $\sigma_C(R)$} 357 | \begin{itemize} 358 | \item[\texttt{Map}:] 359 | \begin{itemize} 360 | \item For each tuple $t$ in $R$, check if $t$ satisfies $C$ 361 | \item If so, emit a key/value pair $(t,t)$ 362 | \end{itemize} 363 | \item[\texttt{Reduce}:] 364 | \begin{itemize} 365 | \item Identity reducer 366 | \item {\color{red}Question}: single or multiple reducers? 367 | \end{itemize} 368 | \end{itemize} 369 | 370 | \vspace{20pt} 371 | 372 | \item \textbf{NOTE: the output is not exactly a relation} 373 | \begin{itemize} 374 | \item {\color{red}WHY?} 375 | \end{itemize} 376 | 377 | \end{itemize} 378 | } 379 | 380 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 381 | \frame {\frametitle{Computing Projections} 382 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 383 | \begin{itemize} 384 | \item \textbf{Similar process to selection} 385 | \begin{itemize} 386 | \item But, projection may cause same tuple to appear several times 387 | \end{itemize} 388 | 389 | \vspace{20pt} 390 | 391 | \item \textbf{A MapReduce implementation of $\pi_S(R)$} 392 | \begin{itemize} 393 | \item[\texttt{Map}:] 394 | \begin{itemize} 395 | \item For each tuple $t$ in $R$, construct a tuple $t'$ by 396 | eliminating those components whose attributes are not in $S$ 397 | \item Emit a key/value pair $(t',t')$ 398 | \end{itemize} 399 | \item[\texttt{Reduce}:] 400 | \begin{itemize} 401 | \item For each key $t'$ produced by any of the Map tasks, fetch 402 | $t', [t', \cdots, t']$ 403 | \item Emit a key/value pair $(t',t')$ 404 | \end{itemize} 405 | \end{itemize} 406 | 407 | \vspace{20pt} 408 | 409 | \item \textbf{NOTE: the reduce operation is {\color{red}duplicate elimination}} 410 | \begin{itemize} 411 | \item This operation is associative and commutative, so it is 412 | possible to optimize MapReduce by using a \texttt{Combiner} in 413 | each mapper 414 | \end{itemize} 415 | \end{itemize} 416 | 417 | } 418 | 419 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 420 | \frame {\frametitle{Computing Unions} 421 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 422 | \begin{itemize} 423 | \item \textbf{Suppose relations $R$ and $S$ have the same schema} 424 | \begin{itemize} 425 | \item Map tasks will be assigned chunks from either $R$ or $S$ 426 | \item Mappers don't do much, just pass by to reducers 427 | \item Reducers do duplicate elimination 428 | \end{itemize} 429 | 430 | \vspace{20pt} 431 | 432 | \item \textbf{A MapReduce implementation of union} 433 | \begin{itemize} 434 | \item[\texttt{Map}:]\footnote{Hadoop MapReduce supports reading multiple inputs.} 435 | \begin{itemize} 436 | \item For each tuple $t$ in $R$ or $S$, emit a key/value pair $(t,t)$ 437 | \end{itemize} 438 | 439 | \item[\texttt{Reduce}:] 440 | \begin{itemize} 441 | \item For each key $t$ there will be either one or two values 442 | \item Emit $(t,t)$ in either case 443 | \end{itemize} 444 | \end{itemize} 445 | 446 | \end{itemize} 447 | } 448 | 449 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 450 | \frame {\frametitle{Computing Intersections} 451 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 452 | \begin{itemize} 453 | \item \textbf{Very similar to computing unions} 454 | \begin{itemize} 455 | \item Suppose relations $R$ and $S$ have the same schema 456 | \item The map function is the same (an identity mapper) as for union 457 | \item The reduce function must produce a tuple only if both 458 | relations have that tuple 459 | \end{itemize} 460 | 461 | \vspace{20pt} 462 | 463 | \item \textbf{A MapReduce implementation of intersection} 464 | \begin{itemize} 465 | \item[\texttt{Map}:] 466 | \begin{itemize} 467 | \item For each tuple $t$ in $R$ or $S$, emit a key/value pair 468 | $(t,t)$ 469 | \end{itemize} 470 | \item[\texttt{Reduce}:] 471 | \begin{itemize} 472 | \item If key $t$ has value list $[t,t]$ then emit the key/value 473 | pair $(t,t)$ 474 | \item Otherwise, emit the key/value pair $(t, \mathtt{NULL})$ 475 | \end{itemize} 476 | \end{itemize} 477 | \end{itemize} 478 | } 479 | 480 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 481 | \frame {\frametitle{Computing difference} 482 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 483 | \begin{itemize} 484 | \item \textbf{Assume we have two relations $R$ and $S$ with the same schema} 485 | \begin{itemize} 486 | \item The only way a tuple $t$ can appear in the output is if it 487 | is in $R$ but not in $S$ 488 | \item The map function passes tuples from $R$ and $S$ to the reducer 489 | \item NOTE: it must inform the reducer whether the tuple came from $R$ or $S$ 490 | \end{itemize} 491 | 492 | \vspace{20pt} 493 | 494 | \item \textbf{A MapReduce implementation of difference} 495 | \begin{itemize} 496 | \item[\texttt{Map}:] 497 | \begin{itemize} 498 | \item For a tuple $t$ in $R$ emit a key/value pair 499 | $(t,\mathtt{'R'})$ and for a tuple $t$ in $S$, emit a 500 | key/value pair $(t,\mathtt{'S'})$ 501 | \end{itemize} 502 | \item[\texttt{Reduce}:] 503 | \begin{itemize} 504 | \item For each key $t$, do the following: 505 | \item If it is associated to $\mathtt{'R'}$, then emit $(t,t)$ 506 | \item If it is associated to $[\mathtt{'R'},\mathtt{'S'}]$ or 507 | $[\mathtt{'S'},\mathtt{'R'}]$, or $[\mathtt{'S'}]$, emit the 508 | key/value pair $(t, \mathtt{NULL})$ 509 | \end{itemize} 510 | \end{itemize} 511 | \end{itemize} 512 | } 513 | 514 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 515 | \frame {\frametitle{Computing the natural Join} 516 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 517 | \begin{itemize} 518 | \item \textbf{This topic is subject to continuous refinements} 519 | \begin{itemize} 520 | \item There are many \texttt{JOIN} operators and many different 521 | implementations 522 | \item We've seen some of them in the laboratory sessions 523 | \end{itemize} 524 | 525 | \vspace{20pt} 526 | 527 | \item \textbf{Let's look at two relations $R(A,B)$ and $S(B,C)$} 528 | \begin{itemize} 529 | \item We must find tuples that agree on their $B$ components 530 | \item We shall use the $B$-value of tuples from either relation as 531 | the key 532 | \item The value will be the other component and the name of the 533 | relation 534 | \item That way the reducer knows from which relation each tuple is 535 | coming from 536 | \end{itemize} 537 | \end{itemize} 538 | } 539 | 540 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 541 | \frame {\frametitle{Computing the natural Join} 542 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 543 | \begin{itemize} 544 | \item \textbf{A MapReduce implementation of Natural Join} 545 | \begin{itemize} 546 | \item[\texttt{Map}:] 547 | \begin{itemize} 548 | \item For each tuple $(a,b)$ of $R$ emit the key/value pair $(b, 549 | (\mathtt{'R'}, a))$ 550 | \item For each tuple $(b,c)$ of $S$ emit the key/value pair $(b, 551 | (\mathtt{'S'}, c))$ 552 | \end{itemize} 553 | \item[\texttt{Reduce}:] 554 | \begin{itemize} 555 | \item Each key $b$ will be associated to a list of pairs that 556 | are either $(\mathtt{'R'}, a)$ or $(\mathtt{'S'}, c)$ 557 | \item Emit key/value pairs of the form $(b, [(a_1,b,c_1),(a_2,b,c_2),\cdots,(a_n,b,c_n)])$ 558 | \end{itemize} 559 | \end{itemize} 560 | 561 | \vspace{20pt} 562 | 563 | \item \textbf{NOTES} 564 | \begin{itemize} 565 | \item {\color{red}Question}: what if the MapReduce framework 566 | wouldn't implement the distributed (and sorted) group by? 567 | \item In general, for $n$ tuples in relation $R$ and $m$ tuples 568 | in relation $S$ all with a common $B$-value, then we end up 569 | with $nm$ tuples in the result 570 | \item If all tuples of both relations have the same $B$-value, 571 | then we're computing the \textbf{Cartesian product} 572 | \end{itemize} 573 | 574 | \end{itemize} 575 | 576 | } 577 | 578 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 579 | \frame {\frametitle{Grouping and Aggregation in MapReduce} 580 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 581 | \begin{itemize} 582 | 583 | \item \textbf{Let $R(A,B,C)$ be a relation to which we apply 584 | $\gamma_{A,\theta(B)}(R)$} 585 | \begin{itemize} 586 | \item The map operation prepares the grouping 587 | \item The grouping is done by the framework 588 | \item The reducer computes the aggregation 589 | \item Simplifying assumptions: one grouping attribute and 590 | one aggregation function 591 | \end{itemize} 592 | 593 | \vspace{20pt} 594 | 595 | \item \textbf{MapReduce implementation of $\gamma_{A,\theta(B)}(R)$}\footnote{Note here that we are also projecting.} 596 | \begin{itemize} 597 | \item[\texttt{Map}:] 598 | \begin{itemize} 599 | \item For each tuple $(a,b,c)$ emit the key/value pair $(a,b)$ 600 | \end{itemize} 601 | \item[\texttt{Reduce}:] 602 | \begin{itemize} 603 | \item Each key $a$ represents a group 604 | \item Apply $\theta$ to the list $[b_1,b_2,\cdots,b_n]$ 605 | \item Emit the key/value pair $(a,x)$ where $x=\theta([b_1,b_2,\cdots,b_n])$ 606 | \end{itemize} 607 | \end{itemize} 608 | 609 | \end{itemize} 610 | } 611 | -------------------------------------------------------------------------------- /hadoop/mapreduce.tex: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 3 | \subsection{Overview} 4 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 5 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 6 | 7 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 8 | \frame {\frametitle{Disclaimer} 9 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 10 | \begin{itemize} 11 | \item \textbf{MapReduce APIs} 12 | \begin{itemize} 13 | \item Fast evolving 14 | \item Sometimes confusing 15 | \end{itemize} 16 | 17 | \vspace{40pt} 18 | 19 | \item \textbf{Do {\color{red}NOT} rely on this slide deck as a reference} 20 | \begin{itemize} 21 | \item Use appropriate API docs 22 | \item Use Eclipse 23 | \end{itemize} 24 | \end{itemize} 25 | } 26 | 27 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 28 | \frame {\frametitle{Anatomy of a MapReduce Job Run} 29 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 30 | \begin{center} 31 | \framebox{\includegraphics[scale=0.36]{./Figures/mapreduce}} 32 | \end{center} 33 | } 34 | 35 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 36 | \frame {\frametitle{Job Submission} 37 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 38 | \begin{itemize} 39 | \item \textbf{\texttt{JobClient} class} 40 | \begin{itemize} 41 | \item The \texttt{runJob()} method creates a new instance of a 42 | \textbf{JobClient} 43 | \item Then it calls the \texttt{submitJob()} on this class 44 | \end{itemize} 45 | 46 | \vspace{20pt} 47 | 48 | \item \textbf{Simple verifications on the Job} 49 | \begin{itemize} 50 | \item Is there an output directory? 51 | \item Are there any input splits? 52 | \item Can I copy the JAR of the job to HDFS? 53 | \end{itemize} 54 | 55 | \vspace{20pt} 56 | 57 | \item \textbf{NOTE: the JAR of the job is replicated 10 times} 58 | \end{itemize} 59 | } 60 | 61 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 62 | \frame {\frametitle{Job Initialization} 63 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 64 | \begin{itemize} 65 | \item \textbf{The \texttt{JobTracker} is responsible for:} 66 | \begin{itemize} 67 | \item Create an object for the job 68 | \item Encapsulate its tasks 69 | \item {\color{red}Bookkeeping} with the tasks' status and progress 70 | \end{itemize} 71 | 72 | \vspace{20pt} 73 | 74 | \item \textbf{This is where the scheduling happens} 75 | \begin{itemize} 76 | \item \texttt{JobTracker} performs scheduling by maintaining a 77 | queue 78 | \item Queuing disciplines are pluggable 79 | \end{itemize} 80 | 81 | \vspace{20pt} 82 | 83 | \item \textbf{Compute mappers and reducers} 84 | \begin{itemize} 85 | \item \texttt{JobTracker} retrieves input splits (computed by 86 | \texttt{JobClient}) 87 | \item Determines the number of Mappers based on the number of 88 | input splits 89 | \item Reads the configuration file to set the number of Reducers 90 | \end{itemize} 91 | 92 | \end{itemize} 93 | } 94 | 95 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 96 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 97 | \subsection{Scheduling} 98 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 99 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 100 | \begin{frame} 101 | \begin{colorblock}{blue}{lightblue}{ } 102 | \begin{center} 103 | \textbf{Scheduling} 104 | \end{center} 105 | \end{colorblock} 106 | \end{frame} 107 | 108 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 109 | \frame {\frametitle{Task Assignment} 110 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 111 | \begin{itemize} 112 | \item\textbf{Heartbeat-based mechanism} 113 | \begin{itemize} 114 | \item \texttt{TaskTrackers} periodically send heartbeats to the 115 | \texttt{JobTracker} 116 | \item \texttt{TaskTracker} is alive 117 | \item Heartbeat contains also information on availability of the 118 | \texttt{TaskTrackers} to execute a task 119 | \item \texttt{JobTracker} piggybacks a task if 120 | \texttt{TaskTracker} is available 121 | \end{itemize} 122 | 123 | \vspace{20pt} 124 | 125 | \item \textbf{Selecting a task} 126 | \begin{itemize} 127 | \item \texttt{JobTracker} first needs to select a job 128 | (\textit{i.e.} Job scheduling) 129 | \item \texttt{TaskTrackers} have a fixed number of slots for map 130 | and reduce tasks 131 | \item \texttt{JobTracker} gives priority to map tasks ({\color{red}WHY?}) 132 | \end{itemize} 133 | 134 | \vspace{20pt} 135 | 136 | \item \textbf{Data locality} 137 | \begin{itemize} 138 | \item \texttt{JobTracker} is topology aware 139 | \begin{itemize} 140 | \item Useful for map tasks 141 | \item Unused for reduce tasks ({\color{red}WHY?}) 142 | \end{itemize} 143 | \end{itemize} 144 | 145 | \end{itemize} 146 | } 147 | 148 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 149 | \frame {\frametitle{Task Execution} 150 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 151 | \begin{itemize} 152 | \item \textbf{Task Assignment is done, now \texttt{TaskTrackers} can 153 | execute} 154 | \begin{itemize} 155 | \item Copy the JAR from HDFS 156 | \item Create a local working directory 157 | \item Create an instance of \texttt{TaskRunner} 158 | \end{itemize} 159 | 160 | \vspace{20pt} 161 | 162 | \item \textbf{\texttt{TaskRunner} launches a {\color{red}child} JVM} 163 | \begin{itemize} 164 | \item This prevents bugs from stalling the \texttt{TaskTracker} 165 | \item A new child JVM is created per \texttt{InputSplit} 166 | \begin{itemize} 167 | \item Can be overridden by specifying JVM Reuse option, which is 168 | very useful for {\color{red}custom, in-memory, combiners} 169 | \end{itemize} 170 | \end{itemize} 171 | 172 | \vspace{20pt} 173 | 174 | \item \textbf{Streaming and Pipes} 175 | \begin{itemize} 176 | \item User-defined map and reduce methods need not to be in Java 177 | \item Streaming and Pipes allow C++ or python mappers and reducers 178 | \item NOTE: this feature is heavily used in industry, with some tricky downsides 179 | \end{itemize} 180 | \end{itemize} 181 | } 182 | 183 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 184 | \frame {\frametitle{Scheduling in detail} 185 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 186 | \begin{itemize} 187 | \item \textbf{FIFO Scheduler (default in vanilla Hadoop)} 188 | \begin{itemize} 189 | \item First-come-first-served 190 | \begin{itemize} 191 | \item Long jobs monopolize the cluster 192 | \end{itemize} 193 | \end{itemize} 194 | 195 | \vspace{20pt} 196 | 197 | \item \textbf{Fair Scheduler (default in Cloudera)} 198 | \begin{itemize} 199 | \item Every user gets a fair share of the cluster capacity over time 200 | \item Jobs are placed into pools, one for each user 201 | \begin{itemize} 202 | \item Users that submit more jobs have no more resources than others 203 | \item Can guarantee minimum capacity per pool 204 | \end{itemize} 205 | \end{itemize} 206 | 207 | \vspace{20pt} 208 | 209 | \item \textbf{Capacity Scheduler (heavily used in Yahoo)} 210 | \begin{itemize} 211 | \item Hierarchical queues (mimic an organization) 212 | \item FIFO scheduling in each queue 213 | \item Supports priority 214 | \end{itemize} 215 | \end{itemize} 216 | } 217 | 218 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 219 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 220 | \subsection{Failures} 221 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 222 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 223 | \begin{frame} 224 | \begin{colorblock}{blue}{lightblue}{ } 225 | \begin{center} 226 | \textbf{Failures} 227 | \end{center} 228 | \end{colorblock} 229 | \end{frame} 230 | 231 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 232 | \frame {\frametitle{Handling Failures} 233 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 234 | \begin{beamerboxesrounded}[shadow=true]{} 235 | In the real world, code is buggy, processes crash and machines fail 236 | \end{beamerboxesrounded} 237 | 238 | \begin{itemize} 239 | \item \textbf{Task Failure} 240 | \begin{itemize} 241 | \item Case 1: map or reduce task throws a runtime exception 242 | \begin{itemize} 243 | \item The child JVM reports back to the parent 244 | \texttt{TaskTracker} 245 | \item \texttt{TaskTracker} logs the error and marks the 246 | TaskAttempt as failed 247 | \item \texttt{TaskTracker} frees up a slot to run another task 248 | \end{itemize} 249 | \item Case 2: Hanging tasks 250 | \begin{itemize} 251 | \item \texttt{TaskTracker} notices no progress updates (timeout 252 | = 10 minutes) 253 | \item \texttt{TaskTracker} kills the child JVM\footnote{With 254 | streaming, you need to take care of the orphaned process.} 255 | \end{itemize} 256 | \end{itemize} 257 | \item \texttt{JobTracker} is notified of a failed task 258 | \begin{itemize} 259 | \item Avoids rescheduling the task on the same 260 | \texttt{TaskTracker} 261 | \item If a task fails 4 times, it is not 262 | re-scheduled\footnote{Exception is made for speculative execution} 263 | \item {\color{red}Default behavior}: if any task fails 4 times, 264 | the job fails 265 | \end{itemize} 266 | \end{itemize} 267 | } 268 | 269 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 270 | \frame {\frametitle{Handling Failures} 271 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 272 | \begin{itemize} 273 | \item \textbf{\texttt{TaskTracker} Failure} 274 | \begin{itemize} 275 | \item Types: crash, running very slowly 276 | \item Heartbeats will not be sent to \texttt{JobTracker} 277 | \item \texttt{JobTracker} waits for a timeout (10 minutes), then 278 | it removes the \texttt{TaskTracker} from its scheduling pool 279 | \item \texttt{JobTracker} needs to reschedule even 280 | \textit{completed} tasks ({\color{red}WHY?}) 281 | \item \texttt{JobTracker} needs to reschedule tasks in progress 282 | \item \texttt{JobTracker} may even blacklist a 283 | \texttt{TaskTracker} if too many tasks failed 284 | \end{itemize} 285 | 286 | \vspace{20pt} 287 | 288 | \item \textbf{\texttt{JobTracker} Failure} 289 | \begin{itemize} 290 | \item Currently, Hadoop has no mechanism for this kind of failure 291 | \item In future (and commercial) releases: 292 | \begin{itemize} 293 | \item Multiple \texttt{JobTrackers} 294 | \item Use ZooKeeper as a coordination mechanisms 295 | \item[$\to$] {\color{red}High Availability} 296 | \end{itemize} 297 | \end{itemize} 298 | 299 | \end{itemize} 300 | } 301 | 302 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 303 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 304 | \subsection{Internals} 305 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 306 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 307 | \begin{frame} 308 | \begin{colorblock}{blue}{lightblue}{ } 309 | \begin{center} 310 | \textbf{Internals} 311 | \end{center} 312 | \end{colorblock} 313 | \end{frame} 314 | 315 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 316 | \frame {\frametitle{Shuffle and Sort} 317 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 318 | \begin{itemize} 319 | \item \textbf{The MapReduce framework guarantees the input to every reducer 320 | to be sorted by key} 321 | \begin{itemize} 322 | \item The process by which the system sorts and transfers map 323 | outputs to reducers is known as {\color{red}shuffle} 324 | \end{itemize} 325 | 326 | \vspace{20pt} 327 | 328 | \item \textbf{Shuffle is the most important part of the framework, where the 329 | ``magic'' happens} 330 | \begin{itemize} 331 | \item Good understanding allows optimizing both the framework and 332 | the execution time of MapReduce jobs 333 | \end{itemize} 334 | 335 | \vspace{20pt} 336 | 337 | \item \textbf{Subject to continuous refinements} 338 | 339 | \end{itemize} 340 | } 341 | 342 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 343 | \frame {\frametitle{Shuffle and Sort: Map Side} 344 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 345 | \begin{center} 346 | \includegraphics[scale=0.4]{./Figures/map_task} 347 | \end{center} 348 | } 349 | 350 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 351 | \frame {\frametitle{Shuffle and Sort: the Map Side} 352 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 353 | \begin{itemize} 354 | \item \textbf{The output of a map task is not simply written to disk} 355 | \begin{itemize} 356 | \item In memory buffering 357 | \item Pre-sorting 358 | \end{itemize} 359 | 360 | \vspace{20pt} 361 | 362 | \item \textbf{Circular memory buffer} 363 | \begin{itemize} 364 | \item 100 MB by default 365 | \item Threshold based mechanism to {\color{red}spill} buffer content to disk 366 | \item Map output written to the buffer {\color{red}while} spilling to disk 367 | \item If buffer fills up while spilling, the map task is \textbf{blocked} 368 | \end{itemize} 369 | 370 | \vspace{20pt} 371 | 372 | \item \textbf{Disk spills} 373 | \begin{itemize} 374 | \item Written in round-robin to a local dir 375 | \item Output data is partitioned corresponding to the reducers they will be sent to 376 | \item Within each partition, data is sorted ({\color{red}in-memory}) 377 | \item Optionally, if there is a combiner, it is executed just after the sort phase ({\color{red}WHY?}) 378 | \end{itemize} 379 | \end{itemize} 380 | } 381 | 382 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 383 | \frame {\frametitle{Shuffle and Sort: the Map Side} 384 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 385 | \begin{itemize} 386 | \item \textbf{More on spills and memory buffer} 387 | \begin{itemize} 388 | \item Each time the buffer is full, a {\color{red}new} spill is created 389 | \item Once the map task finishes, there are many spills 390 | \item Such spills are merged into a single partitioned and sorted output file 391 | \end{itemize} 392 | 393 | \vspace{40pt} 394 | 395 | \item \textbf{The output file partitions are made available to reducers over HTTP} 396 | \begin{itemize} 397 | \item There are 40 (default) threads dedicated to serve the file partitions to reducers 398 | \end{itemize} 399 | \end{itemize} 400 | } 401 | 402 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 403 | \frame {\frametitle{Details on local spill files} 404 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 405 | \begin{center} 406 | \includegraphics[scale=0.4]{./Figures/spill_partition} 407 | \end{center} 408 | } 409 | 410 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 411 | \frame {\frametitle{Shuffle and Sort: Reduce Side} 412 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 413 | \begin{center} 414 | \includegraphics[scale=0.4]{./Figures/reduce_task} 415 | \end{center} 416 | } 417 | 418 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 419 | \frame {\frametitle{Shuffle and Sort: the Reduce Side} 420 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 421 | \begin{itemize} 422 | \item \textbf{The map output file is located on the local disk of 423 | TaskTracker} 424 | 425 | \item \textbf{Another TaskTracker (in charge of a reduce task) 426 | requires input from many other TaskTracker (that finished their 427 | map tasks)} 428 | \begin{itemize} 429 | \item How do reducers know which \texttt{TaskTrackers} to fetch map output 430 | from? 431 | \begin{itemize} 432 | \item When a map task finishes it notifies the parent 433 | TaskTracker 434 | \item The TaskTracker notifies (with the heartbeat mechanism) 435 | the JobTracker 436 | \item A thread in the reducer {\color{red}polls periodically} 437 | the \texttt{JobTracker} 438 | \item \texttt{TaskTrackers} do not delete local map output as soon as a 439 | reduce task has fetched them ({\color{red}WHY?}) 440 | \end{itemize} 441 | \end{itemize} 442 | 443 | \item \textbf{Copy phase: a pull approach} 444 | \begin{itemize} 445 | \item There is a small number (5) of copy threads that can fetch 446 | map outputs in parallel 447 | \end{itemize} 448 | \end{itemize} 449 | } 450 | 451 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 452 | \frame {\frametitle{Shuffle and Sort: the Reduce Side} 453 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 454 | \begin{itemize} 455 | \item \textbf{The map output are copied to the the TraskTracker running the 456 | reducer in {\color{red}memory} (if they fit)} 457 | \begin{itemize} 458 | \item Otherwise they are copied to disk 459 | \end{itemize} 460 | 461 | \vspace{20pt} 462 | 463 | \item \textbf{Input consolidation} 464 | \begin{itemize} 465 | \item A background thread merges all partial inputs into larger, 466 | {\color{red}sorted} files 467 | \item Note that if compression was used (for map outputs to save 468 | bandwidth), decompression will take place in memory 469 | \end{itemize} 470 | 471 | \vspace{20pt} 472 | 473 | \item \textbf{Sorting the input} 474 | \begin{itemize} 475 | \item When all map outputs have been copied a merge phase starts 476 | \item All map outputs are sorted maintaining their sort ordering, 477 | in rounds 478 | \end{itemize} 479 | \end{itemize} 480 | } 481 | 482 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 483 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 484 | \subsection{Types and Formats} 485 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 486 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 487 | \begin{frame} 488 | \begin{colorblock}{blue}{lightblue}{ } 489 | \begin{center} 490 | \textbf{Types and Formats} 491 | \end{center} 492 | \end{colorblock} 493 | \end{frame} 494 | 495 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 496 | \frame {\frametitle{MapReduce Types} 497 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 498 | \begin{itemize} 499 | \item \textbf{Recall: Input / output to mappers and reducers} 500 | \begin{itemize} 501 | \item map: $(k1, v1)$ $\to$ $[(k2, v2)]$ 502 | \item reduce: $(k2, [v2])$ $\to$ $[(k3, v3)]$ 503 | \end{itemize} 504 | 505 | \vspace{20pt} 506 | 507 | {\color{red} 508 | \item \textbf{In Hadoop, a mapper is created as follows:} 509 | \begin{itemize} 510 | \item \texttt{void map(K1 key, V1 value, Context context)} 511 | \end{itemize} 512 | } 513 | 514 | \vspace{20pt} 515 | 516 | \item \textbf{Types:} 517 | \begin{itemize} 518 | \item $K$ types implement \texttt{WritableComparable} 519 | \item $V$ types implement \texttt{Writable} 520 | \end{itemize} 521 | \end{itemize} 522 | } 523 | 524 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 525 | \frame {\frametitle{What is a \texttt{Writable}} 526 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 527 | \begin{itemize} 528 | \item \textbf{Hadoop defines its own classes for strings (\texttt{Text}), 529 | integers (\texttt{intWritable}), etc...} 530 | 531 | \vspace{20pt} 532 | 533 | \item \textbf{All keys are instances of \texttt{WritableComparable}} 534 | \begin{itemize} 535 | \item {\color{red}Why comparable?} 536 | \end{itemize} 537 | 538 | \vspace{20pt} 539 | 540 | \item \textbf{All values are instances of \texttt{Writable}} 541 | 542 | \end{itemize} 543 | } 544 | 545 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 546 | \frame {\frametitle{Getting Data to the Mapper} 547 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 548 | \begin{center} 549 | \includegraphics[scale=0.4]{./Figures/data2map} 550 | \end{center} 551 | } 552 | 553 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 554 | \frame {\frametitle{Reading Data} 555 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 556 | \begin{itemize} 557 | \item \textbf{Datasets are specified by \texttt{InputFormats}} 558 | \begin{itemize} 559 | \item \texttt{InputFormats} define input data (e.g. a file, a 560 | directory) 561 | \item \texttt{InputFormats} is a factory for \texttt{RecordReader} 562 | objects to extract key-value records from the input source 563 | \end{itemize} 564 | 565 | \vspace{20pt} 566 | 567 | \item \textbf{\texttt{InputFormats} identify partitions of the data 568 | that form an \texttt{InputSplit}} 569 | \begin{itemize} 570 | \item \texttt{InputSplit} is a (\textbf{reference to a}) chunk of 571 | the input processed by a {\color{red}single} map 572 | \begin{itemize} 573 | \item Largest split is processed first 574 | \end{itemize} 575 | \item Each split is divided into records, and the map processes each 576 | record (a key-value pair) in turn 577 | \item Splits and records are {\color{red}logical}, they are not 578 | physically bound to a file 579 | \end{itemize} 580 | \end{itemize} 581 | } 582 | 583 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 584 | \frame {\frametitle{\texttt{InputFormat}} 585 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 586 | \begin{itemize} 587 | \item \textbf{\texttt{TextInputFormat}} 588 | \begin{itemize} 589 | \item Treats each \texttt{newline}-terminated line of a file as a value 590 | \end{itemize} 591 | 592 | \vspace{20pt} 593 | 594 | \item \textbf{\texttt{KeyValueTextInputFormat}} 595 | \begin{itemize} 596 | \item Maps \texttt{newline}-terminated text lines of ``key'' SEPARATOR ``value'' 597 | \end{itemize} 598 | 599 | \vspace{20pt} 600 | 601 | \item\textbf{\texttt{SequenceFileInputFormat}} 602 | \begin{itemize} 603 | \item Binary file of key-value pairs with some additional metadata 604 | \end{itemize} 605 | 606 | \vspace{20pt} 607 | 608 | \item \textbf{\texttt{SequenceFileAsTextInputFormat}} 609 | \begin{itemize} 610 | \item Same as before but, maps \texttt{(k.toString(), v.toString())} 611 | \end{itemize} 612 | \end{itemize} 613 | 614 | } 615 | 616 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 617 | \frame {\frametitle{\texttt{InputSplit}} 618 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 619 | \begin{itemize} 620 | \item \textbf{\texttt{FileInputFormat} divides large files into 621 | chunks} 622 | \begin{itemize} 623 | \item Exact size controlled by \texttt{mapred.min.split.size} 624 | \end{itemize} 625 | 626 | \vspace{20pt} 627 | 628 | \item \textbf{Record readers receive file, offset, and length of chunk} 629 | \begin{itemize} 630 | \item Example 631 | \end{itemize} 632 | \begin{footnotesize} 633 | \begin{columns}[c] 634 | \column{6cm} 635 | On the top of the Crumpetty Tree$\to$\\ 636 | The Quangle Wangle sat,$\to$\\ 637 | But his face you could not see,$\to$\\ 638 | On account of his Beaver Hat.$\to$\\ 639 | 640 | \column{6cm} 641 | 642 | (0, On the top of the Crumpetty Tree)\\ 643 | (33, The Quangle Wangle sat,)\\ 644 | (57, But his face you could not see,)\\ 645 | (89, On account of his Beaver Hat.)\\ 646 | \end{columns} 647 | \end{footnotesize} 648 | 649 | \vspace{20pt} 650 | 651 | \item \textbf{Custom \texttt{InputFormat} implementations may 652 | override split size} 653 | \end{itemize} 654 | } 655 | 656 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 657 | \frame {\frametitle{The relationship between an \texttt{InputSplit} and an HDFS block} 658 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 659 | \begin{center} 660 | \includegraphics[scale=0.4]{./Figures/split_block} 661 | \end{center} 662 | } 663 | 664 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 665 | \frame {\frametitle{Record Readers} 666 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 667 | \begin{itemize} 668 | \item \textbf{Each \texttt{InputFormat} provides its own \texttt{RecordReader} implementation} 669 | 670 | \vspace{20pt} 671 | 672 | \item \textbf{\texttt{LineRecordReader}} 673 | \begin{itemize} 674 | \item Reads a line from a text file 675 | \end{itemize} 676 | 677 | \vspace{20pt} 678 | 679 | \item \textbf{\texttt{KeyValueRecordReader}} 680 | \begin{itemize} 681 | \item Used by \texttt{KeyValueTextInputFormat} 682 | \end{itemize} 683 | 684 | \end{itemize} 685 | } 686 | 687 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 688 | \frame {\frametitle{Sending Data to Reducers} 689 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 690 | {\color{red} 691 | \begin{itemize} 692 | \item \textbf{Map function receives \texttt{Context} object} 693 | \begin{itemize} 694 | \item \texttt{Context.write()} receives key-value elements 695 | \end{itemize} 696 | 697 | \vspace{20pt} 698 | 699 | \item \textbf{Any (\texttt{WritableComparable}, \texttt{Writable}) can be 700 | used} 701 | 702 | \vspace{20pt} 703 | 704 | \item \textbf{By default, mapper output type assumed to be the same 705 | as the reducer output type} 706 | 707 | \end{itemize} 708 | } 709 | } 710 | 711 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 712 | \frame {\frametitle{WritableComparator} 713 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 714 | \begin{itemize} 715 | \item \textbf{Compares \texttt{WritableComparable} data} 716 | \begin{itemize} 717 | \item Will call the \texttt{WritableComparable.compare()} method 718 | \item Can provide fast path for serialized data 719 | \end{itemize} 720 | 721 | \vspace{40pt} 722 | 723 | \item \textbf{Configured through: 724 | \texttt{JobConf.setOutputValueGroupingComparator()}} 725 | \end{itemize} 726 | } 727 | 728 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 729 | \frame {\frametitle{Partitioner} 730 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 731 | \begin{itemize} 732 | \item \textbf{\texttt{int getPartition(key, value, numPartitions)}} 733 | \begin{itemize} 734 | \item Outputs the partition number for a given key 735 | \item One partition == all values sent to a single reduce task 736 | \end{itemize} 737 | 738 | \vspace{20pt} 739 | 740 | \item \textbf{\texttt{HashPartitioner} used by default} 741 | \begin{itemize} 742 | \item Uses \texttt{key.hashCode()} to return partition number 743 | \end{itemize} 744 | 745 | \vspace{20pt} 746 | 747 | \item \textbf{\texttt{JobConf} used to set \texttt{Partitioner} implementation} 748 | \end{itemize} 749 | 750 | } 751 | 752 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 753 | \frame {\frametitle{The Reducer} 754 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 755 | \begin{itemize} 756 | {\color{red}\item \textbf{\texttt{void reduce(k2 key, Iterator values, Context context})}} 757 | 758 | \vspace{20pt} 759 | 760 | \item \textbf{Keys and values sent to one partition all go to the 761 | same reduce task} 762 | 763 | \vspace{20pt} 764 | 765 | \item \textbf{Calls are sorted by key} 766 | \begin{itemize} 767 | \item ``Early'' keys are reduced and output before ``late'' keys 768 | \end{itemize} 769 | \end{itemize} 770 | } 771 | 772 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 773 | \frame {\frametitle{Writing the Output} 774 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 775 | \begin{itemize} 776 | \item \textbf{Analogous to \texttt{InputFormat}} 777 | 778 | \vspace{20pt} 779 | 780 | \item \textbf{\texttt{TextOutputFormat} writes ``key value <\texttt{newline}>'' strings to output file} 781 | 782 | \vspace{20pt} 783 | 784 | \item \textbf{\texttt{SequenceFileOutputFormat} uses a binary format 785 | to pack key-value pairs} 786 | 787 | \vspace{20pt} 788 | 789 | \item \textbf{\texttt{NullOutputFormat} discards output} 790 | 791 | \end{itemize} 792 | 793 | } 794 | 795 | 796 | 797 | -------------------------------------------------------------------------------- /disa/design_patterns.tex: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | \frame {\frametitle{Algorithm Design} 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | \begin{itemize} 5 | \item \textbf{Developing algorithms involve:} 6 | \begin{itemize} 7 | \item Preparing the input data 8 | \item Implement the mapper and the reducer 9 | \item Optionally, design the combiner and the partitioner 10 | \end{itemize} 11 | 12 | \vspace{20pt} 13 | 14 | \item \textbf{How to recast existing algorithms in MapReduce?} 15 | \begin{itemize} 16 | \item It is not always obvious how to express algorithms 17 | \item Data structures play an important role 18 | \item Optimization is hard 19 | \item[$\to$] The designer needs to ``bend'' the framework 20 | \end{itemize} 21 | 22 | \vspace{20pt} 23 | 24 | \item \textbf{Learn by examples} 25 | \begin{itemize} 26 | \item ``Design patterns'' 27 | \item ``Shuffle'' is perhaps the most tricky aspect 28 | \end{itemize} 29 | \end{itemize} 30 | } 31 | 32 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 33 | \frame {\frametitle{Algorithm Design} 34 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 35 | \begin{itemize} 36 | \item \textbf{Aspects that are {\color{red}not} under the control of the 37 | designer} 38 | \begin{itemize} 39 | \item \textit{Where} a mapper or reducer will run 40 | \item \textit{When} a mapper or reducer begins or finishes 41 | \item \textit{Which} input key-value pairs are processed by a 42 | specific mapper 43 | \item \textit{Which} intermediate key-value pairs are processed by a 44 | specific reducer 45 | \end{itemize} 46 | 47 | \vspace{20pt} 48 | 49 | \item \textbf{Aspects that can be controlled} 50 | \begin{itemize} 51 | \item Construct {\color{red}data structures as keys and values} 52 | \item Execute user-specified initialization and termination code 53 | for mappers and reducers 54 | \item Preserve state across multiple input and intermediate keys 55 | in mappers and reducers 56 | \item {\color{red}Control the sort order} of intermediate keys, and therefore 57 | the order in which a reducer will encounter particular keys 58 | \item {\color{red}Control the partitioning of the key space}, and therefore the 59 | set of keys that will be encountered by a particular reducer 60 | \end{itemize} 61 | \end{itemize} 62 | } 63 | 64 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 65 | \frame {\frametitle{Algorithm Design} 66 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 67 | \begin{itemize} 68 | \item \textbf{MapReduce algorithms can be complex} 69 | \begin{itemize} 70 | \item Many algorithms cannot be easily expressed as a single 71 | MapReduce job 72 | \item Decompose complex algorithms into a sequence of jobs 73 | \begin{itemize} 74 | \item Requires orchestrating data so that the output of one job 75 | becomes the input to the next 76 | \end{itemize} 77 | \item Iterative algorithms require an {\color{red}external driver} 78 | to check for convergence 79 | \end{itemize} 80 | 81 | \vspace{20pt} 82 | 83 | \item \textbf{Basic design patterns\footnote{You will see them in action during the laboratory sessions.}} 84 | \begin{itemize} 85 | \item Local Aggregation 86 | \item Pairs and Stripes 87 | \item Order inversion 88 | \end{itemize} 89 | \end{itemize} 90 | } 91 | 92 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 93 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 94 | \subsection{Local Aggregation} 95 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 96 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 97 | 98 | 99 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 100 | \frame {\frametitle{Local Aggregation} 101 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 102 | \begin{itemize} 103 | \item \textbf{In the context of data-intensive distributed processing, the 104 | most important aspect of synchronization is the {\color{red}exchange of 105 | intermediate results}} 106 | \begin{itemize} 107 | \item This involves copying intermediate results from the 108 | processes that produced them to those that consume them 109 | \item In general, this involves \textbf{data transfers over the network} 110 | \item In Hadoop, also disk I/O is involved, as intermediate 111 | results are written to disk 112 | \end{itemize} 113 | 114 | \vspace{20pt} 115 | 116 | \item \textbf{Network and disk latencies are expensive} 117 | \begin{itemize} 118 | \item Reducing the amount of intermediate data translates into 119 | algorithmic efficiency 120 | \end{itemize} 121 | 122 | \vspace{20pt} 123 | 124 | \item \textbf{Combiners and preserving state across inputs} 125 | \begin{itemize} 126 | \item Reduce the number and size of key-value pairs to be shuffled 127 | \end{itemize} 128 | 129 | \end{itemize} 130 | } 131 | 132 | 133 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 134 | \frame {\frametitle{In-Mapper Combiners} 135 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 136 | \begin{itemize} 137 | \item \textbf{In-Mapper Combiners, a possible improvement over vanilla Combiners} 138 | \begin{itemize} 139 | \item Hadoop does not\footnote{Actually, combiners are not called if the number of map output records is less than a small threshold, {\it i.e.}, 4} guarantee combiners to be executed 140 | \end{itemize} 141 | 142 | \vspace{20pt} 143 | 144 | \item \textbf{Use an associative array to cumulate intermediate 145 | results} 146 | \begin{itemize} 147 | \item The array is used to tally up term counts within a single ``document'' 148 | \item The \texttt{Emit} method is called only after all \texttt{InputRecords} have been processed 149 | \end{itemize} 150 | 151 | \vspace{20pt} 152 | 153 | \item \textbf{Example (see next slide)} 154 | \begin{itemize} 155 | \item The code emits a key-value pair for each {\color{red}unique} 156 | term in the document 157 | \end{itemize} 158 | \end{itemize} 159 | } 160 | 161 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 162 | \frame {\frametitle{In-Memory Combiners} 163 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 164 | \begin{algorithm}[H] 165 | \algrenewcommand\algorithmicfunction{\textbf{class}} 166 | \algrenewcommand\algorithmicprocedure{\textbf{method}} 167 | 168 | \begin{algorithmic}[1] 169 | \Function{Mapper}{} 170 | \Procedure{Map}{offset $a$, line $l$} 171 | \State $H \gets$ new AssociativeArray 172 | \ForAll{term $t \in$ line $l$} 173 | \State $H\{t\} \gets H\{t\} + 1$ 174 | \EndFor 175 | \ForAll{term $t \in$ $H$} 176 | \State $\textsc{Emit}(\textrm{term }t, \textrm{count }H\{t\})$ 177 | \EndFor 178 | \EndProcedure 179 | \EndFunction 180 | \end{algorithmic} 181 | \end{algorithm} 182 | 183 | } 184 | 185 | \frame {\frametitle{In-Memory Combiners} 186 | \begin{itemize} 187 | \item \textbf{Taking the idea one step further} 188 | \begin{itemize} 189 | \item Exploit implementation details in Hadoop 190 | \item A Java mapper object is created for each map task 191 | \item JVM reuse must be enabled 192 | \end{itemize} 193 | 194 | \vspace{40pt} 195 | 196 | \item \textbf{Preserve state within and across calls to the \texttt{Map} 197 | method} 198 | \begin{itemize} 199 | \item \texttt{Initialize} method, used to create an across-map, persistent 200 | data structure 201 | \item \texttt{Close} method, used to emit intermediate key-value 202 | pairs only when all map task scheduled on one machine are done 203 | \end{itemize} 204 | \end{itemize} 205 | } 206 | 207 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 208 | \frame {\frametitle{In-Memory Combiners} 209 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 210 | \begin{algorithm}[H] 211 | \algrenewcommand\algorithmicfunction{\textbf{class}} 212 | \algrenewcommand\algorithmicprocedure{\textbf{method}} 213 | 214 | \begin{algorithmic}[1] 215 | \Function{Mapper}{} 216 | \Procedure{Initialize}{} 217 | \State $H \gets$ new AssociativeArray 218 | \EndProcedure 219 | \Procedure{Map}{offset $a$, line $l$} 220 | \ForAll{term $t \in$ line $l$} 221 | \State $H\{t\} \gets H\{t\} + 1$ 222 | \EndFor 223 | \EndProcedure 224 | \Procedure{Close}{} 225 | \ForAll{term $t \in$ $H$} 226 | \State $\textsc{Emit}(\textrm{term }t, \textrm{count }H\{t\})$ 227 | \EndFor 228 | \EndProcedure 229 | \EndFunction 230 | \end{algorithmic} 231 | \end{algorithm} 232 | 233 | } 234 | 235 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 236 | \frame {\frametitle{In-Memory Combiners} 237 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 238 | \begin{itemize} 239 | \item \textbf{Summing up: a first ``design pattern'', \textit{in-memory 240 | combining}} 241 | \begin{itemize} 242 | \item Provides control over when local aggregation occurs 243 | \item Designer can determine how exactly aggregation is done 244 | \end{itemize} 245 | 246 | \vspace{40pt} 247 | 248 | \item \textbf{Efficiency vs. Combiners} 249 | \begin{itemize} 250 | \item There is no additional overhead due to the materialization 251 | of key-value pairs 252 | \begin{itemize} 253 | \item Un-necessary object creation and destruction (garbage 254 | collection) 255 | \item Serialization, deserialization when memory bounded 256 | \end{itemize} 257 | \item With combiners. mappers still need to emit all key-value pairs, combiners 258 | ``only'' reduce network traffic 259 | \end{itemize} 260 | \end{itemize} 261 | } 262 | 263 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 264 | \frame {\frametitle{In-Memory Combiners} 265 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 266 | \begin{itemize} 267 | \item \textbf{Precautions} 268 | \begin{itemize} 269 | \item In-memory combining breaks the functional programming 270 | paradigm due to {\bf state preservation} 271 | \item Preserving state across multiple instances implies that 272 | algorithm behavior might depend on execution order 273 | \begin{itemize} 274 | \item Works well with commutative / associative operations 275 | \item Otherwise, order-dependent bugs are difficult to find 276 | \end{itemize} 277 | \end{itemize} 278 | 279 | \vspace{20pt} 280 | 281 | \item \textbf{Memory capacity is limited} 282 | \begin{itemize} 283 | \item In-memory combining strictly depends on having sufficient memory to store intermediate results 284 | \item A possible {\color{red}solution}: ``block'' and ``flush'' 285 | \end{itemize} 286 | \end{itemize} 287 | } 288 | 289 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 290 | \frame {\frametitle{Further Remarks} 291 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 292 | \begin{itemize} 293 | \item \textbf{The extent to which efficiency can be increased with local 294 | aggregation depends on the size of the intermediate key space} 295 | \begin{itemize} 296 | \item Opportunities for aggregation arise when multiple values 297 | are associated to the same keys 298 | \end{itemize} 299 | 300 | \vspace{40pt} 301 | 302 | \item \textbf{Local aggregation also effective to deal with reduce 303 | stragglers} 304 | \begin{itemize} 305 | \item Reduce the number of values associated with frequently occurring keys 306 | \end{itemize} 307 | \end{itemize} 308 | } 309 | 310 | 311 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 312 | \frame {\frametitle{Computing the average, with in-mapper combiners} 313 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 314 | \begin{itemize} 315 | \item Partial sums and counts are held in memory (across inputs) 316 | \item Intermediate values are emitted only after the entire input 317 | split is processed 318 | \item The output value is a pair 319 | \end{itemize} 320 | 321 | \begin{algorithm}[H] 322 | \algrenewcommand\algorithmicfunction{\textbf{class}} 323 | \algrenewcommand\algorithmicprocedure{\textbf{method}} 324 | 325 | \begin{algorithmic}[1] 326 | \Function{Mapper}{} 327 | \Procedure{Initialize}{} 328 | \State $S \gets$ new AssociativeArray 329 | \State $C \gets$ new AssociativeArray 330 | \EndProcedure 331 | \Procedure{Map}{term $t$, integer $r$} 332 | \State $S\{t\} \gets S\{t\} + r$ 333 | \State $C\{t\} \gets C\{t\} + 1$ 334 | \EndProcedure 335 | \Procedure{Close}{} 336 | \ForAll{term $t \in$ $S$} 337 | \State $\textsc{Emit}(\textrm{term }t, \textrm{pair }(S\{t\},C\{t\}))$ 338 | \EndFor 339 | \EndProcedure 340 | \EndFunction 341 | \end{algorithmic} 342 | \end{algorithm} 343 | 344 | } 345 | 346 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 347 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 348 | \subsection{Pairs and Stripes} 349 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 350 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 351 | 352 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 353 | \frame {\frametitle{Pairs and Stripes} 354 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 355 | \begin{itemize} 356 | \item \textbf{A common approach in MapReduce: build {\color{red}complex} keys} 357 | \begin{itemize} 358 | \item Use the framework to group data together 359 | \end{itemize} 360 | 361 | \vspace{20pt} 362 | 363 | \item \textbf{Two basic techniques:} 364 | \begin{itemize} 365 | \item \textit{Pairs}: similar to the example on the average 366 | \item \textit{Stripes}: uses in-mapper memory data structures 367 | \end{itemize} 368 | 369 | \vspace{20pt} 370 | 371 | \item \textbf{Next, we focus on a particular problem that benefits 372 | from these two methods} 373 | \end{itemize} 374 | } 375 | 376 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 377 | \frame {\frametitle{Problem statement} 378 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 379 | \begin{itemize} 380 | \item \textbf{The problem: building word co-occurrence matrices for large corpora} 381 | \begin{itemize} 382 | \item The co-occurrence matrix of a corpus is a square $n \times n$ matrix, $M$ 383 | \item $n$ is the number of unique words (\textit{i.e.}, the vocabulary size) 384 | \item A cell $m_{ij}$ contains the number of times the word $w_i$ co-occurs with word $w_j$ \textit{within a specific context} 385 | \item Context: a sentence, a paragraph a document or a window of $m$ words 386 | \item NOTE: the matrix may be symmetric in some cases 387 | \end{itemize} 388 | 389 | \vspace{20pt} 390 | 391 | \item \textbf{Motivation} 392 | \begin{itemize} 393 | \item This problem is a basic building block for more complex operations 394 | \item {\color{red}Estimating the distribution of discrete joint events from a large number of observations} 395 | \item Similar problem in other domains: 396 | \begin{itemize} 397 | \item Customers who buy \textit{this} tend to also buy 398 | \textit{that} 399 | \end{itemize} 400 | \end{itemize} 401 | \end{itemize} 402 | } 403 | 404 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 405 | \frame {\frametitle{Observations} 406 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 407 | \begin{itemize} 408 | \item \textbf{Space requirements} 409 | \begin{itemize} 410 | \item Clearly, the space requirement is $O(n^2)$, where $n$ is the size of the vocabulary 411 | \item For real-world (English) corpora $n$ can be hundreds of thousands of words, or even billions of worlds in some specific cases 412 | \end{itemize} 413 | 414 | \vspace{20pt} 415 | 416 | \item \textbf{So what's the problem?} 417 | \begin{itemize} 418 | \item If the matrix can fit in the memory of a single machine, then just use whatever naive implementation 419 | \item Instead, if the matrix is bigger than the available memory, then {\color{red}paging} would kick in, and any naive 420 | implementation would break 421 | \end{itemize} 422 | 423 | \vspace{20pt} 424 | 425 | \item \textbf{Compression} 426 | \begin{itemize} 427 | \item Such techniques can help in solving the problem on a single machine 428 | \item However, there are scalability problems 429 | \end{itemize} 430 | \end{itemize} 431 | } 432 | 433 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 434 | \frame {\frametitle{Word co-occurrence: the Pairs approach} 435 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 436 | \begin{center} 437 | \includegraphics[scale=0.36]{./Figures/pairs} 438 | \end{center} 439 | } 440 | 441 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 442 | \frame {\frametitle{Word co-occurrence: the Pairs approach} 443 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 444 | \begin{itemize} 445 | \item \textbf{Input to the problem} 446 | \begin{itemize} 447 | \item Key-value pairs in the form of a \texttt{offset} and a \texttt{line} 448 | \end{itemize} 449 | 450 | \vspace{20pt} 451 | 452 | \item \textbf{The mapper:} 453 | \begin{itemize} 454 | \item Processes each input document 455 | \item Emits key-value pairs with: 456 | \begin{itemize} 457 | \item Each co-occurring word {\color{red}pair} as the key 458 | \item The integer one (the count) as the value 459 | \end{itemize} 460 | \item This is done with two nested loops: 461 | \begin{itemize} 462 | \item The outer loop iterates over all words 463 | \item The inner loop iterates over all neighbors 464 | \end{itemize} 465 | \end{itemize} 466 | 467 | \vspace{20pt} 468 | 469 | \item \textbf{The reducer:} 470 | \begin{itemize} 471 | \item Receives {\color{red}pairs} related to co-occurring words 472 | \begin{itemize} 473 | \item This {\color{red}requires \textbf{modifying the partitioner}} 474 | \end{itemize} 475 | \item Computes an absolute count of the joint event 476 | \item Emits the pair and the count as the final key-value output 477 | \begin{itemize} 478 | \item Basically reducers emit the cells of the output matrix 479 | \end{itemize} 480 | \end{itemize} 481 | \end{itemize} 482 | } 483 | 484 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 485 | \frame {\frametitle{Word co-occurrence: the Pairs approach} 486 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 487 | \begin{algorithm}[H] 488 | \algrenewcommand\algorithmicfunction{\textbf{class}} 489 | \algrenewcommand\algorithmicprocedure{\textbf{method}} 490 | 491 | \begin{algorithmic}[1] 492 | \Function{Mapper}{} 493 | \Procedure{Map}{offset $a$, line $l$} 494 | \ForAll{term $w \in$ line $l$} 495 | \ForAll{term $u \in \textsc{Neighbors}(w)$} 496 | \State $\textsc{Emit } (\textrm{pair }(w,u), \textrm{count }1)$ 497 | \EndFor 498 | \EndFor 499 | \EndProcedure 500 | \EndFunction 501 | 502 | \Function{Reducer}{} 503 | \Procedure{Reduce}{pair $p$, counts $[c_1,c_2, \cdots ]$} 504 | \State $s \gets 0$ 505 | \ForAll{count $c \in \textrm{counts }[c_1,c_2, \cdots ]$} 506 | \State $s \gets s + c$ 507 | \EndFor 508 | \State $\textsc{Emit } (\textrm{pair }p, \textrm{count }s)$ 509 | \EndProcedure 510 | \EndFunction 511 | 512 | \end{algorithmic} 513 | \end{algorithm} 514 | } 515 | 516 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 517 | \frame {\frametitle{Word co-occurrence: the Stripes approach} 518 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 519 | \begin{center} 520 | \includegraphics[scale=0.36]{./Figures/stripes} 521 | \end{center} 522 | } 523 | 524 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 525 | \frame {\frametitle{Word co-occurrence: the Stripes approach} 526 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 527 | \begin{itemize} 528 | \item \textbf{Input to the problem} 529 | \begin{itemize} 530 | \item Key-value pairs in the form of a \texttt{offset} and a \texttt{line} 531 | \end{itemize} 532 | 533 | \vspace{20pt} 534 | 535 | \item \textbf{The mapper:} 536 | \begin{itemize} 537 | \item Same two nested loops structure as before 538 | \item Co-occurrence information is first stored in an associative 539 | array 540 | \item Emit key-value pairs with {\color{red}words} as keys and the 541 | corresponding arrays as values 542 | \end{itemize} 543 | 544 | \vspace{20pt} 545 | 546 | \item \textbf{The reducer:} 547 | \begin{itemize} 548 | \item Receives all associative arrays related to the same word 549 | \item Performs an element-wise sum of all associative arrays with 550 | the same key 551 | \item Emits key-value output in the form of word, associative 552 | array 553 | \begin{itemize} 554 | \item Basically, reducers emit \textbf{rows} of the co-occurrence matrix 555 | \end{itemize} 556 | \end{itemize} 557 | \end{itemize} 558 | 559 | } 560 | 561 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 562 | \frame {\frametitle{Word co-occurrence: the Stripes approach} 563 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 564 | \begin{algorithm}[H] 565 | \algrenewcommand\algorithmicfunction{\textbf{class}} 566 | \algrenewcommand\algorithmicprocedure{\textbf{method}} 567 | 568 | \begin{algorithmic}[1] 569 | \Function{Mapper}{} 570 | \Procedure{Map}{offset $a$, line $l$} 571 | \ForAll{term $w \in$ line $l$} 572 | \State $H \gets$ new AssociativeArray 573 | \ForAll{term $u \in \textsc{Neighbors}(w)$} 574 | \State $H\{u\} \gets H\{u\}+1$ 575 | \EndFor 576 | \State $\textsc{Emit } (\textrm{term }w, \textrm{Stripe }H)$ 577 | \EndFor 578 | \EndProcedure 579 | \EndFunction 580 | 581 | \Function{Reducer}{} 582 | \Procedure{Reduce}{term $w$, Stripes $[H_1,H_2,H_3 \cdots ]$} 583 | \State $H_f \gets$ new AssociativeArray 584 | \ForAll{Stripe $H \in \textrm{Stripes }[H_1,H_2,H_3 \cdots ]$} 585 | \State $\textsc{Sum}(H_f,H)$ 586 | \EndFor 587 | \State $\textsc{Emit } (\textrm{term }w, \textrm{Stripe }H_f)$ 588 | \EndProcedure 589 | \EndFunction 590 | 591 | \end{algorithmic} 592 | \end{algorithm} 593 | 594 | } 595 | 596 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 597 | \frame {\frametitle{Pairs and Stripes, a comparison} 598 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 599 | \begin{itemize} 600 | \item \textbf{The pairs approach} 601 | \begin{itemize} 602 | \item Generates a large number of key-value pairs 603 | \begin{itemize} 604 | \item In particular, intermediate ones, that fly over the network 605 | \end{itemize} 606 | \item The benefit from combiners is limited, as it is less likely 607 | for a mapper to process multiple occurrences of a word 608 | \item Does not suffer from memory paging problems 609 | \end{itemize} 610 | 611 | \vspace{20pt} 612 | 613 | \item \textbf{The stripes approach} 614 | \begin{itemize} 615 | \item More compact 616 | \item Generates fewer and shorted intermediate keys 617 | \begin{itemize} 618 | \item The framework has less sorting to do 619 | \end{itemize} 620 | \item The values are more complex and have serialization / deserialization overhead 621 | \item Greatly benefits from combiners, as the key space is the vocabulary 622 | \item Suffers from memory paging problems, if not properly engineered 623 | \end{itemize} 624 | \end{itemize} 625 | } 626 | 627 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 628 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 629 | \subsection{Order Inversion} 630 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 631 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 632 | 633 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 634 | \frame {\frametitle{Computing relative frequencies} 635 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 636 | \begin{itemize} 637 | \item \textbf{``Relative'' Co-occurrence matrix construction} 638 | \begin{itemize} 639 | \item Similar problem as before, same matrix 640 | \item Instead of absolute counts, we take into consideration the 641 | fact that some words appear more frequently than others 642 | \begin{itemize} 643 | \item Word $w_i$ may co-occur frequently with word $w_j$ simply 644 | because one of the two is very common 645 | \end{itemize} 646 | \item We need to convert absolute counts to relative frequencies 647 | $f(w_j | w_i)$ 648 | \begin{itemize} 649 | \item What proportion of the time does $w_j$ appear in the 650 | context of $w_i$? 651 | \end{itemize} 652 | \end{itemize} 653 | 654 | \vspace{20pt} 655 | 656 | \item \textbf{Formally, we compute:} 657 | $$ f(w_j | w_i) = \frac{N(w_i,w_j)}{\sum_{w'} N(w_i, w')}$$ 658 | \begin{itemize} 659 | \item $N(\cdot,\cdot)$ is the number of times a co-occurring word 660 | pair is observed 661 | \item The denominator is called the marginal 662 | \end{itemize} 663 | 664 | 665 | \end{itemize} 666 | } 667 | 668 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 669 | \frame {\frametitle{Computing relative frequencies} 670 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 671 | \begin{itemize} 672 | \item \textbf{The stripes approach} 673 | \begin{itemize} 674 | \item In the reducer, the counts of all words that co-occur with 675 | the conditioning variable ($w_i$) are available in the 676 | associative array 677 | \item Hence, the sum of all those counts gives the marginal 678 | \item Then we divide the joint counts by the marginal and 679 | we're done 680 | \end{itemize} 681 | 682 | \vspace{40pt} 683 | 684 | \item \textbf{The pairs approach} 685 | \begin{itemize} 686 | \item The reducer receives the pair $(w_i,w_j)$ and the count 687 | \item From this information alone \textbf{it is not possible} to compute 688 | $f(w_j | w_i)$ 689 | \item Fortunately, as for the mapper, also the reducer can 690 | {\color{red}preserve state} across multiple keys 691 | \begin{itemize} 692 | \item We can buffer in memory all the words that co-occur with 693 | $w_i$ and their counts 694 | \item This is basically building the associative array in the 695 | stripes method 696 | \end{itemize} 697 | \end{itemize} 698 | \end{itemize} 699 | } 700 | 701 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 702 | \frame {\frametitle{Computing relative frequencies: a basic approach} 703 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 704 | \begin{itemize} 705 | \item \textbf{We must define the sort order of the pair} 706 | \begin{itemize} 707 | \item In this way, the keys are first sorted by the left word, and 708 | then by the right word (in the pair) 709 | \item Hence, we can detect if all pairs associated with the word 710 | we are conditioning on ($w_i$) have been seen 711 | \item At this point, we can use the in-memory buffer, compute the 712 | relative frequencies and emit 713 | \end{itemize} 714 | 715 | \vspace{20pt} 716 | 717 | \item \textbf{We must define an appropriate partitioner} 718 | \begin{itemize} 719 | \item The default partitioner is based on the hash value of the 720 | intermediate key, modulo the number of reducers 721 | \item For a complex key, the {\bf raw byte representation} is used to 722 | compute the hash value 723 | \begin{itemize} 724 | \item Hence, there is no guarantee that the pair (dog, aardvark) 725 | and (dog,zebra) are sent to the same reducer 726 | \end{itemize} 727 | \item What we want is that all pairs with the same left word are 728 | sent to the same reducer 729 | \end{itemize} 730 | 731 | \vspace{20pt} 732 | 733 | \item \textbf{Limitations of this approach} 734 | \begin{itemize} 735 | \item Essentially, we reproduce the stripes method in the reducer 736 | and we need to use a custom partitioner 737 | \item This algorithm would work, but present the same 738 | memory-bottleneck problem as the stripes method 739 | \end{itemize} 740 | \end{itemize} 741 | } 742 | 743 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 744 | \frame {\frametitle{Computing relative frequencies: order inversion} 745 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 746 | \begin{itemize} 747 | \item \textbf{The key is to properly sequence data presented to 748 | reducers} 749 | \begin{itemize} 750 | \item If it were possible to compute the marginal in the reducer 751 | before processing the joint counts, the reducer could simply 752 | divide the joint counts received from mappers by the marginal 753 | \item The notion of ``before'' and ``after'' can be captured in 754 | the {\color{red}ordering of key-value pairs} 755 | \item The programmer can define the sort order of keys so that 756 | data needed earlier is presented to the reducer before data that 757 | is needed later 758 | \end{itemize} 759 | \end{itemize} 760 | } 761 | 762 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 763 | \frame {\frametitle{Computing relative frequencies: order inversion} 764 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 765 | \begin{itemize} 766 | \item \textbf{Recall that mappers emit pairs of co-occurring words 767 | as keys} 768 | 769 | \vspace{20pt} 770 | 771 | \item \textbf{The mapper:} 772 | \begin{itemize} 773 | \item additionally emits a ``special'' key of the form $(w_i,*)$ 774 | \item The value associated to the special key is one, that 775 | represents the contribution of the word pair to the marginal 776 | \item Using combiners, these partial marginal counts will be 777 | aggregated before being sent to the reducers 778 | \end{itemize} 779 | 780 | \vspace{20pt} 781 | 782 | \item \textbf{The reducer:} 783 | \begin{itemize} 784 | \item We must make sure that the special key-value pairs are 785 | processed {\color{red}before} any other key-value pairs where 786 | the left word is $w_i$ 787 | \item We also need to modify the partitioner as before, 788 | \textit{i.e.}, it would take into account only the first word 789 | \end{itemize} 790 | \end{itemize} 791 | } 792 | 793 | 794 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 795 | \frame {\frametitle{Computing relative frequencies: order inversion} 796 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 797 | \begin{itemize} 798 | \item \textbf{Memory requirements:} 799 | \begin{itemize} 800 | \item Minimal, because only the marginal (an integer) needs to be 801 | stored 802 | \item No buffering of individual co-occurring word 803 | \item No scalability bottleneck 804 | \end{itemize} 805 | 806 | \vspace{20pt} 807 | 808 | \item \textbf{Key ingredients for order inversion} 809 | \begin{itemize} 810 | \item Emit a special key-value pair to capture the marginal 811 | \item Control the sort order of the intermediate key, so that the 812 | special key-value pair is processed first 813 | \item Define a custom partitioner for routing intermediate 814 | key-value pairs 815 | \item Preserve state across multiple keys in the reducer 816 | \end{itemize} 817 | \end{itemize} 818 | } 819 | --------------------------------------------------------------------------------