├── .gitignore ├── AEA.tex ├── Alias Analysis.tex ├── AnotherLCM.tex ├── Available Expressions Analysis.tex ├── C++11MemoryModel.tex ├── CPP.tex ├── Concurrent.tex ├── Constant Propagation.tex ├── DCO.tex ├── DSL.tex ├── DataPrefetching.tex ├── Dependence Analysis.tex ├── Dynamic Code Optimization.tex ├── EarlyOpts.tex ├── Foundations of Data Flow Analysis.tex ├── Foundations-of-Dataflow.tex ├── Instruction Scheduling.tex ├── IntroToDFA.tex ├── Introduction To SSA.tex ├── KnowCPP.tex ├── LCM.tex ├── LICM.tex ├── LLVM.tex ├── LLVMInst.tex ├── LLVMProj.tex ├── LS.tex ├── Live Variabl Analysis.tex ├── LocalOptimizations.tex ├── Loop Invariant Computation and Code Motion.tex ├── MHO.tex ├── Makefile ├── Makefile.work ├── More Examples of Data Flow Analysis.tex ├── PA.tex ├── PGO.tex ├── PRE.tex ├── Parallelism and Dependence Theory.tex ├── RA.tex ├── README.md ├── Reaching Definitions.tex ├── RegAlloc.tex ├── Region-Based Analysis.tex ├── SR.tex ├── SSA-Style Optimizations.tex ├── SSA.tex ├── SoftwarePipe.tex ├── TLS.tex ├── images ├── 172.png ├── CDp.png ├── DFST.png ├── GraphEdges.png ├── dag.png ├── dag2.png ├── fgex.png ├── flowgraph.png ├── liveex.png ├── p1.png ├── p10.png ├── p100.png ├── p101.png ├── p102.pdf ├── p103.pdf ├── p104.png ├── p105.png ├── p106.png ├── p107.png ├── p108.png ├── p109.png ├── p11.png ├── p110.png ├── p111.png ├── p112.png ├── p113.png ├── p114.png ├── p115.jpg ├── p116.png ├── p117.png ├── p118.png ├── p119.jpg ├── p12.png ├── p120.png ├── p121.png ├── p122.jpg ├── p123.jpg ├── p124.jpg ├── p125.jpg ├── p126.png ├── p127.jpg ├── p128.jpg ├── p129.png ├── p13.png ├── p130.png ├── p131.png ├── p132.png ├── p133.png ├── p134.png ├── p135.png ├── p136.png ├── p137.png ├── p138.png ├── p139.png ├── p14.png ├── p140.png ├── p141.png ├── p142.png ├── p143.png ├── p144.png ├── p145.png ├── p146.png ├── p147.png ├── p148.png ├── p149.png ├── p15.png ├── p150.png ├── p151.png ├── p152.png ├── p153.png ├── p154.png ├── p155.png ├── p156.png ├── p157.png ├── p158.png ├── p159.png ├── p16.png ├── p160.png ├── p161.png ├── p162.png ├── p163.png ├── p164.png ├── p165.png ├── p166.png ├── p167.png ├── p168.png ├── p169.png ├── p17.png ├── p170.png ├── p171.png ├── p172.png ├── p173.png ├── p174.png ├── p175.png ├── p176.png ├── p177.png ├── p178.png ├── p179.png ├── p18.png ├── p180.png ├── p181.png ├── p182.png ├── p183.png ├── p184.png ├── p185.png ├── p186.png ├── p187.png ├── p188.png ├── p189.png ├── p19.png ├── p190.png ├── p191.png ├── p192.png ├── p193.png ├── p194.png ├── p195.png ├── p196.png ├── p197.png ├── p198.png ├── p199.png ├── p2.png ├── p20.png ├── p200.png ├── p201.png ├── p202.png ├── p203.png ├── p204.png ├── p205.png ├── p206.png ├── p207.png ├── p208.png ├── p209.png ├── p21.png ├── p210.png ├── p211.png ├── p212.png ├── p213.png ├── p214.png ├── p215.pdf ├── p215.png ├── p216.png ├── p216.svg ├── p217.png ├── p218.png ├── p219.jpg ├── p22.png ├── p220.jpg ├── p221.png ├── p222.png ├── p223.png ├── p224.png ├── p225.png ├── p226.png ├── p227.png ├── p228.png ├── p229.png ├── p23.png ├── p230.png ├── p231.png ├── p232.png ├── p233.png ├── p234.png ├── p235.png ├── p236.png ├── p237.png ├── p238.png ├── p239.png ├── p24.png ├── p240.png ├── p241.png ├── p242.png ├── p243.png ├── p244.png ├── p245.png ├── p246.png ├── p247.png ├── p248.png ├── p249.png ├── p25.png ├── p250.png ├── p251.png ├── p252.png ├── p253.png ├── p254.png ├── p255.png ├── p256.png ├── p257.png ├── p258.png ├── p259.png ├── p26.png ├── p260.png ├── p261.png ├── p27.png ├── p28.png ├── p29.png ├── p3.png ├── p30.png ├── p31.png ├── p32.png ├── p33.png ├── p34.png ├── p35.png ├── p36.png ├── p37.png ├── p38.png ├── p39.png ├── p4.png ├── p40.png ├── p41.png ├── p42.png ├── p43.png ├── p44.png ├── p45.png ├── p46.png ├── p47.pdf ├── p48.pdf ├── p49.pdf ├── p5.png ├── p50.pdf ├── p51.pdf ├── p53.pdf ├── p54.png ├── p55.pdf ├── p55.png ├── p56.pdf ├── p57.pdf ├── p58.pdf ├── p59.png ├── p6.png ├── p60.png ├── p61.png ├── p62.png ├── p63.jpg ├── p64.png ├── p65.png ├── p66.png ├── p67.png ├── p68.png ├── p69.png ├── p7.png ├── p70.png ├── p71.png ├── p72.png ├── p73.png ├── p74.pdf ├── p75.pdf ├── p76.pdf ├── p77.png ├── p78.pdf ├── p79.pdf ├── p8.png ├── p80.png ├── p81.png ├── p82.png ├── p83.png ├── p84.pdf ├── p85.pdf ├── p86.pdf ├── p87.pdf ├── p88.pdf ├── p89.png ├── p9.png ├── p90.png ├── p91.png ├── p92.jpg ├── p93.png ├── p94.png ├── p95.png ├── p96.png ├── p97.png ├── p98.png ├── p99.pdf ├── postorder.pdf ├── rdex1.jpg ├── rdex2.jpg ├── rdex3.jpg ├── ssaexm1.drawio ├── ssaexm1.drawio.pdf ├── ssaexm1.png ├── ssaexm2.drawio ├── ssaexm2.drawio.pdf ├── t2.png ├── t3.png ├── t4.png ├── test1.png ├── test2.png ├── test3.png ├── test4.png └── vne.jpg ├── main.bib ├── main.pdf ├── main.tex ├── template.tex └── test ├── link1.cc ├── link2.cc └── log.cc /.gitignore: -------------------------------------------------------------------------------- 1 | tmp 2 | 3 | 4 | ## Tex gitignore from https://github.com/github/gitignore/blob/master/TeX.gitignore 5 | 6 | ## Core latex/pdflatex auxiliary files: 7 | *.aux 8 | *.lof 9 | *.log 10 | *.lot 11 | *.fls 12 | *.out 13 | *.toc 14 | *.fmt 15 | *.fot 16 | *.cb 17 | *.cb2 18 | .*.lb 19 | 20 | ## Intermediate documents: 21 | *.dvi 22 | *.xdv 23 | *-converted-to.* 24 | # these rules might exclude image files for figures etc. 25 | # *.ps 26 | # *.eps 27 | # *.pdf 28 | 29 | ## Generated if empty string is given at "Please type another file name for output:" 30 | .pdf 31 | 32 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 33 | *.bbl 34 | *.bcf 35 | *.blg 36 | *-blx.aux 37 | *-blx.bib 38 | *.run.xml 39 | 40 | ## Build tool auxiliary files: 41 | *.fdb_latexmk 42 | *.synctex 43 | *.synctex(busy) 44 | *.synctex.gz 45 | *.synctex.gz(busy) 46 | *.pdfsync 47 | 48 | ## Build tool directories for auxiliary files 49 | # latexrun 50 | latex.out/ 51 | 52 | ## Auxiliary and intermediate files from other packages: 53 | # algorithms 54 | *.alg 55 | *.loa 56 | 57 | # achemso 58 | acs-*.bib 59 | 60 | # amsthm 61 | *.thm 62 | 63 | # beamer 64 | *.nav 65 | *.pre 66 | *.snm 67 | *.vrb 68 | 69 | # changes 70 | *.soc 71 | 72 | # comment 73 | *.cut 74 | 75 | # cprotect 76 | *.cpt 77 | 78 | # elsarticle (documentclass of Elsevier journals) 79 | *.spl 80 | 81 | # endnotes 82 | *.ent 83 | 84 | # fixme 85 | *.lox 86 | 87 | # feynmf/feynmp 88 | *.mf 89 | *.mp 90 | *.t[1-9] 91 | *.t[1-9][0-9] 92 | *.tfm 93 | 94 | #(r)(e)ledmac/(r)(e)ledpar 95 | *.end 96 | *.?end 97 | *.[1-9] 98 | *.[1-9][0-9] 99 | *.[1-9][0-9][0-9] 100 | *.[1-9]R 101 | *.[1-9][0-9]R 102 | *.[1-9][0-9][0-9]R 103 | *.eledsec[1-9] 104 | *.eledsec[1-9]R 105 | *.eledsec[1-9][0-9] 106 | *.eledsec[1-9][0-9]R 107 | *.eledsec[1-9][0-9][0-9] 108 | *.eledsec[1-9][0-9][0-9]R 109 | 110 | # glossaries 111 | *.acn 112 | *.acr 113 | *.glg 114 | *.glo 115 | *.gls 116 | *.glsdefs 117 | *.lzo 118 | *.lzs 119 | 120 | # uncomment this for glossaries-extra (will ignore makeindex's style files!) 121 | # *.ist 122 | 123 | # gnuplottex 124 | *-gnuplottex-* 125 | 126 | # gregoriotex 127 | *.gaux 128 | *.glog 129 | *.gtex 130 | 131 | # htlatex 132 | *.4ct 133 | *.4tc 134 | *.idv 135 | *.lg 136 | *.trc 137 | *.xref 138 | 139 | # hyperref 140 | *.brf 141 | 142 | # knitr 143 | *-concordance.tex 144 | # TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files 145 | # *.tikz 146 | *-tikzDictionary 147 | 148 | # listings 149 | *.lol 150 | 151 | # luatexja-ruby 152 | *.ltjruby 153 | 154 | # makeidx 155 | *.idx 156 | *.ilg 157 | *.ind 158 | 159 | # minitoc 160 | *.maf 161 | *.mlf 162 | *.mlt 163 | *.mtc[0-9]* 164 | *.slf[0-9]* 165 | *.slt[0-9]* 166 | *.stc[0-9]* 167 | 168 | # minted 169 | _minted* 170 | *.pyg 171 | 172 | # morewrites 173 | *.mw 174 | 175 | # newpax 176 | *.newpax 177 | 178 | # nomencl 179 | *.nlg 180 | *.nlo 181 | *.nls 182 | 183 | # pax 184 | *.pax 185 | 186 | # pdfpcnotes 187 | *.pdfpc 188 | 189 | # sagetex 190 | *.sagetex.sage 191 | *.sagetex.py 192 | *.sagetex.scmd 193 | 194 | # scrwfile 195 | *.wrt 196 | 197 | # sympy 198 | *.sout 199 | *.sympy 200 | sympy-plots-for-*.tex/ 201 | 202 | # pdfcomment 203 | *.upa 204 | *.upb 205 | 206 | # pythontex 207 | *.pytxcode 208 | pythontex-files-*/ 209 | 210 | # tcolorbox 211 | *.listing 212 | 213 | # thmtools 214 | *.loe 215 | 216 | # TikZ & PGF 217 | *.dpth 218 | *.md5 219 | *.auxlock 220 | 221 | # todonotes 222 | *.tdo 223 | 224 | # vhistory 225 | *.hst 226 | *.ver 227 | 228 | # easy-todo 229 | *.lod 230 | 231 | # xcolor 232 | *.xcp 233 | 234 | # xmpincl 235 | *.xmpi 236 | 237 | # xindy 238 | *.xdy 239 | 240 | # xypic precompiled matrices and outlines 241 | *.xyc 242 | *.xyd 243 | 244 | # endfloat 245 | *.ttt 246 | *.fff 247 | 248 | # Latexian 249 | TSWLatexianTemp* 250 | 251 | ## Editors: 252 | # WinEdt 253 | *.bak 254 | *.sav 255 | 256 | # Texpad 257 | .texpadtmp 258 | 259 | # LyX 260 | *.lyx~ 261 | 262 | # Kile 263 | *.backup 264 | 265 | # gummi 266 | .*.swp 267 | 268 | # KBibTeX 269 | *~[0-9]* 270 | 271 | # TeXnicCenter 272 | *.tps 273 | 274 | # auto folder when using emacs and auctex 275 | ./auto/* 276 | *.el 277 | 278 | # expex forward references with \gathertags 279 | *-tags.tex 280 | 281 | # standalone packages 282 | *.sta 283 | 284 | # Makeindex log files 285 | *.lpz 286 | 287 | # xwatermark package 288 | *.xwm 289 | 290 | # REVTeX puts footnotes in the bibliography by default, unless the nofootinbib 291 | # option is specified. Footnotes are the stored in a file with suffix Notes.bib. 292 | # Uncomment the next line to have this generated file ignored. 293 | #*Notes.bib 294 | slides/** 295 | *.pdf 296 | **.txt 297 | !images/** 298 | !main.pdf 299 | cmu15745/** 300 | !words.txt 301 | 302 | hw** 303 | **.zip 304 | **.html 305 | 306 | 307 | **.idea/** 308 | -------------------------------------------------------------------------------- /AEA.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/AEA.tex -------------------------------------------------------------------------------- /AnotherLCM.tex: -------------------------------------------------------------------------------- 1 | \newpage 2 | 3 | \section{A Variation of Knoop, Ruthing, and Steffen’s Lazy Code Motion} 4 | \subsection{Where to Insert? } 5 | 6 | We want to insert the new computation where it is not partially available there. 7 | 8 | 9 | \begin{definition}{Anticipable(Very Busy) Expression} 10 | An expression e is anticipable at a program point p 11 | if e will be computed along every path from 12 | p to p$_{\mathrm{end}}$, and no variable in e is 13 | redefined until its computation. It is safe to move 14 | an expression to a basic block where 15 | that expression is anticipable. By "safe" we mean 16 | "performance safe", i.e., no extra computation 17 | will be performed. Notice that if an expression 18 | e is computed at a basic block where it is both available 19 | and anticipable, then that 20 | computation is clearly redundant. 21 | 22 | \begin{figure}[H] 23 | \centering 24 | \includegraphics[width=0.5\textwidth]{p89.png} 25 | \caption{For \texttt{b+c}, the {\color{green}green} blocks are anticipable points. } 26 | \label{fig:p89} 27 | \end{figure} 28 | \end{definition} 29 | 30 | 31 | 32 | The key to partial redundancy 33 | elimination is deciding where to add 34 | computations of an expression to 35 | change partial redundancies into full 36 | redundancies (which may then be 37 | optimized away). There are now two steps that we must 38 | perform: 39 | 40 | \begin{itemize} 41 | \item First, we find the earliest places in which 42 | we can move the computation of an 43 | expression without adding unnecessary 44 | computations to the CFG. This step is like 45 | pushing the computation of the 46 | expressions up. 47 | \item Second, we try to move these 48 | computations down, closer to the places 49 | where they are necessary, without adding 50 | redundancies to the CFG. This phase is like 51 | pulling these computations down the CFG. So that we can, 52 | for instance, reduce register 53 | pressure. 54 | \end{itemize} 55 | 56 | \begin{figure}[H] 57 | \centering 58 | \includegraphics[width=0.3\textwidth]{p90.png} 59 | \caption{Pushing up, Pulling down.} 60 | \label{fig:p90} 61 | \end{figure} 62 | 63 | \subsubsection{Earliest Placemen} 64 | 65 | We must now find the earliest possible places where we 66 | can compute the target expressions. Earliest in the sense that p1 comes before p2 if p1 precedes 67 | p2 in any topological ordering of the CFG. 68 | 69 | \begin{figure}[H] 70 | \centering 71 | \includegraphics[width=0.6\textwidth]{p91.png} 72 | 73 | \label{fig:p91} 74 | \end{figure} 75 | 76 | 77 | For the {\color{red} Fisrt} part, We can move an expression e to 78 | an edge ij only if e is anticipabled at the entrance 79 | of j. If the expression is available at the beginning of the edge, 80 | then we should not move it there. 81 | But the {\color{blue} Second} part, If an expression is anticipable at i, 82 | then we should not move it to ij, because we can move it to before i. 83 | On the other hand, if i kills the expression, then it cannot 84 | be computed before i. 85 | 86 | 87 | \begin{figure}[H] 88 | \centering 89 | \includegraphics[width=0.8\textwidth]{p92.jpg} 90 | \caption{An example for calculating EARLIEST.} 91 | \label{fig:p92} 92 | \end{figure} 93 | 94 | \subsubsection{Latest Placement} 95 | 96 | 97 | $$ 98 | \begin{aligned} 99 | &\operatorname{IN}_{\text {LATER }}(j)=\cap_{i \in \operatorname{pred}(j)} \operatorname{LATER}(i, j) \\ 100 | &\operatorname{LATER}(i, j)=\operatorname{EARLIEST}(i, j) \cup\left(\operatorname{IN}_{\text {LATER }}(i) \cap \overline{\operatorname{EXPR}(i)}\right). \\ 101 | & 102 | \end{aligned} 103 | $$ 104 | 105 | 106 | LATER(i,j) is true if we can move the computation of the 107 | expression down the edge ij. An expression e is in 108 | EXPR(i) if e is computed at i. This predicate is also 109 | computed for edges, although we 110 | have IN$_\mathrm{LATER}$ being computed for nodes. 111 | 112 | 113 | 114 | % $$ 115 | % \operatorname{LATER}(i, j)=\operatorname{EARLIEST}(i, j) \cup\left(\operatorname{IN}_{\text {LATER }}(i) \cap \overline{\operatorname{EXPR}(i)}\right). 116 | % $$ 117 | 118 | For \( \mathrm{LATER}(i,j) \): If EARLIEST(i, j) is true, 119 | then \( \mathrm{LATER}(i,j) \) is also true, as we 120 | can move the computation of e to edge ij without 121 | causing redundant computations. If \( IN_{\mathrm{LATER}}(i,j) \) is true, 122 | and the expression is not used at i, 123 | then LATER(i,j) is true. If the expression is used at i, then there is no point in 124 | computing it at ij, because it will be recomputed at i 125 | anyway. 126 | 127 | 128 | For \( IN_{\mathrm{LATER}}(i,j) \), it is a condition that 129 | we propagate down. If all the predecessors of a 130 | node j accept the 131 | expression as nonredundant, then we can 132 | compute the expression 133 | down on j. 134 | 135 | \begin{figure}[H] 136 | \centering 137 | \begin{subfigure}{0.3\textwidth} 138 | \centering 139 | \includegraphics[width=\textwidth]{p94.png} 140 | \caption{For \texttt{b+c}, two 141 | earliest placement 142 | points is colored in red.} 143 | \label{fig:p94} 144 | \end{subfigure} 145 | \begin{subfigure}{0.4\textwidth} 146 | \centering 147 | \includegraphics[width=\textwidth]{p95.png} 148 | \caption{For \texttt{b+c}, Latest placement edhes and blocks.} 149 | \label{fig:p95} 150 | \end{subfigure} 151 | 152 | \caption{A more complex example of strength reduction.} 153 | \label{fig:p74-76} 154 | \end{figure} 155 | 156 | 157 | 158 | \subsubsection{Where to Insert Computations?} 159 | 160 | We insert the new computations at the latest possible 161 | place.That is 162 | 163 | \begin{figure}[H] 164 | \centering 165 | \includegraphics[width=0.6\textwidth]{p96.png} 166 | 167 | \label{fig:p96} 168 | \end{figure} 169 | 170 | 171 | There are different insertion points, depending on the 172 | structure of the CFG, if x $\in$ INSERT(i, j): 173 | 174 | \begin{figure}[H] 175 | \centering 176 | \includegraphics[width=0.6\textwidth]{p97.png} 177 | \caption{ Different inser9on points} 178 | \label{fig:p97} 179 | \end{figure} 180 | \subsection{Modify CFG} 181 | 182 | Rename all compuation of the expression. 183 | 184 | \begin{figure}[H] 185 | \centering 186 | \includegraphics[width=0.4\textwidth]{p100.png} 187 | \caption{For \texttt{b+c}, the result of applyiny modifying CFG.} 188 | \label{fig:p100} 189 | \end{figure} 190 | 191 | 192 | \subsection{Which Computations to Remove? } 193 | We remove computations that are already covered by 194 | the latest points, and that we cannot use later on. 195 | 196 | \begin{figure}[H] 197 | \centering 198 | \includegraphics[width=0.6\textwidth]{p98.png} 199 | 200 | \label{fig:p98} 201 | \end{figure} 202 | 203 | 204 | 205 | 206 | For {\color{red} First} part, of course, the expression 207 | must be used in the block, 208 | otherwise we would have 209 | nothing to delete. For {\color{blue} second} part, The expression may not be a 210 | computation that is necessary 211 | later on. 212 | 213 | 214 | \begin{figure}[H] 215 | \centering 216 | \includegraphics[width=0.4\textwidth]{p101.png} 217 | \caption{For \texttt{b+c}, the result of applyiny deleting redundancy \texttt{b+c}} 218 | \label{fig:p100} 219 | \end{figure} 220 | 221 | 222 | 223 | \subsection{A fully explained example} 224 | \includepdf[pages={1-}]{p99.pdf} 225 | -------------------------------------------------------------------------------- /Available Expressions Analysis.tex: -------------------------------------------------------------------------------- 1 | \newpage 2 | 3 | \section{Available Expressions Analysis} 4 | 5 | \subsection{Motivation} 6 | 7 | Programs may contain code whose result is needed, but in which some computation is simply a redundant 8 | repetition of earlier computation within the same program. The concept of expression availability is useful in dealing with this situation. 9 | 10 | 11 | \subsection{Backgroud Knowledge} 12 | 13 | Any given program contains a finite number of expressions (i.e. computations which potentially 14 | produce values),so we may talk about the set of all expressions of a program. Consider the program in 15 | \ref{lst:expression1} 16 | 17 | 18 | 19 | 20 | \begin{lstlisting}[language=C,frame=single, caption=An simple example containing some expressions ,label = lst:expression1] 21 | int z = x * y; 22 | print s + t; 23 | int w = u / v; 24 | \end{lstlisting} 25 | 26 | 27 | This program contian expression \texttt{x*y,s+t,u/v}. 28 | 29 | 30 | 31 | \subsection{Problem Formulation} 32 | 33 | 34 | Availability is a data-flow property of expressions: “Has the value of this expression already been computed?” 35 | At each instruction, each expression in the programis either available or unavailable. So each instruction(or node of the flowgraph) has 36 | an associated set of available expression. 37 | 38 | 39 | 40 | \subsection{Semantic vs. Syntactic} 41 | 42 | An expression is \textit{semantically} available at a node n if its value gets computed 43 | (and not subsequently invalidated) along every execution sequence ending at n. 44 | 45 | \begin{figure}[!htb] 46 | \minipage{0.5\textwidth} 47 | \includegraphics[width=\linewidth]{p1.png} 48 | \caption{Available expression example}\label{fig:p1} 49 | \endminipage\hfill 50 | \minipage{0.5\textwidth} 51 | \includegraphics[width=\linewidth]{p2.png} 52 | \caption{unavailable expression example}\label{fig:p2} 53 | \endminipage 54 | \end{figure} 55 | 56 | 57 | An expression is \textit{syntactically} available at a node n if its value gets computed 58 | (and not subsequently invalidated) along every path from the entry of the flowgraph to n. 59 | 60 | 61 | \begin{figure}[!htb] 62 | \minipage{0.5\textwidth} 63 | \includegraphics[width=\linewidth]{p4.png} 64 | \caption{x+y is semantically available}\label{fig:p4} 65 | \endminipage\hfill 66 | \minipage{0.4\textwidth} 67 | \includegraphics[width=\linewidth]{p3.png} 68 | \caption{x+y is syntactically unavailable}\label{fig:p3} 69 | \endminipage 70 | \end{figure} 71 | 72 | 73 | On the path in red from Figure \ref{fig:p3} through the flowgraph, \(x+y\) is only 74 | computed once, so \(x+y\) is syntactically unavailable at the last instruction. 75 | 76 | 77 | Whereas with live variable analysis we found safety in assuming that 78 | more variables were live, here we find safety in assuming that fewer 79 | expressions are available. Because if an expression is deemed to be available, we 80 | may do something dangerous (e.g. remove an instruction which recomputes its value). 81 | So sometimes safe means more, but sometimes means less. 82 | 83 | \begin{figure}[H] 84 | \minipage{0.5\textwidth} 85 | \includegraphics[width=\linewidth]{p5.png} 86 | \caption{Semantic vs. syntactic}\label{fig:p5} 87 | \endminipage\hfill 88 | \minipage{0.5\textwidth} 89 | \includegraphics[width=\linewidth]{p6.png} 90 | \caption{Semantic vs. syntactic}\label{fig:p6} 91 | \endminipage 92 | \end{figure} 93 | 94 | 95 | \subsection{Summary} 96 | 97 | 98 | \begin{center} 99 | \begin{tabular}{|c|c|} 100 | \hline Direction & Forward \\ 101 | \hline Domain & Sets of expressions \\ 102 | \hline Meet operator & \( \cap \) \\ 103 | \hline Top(T) & Universal Set \\ 104 | \hline Bottom & $\phi$ \\ 105 | \hline Boundary condition & $\mathrm{OUT[ENTRY]} = \phi$ \\ 106 | \hline Initialization for internal nodes & $\mathrm{OUT[B]} = T$ \\ 107 | \hline Finited escending chain? & \checkmark \\ 108 | \hline Transfer function & $f_b(x) = \mathrm{Gen}_b \cup (x - \mathrm{Kill}_b)$ \\ 109 | \hline Monotone\&Distributive? & \checkmark \\ 110 | \hline $\mathrm{Kill}_b$ & all E such that block b defines a variable in E \\ 111 | \hline $\mathrm{Gen}_b$ & all E such that block b evaluates E and doesn’t later kill it \\ 112 | \hline 113 | \end{tabular} 114 | \end{center} -------------------------------------------------------------------------------- /CPP.tex: -------------------------------------------------------------------------------- 1 | \newpage 2 | \section{C++} 3 | 4 | 5 | \subsection{Why we need C++?} 6 | 7 | \begin{question}{Why we need C++?} 8 | \begin{itemize} 9 | \item Fast program 10 | \item Take control of hardware. 11 | \end{itemize} 12 | \end{question} 13 | 14 | 15 | \subsection{How the C++ Compiler Works?} 16 | \begin{question}{How the C++ Compiler Works?} 17 | It takes .cpp files and convert them into an intermediate format 18 | called an object file. 19 | 20 | Compilation stage: 21 | \begin{itemize} 22 | \item 1. preprocessing stage (output file extension :.i) 23 | \item 2. Compiling the source code. (output file extension :.s) 24 | \item 3. Assembling the compiled file. (output file extension :.o) 25 | \end{itemize} 26 | 27 | 28 | \begin{figure}[H] 29 | \centering 30 | \includegraphics[width=0.5\textwidth]{p241.png} 31 | \caption{C++ compilation} 32 | \label{fig:p241} 33 | \end{figure} 34 | 35 | \end{question} 36 | 37 | 38 | 39 | 40 | \subsection{How the C++ Linker Works?} 41 | The code shown in Lstlisting \ref{CPP:1} is \verb|mian.cc|, when we enter the command \verb|clang++ main.cc|, we got the error {\color{red}\verb|undefined reference to `log(char const*)'|} 42 | Why do we get link error even if we don't use \verb|multiple| at all? This is becuase \verb|multiple| can be used in other files other than this file. 43 | We can make \verb|multiple| function only shown to this function by adding \verb|static| to its declaration. 44 | 45 | % becuase multiple can be used in other files other than this file. We can make `multiple` function only shown to this function by adding `static` to its declaration. 46 | 47 | \begin{lstlisting}[label={CPP:1},caption={main.cc}] 48 | #include 49 | using namespace std; 50 | void log(const char* msg); 51 | int multiple(int a, int b){ 52 | log("multiple"); 53 | return a*b; 54 | } 55 | int main(){ 56 | 57 | } 58 | \end{lstlisting} 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /Constant Propagation.tex: -------------------------------------------------------------------------------- 1 | \newpage 2 | 3 | \section{Constant Propagation/Folding\cite{Microsof98:online}} 4 | 5 | 6 | \subsection{Problem Definition } 7 | 8 | 9 | Given a program, we would like to know for every point in the program (after every 10 | statement), which variables have constant values, and which do not. We say that a 11 | variable has a constant value at a certain point if every execution that reaches that point, 12 | gives that variable the same constant value. 13 | 14 | 15 | \subsection{Meet Operator} 16 | 17 | \begin{figure}[H] 18 | \centering 19 | \includegraphics[width=0.6\textwidth]{p219.jpg} 20 | \caption{Meet operator for Constant Propagation.} 21 | \label{fig:p219} 22 | \end{figure} 23 | 24 | 25 | \subsection{Transfer Function} 26 | 27 | 28 | Let an assignment be of the form $x_3 = x_1 + x_2$, $+$ represents a generic operator. 29 | 30 | 31 | \begin{figure}[H] 32 | \centering 33 | \includegraphics[width=0.6\textwidth]{p220.jpg} 34 | \caption{Transfer Function.} 35 | \label{fig:p220} 36 | \end{figure} 37 | 38 | \subsection{Summary} 39 | Constant Propogation is not distributive\footnote{See Figure \ref{fig:p221}}. 40 | The semi-lattice is shown in Figure \ref{fig:p222}. 41 | 42 | 43 | 44 | \begin{figure}[H] 45 | \centering 46 | \includegraphics[width=0.6\textwidth]{p222.png} 47 | \caption{Constant Propogation is not Distributive.} 48 | \label{fig:p222} 49 | \end{figure} 50 | 51 | 52 | \begin{figure}[H] 53 | \centering 54 | \includegraphics[width=0.6\textwidth]{p221.png} 55 | \caption{Constant Propogation's Semi-lattice Diagram.} 56 | \label{fig:p221} 57 | \end{figure} 58 | 59 | 60 | \begin{center} 61 | \begin{tabular}{|c|c|} 62 | \hline Direction & Forward \\ 63 | \hline Domain & Numbers \\ 64 | \hline Top(T) & $\mathrm{UNDEF}$ \\ 65 | \hline Bottom & $\mathrm{NAC}$ \\ 66 | \hline Boundary condition & $\mathrm{OUT[ENTRY]} = \mathrm{UNDEF}$ \\ 67 | \hline Initialization for internal nodes & $\mathrm{OUT[B]} = \mathrm{UNDEF}$ \\ 68 | \hline Finited escending chain? & \checkmark \\ 69 | \hline Monotone ? & \checkmark \\ 70 | \hline Distributive? & \text{\sffamily X} \\ 71 | \hline 72 | \end{tabular} 73 | \end{center} 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /DCO.tex: -------------------------------------------------------------------------------- 1 | \newpage 2 | 3 | \section{Dynamic Code Optimization} 4 | 5 | -------------------------------------------------------------------------------- /DataPrefetching.tex: -------------------------------------------------------------------------------- 1 | \newpage 2 | 3 | \section{Data Prefetching} 4 | \subsection{Compiler-Based Prefetching for Recursive Data Structures\cite{luk1996compiler}} 5 | 6 | Recursive Data Structures (RDSs) include familiar objects 7 | such as linked lists, trees, graphs, etc., where individual nodes 8 | are dynamically allocated from the heap, and nodes are linked 9 | together through pointers to form the overall structure. For 10 | our purposes, "recursive data structures" can be broadly interpreted to include most pointer-linked data structures (e.g., 11 | mutually-recursive data structures, or even a graph of heterogeneous objects). From a memory performance perspective, these 12 | pointer-based data structures are expected to be an important 13 | concern for the following reasons. For an application to suffer a large memory penalty due to data replacement misses, it 14 | typically must have a large data set relative to the cache size. 15 | Aside from multi-dimensional arrays, recursive data structures 16 | are one of the most common and convenient methods of building 17 | large data structures (e.g, B-trees in database applications, octrees in graphics applications, etc.). As we traverse a large RDS, 18 | we may potentially visit enough intervening nodes to displace a 19 | given node from the cache before it is revisited; hence temporal 20 | locality may be poor. Finally, in contrast with arrays, where 21 | consecutive elements are at contiguous addresses and therefore 22 | stride-one accesses can exploit long cache lines, there is little inherent spatial locality between consecutively-accessed nodes in 23 | an RDS since they are dynamically allocated from the heap and 24 | can have arbitrary addresses. Therefore, techniques for coping 25 | with the latency of accessing these pointer-based data structures 26 | are essential. 27 | 28 | 29 | 30 | \subsubsection{Challenges in Prefetching RDSs } 31 | 32 | 33 | Any software-controlled prefetching scheme can be viewed as having two major phases. First, an analysis phase predicts which dynamic memory references are likely to suffer caches misses, and 34 | hence should be prefetched. Second, a scheduling phase attempts 35 | to insert prefetches sufficiently far in advance such that latency is 36 | effectively hidden, while introducing minimal runtime overhead. 37 | For array-based applications, the compiler can use locality analysis to predict which dynamic references to prefetch, and loop 38 | splitting and software pipelining to schedule prefetches. 39 | 40 | A fundamental difference between array references and pointer 41 | dereferences is the way addresses are generated. The address of 42 | an array reference \texttt{A[i]} can always be computed once a value of 43 | i is chosen. In contrast, the address of a pointer dereference *p 44 | is unknown unless the value stored in \texttt{p} is read. This difference 45 | makes both the analysis and scheduling phases significantly more 46 | challenging for RDSs than for arrays. 47 | 48 | 49 | \subsubsection{Analysis} 50 | To illustrate the difficulty of analyzing data locality in RDSs, 51 | consider the code in Figure \ref{fig:p247}(a), where we are traversing n different linked lists. In one extreme, the nodes may be entirely 52 | disjoint (as illustrated in Figure \ref{fig:p247}(b)), in which ease we would 53 | want to prefetch every list node. Another possibility might be 54 | that each list shares a long common "tail" starting with the second list node (as illustrated in Figure \ref{fig:p247}(c)). In this latter case, 55 | there would be significant temporal locality (assuming the cache 56 | is large enough to contain the common tail), and ideally we would 57 | only want to prefetch the nodes in the common tail during the 58 | first list traversal (i.e. when \texttt{i=1}). Unfortunately, despite the 59 | significant progress that has been made recently in pointer analysis techniques for heap-allocated objects [6, 8, 10], compilers are 60 | still not sophisticated enough to differentiate these two cases automatically. In general, analyzing the addresses of heap-allocated 61 | objects is a very difficult problem for the compiler. 62 | 63 | 64 | 65 | 66 | \begin{figure}[H] 67 | \centering 68 | \includegraphics[width=0.6\textwidth]{p247.png} 69 | \caption{Example of list traversals, both with and without temporal data locality. } 70 | \label{fig:p247} 71 | \end{figure} 72 | 73 | 74 | 75 | \subsubsection{Scheduling} 76 | 77 | Our ability to schedule prefetches for an RDS is also constrained 78 | by the fact that nodes are linked together through pointers. For 79 | example, consider the case shown in Figure \ref{fig:p248}(a), where assuming 80 | that three nodes worth of computation is needed to hide the 81 | latency, we would like to initiate a prefetch for node $n_{i+3}$ while 82 | we are visiting node $n_i$ The problem is that to compute the 83 | address of node $n_{i+3}$ , we must first dereference a pointer in node 84 | $n_{i+2}$ , and to do that, we must first dereference a pointer in node 85 | $n_{i+1}$ etc. As a result, one cannot prefetch (or fetch) a future 86 | node until all nodes between it and the current node have been 87 | fetched. However, the very act of touching these intermediate 88 | nodes means that we cannot tolerate the latency of fetching more 89 | than one node ahead. For example, the prefetching code shown 90 | in Figure \ref{fig:p248}(b) will not hide any more latency than the code in 91 | Figure \ref{fig:p248}(c).1 In fact, the code in Figure \ref{fig:p248}(c) is likely to run faster 92 | since it has less instruction overhead. This example illustrates 93 | what we refer to as the pointer-chasing problem. 94 | 95 | Since scheduling RDS prefetches is such a difficult problem, we 96 | make it the primary focus of this paper. Improvements in analysis tend to reduce prefetching overhead by eliminating unnecessary prefetches. However, without sufficient scheduling techniques, there will be no upside to prefetching and hence reducing 97 | overhead will be irrelevant. Fortunately, as we discuss in the 98 | next subsection, there are techniques for scheduling prefetches 99 | that avoid the pointer-chasing problem. 100 | 101 | 102 | 103 | \begin{figure}[H] 104 | \centering 105 | \includegraphics[width=0.6\textwidth]{p248.png} 106 | \caption{Illustration of the pointer-chasing problem. } 107 | \label{fig:p248} 108 | \end{figure} 109 | 110 | 111 | \subsubsection{Greedy Prefetching} 112 | 113 | In a k-ary RDS, each node contains k pointers to other nodes. 114 | Greedy prefetching exploits the fact that when $k > 1$, only one 115 | of these $k$ pointers can be immediately followed by control flow 116 | as the next node in the traversal. Hence the remaining $k - 1$ 117 | pointers serve as natural jump-pointers, and can be prefetched 118 | immediately upon first visiting a node. Although none of these 119 | jump-pointers may actually point to $n_{i+d}$ , hopefully each of them 120 | points to $n_{i+d^{\prime}}$ , for some $d^{\prime} > 0$. If $d^{\prime} < d$, then the latency may 121 | be partially hidden; if > d, then we expect the latency to be 122 | fully hidden, provided that the node is not displaced from the 123 | cache before it is referenced (which may occur if $d^{\prime} >> d$). 124 | 125 | To illustrate how greedy prefetching works, consider the preorder 126 | traversal of a binary tree (i.e. $k = 2$), where Figure \ref{fig:p249}(a) 127 | shows the code with greedy prefetching added. Assuming that 128 | the computation in \texttt{process()} takes half as long as the cache 129 | miss latency, we would want to prefetch two nodes ahead (i.e. 130 | $d = 2$) to fully hide the latency. Figure \ref{fig:p249}(b) shows the caching 131 | behavior of each node. We obviously suffer a full cache miss at 132 | the root node (node 1), since there was no opportunity to fetch 133 | it ahead of time. However, we would only suffer half of the miss 134 | penalty ($\frac{L}{2}$) when we visit node 2, and no miss penalty when 135 | we eventually visit node 3 (since the time to visit the subtree 136 | rooted at node 2 is greater than $L$). In this example, the latency 137 | is fully hidden for roughly half of the nodes, and reduced by 138 | 50\% for the other half (minus the root node). If we generalize 139 | this example to a k-ary tree, we would expect the fraction of 140 | nodes where latency is fully hidden to be roughly $\frac{k-1}{k}$(assuming 141 | that prefetched nodes are generally not displaced from the cache 142 | before they are referenced). Hence a larger value of $k$ is likely 143 | to improve the performance of greedy prefetching, since more 144 | natural jump-pointers are available. 145 | 146 | 147 | \begin{figure}[H] 148 | \centering 149 | \includegraphics[width=0.6\textwidth]{p249.png} 150 | \caption{Illustration of greedy prefetching. } 151 | \label{fig:p249} 152 | \end{figure} 153 | 154 | 155 | Greedy prefetching offers the following advantages: 156 | (i) it has 157 | low runtime overhead, since no additional storage or computation is needed to construct the natural jump-pointers; 158 | (ii) it is 159 | applicable to a wide variety of RDSs, regardless of how they are 160 | accessed or whether their structure is modified frequently; and 161 | (iii) it is relatively straightforward to implement in a compiler. The main disadvantage of greedy prefetching is that it does not offer precise control over the prefetching 162 | distance, which is the motivation for our next algorithm. 163 | 164 | 165 | -------------------------------------------------------------------------------- /Dependence Analysis.tex: -------------------------------------------------------------------------------- 1 | \newpage 2 | 3 | \section{Dependence Analysis} 4 | 5 | 6 | 7 | \subsection{Dependence in Loops} 8 | 9 | If there are no dependences in a loop, we can parallelize it because none of the 10 | iterations interfere with each other. Loop dependence is also useful in other loop optimizations, such 11 | as loop interchange, loop fusion, etc. 12 | 13 | In this section, we should answer the following two questions: 14 | 15 | \begin{itemize} 16 | \item How do we represent dependences in loops? 17 | \item How do we determine if there are dependences? 18 | \end{itemize} 19 | 20 | 21 | \subsubsection{Representing dependences} 22 | 23 | Iteration space graphs is a good start. Here we give steps for creating 24 | iteration space graphs: 25 | 26 | \begin{itemize} 27 | \item Step 1: Create nodes, 1 for each iteration 28 | \item Step 2: Determine which array elements are read and 29 | written in each iteration 30 | \item Step 3: Draw arrows to represent dependences 31 | \end{itemize} 32 | 33 | 34 | \begin{figure}[H] 35 | \centering 36 | \begin{subfigure}{0.6\textwidth} 37 | \centering 38 | \includegraphics[width=\textwidth]{p253.png} 39 | \caption{Source code.} 40 | \label{fig:p253} 41 | \end{subfigure} 42 | \begin{subfigure}{0.6\textwidth} 43 | \centering 44 | \includegraphics[width=\textwidth]{p250.png} 45 | \caption{Step 1: Create nodes, 1 for each iteration} 46 | \label{fig:p250} 47 | \end{subfigure} 48 | \begin{subfigure}{0.6\textwidth} 49 | \centering 50 | \includegraphics[width=\textwidth]{p251.png} 51 | \caption{Step 2: Determine which array elements are read and 52 | written in each iteration} 53 | \label{fig:p251} 54 | \end{subfigure} 55 | \begin{subfigure}{0.6\textwidth} 56 | \centering 57 | \includegraphics[width=\textwidth]{p252.png} 58 | \caption{Step 3: Draw arrows to represent dependences} 59 | \label{fig:p252} 60 | \end{subfigure} 61 | \caption{A 1-D exmaple to illustrate Iteration space graphs.} 62 | \label{fig:p253} 63 | \end{figure} 64 | 65 | 66 | 67 | \begin{figure}[H] 68 | \centering 69 | \begin{subfigure}{0.5\textwidth} 70 | \centering 71 | \includegraphics[width=\textwidth]{p255.png} 72 | \caption{Source code.} 73 | \label{fig:p255} 74 | \end{subfigure} 75 | \begin{subfigure}{0.5\textwidth} 76 | \centering 77 | \includegraphics[width=\textwidth]{p254.png} 78 | \caption{Iteration space graphs for the source code.} 79 | \label{fig:p254} 80 | \end{subfigure} 81 | 82 | \caption{A 2-D exmaple to illustrate Iteration space graphs.} 83 | \label{fig:p254-5} 84 | \end{figure} 85 | 86 | 87 | But there is a crucial problem faced by iteration space graph: 88 | Iteration space graphs are potentially infinite representations! 89 | 90 | Compiler researchers have devised compressed representations of dependences, such 91 | as distance vectors and direction vectors. 92 | 93 | \subsubsection{Distance vector} 94 | 95 | Distance vectors captures the “shape” of dependences, 96 | but not the particular source and sink. Represent each dependence arrow in an iteration space 97 | graph as a vector. 98 | 99 | Distance vectors for Figure \ref{fig:p252} is $ (2) $. 100 | Distance vectors for Figure \ref{fig:p254} is $ (1,-2) $. 101 | 102 | 103 | A more complex example is shown in \label{fig:p256-7}. 104 | There are 2 distance vectors : $(1,-2), (2,0)$. 105 | 106 | 107 | 108 | 109 | \begin{figure}[H] 110 | \centering 111 | \begin{subfigure}{0.5\textwidth} 112 | \centering 113 | \includegraphics[width=\textwidth]{p256.png} 114 | \caption{Source code.} 115 | \label{fig:p256} 116 | \end{subfigure} 117 | \begin{subfigure}{0.5\textwidth} 118 | \centering 119 | \includegraphics[width=\textwidth]{p257.png} 120 | \caption{Iteration space graphs for the source code.} 121 | \label{fig:p257} 122 | \end{subfigure} 123 | 124 | \caption{A 2-D exmaple to illustrate Iteration space graphs.} 125 | \label{fig:p256-7} 126 | \end{figure} 127 | 128 | 129 | But distance vectors can't always summarize as easily. Consider the example in 130 | \ref{fig:p258}, distance vectors for this code are $(1), (2), (3), (4) ...$ 131 | 132 | 133 | 134 | \begin{figure}[H] 135 | \centering 136 | \begin{subfigure}{0.5\textwidth} 137 | \centering 138 | \includegraphics[width=\textwidth]{p258.png} 139 | \caption{Source code.} 140 | \label{fig:p258} 141 | \end{subfigure} 142 | \begin{subfigure}{0.7\textwidth} 143 | \centering 144 | \includegraphics[width=\textwidth]{p259.png} 145 | \caption{Iteration space graphs for the source code.} 146 | \label{fig:p259} 147 | \end{subfigure} 148 | 149 | \caption{A 2-D exmaple to illustrate Iteration space graphs.} 150 | \label{fig:p258-9} 151 | \end{figure} 152 | 153 | From distance vector, we have information about the length of each vector, 154 | but not about the source of each vector. What happens if we try to reconstruct the iteration space graph? 155 | 156 | 157 | 158 | \begin{figure}[H] 159 | \centering 160 | \includegraphics[width=0.8\textwidth]{p260.png} 161 | \caption{reconstruct the iteration 162 | space graph from distance vector for \ref{fig:p258}} 163 | \label{fig:p260} 164 | \end{figure} 165 | 166 | 167 | \subsection{Direction vectors} 168 | 169 | The whole point of distance vectors is that we want to be able to 170 | succinctly capture the dependences in a loop nest and summarize distance vectors, and save only the direction the 171 | dependence was in. 172 | 173 | 174 | For example, distance vector $(2,-1)$, the corresponding direction vector is $(+,-)$. 175 | Also, distance vector $(0,1)$, the corresponding direction vector is $(0,+)$. 176 | 177 | Direction vectors lose a lot of information, but do capture 178 | some useful information, such as whether there is a dependence (anything other than a 179 | $0$ means there is a dependence). 180 | Many times, the only information we need to determine if 181 | an optimization (e.g. loop paralelization, loop iterchange) is legal is captured by direction vectors. 182 | 183 | 184 | 185 | If there is a non-zero entry for a loop dimension, that 186 | means that there is a loop carried dependence over that 187 | dimension. If an entry is zero, then that loop can be parallelized! 188 | 189 | 190 | 191 | 192 | 193 | \subsubsection{Loop-carried dependence} 194 | 195 | Loop carried dependence is a dependence that crosses loop iterations. 196 | 197 | If there is a loop carried dependence, then that loop cannot 198 | be parallelized. 199 | 200 | 201 | \subsection{Data Dependence Tests} 202 | 203 | 204 | \subsubsection{GCD} 205 | 206 | 207 | 208 | \begin{figure}[H] 209 | \centering 210 | \includegraphics[width=0.6\textwidth]{p261.png} 211 | \caption{} 212 | \label{fig:p261} 213 | \end{figure} 214 | 215 | Consider the loop nest in \ref{fig:p261} , A dependence exists if there exist an integer $i$ and an $i^\prime$ such 216 | that: 217 | \begin{itemize} 218 | \item $f(i) = g(i^\prime)$ 219 | \item $0 \leq i, i^\prime < N$ 220 | \item If $i < i^\prime$, write happens before read (flow dependence) 221 | \item If $i > i^\prime$, write happens after read (anti dependence) 222 | \end{itemize} 223 | 224 | 225 | 226 | 227 | An equation $a_1 \times i + a_2 \times i^\prime = a_3$ has a solution iff $gcd(a_1, a_2)$ 228 | evenly divides $a_3$ 229 | 230 | For example, 231 | \begin{itemize} 232 | \item $15 \times i + 6 \times j - 9 \times k = 12$ has a solution (gcd = 3) 233 | \item $2 \times i + 7 \times j = 3$ has a solution (gcd = 1) 234 | \item $9 \times i + 6 \times j = 10$ has no solution (gcd = 3) 235 | \end{itemize} 236 | 237 | 238 | 239 | Unfortunately, most loops have gcd(a, b) = 1, which divides 240 | everything. Also, If $f(i) = g(i^\prime)$, there might be a dependence, 241 | but might not. 242 | -------------------------------------------------------------------------------- /EarlyOpts.tex: -------------------------------------------------------------------------------- 1 | \newpage 2 | 3 | 4 | \section{Early Optimizations} 5 | 6 | "Optimization" is a misnomer—only very rarely 7 | does applying optimizations to a program result in object code whose performance 8 | is optimal, by any measure. Rather, optimizations generally improve performance, 9 | sometimes substantially, although it is entirely possible that they may decrease it or 10 | make no difference for some (or even all) possible inputs to a given program . 11 | 12 | 13 | 14 | \subsection{Constant-Expression Evaluation(Constant Folding)} 15 | 16 | 17 | 18 | 19 | ConstantFoldInstruction -> PHI? 20 | -> 21 | 22 | 23 | 24 | 25 | 26 | \subsection{Scalar Replacement of Aggregates} 27 | 28 | 29 | The normal goal of this pass is to replace small, fixed-size aggregate objects (e.g., structures or small 30 | constant-size arrays) with separate variables corresponding to the fields of the original object. The primary benefit 31 | of this pass is that it allows global dataflow optimizations to be applied to fields of aggregate objects. 32 | 33 | 34 | 35 | \subsubsection{Scalar Replacement of Aggregates in LLVM \footnote{Copied from \url{https://misailo.web.engr.illinois.edu/courses/526-sp20/mp1.pdf}}} 36 | 37 | 38 | 39 | The top-level function for your pass should iterate the following two steps until no more changes happen: 40 | 41 | \begin{itemize} 42 | 43 | \item Promote some scalar allocas to virtual registers (equivalent to one pass of mem2reg). 44 | \item Replace some allocas with allocas of the individual fields (i.e., scalar-expand the original allocas). 45 | 46 | 47 | \end{itemize} 48 | 49 | 50 | Here are the major requirements: 51 | 52 | \begin{itemize} 53 | % \lstinline|int main() { return 0; }| 54 | 55 | 56 | \item The function \lstinline|PromoteMemToReg(const std::vector &Allocas, | 57 | \lstinline|DominatorTree &DT,AliasSetTracker *AST = 0)| invokes the mem2reg pass functionality directly. You can call it within your 58 | pass to satisfy the first of the two steps, above 59 | 60 | \item An object allocated using an \lstinline|alloca| instruction is promotable to live in a register if the alloca satisfies all 61 | these requirements: 62 | 63 | \begin{itemize} 64 | \item (P1) The alloca is a “first-class” type, which you can approximate conservatively with 65 | \item (P2) The alloca is only used in a load or store instruction and the instruction satisfies \lstinline|!isVolatile().| 66 | 67 | Technically, the use kind (U2) below is also permissible, but the LLVM version does not allow this, so the 68 | assertion of \lstinline|isAllocaPromotable(const AllocaInst)| will fail if you try to permit it. 69 | 70 | 71 | \end{itemize} 72 | 73 | \end{itemize} 74 | 75 | The rest of this section describes the algorithm for the second step above, i.e., the scalar-replacement-of aggregates step. 76 | 77 | \begin{itemize} 78 | \item only need to consider alloca instructions that allocate an object of a structure type 79 | \item An alloca instruction can be eliminated if the resulting pointer ptr is used only in these two ways: 80 | \begin{itemize} 81 | \item (U1) In a getelementptr instruction that satisfies both these conditions: 82 | \begin{itemize} 83 | \item It is of the form: \lstinline|getelementptr ptr, 0, constant[, ... constant]| 84 | \item The result of the getelementptr is only used in instructions of type U1 or U2, or as the pointer 85 | argument of a load or store instruction, i.e., the pointer stored into (not the value being stored). 86 | 87 | \end{itemize} 88 | \item (U2) In a \lstinline|eq| or \lstinline|ne| comparison instruction, where the other operand is the \lstinline|NULL| pointer value. 89 | \end{itemize} 90 | 91 | \item In order to eliminate an instruction M, it should be replaced with separate \lstinline|alloca| instructions, one for each 92 | field of the original object. These alloca operations should be placed at the entry to the current function. 93 | 94 | \item Each use of the pointer returned by M must be replaced appropriately. You have to figure out how each of 95 | the two kinds of uses listed above should be replaced. 96 | 97 | \item Because there can be structures nested inside structures, a single scalar-replacement step of your algorithm 98 | must iterate until no more structure allocations can be eliminated (in addition to the outer iteration with 99 | mem2reg). 100 | 101 | \item For efficiency, don’t simply repeat your entire algorithm until nothing changes. Instead, use a 102 | worklist containing suitable items and repeat until the worklist is empty. 103 | 104 | 105 | \end{itemize} 106 | 107 | 108 | 109 | % \begin{algorithm}[hbt!] 110 | % \caption{runOnAlloca}\label{alg:runOnAlloca} 111 | % \KwData{AI: AllocaInst} 112 | % \State{Skip when 1. AI.use_empty() 2. arrayAllocation 3. vector type 4. } 113 | % \State{1. build AllocaSlices} 114 | % \State{2. splitAlloca} 115 | % \State{3. } 116 | % \end{algorithm} 117 | 118 | 119 | 120 | % \begin{algorithm}[hbt!] 121 | % \caption{SROA}\label{alg:SROA} 122 | % \KwData{AS: slices of an alloca which are formed by its various uses} 123 | 124 | % \While{$N \neq 0$}{ 125 | % \eIf{$N$ is even}{ 126 | % $X \gets X \times X$\; 127 | % $N \gets \frac{N}{2} $ \Comment*[r]{This is a comment} 128 | % }{\If{$N$ is odd}{ 129 | % $y \gets y \times X$\; 130 | % $N \gets N - 1$\; 131 | % } 132 | % } 133 | % } 134 | 135 | 136 | 137 | \begin{algorithm}[H] 138 | \caption{SROA}\label{alg:SROA} 139 | \begin{algorithmic} 140 | \While{!Worklist.empty()} 141 | \State{$I \gets Worklist.pop\_back()$} 142 | \State{runOnAlloca(I)} 143 | \State{deleteDeadInstructions} 144 | \State{promoteAllocas()} 145 | \EndWhile 146 | \end{algorithmic} 147 | \end{algorithm} 148 | 149 | 150 | \begin{algorithm}[H] 151 | \caption{runOnAlloca}\label{alg:runOnAlloca} 152 | \begin{algorithmic} 153 | \State{Build the slices of the allocInst} 154 | \State{splitAlloca (normalize the slice)} 155 | \end{algorithmic} 156 | \end{algorithm} 157 | 158 | 159 | 160 | \begin{algorithm}[H] 161 | \caption{splitAlloca}\label{alg:splitAlloca} 162 | \begin{algorithmic} 163 | \State{presplitLoadsAndStores: change[4,12) to [4,8)+[8,12)...} 164 | \State{rewritePartition} 165 | \end{algorithmic} 166 | \end{algorithm} 167 | 168 | 169 | \begin{algorithm}[H] 170 | \caption{rewritePartition}\label{alg:rewritePartition} 171 | \begin{algorithmic} 172 | \State{Create a new allocInst} 173 | \State{add old allocInst to DeadInsts} 174 | \end{algorithmic} 175 | \end{algorithm} 176 | 177 | 178 | \subsection{Algebraic Simplifications and Reassociation} 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | -------------------------------------------------------------------------------- /Foundations-of-Dataflow.tex: -------------------------------------------------------------------------------- 1 | 2 | \section{Foundations of Data Flow Analysis} 3 | 4 | 5 | $\leq$ means more conservative, but not means subset. 6 | 7 | 8 | \subsection{Transfer Functions} 9 | 10 | 11 | 12 | 13 | 14 | \subsubsection{monotonicity} 15 | 16 | % \begin{tcolorbox} 17 | % efjer 18 | % \end{tcolorbox} 19 | 20 | \begin{note}{Monotone framework doesn't mean $ f(x) \leq x$} 21 | For example, reaching definition for just one definition \texttt{a=1} in a BasicBlock(\texttt{BB1}). \[IN(BB1) = \{\} = x = \top , OUT(BB1) = {a} = f(x) \] 22 | However, $x = \top \leq f(x)$ 23 | \end{note} 24 | 25 | \subsubsection{Distributivity} 26 | 27 | Not a requirement. 28 | 29 | \begin{definition}{Distributivity} 30 | 31 | A framework $F,V, \wedge$ is \emph{distributive} is and only if 32 | 33 | \[f(x \wedge y)=f(x) \wedge f(y)\] 34 | 35 | which means applying $f$ to the merge input is equal to applying $f$ individually then merge result. 36 | \end{definition} 37 | 38 | Reaching definition is distributive. 39 | 40 | 41 | Constant Propagation is not distributive. 42 | \begin{figure}[h] 43 | \centering 44 | \includegraphics[width=0.2\textwidth]{CDp.png} 45 | \caption{} 46 | \label{fig:p15} 47 | \end{figure} 48 | 49 | \subsection{Data Flow Analysis} 50 | 51 | \begin{definition}{Definition} 52 | Let $f_1, \dots , f_m \in F$, where $f_i$ is the transfer function for node $i$. $f_p=f_{n_k} \cdot \ldots \cdot f_{n_1}$, where $p$ is a path through nodes $n_1 \cdot \ldots \cdot n_k$. $f_p =$ identify function, if $p$ is an empty path. 53 | \end{definition} 54 | 55 | \subsubsection{Precision} 56 | 57 | Ideally for each node n, the IN should be $ \wedge f_{p_i}(\top)$ for all possibly executed path $p_i$ reaching n. But determining all possible executed paths is undecidable. Look at the example shown in \ref{fig:p20}. 58 | 59 | 60 | \begin{figure}[h] 61 | \centering 62 | \includegraphics[width=0.2\textwidth]{p20.png} 63 | \caption{} 64 | \label{fig:p20} 65 | \end{figure} 66 | 67 | So in reality, we will conservatively include some paths that will never be executed. From a correctness standpoint, this is fine because we will just get an more conservative answer. 68 | 69 | \subsubsection{Meet-Over-Path(MOP)} 70 | 71 | \begin{definition}{MOP} 72 | For each node n, MOP(n) = $ \wedge f_{p_i}(\top)$ for all possibly executed path $p_i$ reaching n. 73 | 74 | Strictly speaking, MOP considers more paths than necessary, which means 75 | 76 | \[ \textit{MOP = Perfect-Solution} \wedge \textit{Solution-to-Unexecuted-Paths.}\] 77 | 78 | So 79 | 80 | \[ MOP \leq \textit{ Perfect-Solution} \] 81 | 82 | MOP is more conservative. 83 | 84 | \end{definition} 85 | 86 | 87 | 88 | \subsection{Solving Data Flow Equations} 89 | 90 | Any solution that satisfies equations is a Fixed Point Solution(FP). 91 | 92 | 93 | \subsubsection{Iterative algorithm } 94 | 95 | If framework is monotone and algorithm coverges, then it computes Maximum Fixed Point(MFP). 96 | 97 | 98 | FP $\leq$ MFP $\leq$ MOP $\leq$ Perfect-solution 99 | 100 | 101 | Reaching Definition example: 102 | \begin{figure}[h] 103 | \centering 104 | \includegraphics[width=0.2\textwidth]{p21.png} 105 | \caption{} 106 | \label{fig:p21} 107 | \end{figure} 108 | 109 | 110 | 111 | 112 | \subsection{Precision} 113 | 114 | If data flow framework is distributive, then if the algorithm converges, $IN[b] = MOP[b]$ 115 | 116 | A Monotone but not distributive example: Constant Propagation.(Behaves as if there are additional paths) 117 | 118 | 119 | 120 | \subsection{Convergence} 121 | Properties are needed to guarantee convergence: 122 | 123 | \begin{itemize} 124 | \item monotone 125 | \item finite descending chain 126 | \end{itemize} 127 | 128 | 129 | 130 | \subsection{Speed of Convergence} 131 | 132 | \subsubsection{Reverse Post order} 133 | 134 | \begin{figure}[h] 135 | \centering 136 | \includegraphics[width=0.2\textwidth]{p22.png} 137 | \caption{} 138 | \label{fig:p22} 139 | \end{figure} 140 | 141 | 142 | \subsubsection{Depth-First Iterative Algorithm(forward) 143 | } 144 | 145 | 146 | 147 | \begin{figure}[h] 148 | \centering 149 | \includegraphics[width=0.2\textwidth]{p23.png} 150 | \caption{} 151 | \label{fig:p23} 152 | \end{figure} 153 | 154 | 155 | \subsubsection{Cost} 156 | 157 | Number of iterations = number of back edges in any acyclic path +2 158 | 159 | 160 | 161 | 162 | -------------------------------------------------------------------------------- /Instruction Scheduling.tex: -------------------------------------------------------------------------------- 1 | \section{Instruction Scheduling} 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | \subsection{Introduction} 10 | We talk about what we called 11 | machine-independent optimizations in previous chapters. So for example, things like 12 | partial redunancy elimation, dead-code elimation, consant propagation and 13 | folding. These are things that are good for improving code, no matter 14 | what machine you're running on. It's always good to eliminate work 15 | from the code. But now we are talking about machine-dependent optimazations. 16 | These are optimizations where you need to know a little bit information 17 | about the machine you're targeting. The goal of the machine-independent 18 | optimizations is simply to eliminate work. For a register allocation, the point 19 | is to make it less expensive to access data. It's much cheaper to use data in 20 | registers than having to go to memory on the stack all the time. 21 | 22 | What's the point of instruction scheduling? With instruction scheduling, 23 | we aren't getting rid of work. We assume that the earlier optimazation passes have 24 | already eliminated as much work as possible, but instead our goal is to take that 25 | fixed amount of work that we have and to perform it faster. And how is that possible 26 | the basic answer for how this happens is that we want to execute things in parallel. 27 | If we want to scrunch the same amount of work into less time that means essentially 28 | overlapping things. Let's say there's a sequence of instructions that we're assigning 29 | values to a b and c. With instruction scheduling, we're hoping to we still have to do all 30 | all of this work, but we can do all the work simultaneously, then that would allow 31 | us to get our answer faster. 32 | 33 | Parallelism occurs in different forms in modern machines, and I'am going to talk a little 34 | bit about each of these things. The first two items are pipelineing and superscalar 35 | processing. Exploiting those types of Parallelism for historical reasons we call that 36 | instruction scheduling. To take full advantage of parallel processing on multiple cores 37 | with multiple threads we call that automaic Parallelization, which we will talk about 38 | later. 39 | 40 | At a high level, if you think about the time that it takes to exceute an instruction, 41 | the idea behind pipelineing is to break up that time into different stages. 42 | Examples of some modern processors talked about breaking up execution into five 43 | stages for example. So you fetch an instruction, you go grab it from memory, you 44 | decode it and figure out what it is and you read grab any registers that it's going to read 45 | from, then you execute it, then if it's a memory access you may need to load or store 46 | from memory and finally you may have a result that you need to put into a register file. 47 | These may be five different steps that instructions tyically go through, and the key thing is 48 | that they use independent pieces of hardware as they flow through these different stages of execution 49 | 50 | And since they're using different pieces of hardware, what we do is rather than executing them 51 | one after another, instead we can overlap them in time so that at any moment in time, we have 52 | several different instructions in flight, where we're using different pieces of hardware for each 53 | of the different instructions. So notice at one moment in time, we're using all the hardware 54 | associated with all five different stages, because those are independent pieces od hardware, but 55 | they happen to be running different instructions. So this is one way to create Parallelism. 56 | 57 | Should we simply make pipelines deeper and deeper? This becomes less and less attractive, 58 | because these is overhead between pipeline stages. So you have registers that capture the 59 | stage and between pipe stages. So this becomes less attractive. So you don't see hundred stage 60 | pipelines. And in fact if you make the pipeline stage short than the time it takes to do just 61 | a basic arithmetic operation. It's not particularly clear what the point is in trying to pipeline. 62 | at such a tiny granularity. The length of a pipeline stage can affect the clock rate.And there 63 | was a lot of focus on increasing clock rates. Pentium 4 processor had a 20-stage pipeline. 64 | 65 | Pipeline is a form Parallelism and another way to make processors run faster is not just to break them up into independent stages whitn the execution of one instruction but to actually allow multiple instructions tp proceed side by side through pipeline stages. 66 | And in order todo this, you need to create more hardware in particular you need more alus. 67 | So if we just focus on the execution stage where we do an arithmetic operation. 68 | In reality, this involves using a piece of hardware called called an alu. The idea is 69 | what if we created multiple alus, so here I'm showing two of them or it clould be more 70 | than that. So the idea is instead of just executing one instruction through the alu, 71 | I can potentially be executing two instructions side by side. 72 | The requirement though is they have to be independent instruction. They can't depend 73 | on each other. 74 | 75 | For original pipeline, only one instruction in a given pipe stage at a given time. 76 | But for superscalar pipeline, there can be multiple instructions in the same pipe 77 | stage at the same time. The idea is we create even more Parallelism by allowing more 78 | instructions to proceed simultaneously. Although there has to be even more independence 79 | between these instructions, the ones are going side by side at least. 80 | 81 | Beceuse pipelining and superscalar processing, we need to parallel instructions that don't have 82 | tight data dependencies, so we can execute them simultaneously. So if you think about this 83 | being the instruction stream, the dynamic path through the code. The ideal thing for instruction scheduling 84 | is through some magic, we take all these instructions and figure out a way to cram them together 85 | into as little time as possible. In the absoulte extreme instead of doing n instructions with n cycles 86 | . What if we could cram all of them into one cycle. So is this possible? 87 | 88 | There are three major things that can strain our ability to achieve that ideal. They are 89 | hardware resources, data dependencies and control dependencies. 90 | 91 | First, hardware resources. Modern processors have finite resources, and there are often constraints 92 | on how resources can be used. There are three kinds constraints that affect scheduling. First of all, 93 | a processor can only issue a certain number of instructions every clock cycle and so that might 94 | be fo example four instructions or something like that. The next type of constraints is there's 95 | a certain mixture of functional units, so there might be a certain of alus that can perform. So 96 | among you have four instructions, it's not that you can have an arbitrary mix of instructions. 97 | There can be also constraints based on the type of instruction that you're trying to execute. Finally, 98 | another type of limitation is that there are some functional units where you can't necessarily start a instruction 99 | once you start a new one down the same piece of hardware immediately on the next clock cycle. 100 | Some like integer divide. So there are some operations keep a piece of hardware busy for a while. 101 | There might be a restriction on how soon you can start the next instruction. 102 | 103 | -------------------------------------------------------------------------------- /IntroToDFA.tex: -------------------------------------------------------------------------------- 1 | \newpage 2 | \section{Introduction to Data Flow Analysis} 3 | 4 | \subsection{Motivation for Dataflow Analysis} 5 | 6 | Some optimizations\footnote{based on \url{https://pages.cs.wisc.edu/~horwitz/CS704-NOTES/2.DATAFLOW.html}} , however, require more "global" information. 7 | For example, consider the code \ref{lst:expr1} 8 | 9 | \begin{lstlisting}[language=C,frame=single, caption=An example to illustrate some optimizations require more global information,label = lst:expr1] 10 | a = 1; 11 | b = 2; 12 | c = 3; 13 | if (...) x = a + 5; 14 | else x = b + 4; 15 | c = x + 1; 16 | \end{lstlisting} 17 | 18 | 19 | In this example, the initial assignment to \texttt{c} (at line 3) is useless, and the expression 20 | \texttt{x + 1} can be simplified to 7, but it is less obvious how a compiler can discover these facts 21 | since they cannot be discovered by looking only at one or two consecutive statements. 22 | A more global analysis is needed so that the compiler knows at each point in the program: 23 | \begin{itemize} 24 | \item which variables are guaranteed to have constant values, and 25 | \item which variables will be used before being redefined. 26 | \end{itemize} 27 | 28 | To discover these kinds of properties, we use dataflow analysis. 29 | 30 | 31 | 32 | \subsubsection{What is Data Flow Analysis?} 33 | 34 | Local Optimizations only consider optimizations within a node in CFG. 35 | Data flow analysis will take edges into account, which means composing 36 | effects of basic blocks to derive information at basic block boundaries. 37 | Data-flow analysis is a technique for gathering information about the possible 38 | set of values calculated at various points in a computer program. A program's 39 | control-flow graph (CFG) is used to determine those parts of a program to which 40 | a particular value assigned to a variable might propagate. The information gathered 41 | is often used by compilers when optimizing a program. 42 | 43 | 44 | Typically, we will do local optimization for the first step to know what happens in a 45 | basic block, step 2 is to do data flow analysis. In the third step, we will go back and 46 | revisit the individual instructions inside of the blocks. 47 | 48 | 49 | Data flow analysis is \textbf{flow-sensitive}, which means we take into account 50 | the effect of control flow. It is also a \textbf{intraprocedural analysis} which means 51 | the analysis is within a procedure.\footnote{\textbf{Interprocedural analysis} uses calling relationships among 52 | procedures} Data-flow analysis computes its solutions over the paths in 53 | a control-flow graph. The well-known, meet-over-all-paths 54 | formulation produces safe, precise solutions for general dataflow problems. All paths-whether feasible or infeasible, 55 | heavily or rarely executed-contribute equally to a solution. 56 | 57 | Here are some examples of intraprocedural optimizations: 58 | 59 | \begin{itemize} 60 | \item \textbf{constant propagation}. Constant propagation is a well-known global flow analysis 61 | problem. The goal of constant propagation is to discover values that are constant on all possible 62 | executions of a program and to propagate these constant values as far forward through the program 63 | as possible. Expressions whose operands are all constants can be evaluated at compile time and the 64 | results propagated further. 65 | 66 | \item \textbf{common subexpression elimination} CSE is a compiler 67 | optimization that searches for instances of identical expressions 68 | (i.e., they all evaluate to the same value), 69 | and analyzes whether it is worthwhile replacing them with a 70 | single variable holding the computed value. 71 | 72 | \item \textbf{dead code elimination}. Actually, source code written by programmers doesn't contain 73 | a lot of dead code, dead code happens to occur partly because of how the front end translates code into 74 | the IR. Doing optimizations will also turn code into dead. 75 | 76 | \end{itemize} 77 | 78 | % \subsection{Static Program vs. Dynamic Execution } 79 | 80 | % Static program 81 | 82 | 83 | 84 | 85 | \subsubsection{Static Program vs. Dynamic Execution} 86 | 87 | 88 | Program is statically finite, but there can be infinite many dynamic execution paths. On one hand, analysis 89 | need to be precise, so we will take into account as much dynamic execution as possible. On the other hand, analysis 90 | need to do the analysis quickly. For a compromise, the analysis result is \textbf{conservative} and what it does is for each 91 | point in the program, combines information of all the instances of the same program point. 92 | 93 | 94 | 95 | 96 | 97 | \subsubsection{Data Flow Analysis Schema} 98 | Before thinking about how to define a dataflow problem, note that there are two kinds of problems: 99 | \begin{itemize} 100 | \item Forward problems (like constant propagation) where the information at a node n summarizes what can happen on paths from "enter" to n. \textbf{So if we care about what happened in the past, it's a forward problem.} 101 | \item Backward problems (like live-variable analysis), where the information at a node n summarizes what can happen on paths from n to "exit". \textbf{So if we care about what will happen in the future, it's a backward problem.} 102 | \end{itemize} 103 | 104 | In what follows, we will assume that we're thinking about a forward problem unless otherwise specified. 105 | 106 | Another way that many common dataflow problems can be categorized is as may problems or must problems. 107 | The solution to a "may" problem provides information about what may be true at each program point (e.g., 108 | for live-variables analysis, a variable is considered live after node n if its value may be used before 109 | being overwritten, while for constant propagation, the pair (x, v) holds before node n if x must have the value v at that point). 110 | 111 | Now let's think about how to define a dataflow problem so that it's clear what the (best) solution should be. 112 | When we do dataflow analysis "by hand", we look at the CFG and think about: 113 | 114 | \begin{itemize} 115 | \item What information holds at the start of the program. 116 | \item When a node n has more than one incoming edge in the CFG, how to combine the incoming 117 | information (i.e., given the information that holds after each predecessor of n, how to 118 | combine that information to determine what holds before n). 119 | \item How the execution of each node changes the information. 120 | \end{itemize} 121 | 122 | This intuition leads to the following definition. An instance of a dataflow problem includes: 123 | \begin{itemize} 124 | \item a \(CFG\), 125 | \item a domain \(D\) of "dataflow facts", 126 | \item a dataflow fact "init" (the information true at the start of the program for forward problems, 127 | or at the end of the program for backward problems), 128 | \item an operator \(\wedge\) (used to combine incoming information from multiple predecessors), 129 | \item for each CFG node n, a dataflow function \(f_n\) :\( D \rightarrow D\) (that defines the effect of 130 | executing n). 131 | \end{itemize} 132 | 133 | For constant propagation, an individual dataflow fact is a set of pairs of the form (var, val), 134 | so the domain of dataflow facts is the set of all such sets of pairs (the power set). 135 | For live-variable analysis, it is the power set of the set of variables in the program. 136 | 137 | For both constant propagation and live-variable analysis, the "init" fact is the empty set 138 | (no variable starts with a constant value, and no variables are live at the end of the program). 139 | 140 | 141 | 142 | For constant propagation, the combining operation \(\wedge\) is set intersection. 143 | This is because if a node n has two predecessors, p1 and p2, then variable x has value v before 144 | node n iff it has value v after both p1 and p2. For live-variable analysis, 145 | \(\wedge\) is set union: if a node n has two successors, s1 and s2, then the value of x after n may be 146 | used before being overwritten iff that holds either before s1 or before s2. In general, 147 | for "may" dataflow problems, \(\wedge\) will be some union-like operator, while it will be an intersection-like 148 | operator for "must" problems. 149 | 150 | For constant propagation, the dataflow function associated with a CFG node that does not assign 151 | to any variable (e.g., a predicate) is the identity function. For a node n that assigns to 152 | a variable x, there are two possibilities: 153 | 154 | \begin{itemize} 155 | \item 1. The right-hand side has a variable that is not constant. In this case, the function 156 | result is the same as its input except that if variable x was constant the before n, 157 | it is not constant after n. 158 | \item 2. All right-hand-side variables have constant values. In this case, the right-hand side of 159 | the assignment is evaluated producing consant-value c, and the dataflow-function result is the 160 | same as its input except that it includes the pair (x, c) for variable x (and excludes the pair 161 | for x, if any, that was in the input). 162 | \end{itemize} 163 | 164 | 165 | For live-variable analysis, the dataflow function for each node n has the form: 166 | \(f_n(S) = Gen_n \cup (S - KILL_n)\), where \(KILL_n\) is the set of variables defined at node n, 167 | and \(GEN_n\) is the set of variables used at node n. In other words, for a node that does not 168 | assign to any variable, the variables that are live before n are those that are live after 169 | n plus those that are used at n; for a node that assigns to variable x, the variables that are 170 | live before n are those that are live after n except x, plus those that are used at n 171 | (including x if it is used at n as well as being defined there). 172 | 173 | An equivalent way of formulating the dataflow functions for live-variable analysis is: 174 | \(f_n(S) = (S \cap NOT-KILL_n) \cup GEN_n\), where \(NOT-KILL_n\) is the set of variables not defined 175 | at node n. The advantage of this formulation is that it permits the dataflow facts to be 176 | represented using bit vectors, and the dataflow functions to be implemented using simple 177 | bit-vector operations (and or). 178 | 179 | It turns out that a number of interesting dataflow problems have dataflow functions of this 180 | same form, where \(GEN_n\) and \(KILL_n\) are sets whose definition depends only on n, and the combining 181 | operator \(\wedge\) is either union or intersection. These problems are called GEN/KILL problems, 182 | or bit-vector problems. 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | -------------------------------------------------------------------------------- /KnowCPP.tex: -------------------------------------------------------------------------------- 1 | \newpage 2 | \section{Deep into C++ \cite{HowCWork92:online}} 3 | -------------------------------------------------------------------------------- /LLVM.tex: -------------------------------------------------------------------------------- 1 | \section{LLVM} 2 | 3 | 4 | 5 | 6 | \subsubsection{PGO and LLVM} 7 | 8 | Profile Guided Optimizations. 9 | 10 | \subsection{Profile Guided Transforms} 11 | 12 | \subsubsection{Spill Placement} 13 | registers 14 | 15 | \subsubsection{Code Layout} 16 | 17 | Code layout \cite{raman2022learning} is the process of ordering the blocks of the CFG. This order dictates the 18 | placement of instructions within those blocks in memory. By inserting branch instructions at the end of the basic blocks, 19 | the compiler can layout the blocks in any order. Consider the 20 | Control Flow Graph (CFG) in Figure \ref{fig:p7}. Figures \ref{fig:p8} and \ref{fig:p9} 21 | show two possible layouts of the CFG. In both theese layouts, 22 | a block is followed by one of its successor blocks that is 23 | not yet laid out. Consider block A, for example. It has two 24 | successors in the CFG. Only one of the successors B or C 25 | can be placed immediately following A and is known as the 26 | \textbf{fall-through block}. In Figure \ref{fig:p8}, B is the fall-through block 27 | and in the layout in Figure \ref{fig:p9}, C is the fall-through block. 28 | The choice of which block to place as the fall-through block 29 | has performance implications. If control is often transferred 30 | to C from A often during program execution, then placing 31 | C next to A has the following advantages: 32 | 33 | 34 | \begin{figure}[!htb] 35 | \minipage{0.5\textwidth} 36 | \includegraphics[width=\linewidth]{p7.png} 37 | \caption{CFG}\label{fig:p7} 38 | \endminipage\hfill 39 | \minipage{0.25\textwidth} 40 | \includegraphics[width=\linewidth]{p8.png} 41 | \caption{Layout 1}\label{fig:p8} 42 | \endminipage\hfill 43 | \minipage{0.25\textwidth} 44 | \includegraphics[width=\linewidth]{p9.png} 45 | \caption{Layout 2}\label{fig:p9} 46 | \endminipage\hfill 47 | \caption{Code Layout} 48 | \end{figure} 49 | 50 | 51 | 52 | 53 | \begin{itemize} 54 | \item Since the branch at the end of A is mostly not-taken, 55 | the frontend of the processor's pipeline is less likely to 56 | be stalled if it is an out-of-order superscalar processor. 57 | \item As the cacheline containing the last instruction of A 58 | also contains instructions that are more likely to execute (from block C), instruction cache utilization is 59 | likely to be better. 60 | \end{itemize} 61 | 62 | In the LLVM compiler, the \texttt{MachineBlockPlacement} pass 63 | performs code layout optimization. This pass relies on the 64 | branch probability analysis which provides, for each branch 65 | instruction, the probability of the branch being taken. 66 | \texttt{MachineBlockPlacement} is just one of the many transformation passes that make use of branch probability analysis. 67 | Branch probability analysis is used in another analysis called 68 | block frequency analysis that provides relative frequencies of 69 | basic blocks within a function. Block frequency analysis is 70 | used by optimizations such as inlining, spill-code placement 71 | in register allocation among others. 72 | 73 | \subsubsection{Hot/Cold Partitioning} 74 | 75 | \begin{figure}[!htb] 76 | \minipage{0.33\textwidth} 77 | \includegraphics[width=\linewidth]{p10.png} 78 | \caption{CFG}\label{fig:p10} 79 | \endminipage\hfill 80 | \minipage{0.33\textwidth} 81 | \includegraphics[width=\linewidth]{p11.png} 82 | \caption{Layout 1}\label{fig:p11} 83 | \endminipage\hfill 84 | \minipage{0.33\textwidth} 85 | \includegraphics[width=\linewidth]{p12.png} 86 | \caption{Layout 2}\label{fig:p12} 87 | \endminipage\hfill 88 | \caption{Code Layout} 89 | \end{figure} 90 | 91 | 92 | Hot Cold splitting is an optimization to improve instruction locality. It is used to outline basic blocks which execute less frequently. The hot/cold splitting pass identifies cold basic blocks and moves them into separate functions. The linker can then put newly-created cold functions away from the rest of the program . The idea here is to have these cold pages faulted in relatively infrequently, and to improve the memory locality of code outside of the cold area. 93 | 94 | The algorithm is novel in the sense it is based on region and implemented at the IR level. Because it is implemented at the IR level, all the backend targets benefit from this implementation. Other implementations of hot-cold splitting outline each basic block separately and are implemented at the RTL level. 95 | 96 | 97 | What applications will benefit from Hot/Cold Spliting? 98 | 99 | \begin{itemize} 100 | 101 | \item High cache misses(A giant app on a small device) 102 | \item High start-up time 103 | 104 | \end{itemize} 105 | 106 | 107 | \subsubsection{Inliner} 108 | 109 | 110 | 111 | \subsubsection{Outlining \& Merging} 112 | 113 | With PGO information, we can do more aggressive outlining of cold regions in the inline candidate function. This contrasts with the scheme of keeping only the 'early return' portion of the inline candidate and outlining the rest of the function as a single function call. 114 | 115 | Support for outlining multiple regions of each function is added, as well as some basic heuristics to determine which regions are good to outline. Outline candidates limited to regions that are single-entry \& single-exit. Also we don't account for live-ranges we may be killing across the region with a function. These are enhancements we can consider in another patch. 116 | 117 | Fallback to the regular partial inlining scheme is retained when either i) no regions are identified for outlining in the function, or ii) the outlined function could not be inlined in any of its callers. 118 | 119 | \subsubsection{Control height reduction} 120 | 121 | Control height reduction merges conditional blocks of code and reduces the 122 | number of conditional branches in the hot path based on profiles. 123 | 124 | \begin{lstlisting}[language=C,frame=single, caption=An ,label = lst:expr2] 125 | if (hot_cond1) { // Likely true. 126 | 127 | do_stg_hot1(); 128 | } 129 | if (hot_cond2) { // Likely true. 130 | 131 | do_stg_hot2(); 132 | } 133 | \end{lstlisting} 134 | 135 | 136 | \begin{lstlisting}[language=C,frame=single, caption=An ,label = lst:expr2] 137 | if (hot_cond1 && hot_cond2) { // Hot path. 138 | 139 | do_stg_hot1(); 140 | do_stg_hot2(); 141 | } else { // Cold path. 142 | 143 | if (hot_cond1) { 144 | do_stg_hot1(); 145 | } 146 | if (hot_cond2) { 147 | do_stg_hot2(); 148 | } 149 | } 150 | \end{lstlisting} 151 | 152 | This speeds up some internal benchmarks up to ~30\%. 153 | 154 | \subsection{Loop Unrolling \& Loop Vectorization} 155 | 156 | 157 | \begin{lstlisting}[language=C,frame=single, caption=An ,label = lst:expr2] 158 | for (int i=0; i<16; ++i) 159 | C[i] = A[i] + B[i]; 160 | \end{lstlisting} 161 | 162 | 163 | \begin{lstlisting}[language=C,frame=single, caption=An ,label = lst:expr2] 164 | for (int i=0; i<16; i+=4) { 165 | C[i] = A[i] + B[i]; 166 | C[i+1] = A[i+1] + B[i+1]; 167 | C[i+2] = A[i+2] + B[i+2]; 168 | C[i+3] = A[i+3] + B[i+3]; 169 | } 170 | \end{lstlisting} 171 | 172 | \begin{lstlisting}[language=C,frame=single, caption=An ,label = lst:expr2] 173 | for (int i=0; i<16; i+=4) 174 | addFourThingsAtOnceAndStoreResult( 175 | &C[i], &A[i], &B[i]); 176 | \end{lstlisting} 177 | \subsection{Partial Inline} 178 | 179 | \begin{lstlisting}[language=C,frame=single, caption=An ,label = lst:expr2] 180 | for (int i=0; i<16; i+=4) 181 | addFourThingsAtOnceAndStoreResult( 182 | &C[i], &A[i], &B[i]); 183 | \end{lstlisting} 184 | 185 | \subsection{Partial Inline } 186 | 187 | \begin{lstlisting}[language=C,frame=single, caption=An ,label = lst:expr2] 188 | void foo() { 189 | bar(); 190 | // rest of the code in foo 191 | } 192 | void bar() { 193 | if (X) 194 | return; 195 | // rest of code (to be outlined) 196 | } 197 | \end{lstlisting} 198 | 199 | 200 | \begin{lstlisting}[language=C,frame=single, caption=An ,label = lst:expr2] 201 | void foo() { 202 | if (!X) 203 | bar.outlined(); 204 | // rest of the code in foo 205 | } 206 | void bar.outlined() { 207 | // rest of the code in bar 208 | } 209 | \end{lstlisting} -------------------------------------------------------------------------------- /LLVMInst.tex: -------------------------------------------------------------------------------- 1 | \newpage 2 | 3 | 4 | \section{LLVM Instructions} 5 | 6 | \subsection{\texttt{InsertValueInst}} 7 | 8 | 9 | \subsection{po\_iterator} 10 | 11 | 12 | This class is used in many cases, the code of po\_iterator 13 | is a little difficult to understand, especially the VisitedStack. 14 | 15 | I will use a slide to illustrate the algorithm. 16 | 17 | \begin{figure}[H] 18 | \centering 19 | \includegraphics[width=\textwidth]{postorder.pdf} 20 | \caption{postorder} 21 | \label{fig:postorder} 22 | \end{figure} 23 | 24 | -------------------------------------------------------------------------------- /LLVMProj.tex: -------------------------------------------------------------------------------- 1 | \newpage 2 | 3 | \section{The LLVM project} 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /Live Variabl Analysis.tex: -------------------------------------------------------------------------------- 1 | \newpage 2 | \section{ Live Variabl Analysis } 3 | 4 | In compilers, live variable analysis (or simply liveness analysis) 5 | is a classic data-flow analysis to calculate the variables that 6 | are live at each point in the program. A variable is live at 7 | some point if it holds a value that may be needed in the future, 8 | or equivalently if its value may be read before the next time 9 | the variable is written to. \footnote{based on Wikipedia} 10 | 11 | \subsection{Motivation} 12 | 13 | Programs may contain 14 | 15 | \begin{itemize} 16 | \item code which gets executed but which has no useful 17 | effect on the program's overall result; 18 | \item occurrences of variables being used before they 19 | are defined;\footnote{we can use liveness information to find undefined variables.} 20 | \item many variables which need to be allocated 21 | registers and/or memory locations for compilation.\footnote{Two variables can use the same register if they are never in use at the 22 | same time(i.e, never simultaneously live). Register allocation 23 | uses liveness information.} 24 | 25 | \end{itemize} 26 | 27 | The concept of variable liveness is useful in dealing 28 | with all three of these situations. 29 | 30 | 31 | Liveness analysis is highly used for \textbf{register allocation}(If variable \texttt{x} is live in a basic block b, it is a potential candidate for 32 | register allocation) and \textbf{dead code elimination}(If variable \texttt{x} is not live after an assignment \texttt{x =...}, then the assignment is 33 | redundant and can be deleted as dead code). 34 | 35 | 36 | \subsection{Problem formulation} 37 | Liveness is a data-flow property of variables: 38 | “Is the value of this variable needed?” We therefore 39 | usually consider liveness from an instruction's 40 | perspective: each instruction (or node of the 41 | flowgraph) has an associated set of live variables. 42 | 43 | 44 | \subsection{Semantic vs. syntactic} 45 | 46 | 47 | There are two kinds of variable liveness : Semantic liveness and Syntactic liveness. 48 | 49 | 50 | A variable x is \textbf{semantically} live at a node n if there is 51 | some execution sequence starting at n whose (externally 52 | observable) behaviour can be affected by changing the 53 | value of x. Semantic liveness is concerned with 54 | the execution behaviour of the program. 55 | 56 | A variable is \textbf{syntactically} live at a node if there is a 57 | path to the exit of the flow graph along which its 58 | value may be used before it is redefined. Syntactic liveness is concerned with properties of 59 | the syntactic structure of the program. 60 | 61 | 62 | So what is the difference between Semantic liveness and Syntactic liveness? syntactic liveness 63 | is a computable approximation of semantic liveness. 64 | 65 | 66 | Consider the example \ref{lst:expr2} 67 | 68 | 69 | \begin{lstlisting}[language=C,frame=single, caption=An example to illustrate semantic syntatic,label = lst:expr2] 70 | int t = x * y; 71 | if ((x+1)*(x+1) == y) { 72 | t = 1; 73 | } 74 | if (x*x + 2*x + 1 != y) { 75 | t = 2; 76 | } 77 | return t; 78 | \end{lstlisting} 79 | 80 | In fact, t is dead in node \texttt{int t = x * y;} because one of the conditions will be true, 81 | so on every execution path t is redefined before it is returned. 82 | The value assigned by the first instruction is never used. 83 | 84 | 85 | But on read path from Figure \ref{fig:liveex} through the 86 | flowgraph, t is not 87 | redefined before it's used, 88 | so t is syntactically live at 89 | the first instruction.Note that this path never 90 | actually occurs during 91 | execution. 92 | 93 | \begin{figure}[h] 94 | \centering 95 | \includegraphics[width=0.3\textwidth]{liveex.png} 96 | \caption{CFG for \ref{lst:expr2}} 97 | \label{fig:liveex} 98 | \end{figure} 99 | 100 | 101 | \subsection{Summary} 102 | 103 | 104 | \begin{center} 105 | \begin{tabular}{|c|c|} 106 | \hline Direction & Backward \\ 107 | \hline Domain & Sets of variables \\ 108 | \hline Meet operator & \( \cup \) \\ 109 | \hline Top(T) & $\phi$ \\ 110 | \hline Bottom & Universal Set \\ 111 | \hline Boundary condition & $\mathrm{IN[EXIT]} = \phi$ \\ 112 | \hline Initialization for internal nodes & $\mathrm{IN[B]} = \phi$ \\ 113 | \hline Finited escending chain? & \checkmark \\ 114 | \hline Transfer function & $f_b(x) = \mathrm{USE}_b \cup (x - \mathrm{DEF}_b)$ \\ 115 | \hline Monotone\&Distributive? & \checkmark \\ 116 | \hline 117 | \end{tabular} 118 | \end{center} 119 | 120 | 121 | 122 | 123 | \subsection{Strongly Live Variables Analysis\cite{LiveVari29:online}} 124 | 125 | A variable is strongly live if 126 | \begin{itemize} 127 | 128 | \item it is used in a statement other than assignment statement, or 129 | (same as simple liveness) 130 | \item it is used in an assignment statement defining a variable that is 131 | strongly live 132 | \end{itemize} 133 | 134 | 135 | \begin{figure}[H] 136 | \centering 137 | \includegraphics[width=0.7\textwidth]{p217.png} 138 | \caption{Understanding Strong Liveness} 139 | \label{fig:p217} 140 | \end{figure} 141 | 142 | 143 | A variable is live at a program 144 | point if its current value is likely 145 | to be used later. We want to compute the smallest 146 | set of variables that are live. Simple liveness considers every 147 | use of a variable as useful. Strong liveness checks the liveness 148 | of the result before declaring the 149 | operands to be live. Strong liveness is more precise 150 | than simple liveness. The transfer function of Strongly Live Variables Analysis is shwon 151 | below: 152 | 153 | 154 | $$ 155 | f_n(X)= \begin{cases}(X-\{y\}) \cup(Opd(e) \cap \mathbb{V}ar) & n \text { is } y=e, e \in \mathbb{E}pr, y \in X \\ X-\{y\} & n \text { is input }(y) \\ X \cup\{y\} & n \text { is use }(y) \\ X & \text { otherwise }\end{cases} 156 | $$ 157 | 158 | 159 | The first case means that If \texttt{y} is not strongly live, the 160 | assignment is skipped using 161 | the “otherwise” clause 162 | 163 | \begin{figure}[H] 164 | \centering 165 | \includegraphics[width=0.4\textwidth]{p218.png} 166 | \caption{Simple Liveness VS. Strong Liveness.} 167 | \label{fig:p218} 168 | \end{figure} 169 | -------------------------------------------------------------------------------- /Loop Invariant Computation and Code Motion.tex: -------------------------------------------------------------------------------- 1 | % \section{Loop Invariant Computation and Code Motion} 2 | 3 | \section{Loop} 4 | 5 | In a CFG, not every cycle is a loop from an optimization perspective. 6 | 7 | 8 | A loop is a set of CFG nodes S such that: 9 | \begin{itemize} 10 | \item there exists a header node h in S that dominates all nodes in S. 11 | \begin{itemize} 12 | \item there exists a path of directed edges from h to any node in S. 13 | \item h is the only node in S with predecessors not in S. 14 | \end{itemize} 15 | \item from any node in S, there exists a path of directed edges to h. 16 | \end{itemize} 17 | A loop is a single entry, multiple exit region. 18 | 19 | \subsection{Dominance} 20 | 21 | In a CFG, node a dominates b if every path from the start node to b passes through a. Node a is a dominator of b. The dominance relation is a partial order. 22 | 23 | 24 | Node a strictly dominates b if $a \neq b$ and a dominates b. 25 | 26 | 27 | 28 | 29 | \subsection{Natural Loops} 30 | 31 | The natural loop of the back edge is defined to be the smallest set of nodes that includes the back edge and has no predecessors outside the set except for the predecessor of the header. Natural loops are the loops for which we find optimizations. 32 | 33 | \subsection{Reducible Flow Graphs} 34 | A flow graph is reducible if every retreating edge in any DFST for that flow graph is a back edge. 35 | 36 | \paragraph{Testing reducibility} Take any DFST for the flow graph, remove the back edges, and check that the result is acyclic. 37 | 38 | \paragraph{Example: Nonreducible Graph} 39 | 40 | 41 | 42 | \begin{figure}[h] 43 | \centering 44 | \includegraphics[width=0.5\textwidth]{images/DFST.png} 45 | \caption{Example: Nonreducible Graph } 46 | \label{fig:DFST} 47 | \end{figure} 48 | 49 | \subsection{Algorithm to Find Natural Loops} 50 | 51 | \subsubsection{Step 1. Finding Dominators} 52 | 53 | We can formulate this as Data Flow Analysis problem. 54 | \begin{center} 55 | \begin{tabular}{|c|c|} 56 | \hline Direction & Forward\\ 57 | \hline Values & Basic Blocks\\ 58 | \hline Meet operator & \( \cap \)\\ 59 | \hline Top(T) & Universal Set\\ 60 | \hline Bottom & $\phi$\\ 61 | \hline Boundary condition for entry node & $\phi$ \\ 62 | \hline Initialization for internal nodes & \(\mathrm{T}\) \\ 63 | \hline Finited escending chain? &\checkmark \\ 64 | \hline Transferfunction & $\text { OUT }[\mathbf{b}]=\{\mathbf{b}\} \cup\left(\cap_{\{\boldsymbol{p}=\boldsymbol{p r e d}(\boldsymbol{b})\}} \text { OUT }[\mathbf{p}]\right)$ \\ 65 | \hline Monotone\&Distributive? & \checkmark \\ 66 | \hline 67 | \end{tabular} 68 | \end{center} 69 | 70 | 71 | 72 | \subsubsection{Step 2. Finding Back Edges} 73 | 74 | \paragraph{Depth-first spanning tree} 75 | Edges traversed in a depth-first search of the flow graph form a depth-first spanning tree. We categorize edges in CFG as follows: 76 | \begin{itemize} 77 | \item Forward edges (node to proper descendant). 78 | \item Retreating edges (node to ancestor). 79 | \item Cross edges (between two nodes, neither of which is an ancestor of the other.) 80 | \end{itemize} 81 | 82 | This is something difficult to understand. Let's make it simpler. We can number each node when we visit it. So each edge should be satisfied the following property: 83 | 84 | 85 | \begin{center} 86 | \begin{tabular}{|c|c|} 87 | \hline Forward edges \( n_1 \rightarrow n_2\) & num($n_1$) $<$ num($n_2$) and $n_1$ is ancestor of $n_2$\\ 88 | \hline Cross edges \( n_1 \rightarrow n_2\)& num($n_1$) $>$ num($n_2$) and neither $n_1$ is ancestor of $n_2$ nor $n_2$ is ancestor of $n_1$ \\ 89 | \hline Retreating edges \( n_1 \rightarrow n_2\) & num($n_1$) $>$ num($n_2$) and $n_2$ is ancestor of $n_1$\\ 90 | \hline 91 | \end{tabular} 92 | \end{center} 93 | 94 | 95 | 96 | 97 | 98 | Of these edges, only retreating edges go from high to low in DF order. 99 | 100 | \paragraph{Back Edges} 101 | 102 | A back edge \( t \rightarrow h \), h domiantes t. 103 | 104 | \paragraph{Algorithm} 105 | 106 | \begin{itemize} 107 | \item Perform a depth first search 108 | \item For each retreating edge \(t \rightarrow h\), check if h is in t’s dominator list 109 | \end{itemize} 110 | 111 | \subsubsection{Step 3. Constructing Natural Loops} 112 | 113 | \paragraph{Algorithm} For each back edge $t\rightarrow h$: 114 | 115 | \begin{itemize} 116 | \item delete h from the flow graph 117 | \item find those nodes that can reach t 118 | (those nodes plus h form the natural loop of \(t \rightarrow h\)) 119 | \end{itemize} 120 | 121 | 122 | 123 | \subsection{Inner Loops} 124 | 125 | 126 | 127 | 128 | 129 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PDFS = main.pdf 2 | 3 | all: $(PDFS) 4 | 5 | main.pdf: *.tex 6 | mkdir -p tmp 7 | ln -sf ${PWD}/images tmp 8 | TOPFILE=main.tex SECFILE=*.tex make -f Makefile.work 9 | 10 | #cv-rs.pdf: cv.pdf rs.pdf 11 | # pdftk cv.pdf rs.pdf cat output cv-rs.pdf 12 | #%.pdf: %.tex 13 | # TOPFILE=$(patsubst %.pdf,%.tex,$@) make -f Makefile.work 14 | 15 | .PHONY: clean clean-tmp preview print gzip gunzip tar ci 16 | 17 | clean: 18 | rm -f $(PDFS) *.ps 19 | cd tmp ; rm -f * 20 | 21 | clean-tmp: 22 | cd tmp ; rm -f * 23 | -------------------------------------------------------------------------------- /More Examples of Data Flow Analysis.tex: -------------------------------------------------------------------------------- 1 | \section{More Examples of Data Flow Analysis: Global Common Sub-expression Elimination; Constant Propagation/Folding} 2 | 3 | If we care about the past, what happened before, then it is a forward problem (entry). 4 | 5 | 6 | \subsection{Available Expression Analysis} 7 | 8 | 9 | \begin{definition}{Availability of an Expression E at point P} 10 | E is available at P if every path to P in the flow graph 11 | \begin{itemize} 12 | \item E must be calculated at least once 13 | \item no variable in E redefined after the last evaluation 14 | 15 | \end{itemize} 16 | \end{definition} 17 | 18 | 19 | \begin{figure}[h] 20 | \centering 21 | \includegraphics[width=0.3\textwidth]{p24.png} 22 | \caption{} 23 | \label{fig:p24} 24 | \end{figure} 25 | 26 | 27 | \subsubsection{Examples} 28 | 29 | 30 | 31 | In \ref{fig:p24} $a-b$, $c+d$ is not available at the last BB. But $x+y$ is. 32 | 33 | 34 | \begin{figure}[h] 35 | \centering 36 | \includegraphics[width=0.3\textwidth]{p25.png} 37 | \caption{} 38 | \label{fig:p25} 39 | \end{figure} 40 | 41 | 42 | In \ref{fig:p25} , $4*i$ is available for both cases. 43 | 44 | 45 | 46 | \begin{figure}[h] 47 | \centering 48 | \includegraphics[width=0.3\textwidth]{p26.png} 49 | \caption{} 50 | \label{fig:p26} 51 | \end{figure} 52 | 53 | 54 | In \ref{fig:p26}, we show that calculate transfer functions for complete basic blocks by composing individual instruction transfer functions. 55 | 56 | 57 | \begin{figure}[h] 58 | \centering 59 | \includegraphics[width=0.3\textwidth]{p27.png} 60 | \caption{} 61 | \label{fig:p27} 62 | \end{figure} 63 | 64 | 65 | \subsection{Eliminating CSEs} 66 | 67 | 68 | \begin{itemize} 69 | \item Step1: Value Numbering 70 | \item Step2: Available expression 71 | \item Step3: If CSE is an "available expression", then transform the code. 72 | 73 | \end{itemize} 74 | 75 | \begin{figure}[h] 76 | \centering 77 | \includegraphics[width=0.3\textwidth]{p28.png} 78 | \caption{} 79 | \label{fig:p28} 80 | \end{figure} 81 | 82 | If we only use value numbering to eliminate common expression in \ref{fig:p28}, we will see that this will just add a lot of new work and no income. But if we calculate Available expression in \ref{fig:p29}, we can find that $x+y$ is such one and can do some optimization. 83 | 84 | 85 | \begin{figure}[h] 86 | \centering 87 | \includegraphics[width=0.3\textwidth]{p29.png} 88 | \caption{} 89 | \label{fig:p29} 90 | \end{figure} 91 | 92 | \begin{note}{How to deal with Textually identical expression?\ref{fig:p30}} 93 | Just sort the operands. 94 | 95 | 96 | 97 | But for textually different expressions that may be equivalent \ref{fig:p31}, we had better do copy propagation first. 98 | 99 | \end{note} 100 | \begin{figure}[h] 101 | \centering 102 | \includegraphics[width=0.3\textwidth]{p30.png} 103 | \caption{} 104 | \label{fig:p30} 105 | \end{figure} 106 | 107 | \begin{figure}[h] 108 | \centering 109 | \includegraphics[width=0.3\textwidth]{p31.png} 110 | \caption{} 111 | \label{fig:p31} 112 | \end{figure} 113 | 114 | 115 | \subsubsection{Summary} 116 | 117 | \begin{figure}[h] 118 | \centering 119 | \includegraphics[width=0.3\textwidth]{p32.png} 120 | \caption{} 121 | \label{fig:p32} 122 | \end{figure} 123 | 124 | 125 | \subsection{Constant Propagation/Folding} 126 | 127 | \begin{figure}[h] 128 | \centering 129 | \includegraphics[width=0.3\textwidth]{p33.png} 130 | \caption{} 131 | \label{fig:p33} 132 | \end{figure} 133 | 134 | \subsubsection{Meet Operator in Table Form} 135 | \begin{figure}[h] 136 | \centering 137 | \includegraphics[width=0.3\textwidth]{p34.png} 138 | \caption{} 139 | \label{fig:p34} 140 | \end{figure} 141 | 142 | 143 | \subsubsection{Example} 144 | 145 | \begin{figure}[h] 146 | \centering 147 | \includegraphics[width=0.3\textwidth]{p35.png} 148 | \caption{} 149 | \label{fig:p35} 150 | \end{figure} 151 | 152 | On the other path in \ref{fig:p35}, x is uninitialized. When we have undefined behavior, hopefully the front end of the compiler should complain about it, but if it doesn't, the optimizer is free to do whatever it wants to do. 153 | 154 | 155 | \subsubsection{Transfer Function} 156 | 157 | 158 | \begin{figure}[h] 159 | \centering 160 | \includegraphics[width=0.3\textwidth]{p36.png} 161 | \caption{} 162 | \label{fig:p36} 163 | \end{figure} 164 | 165 | 166 | 167 | It is not distributive in \ref{fig:p37}. 168 | 169 | \begin{figure}[h] 170 | \centering 171 | \includegraphics[width=0.3\textwidth]{p37.png} 172 | \caption{} 173 | \label{fig:p37} 174 | \end{figure} 175 | 176 | 177 | 178 | \subsection{Copy Propagation} 179 | 180 | \subsection{Dead Code Elimination} 181 | 182 | -------------------------------------------------------------------------------- /PGO.tex: -------------------------------------------------------------------------------- 1 | \newpage 2 | 3 | 4 | \section{Profile Guided Optimizations} 5 | 6 | 7 | \subsection{Efficient Path Profiling} 8 | 9 | 10 | 11 | \subsection{Improved Basic Block Reordering} 12 | 13 | Improved Basic Block Reordering \cite{newell2020improved} is published by Andy Newell and Sergey Pupyrev 14 | from Facebook. 15 | 16 | 17 | Given a directed control flow graph comprising of basic blocks and frequencies of jumps between the blocks, find an ordering 18 | of the blocks such that the number of fall-through jumps 19 | is maximized. This is the maximum directed TRAVELING 20 | SALESMAN PROBLEM (TSP). Solving TSP alone is not sufficient for constructing a good ordering of basic blocks. It is easy to find 21 | examples of control flow graphs with multiple different 22 | orderings that are all optimal with respect to the TSP objective. Consider for example a control flow graph in Figure \ref{fig:p54} 23 | in which the maximum number of fall-through branches is 24 | achieved with two orderings that utilize a different number 25 | of I-cache lines in a typical execution. For these cases, an 26 | algorithm needs to take into consideration non-fall-through 27 | branches to choose the best ordering. However, maximizing the number of fall-through jumps is not always preferred 28 | from the performance point of view. Consider a control 29 | flow graph with seven basic blocks in Figure \ref{fig:p55}. It is not hard 30 | to verify that the ordering with the maximum number of 31 | fall-through branches is one containing two concatenated 32 | chains, B0$\rightarrow$B1$\rightarrow$B3$\rightarrow$B4 and B5$\rightarrow$B6$\rightarrow$B2 (upper-right in 33 | Figure \ref{fig:p55}). Observe that for this placement, the hot part of 34 | the function occupies three 64-byte cache lines. Arguably a 35 | better ordering is the lower-right in Figure \ref{fig:p55}, which uses only 36 | two cache lines for the five hot blocks, B0, B1, B2, B3, B4, at 37 | the cost of breaking the lightly weighted branch B6$\rightarrow$B2. 38 | 39 | 40 | 41 | \begin{figure}[H] 42 | \centering 43 | \includegraphics[width=0.5\textwidth]{p54.png} 44 | \caption{ Two orderings of basic blocks with the same TSP score (1995) 45 | resulting in different I-cache utilization. All blocks have the same size of 46 | 16 bytes and colored according to their hotness in the profile.} 47 | \label{fig:p54} 48 | \end{figure} 49 | 50 | 51 | \begin{figure}[H] 52 | \centering 53 | \includegraphics[width=0.5\textwidth]{p55.png} 54 | \caption{A control flow graph with jump frequencies (left) and two possible 55 | orderings of basic blocks (right). All blocks have the same size (in bytes) 56 | and colored according to their hotness in the profile. An optimal TSPbased layout (upper right) utilizes three cache lines for the hot code, 57 | while an arguably better layout (lower right) can be built with a new 58 | EXTTSP model.} 59 | \label{fig:p55} 60 | \end{figure} 61 | 62 | 63 | \subsubsection{Contribution} 64 | 65 | The contributions of the paper are the following. 66 | 67 | \begin{itemize} 68 | \item Identify an opportunity for improvement over the 69 | classical approach for basic block reordering, initiated 70 | by Pettis and Hansen \cite{pettis1990profile}. Then they extend the model and 71 | suggest a new optimization problem with the objective 72 | closely related to the performance of a binary. 73 | 74 | \item Develop a new practical algorithm for basic 75 | block reordering. The algorithm relies on a greedy 76 | technique for solving the optimization problem. 77 | 78 | \item Propose a Mixed Integer Programming formulation 79 | for the aforementioned optimization problem, which is 80 | capable of finding optimal solutions on small functions 81 | \end{itemize} 82 | 83 | \subsubsection{New ideas} 84 | 85 | In their study, they consider the following features. 86 | 87 | \begin{itemize} 88 | \item The length of a jump impacts the performance of instruction caches. Longer jumps are more likely to result 89 | in a cache miss than shorter ones. In particular, a jump 90 | with the length shorter than 64 bytes has a chance to 91 | remain within the same cache line. 92 | 93 | \item The direction of a branch plays a role for branch predicting. A branch s$\rightarrow$t is called forward if s $<$ t, that is, 94 | block s precedes block t in the ordering; otherwise, the 95 | branch is called backward. 96 | 97 | \item The branches can be classified into unconditional (if the 98 | out-degree is one) and conditional (if the out-degree is 99 | two). A special kind of branches is between consecutive 100 | blocks in the ordering that are called fall-through; in this 101 | case, a jump instruction is not needed. 102 | 103 | \item They introduce a new score that estimates the quality 104 | of a basic block ordering taking into account the branch 105 | characteristics. In the most generic form, the new function, 106 | called EXTENDED TSP (EXTTSP), is expressed as follows: 107 | 108 | $$\operatorname{ExtTSP}=\sum_{(s, t)} w(s, t) \times K_{s, t} \times h_{s, t}(\operatorname{len}(s, t))$$ 109 | 110 | 111 | where the sum is taken over all branches in the control 112 | flow graph. Here $w(s, t)$ is the frequency of branch s$\rightarrow$t and 113 | $0 \leq K_{s,t} \leq 1$ is a weight coefficient modeling the relative 114 | importance of the branch for optimization. We distinguish 115 | six types of branches arising in code: conditional and unconditional versions of fall-through, forward, and backward 116 | branches. Thus, we introduce six coefficients for EXTTSP. 117 | The lengths of the jumps are accounted in the last term of the 118 | expression, which increases the importance of short jumps. 119 | A non-negative function $h_{s,t}(len(s, t))$ is defined by value 120 | of 1 for zero-length jumps, value of 0 for jumps exceeding a 121 | prescribed length, and it monotonically decreases between 122 | the two values. To be consistent with the objective of TSP, 123 | the EXTTSP score needs to be maximized for the best performance. Notice that EXTTSP is a generalization of TSP, as the 124 | latter can be modeled by setting $K_{s,t} = 1, h(len(s, t)) = 1$ 125 | for fall-through branches and $K_{s,t} = 0$ otherwise. 126 | 127 | 128 | \end{itemize} 129 | 130 | 131 | They use machine learning methods to find parameters for EXTTSP 132 | that have the highest correlation with the performance of 133 | a binary in the experiment. 134 | 135 | 136 | 137 | $$ 138 | \operatorname{ExtTSP}=\sum_{(s, t)} w(s, t) \times \begin{cases}1 & \text { if } \operatorname{len}(s, t)=0, \\ 0.1 \cdot\left(1-\frac{\operatorname{len}(s, t)}{1024}\right) & \text { if } 0<\operatorname{len}(s, t) \leq 1024 \\ & \text { and } s 1$} \Comment{chain merging} 161 | \For{$c_i,c_j \in Chains$} 162 | \State{$gain[c_i,c_j] \gets$ ComputeMergeGain($c_i, c_j$ )} 163 | \EndFor 164 | \State{$\operatorname{src}, d s t \leftarrow \underset{i, j}{\arg \max } \operatorname{gain}\left[c_i, c_j\right]$} \Comment{find best pair of chains} 165 | \State {$Chains \gets Chains \cup Merge(src, dst) \backslash \{src, dst\};$} \Comment{merge the pair and update chains} 166 | \EndWhile{\\} 167 | \Return{ordering given by the remaining chain;} 168 | \EndFunction 169 | 170 | 171 | \Function{ComputeMergeGain}{src, dst} 172 | \For{$i=1$ \textbf{to} blocks(src)} \Comment{try all ways to split chain src} 173 | \State{$s_1 \gets src[1:i]$} \Comment{break the chain at index i} 174 | \State{$s_2 \gets src[i+1:blocks(src)]$} 175 | \State{score $_i \leftarrow \max \left\{\begin{array}{l}\operatorname{ExtTSP}\left(s_1, s_2, d s t\right) \text { if } v^* \notin d s t \\ \operatorname{ExtTSP}\left(s_1, d s t, s_2\right) \text { if } v^* \notin d s t \\ \operatorname{ExtTSP}\left(s_2, s_1, d s t\right) \text { if } v^* \notin s_1, d s t \\ \operatorname{ExtTSP}\left(s_2, d s t, s_1\right) \text { if } v^* \notin s_1, d s t \\ \operatorname{ExtTSP}\left(d s t, s_1, s_2\right) \text { if } v^* \notin s r c \\ \operatorname{ExtTSP}\left(d s t, s_2, s_1\right) \text { if } v^* \notin s r c\end{array}\right.$} \Comment{try all valid ways to concatenate} 176 | \EndFor{\\} 177 | \Return{$\max _i$ score $_i-\operatorname{ExtTSP}(s r c)-\operatorname{ExtTSP}(d s t)$} \Comment{ the gain of merging chains src and dst} 178 | \EndFunction 179 | \end{algorithmic} 180 | \end{algorithm} 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | -------------------------------------------------------------------------------- /RA.tex: -------------------------------------------------------------------------------- 1 | \newpage 2 | 3 | \section{Region-Based Analysis} 4 | 5 | The iterative data-flow analysis algorithm we have discussed so far is just one approach to solving data-flow problems. 6 | Here we discuss another approach called region-based analysis\cite{Microsof22:online}. Recall that in the iterative-analysis approach, 7 | we create transfer functions for basic blocks, then find the fixedpoint solution by repeated passes over the blocks. 8 | Instead of creating transfer functions just for individual blocks, a region-based analysis finds transfer functions that 9 | summarize the execution of progressively larger regions of the program. Ultimately, transfer functions for entire procedures are constructed 10 | and then applied, to get the desired data-flow values directly. 11 | 12 | 13 | 14 | While a data-flow framework using an iterative algorithm is specified by a semilattice of data-flow values and a family of transfer 15 | functions closed un-der composition, region-based analysis requires more elements. A region-based framework includes both a semilattice 16 | of data-flow values and a semilattice of transfer functions that must possess a meet operator, a composition operator, 17 | and a closure operator. 18 | 19 | A region-based analysis is particularly useful for data-flow problems where paths that have cycles may change the data-flow values. 20 | The closure operator allows the effect of a loop to be summarized more effectively than does iterative analysis. 21 | The technique is also useful for interprocedural analysis, where transfer functions associated with a procedure call may be treated like 22 | the transfer functions associated with basic blocks. 23 | 24 | \subsection{Motivating Example} 25 | 26 | Consider the example in \ref{fig:p115}, we want to know how many bits needed to store the return value for a fpga device. We can pessimistically solve for the worst case. 27 | But we can also try to be more precise to calculate for each call site. It would be nice if instead of having to go back and iteratively solve a problem, for each different value of x. 28 | 29 | 30 | \begin{figure}[H] 31 | \centering 32 | \includegraphics[width=0.5\textwidth]{p115.jpg} 33 | \caption{Motivating Example for region-based analysis} 34 | \label{fig:p115} 35 | \end{figure} 36 | 37 | The idea behind region-based analysis is that we want to create a transfer function for the entire procedure. 38 | 39 | 40 | 41 | \subsection{Algorithm} 42 | 43 | 44 | \begin{definition}{Region} 45 | A region in a flow graph is 46 | a set of nodes with a 47 | header that dominates all 48 | other nodes in a region. 49 | \end{definition} 50 | 51 | In Iterative Analysis, {\color{blue}Transfer function} 52 | {\color{red}\(F_B\)} 53 | summarize effect from {\color{blue}beginning to end of basic block B}. 54 | 55 | In Region-Based Analysis, {\color{blue}Transfer function} 56 | {\color{red}\(F_{R,B}\)} 57 | summarize effect from {\color{blue}beginning of region 58 | {\color{red}R} to end of basic block B}. Recursively 59 | construct a larger region {\color{red}R} from smaller regions 60 | construct {\color{red}\(F_{R,B}\)} from transfer functions for smaller 61 | regions 62 | until the program is one region. 63 | Let P be the region for the entire program, 64 | and v be initial value at entry node, {\color{blue}out[B] = {\color{red}\(F_{R,B}\)} (v)}, 65 | {\color{blue}in[B] = {\color{red}\( \cap_{B^\prime}out[B^\prime] \)}} where B’ is a predecessor of B 66 | 67 | 68 | We will use Reaching definitions as our transfer function to illustrate Region-Based Analysis. 69 | 70 | 71 | \subsubsection{Operations on Transfer Functions} 72 | 73 | \begin{figure}[H] 74 | \centering 75 | \includegraphics[width=0.7\textwidth]{p116.png} 76 | \caption{Operations on Transfer Functions: Composition} 77 | \label{fig:p116} 78 | \end{figure} 79 | 80 | \begin{figure}[H] 81 | \centering 82 | \includegraphics[width=0.7\textwidth]{p117.png} 83 | \caption{Operations on Transfer Functions: Meet} 84 | \label{fig:p117} 85 | \end{figure} 86 | 87 | \begin{figure}[H] 88 | \centering 89 | \includegraphics[width=0.7\textwidth]{p118.png} 90 | \caption{Operations on Transfer Functions: Closure} 91 | \label{fig:p118} 92 | \end{figure} 93 | 94 | 95 | 96 | \subsubsection{Structure of Nested Regions (An Example)} 97 | 98 | \begin{figure}[H] 99 | \centering 100 | \includegraphics[width=0.7\textwidth]{p119.jpg} 101 | \caption{} 102 | \label{fig:p119} 103 | \end{figure} 104 | 105 | 106 | \subsubsection{Transfer Functions for T2 Rule} 107 | 108 | \begin{figure}[H] 109 | \centering 110 | \includegraphics[width=0.7\textwidth]{p121.png} 111 | \caption{} 112 | \label{fig:p121} 113 | \end{figure} 114 | 115 | 116 | 117 | \subsubsection{Transfer Functions for T1 Rule} 118 | 119 | 120 | \begin{figure}[H] 121 | \centering 122 | \includegraphics[width=0.7\textwidth]{p120.png} 123 | \caption{} 124 | \label{fig:p120} 125 | \end{figure} 126 | 127 | 128 | \subsubsection{Example: Reaching Definitions} 129 | 130 | \begin{figure}[H] 131 | \centering 132 | \includegraphics[width=0.7\textwidth]{p126.png} 133 | \caption{} 134 | \label{fig:p126} 135 | \end{figure} 136 | 137 | 138 | 139 | \begin{figure}[H] 140 | \centering 141 | \includegraphics[width=0.7\textwidth]{p122.jpg} 142 | \caption{} 143 | \label{fig:p122} 144 | \end{figure} 145 | 146 | \begin{figure}[H] 147 | \centering 148 | \includegraphics[width=0.7\textwidth]{p123.jpg} 149 | \caption{} 150 | \label{fig:p123} 151 | \end{figure} 152 | \begin{figure}[H] 153 | \centering 154 | \includegraphics[width=0.7\textwidth]{p124.jpg} 155 | \caption{} 156 | \label{fig:p124} 157 | \end{figure} 158 | \begin{figure}[H] 159 | \centering 160 | \includegraphics[width=0.7\textwidth]{p125.jpg} 161 | \caption{} 162 | \label{fig:p125} 163 | \end{figure} 164 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | ### How to build 3 | 4 | ``` 5 | mkdir tmp && make clean && make BIBFILES=main.bib 6 | ``` 7 | 8 | 9 | Buy me a cup of coffee [☕️](https://user-images.githubusercontent.com/45984215/202376581-4837a283-4812-4063-82bc-cc9c3101d3a5.jpg) if you find this is helpful! 10 | 11 | 12 | 13 | 14 | # TODO 15 | 16 | - [X] Local Optimizations 17 | - [X] LLVM 18 | - [X] Introduction to Dataflow analysis 19 | - [X] Foundations of Dataflow analysis 20 | - [X] GCSE/ConstProp 21 | - [X] SSA 22 | - [X] SSA Optimizations 23 | - [X] LICM 24 | - [X] Strength Reduction 25 | - [X] PRE 26 | - [X] Lazy Code Motion 27 | - [X] Region Analysis 28 | - [X] Pointer Analysis 29 | - [X] Register Allocation 30 | - [X] List Scheduling 31 | - [X] Dynamic Code Optimizations 32 | - [X] DSL 33 | - [ ] Global Scheduling SW Pipelining 34 | - [X] [Data Prefetching](./DataPrefeching.tex) 35 | - [X] Parallelism 36 | - [X] TLS 37 | -------------------------------------------------------------------------------- /Reaching Definitions.tex: -------------------------------------------------------------------------------- 1 | \newpage 2 | 3 | \section{Reaching Definitions} 4 | 5 | The Reaching Definitions Problem is a data-flow problem used to answer the 6 | following questions: Which definitions of a variable \textit{X} reach a given use of \textit{X} in 7 | an expression? Is \textit{X} used anywhere before it is defined? A definition\textit{d} reaches a point \textit{p} if there exists path 8 | from the point immediately following \textit{d} to \textit{p} such that \textit{d} is not killed(overwritten) along that path. 9 | 10 | 11 | 12 | \subsection{Iterative Algorithm} 13 | 14 | Here is the iterative algorithm. 15 | 16 | 17 | 18 | \begin{algorithm} 19 | \caption{Reaching Defintions:Iterative Algorithm}\label{alg:reachingdefiterative} 20 | \hspace*{\algorithmicindent} \textbf{Input: control flow graph CFG = (N, E, Entry, Exit) } \\ 21 | 22 | 23 | \begin{algorithmic} 24 | 25 | \State out[Entry] = $\emptyset$ \algorithmiccomment{Boundary condition} 26 | 27 | \For{\texttt{each basic block B other than Entry}} 28 | \State \texttt{out[B] = $\emptyset$} \algorithmiccomment{Initialization for iterative algorithm } 29 | \EndFor 30 | \While{Changes to any out[] occur} 31 | \For{\texttt{each basic block B other than Entry}} 32 | \State \texttt{$in[B] = \cup (out[p])$, for all predecessors p of B} 33 | \State \texttt{$out[B] = f_B(in[B])$} \algorithmiccomment{$out[B]=gen[B]\cup (in[B]-kill[B]) $ } 34 | \EndFor 35 | 36 | \EndWhile 37 | \end{algorithmic} 38 | \end{algorithm} 39 | 40 | 41 | 42 | 43 | \subsection{Worklist Algorithm} 44 | 45 | \begin{algorithm} 46 | \caption{Reaching Defintions:Worklist Algorithm}\label{alg:reachingdefiterative} 47 | \hspace*{\algorithmicindent} \textbf{Input: control flow graph CFG = (N, E, Entry, Exit) } \\ 48 | 49 | 50 | \begin{algorithmic} 51 | 52 | \State out[Entry] = $\emptyset$ \algorithmiccomment{Boundary condition} 53 | \State \textcolor{blue}{ChangedNodes = N} 54 | \For{\texttt{each basic block B other than Entry}} 55 | \State \texttt{out[B] = $\emptyset$} \algorithmiccomment{Initialization for iterative algorithm } 56 | \EndFor 57 | \While{ChangedNodes $\neq \emptyset$} 58 | \State \textcolor{blue}{Remove i from ChangedNodes} 59 | \State $in[B] = \cup (out[p])$, for all predecessors p of B 60 | \State \textcolor{blue}{$oldout = out[i]$} 61 | \State $out[i] = f_i(in[i])$ \algorithmiccomment{$out[i]=gen[i]\cup (in[i]-kill[i]) $ } 62 | \If {\textcolor{blue}{oldout} $\neq out[i]$} 63 | 64 | \For{\texttt{all \textcolor{blue}{successors s of i}}} 65 | \State \textcolor{blue}{add s to ChangedNodes} 66 | \EndFor 67 | \EndIf 68 | 69 | \EndWhile 70 | \end{algorithmic} 71 | \end{algorithm} 72 | 73 | 74 | 75 | \subsection{Example} 76 | Here comes an example of reaching definition. 77 | 78 | \begin{figure}[!htb] 79 | \minipage{0.32\textwidth} 80 | \includegraphics[width=\linewidth]{rdex1.jpg} 81 | \caption{Pass 1}\label{fig:awesome_image1} 82 | \endminipage\hfill 83 | \minipage{0.32\textwidth} 84 | \includegraphics[width=\linewidth]{rdex2.jpg} 85 | \caption{Pass 2}\label{fig:awesome_image2} 86 | \endminipage\hfill 87 | \minipage{0.32\textwidth}% 88 | \includegraphics[width=\linewidth]{rdex3.jpg} 89 | \caption{Pass 3}\label{fig:awesome_image3} 90 | \endminipage 91 | \end{figure} 92 | 93 | 94 | \subsection{Summary} 95 | 96 | \begin{center} 97 | \begin{tabular}{|c|c|} 98 | \hline Direction & Forward \\ 99 | \hline Domain & Sets of definitions \\ 100 | \hline Meet operator & \( \cup \) \\ 101 | \hline Top(T) & $\phi$ \\ 102 | \hline Bottom & Universal Set \\ 103 | \hline Boundary condition & $\mathrm{OUT[ENTRY]} = \phi$ \\ 104 | \hline Initialization for internal nodes & $\mathrm{OUT[B]} = \phi$ \\ 105 | \hline Finited escending chain? & \checkmark \\ 106 | \hline Transfer function & $f_b(x) = \mathrm{Gen}_b \cup (x - \mathrm{Kill}_b)$ \\ 107 | \hline Monotone\&Distributive? & \checkmark \\ 108 | \hline 109 | \end{tabular} 110 | \end{center} 111 | -------------------------------------------------------------------------------- /Region-Based Analysis.tex: -------------------------------------------------------------------------------- 1 | \section{Region-Based Analysis} 2 | 3 | Region-based analysis is in contrast with the iterative data flow analysis. 4 | 5 | \subsection{Basic Ides} 6 | 7 | 8 | \subsubsection{Motivating Example} 9 | 10 | 11 | 12 | 13 | \subsection{Algorithm} 14 | 15 | 16 | \subsection{Optimization and Complexity} 17 | 18 | 19 | 20 | \subsection{Comparing region-based analysis with iterative algorithms} 21 | 22 | 23 | -------------------------------------------------------------------------------- /SR.tex: -------------------------------------------------------------------------------- 1 | \newpage 2 | 3 | \section{Induction Variables and Strength Reduction} 4 | Strength reduction is an optimization technique which substitutes expensive operations with computationally cheaper ones. For example, a very weak strength reduction algorithm can substitute the instruction 5 | \texttt{b = a * 4} with \texttt{b = a << 2}. 6 | 7 | \subsection{Motivation} 8 | 9 | Opportunities for strength reduction arise routinely from details that the compiler 10 | inserts to implement source-level abstractions. To see this, consider the simple 11 | code fragment shown in Figure \ref{fig:p74-76}. Figure \ref{fig:p74} shows source code and the same loop in a low-level intermediate code. 12 | Notice the instruction sequence that begins at the label L2. The compiler inserted this code 13 | (with its multiply) as the expansion of A[i]. Figure \ref{fig:p75} shows the code that results from applying Strength Reduction, 14 | Figure \ref{fig:p76} is followed by dead-code elimination. The compiler created a new variable, t2$^\prime$, to 15 | hold the value of the expression i $*$ 4 + A. Its value is computed directly, by 16 | incrementing it with the constant 4, rather than recomputing it on each iteration 17 | as a function of i. Strength reduction automates this transformation. 18 | 19 | \begin{figure}[H] 20 | \centering 21 | \begin{subfigure}{0.6\textwidth} 22 | \centering 23 | \includegraphics[width=\textwidth]{p74.pdf} 24 | \caption{Origianl code.} 25 | \label{fig:p74} 26 | \end{subfigure} 27 | \begin{subfigure}{0.6\textwidth} 28 | \centering 29 | \includegraphics[width=\textwidth]{p75.pdf} 30 | \caption{After induction variable substitute.} 31 | \label{fig:p75} 32 | \end{subfigure} 33 | \begin{subfigure}{0.6\textwidth} 34 | \centering 35 | \includegraphics[width=\textwidth]{p76.pdf} 36 | \caption{Final code.} 37 | \label{fig:p76} 38 | \end{subfigure} 39 | \caption{An example of strength reduction.} 40 | \label{fig:p74-76} 41 | \end{figure} 42 | 43 | 44 | 45 | \subsection{Definitions} 46 | 47 | \begin{definition}{Basic Induction Variable} 48 | A basic induction variable (e.g., i as shown in Figure \ref{fig:p74} ) is a variable X whose only definitions within the loop 49 | are assignments of the form: X = X+c or X = X-c, where c is either a constant or 50 | a loop-invariant variable. 51 | \end{definition} 52 | 53 | \begin{definition}{Induction Variable} 54 | An induction variable is either a basic induction variable B, or 55 | or a variable defined once within the loop (e.g., t1,t2 as shown in Figure \ref{fig:p74} ) , whose value is a linear function 56 | of some basic induction variable at the time of the definition: 57 | $A = c_1 * B + c_2$ 58 | \end{definition} 59 | 60 | The FAMILY of a basic induction variable B is the set of induction variables A such that each time A is assigned in the loop, 61 | the value of A is a linear function of B. (e.g., t1, t2 is in family of i as shown in Figure \ref{fig:p74}) 62 | 63 | 64 | \subsection{Optimizations} 65 | 66 | \subsubsection{Strength Reduction} 67 | \begin{algorithm}[H] 68 | \caption{Strength Reduction Optimizations}\label{alg:Strength Reduction Optimizations} 69 | \begin{algorithmic} 70 | 71 | 72 | \State{A is an induction variable in family of basic induction variable B (i.e., $A = c_1 * B + c_2$ )} 73 | \State{\,\,\,\,\,\,\,\, Create new variable A$^\prime$} 74 | \State{\,\,\,\,\,\,\,\, Initialize in preheader A$^\prime$ = $c_1 * B + c_2$} 75 | \State{\,\,\,\,\,\,\,\, Track value of B: add after $B=B+x$: $A^\prime=A^\prime+x*c_1$} 76 | \State{\,\,\,\,\,\,\,\, Replace assignment to A: replace lone $A=\dots$ with $A=A^\prime$} 77 | \end{algorithmic} 78 | \end{algorithm} 79 | 80 | \subsubsection{Optimizing non-basic induction variables} 81 | 82 | 83 | \begin{itemize} 84 | \item copy propagation 85 | \item dead code elimination 86 | \end{itemize} 87 | 88 | 89 | \subsubsection{Optimizing basic induction variables} 90 | 91 | Eliminate basic induction variables used only for calculating other induction variables and loop tests. 92 | \begin{algorithm}[H] 93 | \caption{Optimizing basic induction variables}\label{alg:Optimizing basic induction variables} 94 | \begin{algorithmic} 95 | 96 | 97 | \State{Select an induction variable A in the family of B, preferably with simple constants ($A = c_1 * B + c_2$ ).} 98 | \State{Replace a comparison such as \texttt{if B > X goto L1} with \texttt{if (A$\prime$ > $c_1 * X + c_2$) goto L1} (assuming c 1 is positive)} 99 | \State{if B is live at any exit from the loop, recompute it from A$\prime$, After the exit, $B = (A^\prime - c 2 ) / c 1$} 100 | \end{algorithmic} 101 | \end{algorithm} 102 | 103 | \subsection{Further Details} 104 | 105 | 106 | \begin{figure}[H] 107 | \centering 108 | \begin{subfigure}{0.7\textwidth} 109 | \centering 110 | \includegraphics[width=\textwidth]{p78.pdf} 111 | \caption{A more complex example. k and i are both basic induction variables. 112 | m is in the family of k.} 113 | \label{fig:p78} 114 | \end{subfigure} 115 | \begin{subfigure}{\textwidth} 116 | \centering 117 | \includegraphics[width=0.7\textwidth]{p79.pdf} 118 | \caption{After induction variable substitute.} 119 | \label{fig:p79} 120 | \end{subfigure} 121 | 122 | \caption{A more complex example of strength reduction.} 123 | \label{fig:p74-76} 124 | \end{figure} 125 | 126 | \subsection{Finding Induction Variable Families} 127 | 128 | Let B be a basic induction variable, A is in the family of B if it satisfies one the following conditions 129 | \begin{itemize} 130 | \item {\large{\textbf{Condition C1}}} A has a single assignment in the loop L of the form A = B*c, c*B, B+c, etc 131 | \item {\large{\textbf{Condition C2}}} A is in family of B if $D = c_1 * B + c_2$ for basic induction variable B and: 132 | \begin{itemize} 133 | \item Rule 1: A has a single assignment in the loop L of the form A = D*c, D+c, etc 134 | \item Rule 2: No definition of D outside L reaches the assignment to A 135 | \item Rule 3: Every path between the lone point of assignment to D in L and the 136 | assignment to A has the same sequence (possibly empty) of definitions of B 137 | \end{itemize} 138 | \end{itemize} 139 | 140 | 141 | 142 | \begin{figure}[H] 143 | \centering 144 | \includegraphics[width=0.3\textwidth]{p80.png} 145 | \caption{i is abasic induction variable, t1 t2 are in family ofi, but t2 is not because it violates the condition C2 rule 2.} 146 | \label{fig:p80} 147 | \end{figure} 148 | 149 | 150 | \begin{figure}[H] 151 | \centering 152 | \includegraphics[width=0.3\textwidth]{p81.png} 153 | \caption{i is abasic induction variable, t1 is in the family of i. t2 is not because it violates the Condition2 rule3(some path reaches t2 includes \texttt{i = i+1} but some not.). } 154 | \label{fig:p81} 155 | \end{figure} -------------------------------------------------------------------------------- /SSA.tex: -------------------------------------------------------------------------------- 1 | \section{Static Single Assignment} 2 | 3 | In this lecture we will cover what is SSA and how to convert a program to SSA form. 4 | 5 | \subsection{Backgroud} 6 | 7 | When we are trying to do some types of optimizations, it is often nice to know when is a variable is defined. 8 | For loop invariant code motion where we are trying to move computations outside of a loop, you will see it is very important to 9 | know where the definitions are. 10 | 11 | 12 | 13 | Take the code \ref{fig:ssaexm1} for example, if \texttt{B,C,D} are defined outside the loop, we can move the computation ouside the loop. 14 | 15 | % \begin{figure}[h] 16 | % \centering 17 | % \includegraphics[width=0.4\textwidth]{ssaexm1.drawio.pdf} 18 | % \caption{Example of Loop-Invariant Code Motion} 19 | % \label{fig:ssaexm1} 20 | % \end{figure} 21 | 22 | 23 | Copy optimization is also an important optimization and one way that you can do some analysis for copy propogation is that you can 24 | look for any use of a variable if it turns out that all of the reaching definitions of it are of the form where they are coping the same 25 | variable. Take the code \ref{fig:ssaexm2} for example. If all reaching definitions of \texttt{X} copies the same variable \texttt{Y}, and \texttt{Y} is not 26 | redefined since that copy, then we can substitute use of \texttt{X} with use of \texttt{Y} and then hopefully the assigment \texttt{X = Y} will end up becoming dead code later. 27 | 28 | It's often nice to know the relationship between when variables are used and when they are defined. So the motivation for SSA is that it would be nice to directly traverse between definitions and uses 29 | between definitions and uses for paticular variables instead of having to look everything that is going on inside a flow graph. Argubly this will not only be convinient but also it might make things more efficient becuase I 30 | can jump over all the instructions irrelevant to the analysis that I am doing. Another insight is that sometimes variables get reused but if we're overwriting a variable in the second time, the later 31 | uses may have nothing to do with earlier uses and definitions of the variable. So specifically, \texttt{X} appears twice in the CFG, but the definition and use of \texttt{X} actually have nothing to do with that in the second block. 32 | So considering them to be the same variable is going to inhibit optimizations potentially. It would be nice if the compiler could realize they are seperate variables effectively. 33 | 34 | 35 | One way that you could potentially deal with wanting to go directly between uses and definitions is to create a type of index or a data structure called Definition-Use or Use-Definition chains. But it has a downside which means the chains can be expensive. 36 | 37 | 38 | 39 | % \begin{figure}[h] 40 | % \centering 41 | % \includegraphics[width=0.4\textwidth]{ssaexm2.drawio.pdf} 42 | % \caption{Example of Copy Propogation} 43 | % \label{fig:ssaexm2} 44 | % \end{figure} 45 | % \subsection{The Development of Static Single Assignment Form} 46 | 47 | 48 | 49 | 50 | \begin{figure}[h] 51 | \centering 52 | \begin{subfigure}[b]{0.4\textwidth} 53 | \centering 54 | \includegraphics[width=0.6\textwidth]{ssaexm1.drawio.pdf} 55 | \caption{Example of Loop-Invariant Code Motion} 56 | \label{fig:ssaexm1} 57 | \end{subfigure} 58 | \hfill 59 | \begin{subfigure}[b]{0.4\textwidth} 60 | \centering 61 | \includegraphics[width=0.6\textwidth]{ssaexm2.drawio.pdf} 62 | \caption{Example of Copy Propogation} 63 | \label{fig:ssaexm2} 64 | \end{subfigure} 65 | 66 | \caption{two examples} 67 | \end{figure} 68 | 69 | 70 | In the case , if we even just consider one definition and all its uses, or one use and all of its definitions, these data structures can be non-trivially large. In general, if you have N definitions and M uses, the complexity of time and memory is \texttt{O(MN)}. 71 | So for that reason, def-use and use-def chains are not very popular becuase of their cost. But one thing we could do is to limit the number of definitions of a variable 72 | to just one place. For example, I have all these definitions of \texttt{x} and if I set one of them to be \texttt{x1},I have at least simplified half of the problem. 73 | 74 | 75 | 76 | \subsection{SSA} 77 | 78 | Def-use and use-def chains are things you will find in literature because of their expensive cost. An alternative that goes at least part of the way toward the same goal is to put the intermediate form into something called SSA style. 79 | So under SSA, every variable is assigned statically at most once in the program text. It's not dynamic because we still have the problem of trying to figure out waht's happening dynamically as we go through all these different paths but we can at least simplify things a bit in terms of locations in the code 80 | so statically each variable is just assigned once. It's easy to do this in a basic block. 81 | 82 | 83 | 84 | 85 | 86 | % This concept is based on \footnote{\url{https://compilers.cs.uni-saarland.de/ssasem/talks/Kenneth.Zadeck.pdf}} 87 | 88 | 89 | % In the very Beginning, there was dataflow analysis. Ultimately dataflow analysis turns out to be very expensive. 90 | 91 | % Viewing the program variable by variable exposes structure that is obscured by the dataflow model: 92 | % A kill allows the cfg to be clipped. Also, the dataflow for a single variable can be solved 93 | % without iteration. This turns out to be a dead end, but it set the 94 | % stage for the development of SSA. 95 | 96 | 97 | % Take constant propogation for example, Kildall and Wegbreit use a conventional 98 | % dataflow framework. The fact vector is very large: values not bits. Must use iteration. 99 | % The time to run these is between \(O(ElogEV)\) and 100 | % \(O(E^2 V)\) depending on the type of control flow 101 | % graph processing. 102 | 103 | 104 | 105 | % \subsubsection{The First Attack} 106 | 107 | 108 | % Use def-use chains. Sometimes this helps and sometimes it does not. This requires NMV 109 | % def-use chains. 110 | 111 | % \begin{lstlisting}[language=C,frame=single, caption=An ,label = lst:expr2] 112 | % switch (...) { 113 | % case 1: x=...; y=...; break; 114 | % ... 115 | % case n: x=...; y=...; break; 116 | % } 117 | % switch (...) { 118 | % case 1: ...=x; ...=y; break; 119 | % ... 120 | % case m: ...=x; ...=y; break; 121 | % } 122 | % \end{lstlisting} 123 | 124 | 125 | % \subsubsection{The Second Attack } 126 | 127 | 128 | % Add a “join birthpoint” 129 | % for x and y between 130 | % the two switches. 131 | 132 | 133 | 134 | % \begin{lstlisting}[language=C,frame=single, caption=An ,label = lst:expr2] 135 | % switch (...) { 136 | % case 1: x=...; y=...; break; 137 | % ... 138 | % case n: x=...; y=...; break; 139 | % } 140 | % birthpoint x, y; 141 | % switch (...) { 142 | % case 1: ...=x; ...=y; break; 143 | % ... 144 | % case m: ...=x; ...=y; break; 145 | % } 146 | % \end{lstlisting} -------------------------------------------------------------------------------- /SoftwarePipe.tex: -------------------------------------------------------------------------------- 1 | \newpage 2 | \section{Software Pipeline} 3 | 4 | 5 | 6 | 7 | 8 | \begin{problem} 9 | 10 | Consider the following cyclic data-dependence graph: 11 | \begin{figure}[H] 12 | \centering 13 | \includegraphics[width=0.5\textwidth]{p243.png} 14 | \caption{} 15 | \label{fig:p243} 16 | \end{figure} 17 | 18 | Construct the table of longest simple paths between each pair of nodes, in terms of T, the initiation interval. Then, for T = 4, give the constraints on (the schedule of) each pair of nodes. For any node x, let S(x) be the clock, relative to the beginning of the iteration, at which the instruction represented by node x is executed. Identify below the constraints on S(a) in terms of S(b). 19 | 20 | 21 | \item a) S(b) - 3 $\leq$ S(a) $\leq$ S(b) - 1 22 | \item b) S(b) - 1 $\leq$ S(a) $\leq$ S(b) + 3 23 | \item c) S(b) + 1 $\leq$ S(a) $\leq$ S(b) - 3 24 | \item d) S(b) - 3 $\leq$ S(a) $\leq$ S(b) + 1 25 | 26 | 27 | {\color{red}Notice that there is only one simple path from any node to any other. Thus, we can compute the lengths of the longest simple paths by adding the second components of the lables, and subtracting T times the sum of the first components. Here is the table:} 28 | 29 | $$ 30 | \begin{array}{|l|l||c||c|c|c|c|} 31 | \hline & \mathbf{a} & \mathbf{b} & \mathbf{c} & \mathbf{d} & \mathbf{e} & \mathbf{f} \\ 32 | \hline \hline \mathbf{a} & - & 1 & 2 & 3-T & 4-T & 5-T \\ 33 | \hline \mathbf{b} & 5-2 T & - & 1 & 2-T & 3-T & 4-T \\ 34 | \hline \mathbf{c} & 4-2 T & 5-2 T & - & 1-T & 2-T & 3-T \\ 35 | \hline \mathbf{d} & 3-T & 4-T & 5-T & - & 1 & 2 \\ 36 | \hline \mathbf{e} & 2-T & 3-T & 4-T & 5-2 T & - & 1 \\ 37 | \hline \mathbf{f} & 1-T & 2-T & 3-T & 4-2 T & 5-2 T & - \\ 38 | \hline 39 | \end{array} 40 | $$ 41 | 42 | {\color{red}If T = 4, the values of these expressions are: 43 | 44 | To read the constraints off of this table, suppose that the entry in the row for x and column for y is v. Also, suppose that the entry in the row for y and the column for x is u. Then the constraint on S(x) is: 45 | S(y) + u $\leq$ S(x) $\leq$ S(y) - v 46 | 47 | For example, S(b) - 3 $\leq$ S(a) $\leq$ S(b) - 1. 48 | 49 | 50 | Solution : a 51 | } 52 | 53 | 54 | 55 | \end{problem} 56 | 57 | 58 | 59 | \begin{problem} 60 | Here is the sequence of instructions constituting the body of a loop that we wish to software-pipeline: 61 | 62 | \item LD 63 | \item ST 64 | \item LD 65 | \item ADD 66 | \item ST 67 | \item ADD 68 | \item ST 69 | 70 | The machine model allows each instruction to be performed in one clock tick. It also allows, in one clock tick, for the initiation of one load (LD), one store (ST), and one addition (ADD). 71 | 72 | To form an optimal pipeline, we need to pick the shortest possible period --- the interval between initiations of successive iterations of the loop. Then, in order to make the pipeline meet the constraints of the machine, we may need to introduce delay (nop instructions) into the sequence of instructions executed by each iteration. Especially, we must avoid having the machine try to execute two or more of the same type of instruction (load, store, or add) at the same clock tick. 73 | 74 | Your job is to find the shortest possible period, and for that period, to find the smallest number of nop's that need to be inserted. Find, in the list below, the one choice that has both the shortest possible period and the fewest nop's. 75 | 76 | 77 | \item a) Period = 4 with 1 delay: LD, nop, ST, LD, ADD, ST, ADD, ST 78 | \item b) Period = 2 with 1 delay: LD, ST, nop, LD, ADD, ST, ADD, ST 79 | \item c) Period = 3 with 2 delays: LD, ST, nop, nop, LD, ADD, ST, ADD, ST 80 | \item d) Period = 4 with no delays: LD, ST, LD, ADD, ST, ADD, ST 81 | 82 | 83 | {\color{red} 84 | 85 | There are 3 store's so the period has to be at least 3. In proof: if the period is 1, then stores from three different iterations will be executed at each clock. If the period is 2, then either the odd clock ticks or the even clock ticks must have stores from two different iterations. 86 | Now, let us see if we can find a schedule with period 3. Introducing no delay won't work. The ST in instruction 5 for iteration i will be executed in the same clock as the ST in instruction 2 for iteration i+1. Introducing only one delay won't work either. If the nop is before the first ST or after the second ST, then there is a conflict between the first two stores, of the same type as if there were no delays introduced. But if the nop is between the first and second ST's, then there is a conflict between the first and third ST's: instruction 7 from iteration i is executed at the same clock as instruction 2 from iteration i+2. 87 | 88 | However, there are several ways to introduce two nop's so that no two instructions of the same type from the same iteration are executed on clock ticks whose numbers differ by a multiple of the period --- 3. For example, we can add two nop's between instructions 3 and 4, to get the sequence: 89 | 90 | LD, ST, LD, nop, nop, ADD, ST, ADD, ST. 91 | 92 | The loads occur at clocks 1 and 3 --- a difference of 2, which is not divisible by 3. The ADD's occur at clocks 6 and 8, again a difference not divisible by 3. The stores occur at clocks 2, 7, and 9, and none of the differences 9-2, 9-7, or 7-2 are divisible by 3. 93 | 94 | Solution:c 95 | } 96 | \end{problem} 97 | 98 | 99 | -------------------------------------------------------------------------------- /TLS.tex: -------------------------------------------------------------------------------- 1 | \newpage 2 | 3 | \section{Compiler Optimizations for Thread-Level Speculation \cite{zhai2002compiler} } 4 | While using multithreaded hardware to improve the throughput of a 5 | workload is straightforward, using it to improve the performance 6 | of a single application requires parallelization. The ideal solution 7 | would be to convert sequential programs into parallel programs automatically, but unfortunately this is difficult (if not impossible) for 8 | many general-purpose programs due to their use of pointers, complex data and control struetures, and run-time inputs. 9 | 10 | 11 | Thread-Level Speculation (TLS) is a potential solution to this problem since it allows the 12 | compiler to create parallel threads without having to prove that 13 | they are independent. The underlying hardware ensures that interthread dependences through memory are satisfied, and re-executes 14 | any thread for which they are not. 15 | 16 | The key to extracting parallelism from these programs and hence 17 | improving performance is in the efficiency of speculative execution. While recent research has investigated hardware optimization 18 | for TLS, there has been relatively little work 19 | on compiler optimization in this area. One potential opportunity 20 | for optimization focuses on data dependences between speculative 21 | threads that occur frequently: if the compiler is able to identify the 22 | source and the destination of a frequent inter-thread data dependence, 23 | then it is beneficial to insert synchronization and forward 24 | that value explicitly to avoid failed speculation. Figure l(a) shows 25 | an example loop that the compiler has speculatively parallelized by 26 | partitioning the loop into speculative threads (aka epochs). Since 27 | the variable A is read and written in every iteration, 28 | the compiler decides to synchronize and forward A by inserting a wa i t operation 29 | before the first use of A, and a signal operation after the last definition of A--we describe, implement, and evaluate this algorithm 30 | in Section 3. The synchronization results in the partially-parallel 31 | execution shown in Figure l(a), where each epoch stalls until the 32 | value of A is produced by the previous epoch. The flow of the value 33 | of A between epochs serializes the parallel execution, and so we refer to it as a critical forwarding path. 34 | The overall performance of speculation is limited by the size of 35 | this critical forwarding path. 36 | 37 | \begin{figure}[H] 38 | \centering 39 | \includegraphics[width=0.5\textwidth]{p207.png} 40 | \caption{ Impact of scheduling on the critical forwarding path.} 41 | \label{fig:p207} 42 | \end{figure} 43 | 44 | Compiler can improve the performance of speculatively parallelized code by 45 | using scheduling techniques to move the signal operations (and the code that these operations depend upon) 46 | upwards through the control flow graph to reduce the length of the 47 | critical forwarding path and expose more parallelism. 48 | 49 | \subsubsection{Scheduling Instructions Speculatively} 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /images/172.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/172.png -------------------------------------------------------------------------------- /images/CDp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/CDp.png -------------------------------------------------------------------------------- /images/DFST.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/DFST.png -------------------------------------------------------------------------------- /images/GraphEdges.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/GraphEdges.png -------------------------------------------------------------------------------- /images/dag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/dag.png -------------------------------------------------------------------------------- /images/dag2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/dag2.png -------------------------------------------------------------------------------- /images/fgex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/fgex.png -------------------------------------------------------------------------------- /images/flowgraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/flowgraph.png -------------------------------------------------------------------------------- /images/liveex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/liveex.png -------------------------------------------------------------------------------- /images/p1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p1.png -------------------------------------------------------------------------------- /images/p10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p10.png -------------------------------------------------------------------------------- /images/p100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p100.png -------------------------------------------------------------------------------- /images/p101.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p101.png -------------------------------------------------------------------------------- /images/p102.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p102.pdf -------------------------------------------------------------------------------- /images/p103.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p103.pdf -------------------------------------------------------------------------------- /images/p104.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p104.png -------------------------------------------------------------------------------- /images/p105.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p105.png -------------------------------------------------------------------------------- /images/p106.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p106.png -------------------------------------------------------------------------------- /images/p107.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p107.png -------------------------------------------------------------------------------- /images/p108.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p108.png -------------------------------------------------------------------------------- /images/p109.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p109.png -------------------------------------------------------------------------------- /images/p11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p11.png -------------------------------------------------------------------------------- /images/p110.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p110.png -------------------------------------------------------------------------------- /images/p111.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p111.png -------------------------------------------------------------------------------- /images/p112.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p112.png -------------------------------------------------------------------------------- /images/p113.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p113.png -------------------------------------------------------------------------------- /images/p114.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p114.png -------------------------------------------------------------------------------- /images/p115.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p115.jpg -------------------------------------------------------------------------------- /images/p116.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p116.png -------------------------------------------------------------------------------- /images/p117.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p117.png -------------------------------------------------------------------------------- /images/p118.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p118.png -------------------------------------------------------------------------------- /images/p119.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p119.jpg -------------------------------------------------------------------------------- /images/p12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p12.png -------------------------------------------------------------------------------- /images/p120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p120.png -------------------------------------------------------------------------------- /images/p121.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p121.png -------------------------------------------------------------------------------- /images/p122.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p122.jpg -------------------------------------------------------------------------------- /images/p123.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p123.jpg -------------------------------------------------------------------------------- /images/p124.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p124.jpg -------------------------------------------------------------------------------- /images/p125.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p125.jpg -------------------------------------------------------------------------------- /images/p126.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p126.png -------------------------------------------------------------------------------- /images/p127.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p127.jpg -------------------------------------------------------------------------------- /images/p128.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p128.jpg -------------------------------------------------------------------------------- /images/p129.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p129.png -------------------------------------------------------------------------------- /images/p13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p13.png -------------------------------------------------------------------------------- /images/p130.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p130.png -------------------------------------------------------------------------------- /images/p131.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p131.png -------------------------------------------------------------------------------- /images/p132.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p132.png -------------------------------------------------------------------------------- /images/p133.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p133.png -------------------------------------------------------------------------------- /images/p134.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p134.png -------------------------------------------------------------------------------- /images/p135.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p135.png -------------------------------------------------------------------------------- /images/p136.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p136.png -------------------------------------------------------------------------------- /images/p137.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p137.png -------------------------------------------------------------------------------- /images/p138.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p138.png -------------------------------------------------------------------------------- /images/p139.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p139.png -------------------------------------------------------------------------------- /images/p14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p14.png -------------------------------------------------------------------------------- /images/p140.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p140.png -------------------------------------------------------------------------------- /images/p141.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p141.png -------------------------------------------------------------------------------- /images/p142.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p142.png -------------------------------------------------------------------------------- /images/p143.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p143.png -------------------------------------------------------------------------------- /images/p144.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p144.png -------------------------------------------------------------------------------- /images/p145.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p145.png -------------------------------------------------------------------------------- /images/p146.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p146.png -------------------------------------------------------------------------------- /images/p147.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p147.png -------------------------------------------------------------------------------- /images/p148.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p148.png -------------------------------------------------------------------------------- /images/p149.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p149.png -------------------------------------------------------------------------------- /images/p15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p15.png -------------------------------------------------------------------------------- /images/p150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p150.png -------------------------------------------------------------------------------- /images/p151.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p151.png -------------------------------------------------------------------------------- /images/p152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p152.png -------------------------------------------------------------------------------- /images/p153.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p153.png -------------------------------------------------------------------------------- /images/p154.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p154.png -------------------------------------------------------------------------------- /images/p155.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p155.png -------------------------------------------------------------------------------- /images/p156.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p156.png -------------------------------------------------------------------------------- /images/p157.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p157.png -------------------------------------------------------------------------------- /images/p158.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p158.png -------------------------------------------------------------------------------- /images/p159.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p159.png -------------------------------------------------------------------------------- /images/p16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p16.png -------------------------------------------------------------------------------- /images/p160.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p160.png -------------------------------------------------------------------------------- /images/p161.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p161.png -------------------------------------------------------------------------------- /images/p162.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p162.png -------------------------------------------------------------------------------- /images/p163.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p163.png -------------------------------------------------------------------------------- /images/p164.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p164.png -------------------------------------------------------------------------------- /images/p165.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p165.png -------------------------------------------------------------------------------- /images/p166.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p166.png -------------------------------------------------------------------------------- /images/p167.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p167.png -------------------------------------------------------------------------------- /images/p168.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p168.png -------------------------------------------------------------------------------- /images/p169.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p169.png -------------------------------------------------------------------------------- /images/p17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p17.png -------------------------------------------------------------------------------- /images/p170.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p170.png -------------------------------------------------------------------------------- /images/p171.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p171.png -------------------------------------------------------------------------------- /images/p172.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p172.png -------------------------------------------------------------------------------- /images/p173.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p173.png -------------------------------------------------------------------------------- /images/p174.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p174.png -------------------------------------------------------------------------------- /images/p175.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p175.png -------------------------------------------------------------------------------- /images/p176.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p176.png -------------------------------------------------------------------------------- /images/p177.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p177.png -------------------------------------------------------------------------------- /images/p178.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p178.png -------------------------------------------------------------------------------- /images/p179.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p179.png -------------------------------------------------------------------------------- /images/p18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p18.png -------------------------------------------------------------------------------- /images/p180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p180.png -------------------------------------------------------------------------------- /images/p181.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p181.png -------------------------------------------------------------------------------- /images/p182.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p182.png -------------------------------------------------------------------------------- /images/p183.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p183.png -------------------------------------------------------------------------------- /images/p184.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p184.png -------------------------------------------------------------------------------- /images/p185.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p185.png -------------------------------------------------------------------------------- /images/p186.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p186.png -------------------------------------------------------------------------------- /images/p187.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p187.png -------------------------------------------------------------------------------- /images/p188.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p188.png -------------------------------------------------------------------------------- /images/p189.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p189.png -------------------------------------------------------------------------------- /images/p19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p19.png -------------------------------------------------------------------------------- /images/p190.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p190.png -------------------------------------------------------------------------------- /images/p191.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p191.png -------------------------------------------------------------------------------- /images/p192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p192.png -------------------------------------------------------------------------------- /images/p193.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p193.png -------------------------------------------------------------------------------- /images/p194.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p194.png -------------------------------------------------------------------------------- /images/p195.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p195.png -------------------------------------------------------------------------------- /images/p196.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p196.png -------------------------------------------------------------------------------- /images/p197.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p197.png -------------------------------------------------------------------------------- /images/p198.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p198.png -------------------------------------------------------------------------------- /images/p199.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p199.png -------------------------------------------------------------------------------- /images/p2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p2.png -------------------------------------------------------------------------------- /images/p20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p20.png -------------------------------------------------------------------------------- /images/p200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p200.png -------------------------------------------------------------------------------- /images/p201.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p201.png -------------------------------------------------------------------------------- /images/p202.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p202.png -------------------------------------------------------------------------------- /images/p203.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p203.png -------------------------------------------------------------------------------- /images/p204.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p204.png -------------------------------------------------------------------------------- /images/p205.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p205.png -------------------------------------------------------------------------------- /images/p206.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p206.png -------------------------------------------------------------------------------- /images/p207.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p207.png -------------------------------------------------------------------------------- /images/p208.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p208.png -------------------------------------------------------------------------------- /images/p209.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p209.png -------------------------------------------------------------------------------- /images/p21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p21.png -------------------------------------------------------------------------------- /images/p210.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p210.png -------------------------------------------------------------------------------- /images/p211.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p211.png -------------------------------------------------------------------------------- /images/p212.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p212.png -------------------------------------------------------------------------------- /images/p213.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p213.png -------------------------------------------------------------------------------- /images/p214.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p214.png -------------------------------------------------------------------------------- /images/p215.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p215.pdf -------------------------------------------------------------------------------- /images/p215.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p215.png -------------------------------------------------------------------------------- /images/p216.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p216.png -------------------------------------------------------------------------------- /images/p217.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p217.png -------------------------------------------------------------------------------- /images/p218.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p218.png -------------------------------------------------------------------------------- /images/p219.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p219.jpg -------------------------------------------------------------------------------- /images/p22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p22.png -------------------------------------------------------------------------------- /images/p220.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p220.jpg -------------------------------------------------------------------------------- /images/p221.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p221.png -------------------------------------------------------------------------------- /images/p222.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p222.png -------------------------------------------------------------------------------- /images/p223.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p223.png -------------------------------------------------------------------------------- /images/p224.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p224.png -------------------------------------------------------------------------------- /images/p225.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p225.png -------------------------------------------------------------------------------- /images/p226.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p226.png -------------------------------------------------------------------------------- /images/p227.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p227.png -------------------------------------------------------------------------------- /images/p228.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p228.png -------------------------------------------------------------------------------- /images/p229.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p229.png -------------------------------------------------------------------------------- /images/p23.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p23.png -------------------------------------------------------------------------------- /images/p230.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p230.png -------------------------------------------------------------------------------- /images/p231.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p231.png -------------------------------------------------------------------------------- /images/p232.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p232.png -------------------------------------------------------------------------------- /images/p233.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p233.png -------------------------------------------------------------------------------- /images/p234.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p234.png -------------------------------------------------------------------------------- /images/p235.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p235.png -------------------------------------------------------------------------------- /images/p236.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p236.png -------------------------------------------------------------------------------- /images/p237.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p237.png -------------------------------------------------------------------------------- /images/p238.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p238.png -------------------------------------------------------------------------------- /images/p239.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p239.png -------------------------------------------------------------------------------- /images/p24.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p24.png -------------------------------------------------------------------------------- /images/p240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p240.png -------------------------------------------------------------------------------- /images/p241.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p241.png -------------------------------------------------------------------------------- /images/p242.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p242.png -------------------------------------------------------------------------------- /images/p243.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p243.png -------------------------------------------------------------------------------- /images/p244.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p244.png -------------------------------------------------------------------------------- /images/p245.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p245.png -------------------------------------------------------------------------------- /images/p246.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p246.png -------------------------------------------------------------------------------- /images/p247.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p247.png -------------------------------------------------------------------------------- /images/p248.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p248.png -------------------------------------------------------------------------------- /images/p249.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p249.png -------------------------------------------------------------------------------- /images/p25.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p25.png -------------------------------------------------------------------------------- /images/p250.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p250.png -------------------------------------------------------------------------------- /images/p251.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p251.png -------------------------------------------------------------------------------- /images/p252.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p252.png -------------------------------------------------------------------------------- /images/p253.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p253.png -------------------------------------------------------------------------------- /images/p254.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p254.png -------------------------------------------------------------------------------- /images/p255.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p255.png -------------------------------------------------------------------------------- /images/p256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p256.png -------------------------------------------------------------------------------- /images/p257.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p257.png -------------------------------------------------------------------------------- /images/p258.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p258.png -------------------------------------------------------------------------------- /images/p259.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p259.png -------------------------------------------------------------------------------- /images/p26.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p26.png -------------------------------------------------------------------------------- /images/p260.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p260.png -------------------------------------------------------------------------------- /images/p261.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p261.png -------------------------------------------------------------------------------- /images/p27.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p27.png -------------------------------------------------------------------------------- /images/p28.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p28.png -------------------------------------------------------------------------------- /images/p29.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p29.png -------------------------------------------------------------------------------- /images/p3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p3.png -------------------------------------------------------------------------------- /images/p30.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p30.png -------------------------------------------------------------------------------- /images/p31.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p31.png -------------------------------------------------------------------------------- /images/p32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p32.png -------------------------------------------------------------------------------- /images/p33.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p33.png -------------------------------------------------------------------------------- /images/p34.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p34.png -------------------------------------------------------------------------------- /images/p35.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p35.png -------------------------------------------------------------------------------- /images/p36.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p36.png -------------------------------------------------------------------------------- /images/p37.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p37.png -------------------------------------------------------------------------------- /images/p38.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p38.png -------------------------------------------------------------------------------- /images/p39.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p39.png -------------------------------------------------------------------------------- /images/p4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p4.png -------------------------------------------------------------------------------- /images/p40.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p40.png -------------------------------------------------------------------------------- /images/p41.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p41.png -------------------------------------------------------------------------------- /images/p42.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p42.png -------------------------------------------------------------------------------- /images/p43.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p43.png -------------------------------------------------------------------------------- /images/p44.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p44.png -------------------------------------------------------------------------------- /images/p45.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p45.png -------------------------------------------------------------------------------- /images/p46.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p46.png -------------------------------------------------------------------------------- /images/p47.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p47.pdf -------------------------------------------------------------------------------- /images/p48.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p48.pdf -------------------------------------------------------------------------------- /images/p49.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p49.pdf -------------------------------------------------------------------------------- /images/p5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p5.png -------------------------------------------------------------------------------- /images/p50.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p50.pdf -------------------------------------------------------------------------------- /images/p51.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p51.pdf -------------------------------------------------------------------------------- /images/p53.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p53.pdf -------------------------------------------------------------------------------- /images/p54.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p54.png -------------------------------------------------------------------------------- /images/p55.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p55.pdf -------------------------------------------------------------------------------- /images/p55.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p55.png -------------------------------------------------------------------------------- /images/p56.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p56.pdf -------------------------------------------------------------------------------- /images/p57.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p57.pdf -------------------------------------------------------------------------------- /images/p58.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p58.pdf -------------------------------------------------------------------------------- /images/p59.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p59.png -------------------------------------------------------------------------------- /images/p6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p6.png -------------------------------------------------------------------------------- /images/p60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p60.png -------------------------------------------------------------------------------- /images/p61.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p61.png -------------------------------------------------------------------------------- /images/p62.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p62.png -------------------------------------------------------------------------------- /images/p63.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p63.jpg -------------------------------------------------------------------------------- /images/p64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p64.png -------------------------------------------------------------------------------- /images/p65.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p65.png -------------------------------------------------------------------------------- /images/p66.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p66.png -------------------------------------------------------------------------------- /images/p67.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p67.png -------------------------------------------------------------------------------- /images/p68.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p68.png -------------------------------------------------------------------------------- /images/p69.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p69.png -------------------------------------------------------------------------------- /images/p7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p7.png -------------------------------------------------------------------------------- /images/p70.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p70.png -------------------------------------------------------------------------------- /images/p71.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p71.png -------------------------------------------------------------------------------- /images/p72.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p72.png -------------------------------------------------------------------------------- /images/p73.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p73.png -------------------------------------------------------------------------------- /images/p74.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p74.pdf -------------------------------------------------------------------------------- /images/p75.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p75.pdf -------------------------------------------------------------------------------- /images/p76.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p76.pdf -------------------------------------------------------------------------------- /images/p77.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p77.png -------------------------------------------------------------------------------- /images/p78.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p78.pdf -------------------------------------------------------------------------------- /images/p79.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p79.pdf -------------------------------------------------------------------------------- /images/p8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p8.png -------------------------------------------------------------------------------- /images/p80.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p80.png -------------------------------------------------------------------------------- /images/p81.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p81.png -------------------------------------------------------------------------------- /images/p82.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p82.png -------------------------------------------------------------------------------- /images/p83.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p83.png -------------------------------------------------------------------------------- /images/p84.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p84.pdf -------------------------------------------------------------------------------- /images/p85.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p85.pdf -------------------------------------------------------------------------------- /images/p86.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p86.pdf -------------------------------------------------------------------------------- /images/p87.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p87.pdf -------------------------------------------------------------------------------- /images/p88.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p88.pdf -------------------------------------------------------------------------------- /images/p89.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p89.png -------------------------------------------------------------------------------- /images/p9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p9.png -------------------------------------------------------------------------------- /images/p90.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p90.png -------------------------------------------------------------------------------- /images/p91.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p91.png -------------------------------------------------------------------------------- /images/p92.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p92.jpg -------------------------------------------------------------------------------- /images/p93.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p93.png -------------------------------------------------------------------------------- /images/p94.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p94.png -------------------------------------------------------------------------------- /images/p95.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p95.png -------------------------------------------------------------------------------- /images/p96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p96.png -------------------------------------------------------------------------------- /images/p97.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p97.png -------------------------------------------------------------------------------- /images/p98.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p98.png -------------------------------------------------------------------------------- /images/p99.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/p99.pdf -------------------------------------------------------------------------------- /images/postorder.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/postorder.pdf -------------------------------------------------------------------------------- /images/rdex1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/rdex1.jpg -------------------------------------------------------------------------------- /images/rdex2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/rdex2.jpg -------------------------------------------------------------------------------- /images/rdex3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/rdex3.jpg -------------------------------------------------------------------------------- /images/ssaexm1.drawio: -------------------------------------------------------------------------------- 1 | 7VZNc5swFPw1HOsBycb46K84nTbTZnxIclRABs0AokLY0F9fyZIAGSe2M+lMD72A3mr1JO2+Z+PAZVZvGCqSBxrh1AFuVDtw5QAwDYB4SqBRgD+bKCBmJFKQ1wFb8htr0NVoRSJcWkROacpJYYMhzXMccgtDjNGDTdvR1N61QDEeANsQpUP0iUQ8UWgAph1+j0mcmJ09f6ZmMmTI+iZlgiJ66EFw7cAlo5SrUVYvcSq1M7qodXdvzLYHYzjn1yy49+Lv1bP/9JXs1o+Pm2/7IPjxRWfZo7TSF3aAn4p8ix0VaYVgKFQT/q9KnnSxpBkJxcQW5aV4PWy7KTGK5Xs0Gpksr8yAc0E+JnIXcgTkczmkrVvavKWthjS1BcoKMc5fy0LtnqpT3KmzK6YWnzfGUUarPMJSFLHR4pAQjreFuuVBlLDAEp6lIvLa1X2RjWKYcVz3IC36BtMMc9YIip6FE10AugOAr+NDV0+eKZKkV0uGh3QJx23qzmUx0EbfYDo4Y/qJSDiP5rJ7RBSmqCyF5ZYutohvqoQjq7uGGvU0mJyRwGAMp4iTvd2T53TRO/yk5Fi92oKxOxtNLBMgOBG3pBULsV7X76LLqYKTVByxGPNBqqNV7dU/7h787x5wP829QZf9ZffGl90LK7Y/miOtus1KXBP+LMfilip6MXnEeFX3aKvGBLm4UW+RDF/6c92yY9RcKhtlxuU/HaX0Zd4/UoZwav+Oj8cfLUJgJ4Kz60pQ1AFqerRCEsp3Dhyc7OO7754Lwtv4betovjez+GKgTnxl/4iw+xhS9O6LEq7/AA== -------------------------------------------------------------------------------- /images/ssaexm1.drawio.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/ssaexm1.drawio.pdf -------------------------------------------------------------------------------- /images/ssaexm1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/ssaexm1.png -------------------------------------------------------------------------------- /images/ssaexm2.drawio: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /images/ssaexm2.drawio.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/ssaexm2.drawio.pdf -------------------------------------------------------------------------------- /images/t2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/t2.png -------------------------------------------------------------------------------- /images/t3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/t3.png -------------------------------------------------------------------------------- /images/t4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/t4.png -------------------------------------------------------------------------------- /images/test1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/test1.png -------------------------------------------------------------------------------- /images/test2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/test2.png -------------------------------------------------------------------------------- /images/test3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/test3.png -------------------------------------------------------------------------------- /images/test4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/test4.png -------------------------------------------------------------------------------- /images/vne.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/images/vne.jpg -------------------------------------------------------------------------------- /main.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/main.pdf -------------------------------------------------------------------------------- /template.tex: -------------------------------------------------------------------------------- 1 | \begin{itemize} 2 | \item 3 | \item 4 | \end{itemize} 5 | 6 | 7 | \begin{figure}[H] 8 | \centering 9 | \includegraphics[width=0.4\textwidth]{p210.png} 10 | \caption{Iterative Algorithms} 11 | \label{fig:p210} 12 | \end{figure} 13 | 14 | \includepdf[pages={1-}]{p99.pdf} 15 | 16 | 17 | \begin{algorithm} 18 | \caption{Iterated path-convergence criterion}\label{alg:Iterated path-convergence criterion} 19 | \begin{algorithmic} 20 | 21 | \While{there are nodes $x, y, z$ satisfying conditions 1–5 22 | and \\ $z$ does not contain a $\Phi$-function for a} 23 | \State insert a $\leftarrow$ $\Phi$(a, a, . . . , a) at node Z 24 | \EndWhile 25 | \end{algorithmic} 26 | \end{algorithm} 27 | 28 | 29 | \begin{figure}[H] 30 | \centering 31 | \begin{subfigure}{0.3\textwidth} 32 | \centering 33 | \includegraphics[width=\textwidth]{p94.png} 34 | \caption{For \texttt{b+c}, two 35 | earliest placement 36 | points is colored in red.} 37 | \label{fig:p94} 38 | \end{subfigure} 39 | \begin{subfigure}{0.4\textwidth} 40 | \centering 41 | \includegraphics[width=\textwidth]{p95.png} 42 | \caption{For \texttt{b+c}, Latest placement edhes and blocks.} 43 | \label{fig:p95} 44 | \end{subfigure} 45 | 46 | \caption{} 47 | \label} 48 | \end{figure} 49 | 50 | 51 | \begin{lstlisting}[language=C,frame=single, caption=An simple example containing some expressions ,label = lst:expression1] 52 | 53 | \end{lstlisting} 54 | -------------------------------------------------------------------------------- /test/link1.cc: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace std; 3 | void log(const char* msg); 4 | int multiple(int a, int b){ 5 | log("multiple"); 6 | return a*b; 7 | } 8 | int main(){ 9 | 10 | } 11 | 12 | // clang++ link1.cc 13 | // link1.cc:(.text+0x19): undefined reference to `log(char const*)' Why do we get link error even if we don't use multiple at all? 14 | 15 | // becuase multiple can be used in other files other than this file. We can make `multiple` function only shown to this function by adding `static` to its declaration. -------------------------------------------------------------------------------- /test/link2.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liusy58/CompilerNotes/43c2fbd9d4cad8b9f417a62d0b56206f45d32130/test/link2.cc -------------------------------------------------------------------------------- /test/log.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void log(const char* msg){ 4 | std::cout << msg << std::endl; 5 | } --------------------------------------------------------------------------------