├── .gitignore ├── 22-05-presentation ├── Bibliography.bib ├── Wue.sty ├── imgs │ ├── standards.png │ ├── standards2.png │ └── standards3.png ├── main.pdf ├── main.tex ├── unilogo4c.jpg └── unilogo4c.png ├── Ideas └── NeuralTransformer.md ├── ReadMe.md └── attachments ├── logo.png └── standards3.png /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/obsidian,latex,visualstudiocode 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=obsidian,latex,visualstudiocode 3 | 4 | ### LaTeX ### 5 | ## Core latex/pdflatex auxiliary files: 6 | *.aux 7 | *.lof 8 | *.log 9 | *.lot 10 | *.fls 11 | *.out 12 | *.toc 13 | *.fmt 14 | *.fot 15 | *.cb 16 | *.cb2 17 | .*.lb 18 | 19 | ## Intermediate documents: 20 | *.dvi 21 | *.xdv 22 | *-converted-to.* 23 | # these rules might exclude image files for figures etc. 24 | # *.ps 25 | # *.eps 26 | # *.pdf 27 | 28 | ## Generated if empty string is given at "Please type another file name for output:" 29 | .pdf 30 | 31 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 32 | *.bbl 33 | *.bcf 34 | *.blg 35 | *-blx.aux 36 | *-blx.bib 37 | *.run.xml 38 | 39 | ## Build tool auxiliary files: 40 | *.fdb_latexmk 41 | *.synctex 42 | *.synctex(busy) 43 | *.synctex.gz 44 | *.synctex.gz(busy) 45 | *.pdfsync 46 | 47 | ## Build tool directories for auxiliary files 48 | # latexrun 49 | latex.out/ 50 | 51 | ## Auxiliary and intermediate files from other packages: 52 | # algorithms 53 | *.alg 54 | *.loa 55 | 56 | # achemso 57 | acs-*.bib 58 | 59 | # amsthm 60 | *.thm 61 | 62 | # beamer 63 | *.nav 64 | *.pre 65 | *.snm 66 | *.vrb 67 | 68 | # changes 69 | *.soc 70 | 71 | # comment 72 | *.cut 73 | 74 | # cprotect 75 | *.cpt 76 | 77 | # elsarticle (documentclass of Elsevier journals) 78 | *.spl 79 | 80 | # endnotes 81 | *.ent 82 | 83 | # fixme 84 | *.lox 85 | 86 | # feynmf/feynmp 87 | *.mf 88 | *.mp 89 | *.t[1-9] 90 | *.t[1-9][0-9] 91 | *.tfm 92 | 93 | #(r)(e)ledmac/(r)(e)ledpar 94 | *.end 95 | *.?end 96 | *.[1-9] 97 | *.[1-9][0-9] 98 | *.[1-9][0-9][0-9] 99 | *.[1-9]R 100 | *.[1-9][0-9]R 101 | *.[1-9][0-9][0-9]R 102 | *.eledsec[1-9] 103 | *.eledsec[1-9]R 104 | *.eledsec[1-9][0-9] 105 | *.eledsec[1-9][0-9]R 106 | *.eledsec[1-9][0-9][0-9] 107 | *.eledsec[1-9][0-9][0-9]R 108 | 109 | # glossaries 110 | *.acn 111 | *.acr 112 | *.glg 113 | *.glo 114 | *.gls 115 | *.glsdefs 116 | *.lzo 117 | *.lzs 118 | *.slg 119 | *.slo 120 | *.sls 121 | 122 | # uncomment this for glossaries-extra (will ignore makeindex's style files!) 123 | # *.ist 124 | 125 | # gnuplot 126 | *.gnuplot 127 | *.table 128 | 129 | # gnuplottex 130 | *-gnuplottex-* 131 | 132 | # gregoriotex 133 | *.gaux 134 | *.glog 135 | *.gtex 136 | 137 | # htlatex 138 | *.4ct 139 | *.4tc 140 | *.idv 141 | *.lg 142 | *.trc 143 | *.xref 144 | 145 | # hyperref 146 | *.brf 147 | 148 | # knitr 149 | *-concordance.tex 150 | # TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files 151 | # *.tikz 152 | *-tikzDictionary 153 | 154 | # listings 155 | *.lol 156 | 157 | # luatexja-ruby 158 | *.ltjruby 159 | 160 | # makeidx 161 | *.idx 162 | *.ilg 163 | *.ind 164 | 165 | # minitoc 166 | *.maf 167 | *.mlf 168 | *.mlt 169 | *.mtc[0-9]* 170 | *.slf[0-9]* 171 | *.slt[0-9]* 172 | *.stc[0-9]* 173 | 174 | # minted 175 | _minted* 176 | *.pyg 177 | 178 | # morewrites 179 | *.mw 180 | 181 | # newpax 182 | *.newpax 183 | 184 | # nomencl 185 | *.nlg 186 | *.nlo 187 | *.nls 188 | 189 | # pax 190 | *.pax 191 | 192 | # pdfpcnotes 193 | *.pdfpc 194 | 195 | # sagetex 196 | *.sagetex.sage 197 | *.sagetex.py 198 | *.sagetex.scmd 199 | 200 | # scrwfile 201 | *.wrt 202 | 203 | # svg 204 | svg-inkscape/ 205 | 206 | # sympy 207 | *.sout 208 | *.sympy 209 | sympy-plots-for-*.tex/ 210 | 211 | # pdfcomment 212 | *.upa 213 | *.upb 214 | 215 | # pythontex 216 | *.pytxcode 217 | pythontex-files-*/ 218 | 219 | # tcolorbox 220 | *.listing 221 | 222 | # thmtools 223 | *.loe 224 | 225 | # TikZ & PGF 226 | *.dpth 227 | *.md5 228 | *.auxlock 229 | 230 | # titletoc 231 | *.ptc 232 | 233 | # todonotes 234 | *.tdo 235 | 236 | # vhistory 237 | *.hst 238 | *.ver 239 | 240 | # easy-todo 241 | *.lod 242 | 243 | # xcolor 244 | *.xcp 245 | 246 | # xmpincl 247 | *.xmpi 248 | 249 | # xindy 250 | *.xdy 251 | 252 | # xypic precompiled matrices and outlines 253 | *.xyc 254 | *.xyd 255 | 256 | # endfloat 257 | *.ttt 258 | *.fff 259 | 260 | # Latexian 261 | TSWLatexianTemp* 262 | 263 | ## Editors: 264 | # WinEdt 265 | *.bak 266 | *.sav 267 | 268 | # Texpad 269 | .texpadtmp 270 | 271 | # LyX 272 | *.lyx~ 273 | 274 | # Kile 275 | *.backup 276 | 277 | # gummi 278 | .*.swp 279 | 280 | # KBibTeX 281 | *~[0-9]* 282 | 283 | # TeXnicCenter 284 | *.tps 285 | 286 | # auto folder when using emacs and auctex 287 | ./auto/* 288 | *.el 289 | 290 | # expex forward references with \gathertags 291 | *-tags.tex 292 | 293 | # standalone packages 294 | *.sta 295 | 296 | # Makeindex log files 297 | *.lpz 298 | 299 | # xwatermark package 300 | *.xwm 301 | 302 | # REVTeX puts footnotes in the bibliography by default, unless the nofootinbib 303 | # option is specified. Footnotes are the stored in a file with suffix Notes.bib. 304 | # Uncomment the next line to have this generated file ignored. 305 | #*Notes.bib 306 | 307 | ### LaTeX Patch ### 308 | # LIPIcs / OASIcs 309 | *.vtc 310 | 311 | # glossaries 312 | *.glstex 313 | 314 | ### Obsidian ### 315 | # config dir 316 | .obsidian/ 317 | 318 | ### VisualStudioCode ### 319 | .vscode/* 320 | !.vscode/settings.json 321 | !.vscode/tasks.json 322 | !.vscode/launch.json 323 | !.vscode/extensions.json 324 | !.vscode/*.code-snippets 325 | 326 | # Local History for Visual Studio Code 327 | .history/ 328 | 329 | # Built Visual Studio Code Extensions 330 | *.vsix 331 | 332 | ### VisualStudioCode Patch ### 333 | # Ignore all local history of files 334 | .history 335 | .ionide 336 | 337 | # End of https://www.toptal.com/developers/gitignore/api/obsidian,latex,visualstudiocode 338 | 339 | 340 | main.pdf 341 | Icons 342 | -------------------------------------------------------------------------------- /22-05-presentation/Bibliography.bib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuralCoder3/transpilation/24222c009fa53485dc68b59656e47b208aea1280/22-05-presentation/Bibliography.bib -------------------------------------------------------------------------------- /22-05-presentation/Wue.sty: -------------------------------------------------------------------------------- 1 | \mode 2 | 3 | \newif\ifbeamer@secheader 4 | \beamer@secheaderfalse 5 | 6 | %\DeclareOptionBeamer{secheader}{\beamer@secheadertrue} 7 | \ProcessOptionsBeamer 8 | 9 | \useoutertheme[footline=authorinstitutetitle,subsection=false]{smoothbars} 10 | \makeatletter % [add curpage/total page at the bottom](http://tex.stackexchange.com/questions/100838/beamer-dresden-theme-miniframes-appeareance-and-frame-number-insertion) 11 | \newcommand{\frameofframes}{/} 12 | \newcommand{\setframeofframes}[1]{\renewcommand{\frameofframes}{#1}} 13 | \setbeamertemplate{footline} 14 | {% 15 | \begin{beamercolorbox}[colsep=1.5pt]{upper separation line foot} 16 | \end{beamercolorbox} 17 | \begin{beamercolorbox}[ht=2.5ex,dp=1.125ex,% 18 | leftskip=.3cm,rightskip=.3cm plus1fil]{author in head/foot}% 19 | \leavevmode{\usebeamerfont{author in head/foot}\insertshortauthor}% 20 | \hfill% 21 | {\usebeamerfont{institute in head/foot}\usebeamercolor[fg]{institute in head/foot}\insertshortinstitute}% 22 | \end{beamercolorbox}% 23 | \begin{beamercolorbox}[ht=2.5ex,dp=1.125ex,% 24 | leftskip=.3cm,rightskip=.3cm plus1fil]{title in head/foot}% 25 | {\usebeamerfont{title in head/foot}\insertshorttitle}% 26 | \hfill% 27 | {\usebeamerfont{frame number}\usebeamercolor[fg]{frame number}\insertframenumber~\frameofframes~\inserttotalframenumber} 28 | \end{beamercolorbox}% 29 | \begin{beamercolorbox}[colsep=1.5pt]{lower separation line foot} 30 | \end{beamercolorbox} 31 | } 32 | \makeatother 33 | 34 | \useinnertheme{circles} 35 | 36 | %\useoutertheme{default} 37 | %\useinnertheme[shadow=true]{rounded} 38 | 39 | \xdefinecolor{wue}{RGB}{37,86,149} 40 | \setbeamercolor{footline}{bg=wue} 41 | \setbeamercolor{frametitle}{bg=wue,fg=white} 42 | \setbeamercolor{title}{bg=wue} 43 | \setbeamerfont{frametitle}{size=\large} 44 | %\setbeamertemplate{navigation symbols}{} 45 | \setbeamertemplate{bibliography item}[text] 46 | \setbeamertemplate{caption}[numbered] 47 | 48 | \setbeamercolor{palette primary}{use=structure,fg=white,bg=structure.fg} 49 | \setbeamercolor{palette secondary}{use=structure,fg=white,bg=structure.fg!75!black} 50 | \setbeamercolor{palette tertiary}{use=structure,fg=white,bg=structure.fg!50!black} 51 | \setbeamercolor{palette quaternary}{fg=white,bg=structure.fg!50!black} 52 | %\setbeamercolor*{sidebar}{use=structure,bg=structure.fg} 53 | \setbeamercolor{titlelike}{parent=palette primary} 54 | 55 | \setbeamercolor{block title}{bg=wue,fg=white} 56 | \setbeamercolor*{block title example}{use={normal text,example text},bg=white,fg=wue} 57 | \setbeamercolor{fine separation line}{} 58 | \setbeamercolor{item projected}{fg=white} 59 | \setbeamercolor{palette sidebar primary}{use=normal text,fg=normal text.fg} 60 | \setbeamercolor{palette sidebar quaternary}{use=structure,fg=structure.fg} 61 | \setbeamercolor{palette sidebar secondary}{use=structure,fg=structure.fg} 62 | \setbeamercolor{palette sidebar tertiary}{use=normal text,fg=normal text.fg} 63 | %\setbeamercolor{palette sidebar quaternary}{fg=white} 64 | \setbeamercolor{section in sidebar}{fg=brown} 65 | \setbeamercolor{section in sidebar shaded}{fg=grey} 66 | \setbeamercolor{separation line}{} 67 | \setbeamercolor{sidebar}{bg=wue} 68 | \setbeamercolor{sidebar}{parent=palette primary} 69 | \setbeamercolor{structure}{fg=wue} 70 | \setbeamercolor{subsection in sidebar}{fg=brown} 71 | \setbeamercolor{subsection in sidebar shaded}{fg=grey} 72 | \AtBeginSection[]{ 73 | \begin{frame} 74 | \tableofcontents[sectionstyle=show/shaded,subsectionstyle=show/shaded/hide,subsubsectionstyle=show/shaded/hide] 75 | \end{frame} 76 | } 77 | \AtBeginSubsection[]{ 78 | \begin{frame} 79 | \tableofcontents[sectionstyle=show/shaded,subsectionstyle=show/shaded/hide,subsubsectionstyle=show/shaded/hide] 80 | \end{frame} 81 | } 82 | 83 | \mode 84 | 85 | -------------------------------------------------------------------------------- /22-05-presentation/imgs/standards.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuralCoder3/transpilation/24222c009fa53485dc68b59656e47b208aea1280/22-05-presentation/imgs/standards.png -------------------------------------------------------------------------------- /22-05-presentation/imgs/standards2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuralCoder3/transpilation/24222c009fa53485dc68b59656e47b208aea1280/22-05-presentation/imgs/standards2.png -------------------------------------------------------------------------------- /22-05-presentation/imgs/standards3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuralCoder3/transpilation/24222c009fa53485dc68b59656e47b208aea1280/22-05-presentation/imgs/standards3.png -------------------------------------------------------------------------------- /22-05-presentation/main.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuralCoder3/transpilation/24222c009fa53485dc68b59656e47b208aea1280/22-05-presentation/main.pdf -------------------------------------------------------------------------------- /22-05-presentation/main.tex: -------------------------------------------------------------------------------- 1 | %------------------------------------------------------------- 2 | \documentclass[aspectratio=169, handout]{beamer} 3 | \usepackage[utf8]{inputenc} 4 | \usepackage{hyperref} 5 | \usepackage[T1]{fontenc} 6 | \usepackage{tikz} 7 | 8 | \usepackage{latexsym,xcolor,multicol,booktabs,calligra} 9 | \usepackage{amsmath,amssymb,BOONDOX-cal,bm} 10 | \usepackage{graphicx,pstricks,stackengine} 11 | \usepackage{tabularx} 12 | \usepackage{makecell} 13 | \usepackage{listings} 14 | \usepackage{etoolbox} 15 | \usepackage{tablefootnote} 16 | \usepackage{ bbold } 17 | \usepackage{tikz} 18 | \usepackage{xfrac} 19 | \usepackage{longtable} 20 | 21 | 22 | \author{Marcel Ullrich} 23 | %\titlegraphic{\includegraphics[width=\textwidth]{unilogo4cmittel.jpg}} 24 | \titlegraphic{ 25 | \begin{tikzpicture}[overlay,remember picture] 26 | \node[right=-0.15cm] at (current page.154){%150 vorher 27 | \includegraphics[width=1.2\textwidth]{unilogo4c.png} 28 | }; 29 | \end{tikzpicture} 30 | } 31 | \title{\textbf{U}niversal \textbf{L}anguage \textbf{Tra}nspiler} % Esperanto 32 | \institute{Saarland University } 33 | \date{02.05.2022} 34 | %\logo{\includegraphics[width=0.12\textwidth]{MessmapLogo.png}} 35 | 36 | \usepackage{Wue} 37 | 38 | \def\cmd#1{\texttt{\color{red}\footnotesize $\backslash$#1}} 39 | \def\env#1{\texttt{\color{blue}\footnotesize #1}} 40 | 41 | \newtheorem{thm}{Theorem}[theorem] 42 | 43 | 44 | %—------------------------------------------------------------- 45 | 46 | \begin{document} 47 | \begin{frame} 48 | \titlepage 49 | \end{frame} 50 | 51 | % \section{ABC} 52 | 53 | \begin{frame}{Problem} 54 | \begin{itemize} 55 | \item Migration from scripting to programming \pause %https://dl.acm.org/doi/abs/10.1145/1176617.1176755?casa_token=dFMn2xhiTeUAAAAA:EC0wkfDgt3clgpbnwQ1x2JasOiL9wOvZQ0HoxKdUNexgjpAznzLg4tpfJpfDyXp0MIYzBHeE58tS 56 | %https://dl.acm.org/doi/abs/10.1145/103135.103138 57 | \item Rewriting of code \pause 58 | \item Interface changes 59 | \end{itemize} 60 | \only<4->{ 61 | \begin{center} 62 | \alt<6>{ 63 | \includegraphics[width=.5\textwidth]{imgs/standards3.png}% 64 | }{ 65 | \alt<5>{ 66 | \includegraphics[width=.5\textwidth]{imgs/standards2.png}% 67 | }{ 68 | \includegraphics[width=.5\textwidth]{imgs/standards.png} 69 | } 70 | } 71 | %3145 esoterische PL 72 | 73 | { \tiny \url{https://xkcd.com/927/} \visible<5->{(modified)}} 74 | \end{center} 75 | } 76 | \end{frame} 77 | 78 | \begin{frame}{Goal} 79 | \begin{itemize} 80 | \item translation of code \pause 81 | \item language agnostic \pause 82 | \item minimal boilerplate \pause 83 | \item readable 84 | \end{itemize} 85 | \end{frame} 86 | 87 | \begin{frame}{Classification} 88 | \begin{itemize} 89 | \item {\color<2>{gray}Machine languages} 90 | \item {\color<2>{gray}Assembly languages} 91 | \item High-level languages 92 | \item Scripting languages 93 | \item System languages 94 | \item Domain-specific languages 95 | \item {\color<2>{gray}Visual languages} 96 | \item {\color<2>{gray}Esoteric languages} 97 | \item {\color<2>{gray}other} 98 | \end{itemize} 99 | \end{frame} 100 | 101 | \begin{frame}{High-level languages} 102 | \begin{itemize} 103 | \item imperative 104 | \item functional 105 | \item {\color<2>{gray}declarative} 106 | \item {\color<2>{gray}array} 107 | \item constraint 108 | \item {\color<2>{gray}other} 109 | \end{itemize} 110 | 111 | \end{frame} 112 | 113 | \begin{frame}{Examples} 114 | %https://cs.lmu.edu/~ray/notes/pltypes/ 115 | \begin{itemize} 116 | \item \href{https://rosettacode.org/wiki/Mandelbrot_set}{Mandelbrot} 117 | \item \url{https://cs.lmu.edu/~ray/notes/pltypes/} 118 | \end{itemize} 119 | \end{frame} 120 | 121 | \begin{frame}{Issues} 122 | \begin{itemize} 123 | \item large difference between languages \pause % compiled interpreted, typed untypes 124 | \item boilerplate around differences\pause 125 | \item language specific features\pause %e.g. metaprogramming 126 | \item correct code generation\pause 127 | \item interaction with the user 128 | \item scaling 129 | \end{itemize} 130 | \end{frame} 131 | 132 | \begin{frame}{Ideas} 133 | \begin{itemize} 134 | \item concept: 135 | \begin{itemize} 136 | \item transpiler generator 137 | \item transpiler that generates 138 | \end{itemize} \pause 139 | \item approach: 140 | \begin{itemize} 141 | \item structure based 142 | \item partial evaluation 143 | \item translation 144 | \item synthesis 145 | \item + validation 146 | \item common language 147 | \end{itemize} 148 | \end{itemize} 149 | \end{frame} 150 | 151 | % TODO 152 | 153 | % https://emina.github.io/rosette/ 154 | % https://docs.racket-lang.org/rosette-guide/index.html 155 | % https://github.com/google-research/crossbeam 156 | % https://github.com/jarble/transpiler 157 | 158 | \begin{frame}{Concept: Transpiler generator} 159 | \begin{itemize} 160 | \item general language independent framework 161 | \item translation rules per language 162 | \item need synthesis of rules \pause 163 | \item advantages: 164 | \begin{itemize} 165 | \item modular 166 | \item well suited for very similar languages 167 | \end{itemize} \pause 168 | \item disadvantages: 169 | \begin{itemize} 170 | \item complicated rules 171 | \item difficult for different ASTs 172 | \item syntactical 173 | \end{itemize} 174 | \end{itemize} 175 | \end{frame} 176 | 177 | \begin{frame}{Concept: Generating Transpiler} 178 | \begin{itemize} 179 | \item takes a fragment of code 180 | \item produces a fragment of code \pause 181 | \item advantages: 182 | \begin{itemize} 183 | \item general 184 | \item allows structure changes 185 | \end{itemize} \pause 186 | \item disadvantages: 187 | \begin{itemize} 188 | \item not modular 189 | \item not necessarily structure preserving 190 | \end{itemize} 191 | \end{itemize} 192 | \end{frame} 193 | 194 | % \begin{frame}{Concept: Correctness} 195 | % \Huge TODO 196 | % \end{frame} 197 | 198 | % Translation 199 | 200 | \begin{frame}{Idea: Structure based} 201 | \begin{itemize} 202 | \item synthesize translation rules 203 | \item compare examples in the two languages 204 | \item same context $\Rightarrow$ same semantics \pause 205 | \item advantages: 206 | \begin{itemize} 207 | \item modular 208 | \item well suited for very similar languages 209 | \end{itemize} \pause 210 | \item disadvantages: 211 | \begin{itemize} 212 | \item complicated rules 213 | \item difficult for different ASTs 214 | \item syntactical 215 | \end{itemize} 216 | \end{itemize} 217 | \end{frame} 218 | 219 | \begin{frame}{Idea: Partial evaluation} 220 | \begin{itemize} 221 | \item interpreter of A in B 222 | \item partial evaluation of B 223 | \item symbolic evaluation $\Rightarrow$ partial eval, interpreter \pause 224 | \item advantages: 225 | \begin{itemize} 226 | \item reuses written code 227 | \item correctness transfer 228 | \item one manual transpilation 229 | \end{itemize} \pause 230 | \item disadvantages: 231 | \begin{itemize} 232 | \item needs complex components 233 | \item work for each new language 234 | \end{itemize} 235 | \end{itemize} 236 | \end{frame} 237 | 238 | \begin{frame}{Idea: Translation} 239 | \begin{itemize} 240 | \item text to text translation \pause % or graph 241 | \item advantages: 242 | \begin{itemize} 243 | \item works for natural languages 244 | \item classical machine learning problem 245 | \end{itemize} \pause 246 | \item disadvantages: 247 | \begin{itemize} 248 | \item no correctness guarantees 249 | \item complicated structural constraints 250 | \item needs suitable representation 251 | \item no insight 252 | \end{itemize} 253 | \end{itemize} 254 | % like a neural network translating normal languages 255 | \end{frame} 256 | 257 | \begin{frame}{Idea: Synthesis} 258 | \Huge TODO 259 | \end{frame} 260 | 261 | \begin{frame}{Idea: Synthesis + Validation} 262 | \Huge TODO 263 | \end{frame} 264 | 265 | \begin{frame}{Idea: Common language} 266 | \Huge TODO 267 | \end{frame} 268 | 269 | % \begin{frame}{Idea: } 270 | % \Huge TODO 271 | % \end{frame} 272 | 273 | 274 | 275 | 276 | \begin{frame}{Applications} 277 | \begin{minipage}{.49\textwidth} 278 | \begin{itemize} 279 | \item Similar to Pandoc, Rosetta, LLVM \pause 280 | \item API changes \pause 281 | \item virtual API \pause 282 | \item Migration to new code (maintainance) \pause 283 | \begin{itemize} 284 | \item COBOL $\to$ C(++) \pause 285 | \item PHP $\to$ Javascript/Python \pause 286 | \item X $\to$ Typescript \pause 287 | \end{itemize} 288 | \end{itemize} 289 | \end{minipage}% 290 | \begin{minipage}{.49\textwidth} 291 | \begin{itemize} 292 | \item Language features \pause 293 | \begin{itemize} 294 | \item verification \pause 295 | \item libraries \pause 296 | \item speed \pause 297 | \item knowledge \pause 298 | \item safety guarantees \pause 299 | \item optimizations \pause 300 | \item tooling environment \pause 301 | \end{itemize} 302 | \item competing frameworks % angular, react, vue, fabric, forge 303 | \end{itemize} 304 | \end{minipage} 305 | \end{frame} 306 | 307 | 308 | \end{document} -------------------------------------------------------------------------------- /22-05-presentation/unilogo4c.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuralCoder3/transpilation/24222c009fa53485dc68b59656e47b208aea1280/22-05-presentation/unilogo4c.jpg -------------------------------------------------------------------------------- /22-05-presentation/unilogo4c.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuralCoder3/transpilation/24222c009fa53485dc68b59656e47b208aea1280/22-05-presentation/unilogo4c.png -------------------------------------------------------------------------------- /Ideas/NeuralTransformer.md: -------------------------------------------------------------------------------- 1 | # Neural Transformer Transpilation 2 | 3 | The basic idea of this approach is to generate (parts) of the output using text(-based) transformer networks. 4 | Afterward, the trust (program equivalence) in the output has to be established. 5 | 6 | 7 | ## Advantage 8 | - [ ] Full usage of network to 9 | - [ ] synthesize optimal code 10 | - [ ] connect concepts (unsupervised learning) 11 | - [ ] conceptually simple 12 | - [ ] modular & configurable 13 | 14 | ## Difficulties & Limitations 15 | - [ ] no formal guarantees 16 | - [ ] output bound by network capabilities 17 | - [ ] needs to ensure trust afterward 18 | - [ ] can fail unexpectedly 19 | - [ ] needs a LLM 20 | 21 | ## Details 22 | 23 | We start with code in language A and convince a LLM to generate code in language B with the same semantics. 24 | There are many choices of networks to use: 25 | * T5 inspired architectures special trained for programming language tasks 26 | * AST network architectures (need knowledge of language A) 27 | * general purpose LLM 28 | * zero-shot: Carefully crafted prompt to transpile 29 | * one-shot/few-shot: give translation examples to guide transpilation (give specifics about the language) 30 | * finetuning: fine tune the model using a few hundret / thousand examples 31 | * attention based correlation (codex) 32 | 33 | Sometimes, LLM run into a wrong direction. Therefore, a restart might be necessary to come up with another (hopefully correct) solution. 34 | 35 | The model might produce (depending on the architecture) syntactically and semantically wrong results. 36 | 37 | For syntactical issues, we can forward the compiler feedback to iterate until a valid program is returned. 38 | 39 | Semantical issues are more difficult to detect and correct. 40 | For the output to be trusted, we need to establish an equivalence with the input. 41 | This can happen in multiple ways: 42 | * correlation based mapping 43 | * automatic equivalence proofs 44 | * (bounded) translation validation proofs 45 | * property/fuzzing tests 46 | 47 | A testing approach is the easiest to implement and maintain for all languages. 48 | Tests can not guarantee correctness. However, a well-tested code is enough in practice. (Note: Depending on the type system and reasoning, whitebox tests could establish correctness) 49 | Additionally, tests provide us with concrete examples of a mis-translation. 50 | We can feed back input-output pairs to refine the result until both programs show the same behaviour. 51 | A tool like quickcheck/hypothesis might help to generate the tests. 52 | 53 | 54 | ## Experiments & Results 55 | 56 | [see subfolder] 57 | 58 | 59 | ## Related Literature 60 | Some literature that applies specifically to this experiment. 61 | This includes prompt engeneering attempt especially for \*GPT\* models. 62 | - Prompting 63 | - GPT3 conditioning 64 | - https://news.ycombinator.com/item?id=34869960#34873669 65 | - [GPT3 to Chat](https://github.com/Kav-K/GPT3Discord) [Author Response](https://old.reddit.com/r/GPT3/comments/zxs18b/gpt3dalle2_discord_bot_with_mediumlong_term_memory/j22a0vk/) 66 | - ChatGPT conditioning 67 | - [Overview](https://www.jailbreakchat.com/) 68 | - [prompt collection](https://github.com/f/awesome-chatgpt-prompts) 69 | - [Dan 5.0](https://www.reddit.com/r/ChatGPT/comments/10tevu1/new_jailbreak_proudly_unveiling_the_tried_and/) 70 | - [Dan 6.0](https://www.reddit.com/r/ChatGPT/comments/10vinun/presenting_dan_60/) 71 | - [SDAN](https://www.reddit.com/r/ChatGPT/comments/10vlzbo/presenting_sdan_simple_dan/) 72 | - [SQL analyst in 26 recursive prompts](https://www.patterns.app/blog/2023/01/18/crunchbot-sql-analyst-gpt/) 73 | - [Hackernews](https://news.ycombinator.com/item?id=34521149) 74 | - [Midjouney Prompt Engine 3](https://www.reddit.com/r/midjourney/comments/11chf6s/version_3_of_my_chatgpt_prompting_machine_it_now/) 75 | - [stable diffusion prompter](https://www.reddit.com/r/StableDiffusion/comments/11cfe1i/is_there_a_chatgpt_prompt_to_create_sd_prompts/) 76 | - [GPT4 Jailbreak](https://news.ycombinator.com/item?id=35190383) 77 | - LLM Prompt Engeneering 78 | - [Microsoft Prompt Engine](https://news.ycombinator.com/item?id=34811070) 79 | - [GPT Error Reduction](https://medium.com/@0xjfan/how-we-cut-the-rate-of-gpt-hallucinations-from-20-to-less-than-2-f3bfcc10e4ec) 80 | - [openai cookbook](https://github.com/openai/openai-cookbook) 81 | - [openai prompt engeneering](https://learnprompting.org/docs/intro) 82 | - Data Processing 83 | - [Pinecone long term vector memory](https://www.pinecone.io/) 84 | - [Load Repo](https://github.com/mpoon/gpt-repository-loader) 85 | - API 86 | - [ChatGPT official API](https://openai.com/blog/introducing-chatgpt-and-whisper-apis) 87 | - [ChatBlade](https://github.com/npiv/chatblade) 88 | - [AI Chat](https://github.com/sigoden/aichat/) 89 | 90 | 91 | ### Concrete Prompts 92 | 93 |
94 | Table prompt GUI Interaction 95 | INSTRUCTIONS: Provide a {text) that you would like to make changes to. Generate a TABLE with 2 96 | columns, one with numbers and one with 5 different writing styles. After the TABLE is generated, ask 97 | the question "What writing style would you like to implement? Pick one from the table above" below 98 | the TABLE. Wait for the user to pick a number. 99 | 100 | 101 | text = 102 | 103 | Execute the INSTRUCTIONS in a TABLE format: 104 |
-------------------------------------------------------------------------------- /ReadMe.md: -------------------------------------------------------------------------------- 1 | # Transpilation 2 | 3 | In this repository, we investigate different aspects and solution (ideas) to the problem of having too many programming languages. 4 | 5 | ![Logo](attachments/logo.png) 6 | 7 | - [Problem](#Problem) 8 | - [Examples](#Examples) 9 | - [Goal](#Goal) 10 | - [Related Work](#related-work) 11 | - [Program Equivalence](#program-equivalence) 12 | - [Translation Validation](#translation-validation) 13 | - [Program Translation](#program-translation) 14 | - [Synthesis](#Synthesis) 15 | - [Artificial Intelligence](#artificial-intelligence) 16 | - [Transpilers](#Transpilers) 17 | - [Related Concepts](#related-concepts) 18 | - [Ideas](#Ideas) 19 | 20 | 21 | ## Problem 22 | 23 | ``` 24 | TLDR: 25 | There are features bound to programming languages that are conceptually independent of the concrete language used. 26 | With over 9000 programming languages in use, porting these tools by hand to all popular languages is infeasible. 27 | In conclusion, this leads to unncessesary work and unavailability of tools. 28 | ``` 29 | 30 | 31 | There are a lot (current estimate >9000) of programming languages (at least 1000 with some hundret of active use and possible many more -- there are at least 3570 registered esoteric programming languages). 32 | 33 | Not every language is the same. 34 | We will focus on general-use programming languages (either by design or by use) and ignore special use programming languages. 35 | For these languages we can find multiple similiarities and diference. 36 | 37 | There are many aspects to languages (non-exclusive) (I only give one or two examples per class -- many languages are omitted): 38 | * dependent type systems (Gallina, Idris) 39 | * functional programming (Haskell, OCaml) 40 | * imperative (C++) 41 | * object oriented (Java) 42 | * dynamically typed (python, javascript) 43 | * array programming (APL, Fortran) 44 | * low-level (C, Rust) 45 | * Lisp 46 | * prototype (Lua) 47 | * scripting (python, lua) 48 | * concurrent (Rust) 49 | * logical (prolog) 50 | * .... 51 | And many more paradigms can be found on [Wikipedia](https://en.wikipedia.org/wiki/Programming_paradigm). 52 | 53 | 54 | There are good (my opinion) reasons to use different languages: 55 | * features suited for a special use case 56 | * low level memory control 57 | * fully manages memory 58 | * incompatible features that you like 59 | * imperative scripting 60 | * dependent type systems 61 | * pure functional programming 62 | * simplicity (depedent on special cases -> maybe consider staging) 63 | * complexity (depedent on special cases -> maybe consider staging) 64 | * design around special use case (not just primitives) 65 | **Note**: Many of these reasons could be summarized with the debate about static vs dynamic typing, functional vs imperative, dependent vs non-dependent types. And the complexity of specifying implicit assumption. This is an area that warrants improvement. However, it will not be our focus here. 66 | 67 | There are also not so good (my opinion) reasons: 68 | * Tools available for this language 69 | * Analyzers 70 | * IDEs 71 | * Libraries 72 | * Frameworks 73 | * Standard library functions 74 | * Compiler 75 | * Speed 76 | * Optimizations 77 | * Userbase 78 | * Language Primitives 79 | 80 | I am not saying, it is wrong to choose a language by any of the second set of points. 81 | However, I propose that these point do not have to / should not influence the choice of language. 82 | On a more abstract level, these points can (for the most part) be isolated from the language itself. 83 | They are only linked by implementation to a language but not conceptually as they apply to programming itself. 84 | In the end, a programming language is only a tool to express semantics of a procedure manipulating data. How we express this can differ in syntax but is conceptually the same. 85 | 86 | Even though the syntax might look quite different between languages, 87 | an experienced programmer can pick up any language fast with only a few examples. 88 | They will tell you that the underlying principles between most languages are the same. 89 | A for loop in C is the same as in Java or Python. 90 | The `map` function of Haskell, the `.map` in Java, the for loop in C, and the list comprehension in Python all look different but express the same semantics. 91 | A while loop and a tail recursive function are quite different in appearance, but every undergrad student learns that they function the same and result in very similar assembly code. 92 | 93 | Despite these similarities or due to the freedom of expression, many languages developed and are in use. 94 | It is good that you can freely choose how you want to write code. 95 | However, it is a shame when tools are locked to a language or need time-intensive ports and re-implementations. 96 | 97 | ![Logo](attachments/standards3.png) 98 | 99 | ### Examples 100 | 101 | Only to name a few examples, we can look at common libraries: 102 | 103 | [**Tensorflow**](https://www.tensorflow.org/) and [**PyTorch**](https://pytorch.org/) are widely used and very successful machine-learning tools. 104 | There exist many (one for every popular language) implementations of the frameworks that effectively call the underlying C libraries. Some languages have more sophisticated libraries that embed the frameworks as DSL / reify parts of the language into the frameworks. 105 | Additionally, some languages have libraries building up on the basic frameworks. 106 | Python has one of the best supports for these frameworks. 107 | 108 | It is tedious and schematic to write all the boilerplate to call the libraries. 109 | Some language pairs have semi-automation to generate ABI/FFI. 110 | But these tools only help for special cases and do not solve the problem in general. 111 | The additional problem of advanced interfaces and libraries are currently not solved. 112 | 113 | One could even argue, that for many applications the framework itself could be an implementation-detail: The programmer wants to express a ML-model. The details could be hidden by a common interface. There are libraries available that do this. However, it is imaginable that such an interface could be infered automatically and translate between both libraries without human interaction. 114 | 115 | The [**QuickCheck**](https://en.wikipedia.org/wiki/QuickCheck) framework is a popular combinator library that helps in generating test cases. 116 | It originates in Haskell and was sucessfully applied to other languages as well. 117 | There is QuickChick in Coq, Hypothesis in Python, based on Hypothesis there is PropTest in Rust, and there are 60 more **re**-implementations in other languages. 118 | To be more precise, there are at least six quickcheck re-implementations for python on Github with at least 15 stars. 119 | Some of these re-implementation "only" support random test generation while others are more fully features including features like test minimization. 120 | For more quickcheck re-implementations also see the website of [Hypothesis](https://hypothesis.works/articles/quickcheck-in-every-language/) (a python re-implementation of quickcheck/inspired-project). 121 | 122 | Before quickchick was re-implemented, the default way was to write the code, expose a FFI and use quickchick externally by hand. 123 | 124 | Another common example are **SQL** frameworks that are re-implemented in every language. More sophisticated features like syntax checking, dummy data generation, compile time checking, ... are left on the way and are only available in very few languages. 125 | 126 | 127 | To give a last example, [**SOSML**](https://sosml.org/) is an online interpreter for Standard ML with a nice interface. 128 | The SML implementation is written from scratch by students. 129 | However, Saarland University switched from SML to OCaml. 130 | Theoretically, the difference between SML and OCaml are just some renamings. 131 | 132 | But the correct handling of these changes either require a transpiler between OCaml and SML (including a parser for OCaml) or changes in the SML interpreter (required in-depth knowledge of the code). See [SOOCaml](https://github.com/NeuralCoder3/SOOCaml-frontend) for a discussion. 133 | 134 | This is tedious and could theoretically be automated. 135 | However, until somebody takes time to deeply understand the interpreter, the current solution is to abandon SOSML or to patch in a third-party OCaml interpreter (transpiled to Javascript). 136 | 137 | 138 | 139 | ## Goal 140 | 141 | There are multiple concrete goals. 142 | But abstractly, we want to bridge the gap between languages. 143 | To this end, it should be possible to use one languages tools, features, and libraries in another language. 144 | 145 | More concretely, we want to develop an universal transpiler (or parts thereof). 146 | 147 | 148 | There are multiple conceptual paths to attack the problem. 149 | Each path offers multiple ways to achieve the goal. 150 | 151 | Before we will look closer at ideas, we focus on related work in the field. 152 | We first introduce the works shallowly and go in-depth in the corresponding idea files. 153 | 154 | ## Related Work 155 | 156 | We collect interesting papers in the related areas of this work. 157 | 158 | ### Language Overview 159 | An overview of languages and tools connecting languages (e.g. transpiler, compiler). 160 | - [Programming Language Database](https://pldb.com/) (4000 languages) [Github](https://github.com/breck7/pldb) 161 | - [Langugage Definition Tool Ott](https://github.com/ott-lang/ott) 162 | - https://rosettacode.org/wiki/Language_Comparison_Table 163 | - [Landscape of languages](https://docs.google.com/spreadsheets/d/1P738jVhd9-UkNneIRN6VbEH-stDiKbDbB0WWPROv6lk/edit?usp=sharing) (compiler, transpiler overview, incomplete) 164 | - [Visualization](https://github.com/NeuralCoder3/language-map) 165 | 166 | ### Program Equivalence 167 | An important part in the translation is the equivalence of the original and translated program. 168 | This equivalence either guides the translation/synthesis or has to be established alongside/after the translation. 169 | 170 | - [Generative Language Modeling for Automated Theorem Proving](https://arxiv.org/pdf/2009.03393.pdf) 171 | - [Program Equivalence](https://drops.dagstuhl.de/opus/volltexte/2018/9758/pdf/dagrep_v008_i004_p001_18151.pdf) 172 | - [Interaction Trees](https://arxiv.org/pdf/1906.00046.pdf) 173 | - [Self-Supervised Learning to Prove Equivalence Between Straight-Line Programs via Rewrite Rules](https://arxiv.org/pdf/2109.10476.pdf) 174 | - CompCert 175 | - [Generating Proof Certificates for a Language-Agnostic Deductive Program Verifier](https://xchen.page/assets/pdf/LCT+23-paper.pdf) 176 | - [ARDiff: scaling program equivalence checking via iterative abstraction and refinement of common code](https://dl.acm.org/doi/10.1145/3368089.3409757) 177 | - [FuzzDiff: A Program Equivalence Checker based on feedback-directed fuzz testing and semantic analysis](https://www.scss.tcd.ie/publications/theses/diss/2022/TCD-SCSS-DISSERTATION-2022-134.pdf) 178 | - [Self-Supervised Learning to Prove Equivalence Between Straight-Line Programs via Rewrite Rules](https://arxiv.org/pdf/2109.10476.pdf) 179 | 180 | [add from folder] 181 | 182 | #### Translation Validation 183 | Translation Validation is a special subfield that focuses on automated equivalence checks of programs before and after optimizations. 184 | 185 | - [End-to-End Translation Validation for the Halide Langauge](https://hal.inria.fr/hal-03653857/document) 186 | - [Alive2: Bounded Translation Validation for LLVM](https://dl.acm.org/doi/pdf/10.1145/3453483.3454030) 187 | - [Counterexample-Guided Correlation Algorithm for Translation Validation](https://shubhani.compiler.ai/pubs/oopsla20.pdf) 188 | 189 | #### Program Translation 190 | Automated program translations like superoptimizers have to guarantee program equivalence (usually in one language). Superoptimizers are often restricted to loop-free short code segments. 191 | 192 | [add from folder] 193 | 194 | ### Synthesis 195 | The target program has to be synthesized from the original program. 196 | The programs need to be equivalent and can be quite complex involving complicated control flow. 197 | However, the synthesis has a clear guideline as the shape of the original program can be used and the semantics of the result is fully defined. 198 | 199 | - [CounterExample-Guided Inductive Synthesis (CEGIS)](https://www-cs.stanford.edu/~preiner/publications/2017/PreinerNiemetzBiere-TACAS17.pdf) 200 | - [Synthesis of Loop-free Programs](https://dl.acm.org/doi/pdf/10.1145/1993316.1993506) (tool: Brahma) 201 | - [Program Synthesis](https://www.nowpublishers.com/article/Details/PGL-010) 202 | - Cyclic Program Synthesis 203 | - [Bottom-Up Synthesis of Recursive Functional Programs using Angelic Execution](https://dl.acm.org/doi/pdf/10.1145/3498682) 204 | - [Evaluating Large Language Models Trained on Code](https://arxiv.org/pdf/2107.03374.pdf) 205 | - [APIfix: Output-Oriented Program Synthesis for Combating Breaking Changes in Libraries](https://gaoxiang9430.github.io/papers/APIFix.pdf) 206 | - [FlashFill++: Scaling Programming by Example by Cutting to the Chase](https://www.microsoft.com/en-us/research/uploads/prod/2022/12/flashfillpp-popl-23-camera-ready.pdf) 207 | - Inductive Synthesis of Structurally Recursive Functional Programs from Non-recursive Expressions 208 | - [Top-Down Synthesis For Library Learning](https://arxiv.org/pdf/2211.16605.pdf) 209 | - Bootstrapping Library-Based Synthesis 210 | - [Abstract Syntax Networks for Code Generation and Semantic Parsing](https://arxiv.org/pdf/1704.07535.pdf) 211 | - [Github](https://github.com/xiye17/torchASN) 212 | - [Synthesizing an Instruction Selection Rule Library from Semantic Specifications](https://pp.ipd.kit.edu/uploads/publikationen/buchwald18cgo.pdf) 213 | - [Suoper: A Synthesizing Superoptimizer](https://arxiv.org/abs/1711.04422) 214 | - [Minotaur: A SIMD-Oriented Synthesizing Superoptimizer](https://arxiv.org/abs/2306.00229) (based on Alive) 215 | - [Supersonic](https://dl.acm.org/doi/10.1145/3497776.3517769) [Github](https://github.com/HuantWang/SUPERSONIC) (based on CompilerGym) 216 | - https://emina.github.io/rosette/ 217 | - [Untyped -> Typed](https://dl.acm.org/doi/abs/10.1145/1176617.1176755?casa_token=dFMn2xhiTeUAAAAA:EC0wkfDgt3clgpbnwQ1x2JasOiL9wOvZQ0HoxKdUNexgjpAznzLg4tpfJpfDyXp0MIYzBHeE58tS) 218 | - LLM Forced Format 219 | - [Jsonformer](https://news.ycombinator.com/item?id=35790092) [Github](https://github.com/1rgs/jsonformer) 220 | - [clownfish](https://github.com/newhouseb/clownfish) 221 | - [Output Fixing Parser](https://python.langchain.com/en/latest/modules/prompts/output_parsers/examples/output_fixing_parser.html) 222 | - [Guardrails](https://github.com/ShreyaR/guardrails) 223 | - [Statistically certified approximate logic synthesis](https://ieeexplore.ieee.org/abstract/document/8203798?casa_token=x4a__CSWqaAAAAAA:Do9diuZVLhRFGh2v-tmwoaL4nEvNNqI3RkXBuoque2kGImJIb0WbMLve1bP2_jsIEURFES2QB5ZX) 224 | - [Certifying the Synthesis of Heap-Manipulating Programs](https://dl.acm.org/doi/pdf/10.1145/3473589) (SuSLik) 225 | - [A FRAMEWORK FOR CERTIFIED PROGRAM SYNTHESIS](https://ilyasergey.net/assets/pdf/papers/Watanabe-MComp.pdf) 226 | 227 | [add from folder] 228 | 229 | ### Artificial Intelligence 230 | A promising approach is neural-guided synthesis using artificial intelligence for translation. 231 | These tools have been proven to be capable of synthesizing complex code with acceptable accuracy. 232 | However, the current projects mainly look into natural language and do not establish thight guarantees like formal semantics of the output. 233 | 234 | - [Competition-Level Code Generation with AlphaCode](https://www.datascienceassn.org/sites/default/files/Competition-Level%20Code%20Generation%20with%20AlphaCode.pdf) 235 | - [CrossBeam: Learning To Search in Bottom-Up Program Synthesis](https://arxiv.org/pdf/2203.10452.pdf) 236 | - [Formal Mathematics Statement Curriculum Learning](https://arxiv.org/pdf/2202.01344.pdf) 237 | - [OpenAI Codex](https://beta.openai.com/docs/guides/code/best-practices) 238 | - [ChatGPT](https://chat.openai.com/chat) 239 | - [Salesforce CodeT5](https://arxiv.org/abs/2109.00859) ([Github](https://github.com/salesforce/CodeT5)) 240 | - [Github CoPilot](https://github.com/features/copilot) 241 | - [Transformer models: an introduction and catalog](https://arxiv.org/pdf/2302.07730.pdf) 242 | - [FlexGen: Running GPT on (relatively) low VRAM](https://news.ycombinator.com/item?id=34869960) [Github](https://github.com/FMInference/FlexGen) 243 | - [Jotte: Graph-based GPT Extension for large text corpus](https://news.ycombinator.com/item?id=34901481) 244 | - [Phind GPT Search](https://news.ycombinator.com/item?id=34884338) 245 | - [Open Source GPT Training](https://news.ycombinator.com/item?id=34858460) 246 | - [Microsoft Prompt Engine](https://news.ycombinator.com/item?id=34811070) 247 | - [The "an" neuron in GPT2](https://news.ycombinator.com/item?id=34821414) 248 | - [Run GPT3 fast](https://news.ycombinator.com/item?id=34702349) 249 | - [GPT Memory & Conversations](https://github.com/Kav-K/GPT3Discord) 250 | - [Show your work: Scratchpads for intermediate computation with language models](https://arxiv.org/pdf/2112.00114.pdf) 251 | - [GPT Error Reduction](https://medium.com/@0xjfan/how-we-cut-the-rate-of-gpt-hallucinations-from-20-to-less-than-2-f3bfcc10e4ec) 252 | - [openai cookbook](https://github.com/openai/openai-cookbook) 253 | - [openai prompt engeneering](https://learnprompting.org/docs/intro) 254 | - [CompilerGym](https://arxiv.org/pdf/2109.08267.pdf) [Github](https://github.com/facebookresearch/CompilerGym) 255 | - fast-ai 256 | - https://github.com/salesforce/CodeGen 257 | - [Auto-GPT](https://github.com/Significant-Gravitas/Auto-GPT) An experimental open-source attempt to make GPT-4 fully autonomous. 258 | - [Maieutic Prompting: Logically Consistent Reasoning with Recursive Explanations](https://arxiv.org/pdf/2205.11822.pdf) 259 | 260 | ### Transpilers 261 | Transpilers are mainly written by hand and are far from perfect. 262 | The produced code is not always readable and sometimes needs post-processing. 263 | However, there are often formal (or implicit) guarantees that the result agrees with the original program. 264 | 265 | - [A General Purpose Transpiler for Fully Homomorphic Ecryption](https://arxiv.org/pdf/2106.07893.pdf) 266 | - [Interlanguage Migration: From Scripts to Programs](https://dl.acm.org/doi/pdf/10.1145/1176617.1176755) 267 | - [Improving mobile app development using transpilers with maintainable outputs](https://dl.acm.org/doi/pdf/10.1145/3422392.3422426) 268 | - [Automated Transpilation of Imperative to Functional Code using Neural-Guided Program Synthesis (Extended Version)](https://arxiv.org/pdf/2203.09452.pdf) 269 | - [A Simple Abstraction of Arrays and Maps by Program Translation](https://arxiv.org/pdf/1506.04161.pdf) 270 | - [Unsupervised Translation of Programming Languages](https://proceedings.neurips.cc/paper/2020/file/ed23fbf18c2cd35f8c7f8de44f85c08d-Paper.pdf) 271 | - [Pandoc](https://pandoc.org/) 272 | - Popular Media 273 | - [Legacy Code Conversion - Computerphile](https://www.youtube.com/watch?v=Xz06zYlQrck) 274 | - Protobuf -- the universal language 275 | - [Oblivious Decompiler](https://www.recon.cx/2012/schedule/attachments/40_Chernov-Troshina.pdf) 276 | - [How to write a Transpiler](https://tomassetti.me/how-to-write-a-transpiler/) 277 | - [universal transpiler](https://github.com/jarble/transpiler) 278 | - [CodeWorker](http://codeworker.free.fr/) 279 | - [User-Customizable Transpilation of Scripting Languages](https://arxiv.org/pdf/2301.11220.pdf) 280 | 281 | ### Language Interoperability 282 | - [Melocoton: A Program Logic for Verified Interoperability Between OCaml and C](https://gallium.inria.fr/~agueneau/publis/melocoton.pdf) 283 | - [FFI Overhead Comparison](https://news.ycombinator.com/item?id=31376679) 284 | 285 | 286 | ### Related Concepts 287 | - Synthesis 288 | - Neural(-guided) synthesis 289 | - Top-Down synthesis 290 | - Search/Planning 291 | - Program Equivalence 292 | - Translation Validation 293 | - Separation Logic 294 | - symbolic abstraction 295 | - smt 296 | - Language Design 297 | - Program Paradigms 298 | - Compilation Transpilation 299 | - Decompilation 300 | - Staging 301 | - Metaprogramming 302 | - DSL 303 | - ABI/FFI, serilization, marshalling 304 | - Partial Evaluation 305 | - Tests 306 | - QA 307 | - logarithmic types 308 | - fuzzing 309 | - specification mining 310 | - Program Communication 311 | - Marshalling 312 | - JSON 313 | - Pickling 314 | - RPC 315 | - ABI/FFI 316 | 317 | 318 | ## Ideas 319 | 320 | We present some promising ideas in [./Ideas/](./Ideas/). 321 | The ideas are not exhaustive. 322 | 323 | ### Building Blocks 324 | 325 | **Transformers**: LLM are currently shown to be knowledgable in complicated synthesis tasks. 326 | The unsupervised trained models present a grasp of related concepts like the relation between natural language and programming languages or between programming languages. 327 | Recent papers and projects have shown first successes in using text transformers to synthesize programs. A more refined and verified approach could build upon this preliminary success. 328 | 329 | **Counter Example Guided Synthesis**: Counterexamples contradicting the synthesis specification can be used to refine and guide the search to a solution. These counterexamples can be obtained using SMT solvers or fuzzers. 330 | 331 | **Tests**: We can employ automated tests/symbolic abstraction to find program equivalence contradiction. These can be used to refine the result and to ensure trust in a unverified result. This way, we can harvest the power of neural networks without suffering from the unpredictable/unverified nature of neural networks. 332 | 333 | **Search**: Many parts of the approach involve/can be formulated as search problems. 334 | For instance the construction (synthesis) of the resulting program can be seen as a search for a valid program with the same properties. 335 | The search has to be heavily pruned and possibly (neural-)guided either directly or via heuristics. A common approach in recent research is to use beam search. There are papers that use bottom-up search as well as other papers that use a top-down approach. 336 | 337 | **Latent Representation**: The neural guided approaches can attach to the approach at different levels. Either as guide, as main component, as checked assistant, ... . 338 | In any way, we need to find a suitable way to communicate and represent the data we present to the networks. 339 | This can happen as simple text (as shown effective by LLM) or using more informative and sophisticated datastructures like graph nets or AST network structures. 340 | Historically more complicated structures did not provide better performance. But recent research showed a more focused network leading to better performance with less resource consumption. 341 | 342 | **Partial Eval**: One could build on refined and optimized futamura projects to transpile programs in languages supporting partial evaluation (beta reduction and propagation) that implement corresponding interpreters/compilers. A lingua franca of programming (like in pandas or FFI communication) could make this approach feasible and also help in other approaches. 343 | 344 | **Rewrite Rules**: Classically, transpilers operate in declarative programming languages (prolog) using rewrite rules. One idea could be to semi-automatically synthesize these rules. 345 | The approach would be limited to match-rule-based rewriting but would allow for more interaction and control. It would especially be open to formal verification. 346 | 347 | **ABI/FFI**: A unrelated concept to increase interoperability is to make it easier to call functions from other languages. This can happen at different abstraction level. 348 | The functions can be linked at the language level, at C level, at assembly level. 349 | The link can happen statically as foreign function using a FFI or by transpilation or 350 | at runtime using communication bridges. 351 | The function needs a common interface between both languages. This can be as simple 352 | as the standard FFI interface, a JSON bridge, or a universal communication language. 353 | _Related Projects_: APIFix, [DSL Project Github](https://github.com/NeuralCoder3/thorin-dsl), serilization, pickling, marshalling, isomorphism type system, [remote call procedure](https://en.wikipedia.org/wiki/Remote_procedure_call) 354 | 355 | ### Concrete Ideas 356 | 357 | We can group the ideas in the following categories: 358 | * Interoperability 359 | * Program Transpilation 360 | 361 | [Ideas/NeuralTransformer](Ideas/NeuralTransformer): A promising but simple approach is to use unsupervised trained neural text transformers. These LLM (large language model) presented knowledge about many languages and their (intuitive) semantics (connection to other languages including natural language). The idea in this approach is to synthesize the resulting program and refine it stepwise using (automated) feedback. 362 | 363 | [see presentation] 364 | 365 | ## Applications 366 | 367 | Here, we collect (more) concrete ideas for applications of our approaches: 368 | * Code -> Rust (safety guarantees, checker environment) 369 | * functional -> imperative (possible speedup) 370 | * python <-> language (library support) 371 | * php -> typescript (update old infrastructure) 372 | * java -> typescript (update old infrastructure) 373 | * imaginary markup -> latex (better syntax) 374 | * toy language -> real language (fast protoyping) 375 | * imaginary api -> real api (accessibility, better adoption, change resistent code) 376 | * pseudocode -> language 377 | * python -> C++ (speedup) 378 | * imperative -> functional (verification) 379 | * react <-> angular <-> vue (re-usage between apis) 380 | * use arbitrary [Language Gimmics](https://buttondown.email/hillelwayne/archive/microfeatures-id-like-to-see-in-more-languages/) => language extension (similar to OCaml ppx) 381 | * synthesis by natural language transpilation 382 | * [Dependabot 2.0](https://github.com/vlts1/ruzz): Adapt code to changes automatically 383 | 384 | Further high-level advantages: 385 | * not all code needs to be able to be converted 386 | * wrappers can be written according to semantics 387 | 388 | Mottos: 389 | * LLVM (lingua franca) for high-level 390 | * write code you want and get code you need 391 | * -------------------------------------------------------------------------------- /attachments/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuralCoder3/transpilation/24222c009fa53485dc68b59656e47b208aea1280/attachments/logo.png -------------------------------------------------------------------------------- /attachments/standards3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeuralCoder3/transpilation/24222c009fa53485dc68b59656e47b208aea1280/attachments/standards3.png --------------------------------------------------------------------------------