├── .gitignore
├── 22-05-presentation
    ├── Bibliography.bib
    ├── Wue.sty
    ├── imgs
    │   ├── standards.png
    │   ├── standards2.png
    │   └── standards3.png
    ├── main.pdf
    ├── main.tex
    ├── unilogo4c.jpg
    └── unilogo4c.png
├── Ideas
    └── NeuralTransformer.md
├── ReadMe.md
└── attachments
    ├── logo.png
    └── standards3.png


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.toptal.com/developers/gitignore/api/obsidian,latex,visualstudiocode
  2 | # Edit at https://www.toptal.com/developers/gitignore?templates=obsidian,latex,visualstudiocode
  3 | 
  4 | ### LaTeX ###
  5 | ## Core latex/pdflatex auxiliary files:
  6 | *.aux
  7 | *.lof
  8 | *.log
  9 | *.lot
 10 | *.fls
 11 | *.out
 12 | *.toc
 13 | *.fmt
 14 | *.fot
 15 | *.cb
 16 | *.cb2
 17 | .*.lb
 18 | 
 19 | ## Intermediate documents:
 20 | *.dvi
 21 | *.xdv
 22 | *-converted-to.*
 23 | # these rules might exclude image files for figures etc.
 24 | # *.ps
 25 | # *.eps
 26 | # *.pdf
 27 | 
 28 | ## Generated if empty string is given at "Please type another file name for output:"
 29 | .pdf
 30 | 
 31 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
 32 | *.bbl
 33 | *.bcf
 34 | *.blg
 35 | *-blx.aux
 36 | *-blx.bib
 37 | *.run.xml
 38 | 
 39 | ## Build tool auxiliary files:
 40 | *.fdb_latexmk
 41 | *.synctex
 42 | *.synctex(busy)
 43 | *.synctex.gz
 44 | *.synctex.gz(busy)
 45 | *.pdfsync
 46 | 
 47 | ## Build tool directories for auxiliary files
 48 | # latexrun
 49 | latex.out/
 50 | 
 51 | ## Auxiliary and intermediate files from other packages:
 52 | # algorithms
 53 | *.alg
 54 | *.loa
 55 | 
 56 | # achemso
 57 | acs-*.bib
 58 | 
 59 | # amsthm
 60 | *.thm
 61 | 
 62 | # beamer
 63 | *.nav
 64 | *.pre
 65 | *.snm
 66 | *.vrb
 67 | 
 68 | # changes
 69 | *.soc
 70 | 
 71 | # comment
 72 | *.cut
 73 | 
 74 | # cprotect
 75 | *.cpt
 76 | 
 77 | # elsarticle (documentclass of Elsevier journals)
 78 | *.spl
 79 | 
 80 | # endnotes
 81 | *.ent
 82 | 
 83 | # fixme
 84 | *.lox
 85 | 
 86 | # feynmf/feynmp
 87 | *.mf
 88 | *.mp
 89 | *.t[1-9]
 90 | *.t[1-9][0-9]
 91 | *.tfm
 92 | 
 93 | #(r)(e)ledmac/(r)(e)ledpar
 94 | *.end
 95 | *.?end
 96 | *.[1-9]
 97 | *.[1-9][0-9]
 98 | *.[1-9][0-9][0-9]
 99 | *.[1-9]R
100 | *.[1-9][0-9]R
101 | *.[1-9][0-9][0-9]R
102 | *.eledsec[1-9]
103 | *.eledsec[1-9]R
104 | *.eledsec[1-9][0-9]
105 | *.eledsec[1-9][0-9]R
106 | *.eledsec[1-9][0-9][0-9]
107 | *.eledsec[1-9][0-9][0-9]R
108 | 
109 | # glossaries
110 | *.acn
111 | *.acr
112 | *.glg
113 | *.glo
114 | *.gls
115 | *.glsdefs
116 | *.lzo
117 | *.lzs
118 | *.slg
119 | *.slo
120 | *.sls
121 | 
122 | # uncomment this for glossaries-extra (will ignore makeindex's style files!)
123 | # *.ist
124 | 
125 | # gnuplot
126 | *.gnuplot
127 | *.table
128 | 
129 | # gnuplottex
130 | *-gnuplottex-*
131 | 
132 | # gregoriotex
133 | *.gaux
134 | *.glog
135 | *.gtex
136 | 
137 | # htlatex
138 | *.4ct
139 | *.4tc
140 | *.idv
141 | *.lg
142 | *.trc
143 | *.xref
144 | 
145 | # hyperref
146 | *.brf
147 | 
148 | # knitr
149 | *-concordance.tex
150 | # TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files
151 | # *.tikz
152 | *-tikzDictionary
153 | 
154 | # listings
155 | *.lol
156 | 
157 | # luatexja-ruby
158 | *.ltjruby
159 | 
160 | # makeidx
161 | *.idx
162 | *.ilg
163 | *.ind
164 | 
165 | # minitoc
166 | *.maf
167 | *.mlf
168 | *.mlt
169 | *.mtc[0-9]*
170 | *.slf[0-9]*
171 | *.slt[0-9]*
172 | *.stc[0-9]*
173 | 
174 | # minted
175 | _minted*
176 | *.pyg
177 | 
178 | # morewrites
179 | *.mw
180 | 
181 | # newpax
182 | *.newpax
183 | 
184 | # nomencl
185 | *.nlg
186 | *.nlo
187 | *.nls
188 | 
189 | # pax
190 | *.pax
191 | 
192 | # pdfpcnotes
193 | *.pdfpc
194 | 
195 | # sagetex
196 | *.sagetex.sage
197 | *.sagetex.py
198 | *.sagetex.scmd
199 | 
200 | # scrwfile
201 | *.wrt
202 | 
203 | # svg
204 | svg-inkscape/
205 | 
206 | # sympy
207 | *.sout
208 | *.sympy
209 | sympy-plots-for-*.tex/
210 | 
211 | # pdfcomment
212 | *.upa
213 | *.upb
214 | 
215 | # pythontex
216 | *.pytxcode
217 | pythontex-files-*/
218 | 
219 | # tcolorbox
220 | *.listing
221 | 
222 | # thmtools
223 | *.loe
224 | 
225 | # TikZ & PGF
226 | *.dpth
227 | *.md5
228 | *.auxlock
229 | 
230 | # titletoc
231 | *.ptc
232 | 
233 | # todonotes
234 | *.tdo
235 | 
236 | # vhistory
237 | *.hst
238 | *.ver
239 | 
240 | # easy-todo
241 | *.lod
242 | 
243 | # xcolor
244 | *.xcp
245 | 
246 | # xmpincl
247 | *.xmpi
248 | 
249 | # xindy
250 | *.xdy
251 | 
252 | # xypic precompiled matrices and outlines
253 | *.xyc
254 | *.xyd
255 | 
256 | # endfloat
257 | *.ttt
258 | *.fff
259 | 
260 | # Latexian
261 | TSWLatexianTemp*
262 | 
263 | ## Editors:
264 | # WinEdt
265 | *.bak
266 | *.sav
267 | 
268 | # Texpad
269 | .texpadtmp
270 | 
271 | # LyX
272 | *.lyx~
273 | 
274 | # Kile
275 | *.backup
276 | 
277 | # gummi
278 | .*.swp
279 | 
280 | # KBibTeX
281 | *~[0-9]*
282 | 
283 | # TeXnicCenter
284 | *.tps
285 | 
286 | # auto folder when using emacs and auctex
287 | ./auto/*
288 | *.el
289 | 
290 | # expex forward references with \gathertags
291 | *-tags.tex
292 | 
293 | # standalone packages
294 | *.sta
295 | 
296 | # Makeindex log files
297 | *.lpz
298 | 
299 | # xwatermark package
300 | *.xwm
301 | 
302 | # REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
303 | # option is specified. Footnotes are the stored in a file with suffix Notes.bib.
304 | # Uncomment the next line to have this generated file ignored.
305 | #*Notes.bib
306 | 
307 | ### LaTeX Patch ###
308 | # LIPIcs / OASIcs
309 | *.vtc
310 | 
311 | # glossaries
312 | *.glstex
313 | 
314 | ### Obsidian ###
315 | # config dir
316 | .obsidian/
317 | 
318 | ### VisualStudioCode ###
319 | .vscode/*
320 | !.vscode/settings.json
321 | !.vscode/tasks.json
322 | !.vscode/launch.json
323 | !.vscode/extensions.json
324 | !.vscode/*.code-snippets
325 | 
326 | # Local History for Visual Studio Code
327 | .history/
328 | 
329 | # Built Visual Studio Code Extensions
330 | *.vsix
331 | 
332 | ### VisualStudioCode Patch ###
333 | # Ignore all local history of files
334 | .history
335 | .ionide
336 | 
337 | # End of https://www.toptal.com/developers/gitignore/api/obsidian,latex,visualstudiocode
338 | 
339 | 
340 | main.pdf
341 | Icons
342 | 


--------------------------------------------------------------------------------
/22-05-presentation/Bibliography.bib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuralCoder3/transpilation/24222c009fa53485dc68b59656e47b208aea1280/22-05-presentation/Bibliography.bib


--------------------------------------------------------------------------------
/22-05-presentation/Wue.sty:
--------------------------------------------------------------------------------
 1 | \mode<presentation>
 2 | 
 3 | \newif\ifbeamer@secheader
 4 | \beamer@secheaderfalse
 5 | 
 6 | %\DeclareOptionBeamer{secheader}{\beamer@secheadertrue}
 7 | \ProcessOptionsBeamer
 8 | 
 9 | \useoutertheme[footline=authorinstitutetitle,subsection=false]{smoothbars}
10 | \makeatletter % [add curpage/total page at the bottom](http://tex.stackexchange.com/questions/100838/beamer-dresden-theme-miniframes-appeareance-and-frame-number-insertion)
11 | \newcommand{\frameofframes}{/}
12 | \newcommand{\setframeofframes}[1]{\renewcommand{\frameofframes}{#1}}
13 | \setbeamertemplate{footline} 
14 |   {%
15 |     \begin{beamercolorbox}[colsep=1.5pt]{upper separation line foot}
16 |     \end{beamercolorbox}
17 |     \begin{beamercolorbox}[ht=2.5ex,dp=1.125ex,%
18 |       leftskip=.3cm,rightskip=.3cm plus1fil]{author in head/foot}%
19 |       \leavevmode{\usebeamerfont{author in head/foot}\insertshortauthor}%
20 |       \hfill%
21 |       {\usebeamerfont{institute in head/foot}\usebeamercolor[fg]{institute in head/foot}\insertshortinstitute}%
22 |     \end{beamercolorbox}%
23 |     \begin{beamercolorbox}[ht=2.5ex,dp=1.125ex,%
24 |       leftskip=.3cm,rightskip=.3cm plus1fil]{title in head/foot}%
25 |       {\usebeamerfont{title in head/foot}\insertshorttitle}%
26 |       \hfill%
27 |       {\usebeamerfont{frame number}\usebeamercolor[fg]{frame number}\insertframenumber~\frameofframes~\inserttotalframenumber}
28 |     \end{beamercolorbox}%
29 |     \begin{beamercolorbox}[colsep=1.5pt]{lower separation line foot}
30 |     \end{beamercolorbox}
31 |   }
32 | \makeatother
33 | 
34 | \useinnertheme{circles}
35 | 
36 | %\useoutertheme{default}
37 | %\useinnertheme[shadow=true]{rounded}
38 | 
39 | \xdefinecolor{wue}{RGB}{37,86,149} 	
40 | \setbeamercolor{footline}{bg=wue}
41 | \setbeamercolor{frametitle}{bg=wue,fg=white}
42 | \setbeamercolor{title}{bg=wue}
43 | \setbeamerfont{frametitle}{size=\large}
44 | %\setbeamertemplate{navigation symbols}{}
45 | \setbeamertemplate{bibliography item}[text]
46 | \setbeamertemplate{caption}[numbered]
47 | 
48 | \setbeamercolor{palette primary}{use=structure,fg=white,bg=structure.fg}
49 | \setbeamercolor{palette secondary}{use=structure,fg=white,bg=structure.fg!75!black}
50 | \setbeamercolor{palette tertiary}{use=structure,fg=white,bg=structure.fg!50!black}
51 | \setbeamercolor{palette quaternary}{fg=white,bg=structure.fg!50!black}
52 | %\setbeamercolor*{sidebar}{use=structure,bg=structure.fg}
53 | \setbeamercolor{titlelike}{parent=palette primary}
54 | 
55 | \setbeamercolor{block title}{bg=wue,fg=white}
56 | \setbeamercolor*{block title example}{use={normal text,example text},bg=white,fg=wue}
57 | \setbeamercolor{fine separation line}{}
58 | \setbeamercolor{item projected}{fg=white}
59 | \setbeamercolor{palette sidebar primary}{use=normal text,fg=normal text.fg}
60 | \setbeamercolor{palette sidebar quaternary}{use=structure,fg=structure.fg}
61 | \setbeamercolor{palette sidebar secondary}{use=structure,fg=structure.fg}
62 | \setbeamercolor{palette sidebar tertiary}{use=normal text,fg=normal text.fg}
63 | %\setbeamercolor{palette sidebar quaternary}{fg=white}
64 | \setbeamercolor{section in sidebar}{fg=brown}
65 | \setbeamercolor{section in sidebar shaded}{fg=grey}
66 | \setbeamercolor{separation line}{}
67 | \setbeamercolor{sidebar}{bg=wue}
68 | \setbeamercolor{sidebar}{parent=palette primary}
69 | \setbeamercolor{structure}{fg=wue}
70 | \setbeamercolor{subsection in sidebar}{fg=brown}
71 | \setbeamercolor{subsection in sidebar shaded}{fg=grey}
72 | \AtBeginSection[]{
73 | 	\begin{frame}
74 | 		\tableofcontents[sectionstyle=show/shaded,subsectionstyle=show/shaded/hide,subsubsectionstyle=show/shaded/hide]
75 | 	\end{frame}
76 | }
77 | \AtBeginSubsection[]{
78 | 	\begin{frame}
79 | 		\tableofcontents[sectionstyle=show/shaded,subsectionstyle=show/shaded/hide,subsubsectionstyle=show/shaded/hide]
80 | 	\end{frame}
81 | }
82 | 
83 | \mode
84 | <all>
85 | 


--------------------------------------------------------------------------------
/22-05-presentation/imgs/standards.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuralCoder3/transpilation/24222c009fa53485dc68b59656e47b208aea1280/22-05-presentation/imgs/standards.png


--------------------------------------------------------------------------------
/22-05-presentation/imgs/standards2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuralCoder3/transpilation/24222c009fa53485dc68b59656e47b208aea1280/22-05-presentation/imgs/standards2.png


--------------------------------------------------------------------------------
/22-05-presentation/imgs/standards3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuralCoder3/transpilation/24222c009fa53485dc68b59656e47b208aea1280/22-05-presentation/imgs/standards3.png


--------------------------------------------------------------------------------
/22-05-presentation/main.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuralCoder3/transpilation/24222c009fa53485dc68b59656e47b208aea1280/22-05-presentation/main.pdf


--------------------------------------------------------------------------------
/22-05-presentation/main.tex:
--------------------------------------------------------------------------------
  1 | %-------------------------------------------------------------
  2 | \documentclass[aspectratio=169, handout]{beamer}
  3 | \usepackage[utf8]{inputenc}
  4 | \usepackage{hyperref}
  5 | \usepackage[T1]{fontenc}
  6 | \usepackage{tikz}
  7 | 
  8 | \usepackage{latexsym,xcolor,multicol,booktabs,calligra}
  9 | \usepackage{amsmath,amssymb,BOONDOX-cal,bm}	
 10 | \usepackage{graphicx,pstricks,stackengine}    
 11 | \usepackage{tabularx}
 12 | \usepackage{makecell}
 13 | \usepackage{listings}
 14 | \usepackage{etoolbox}
 15 | \usepackage{tablefootnote}
 16 | \usepackage{ bbold }
 17 | \usepackage{tikz}
 18 | \usepackage{xfrac}
 19 | \usepackage{longtable}  
 20 | 
 21 | 
 22 | \author{Marcel Ullrich}
 23 | %\titlegraphic{\includegraphics[width=\textwidth]{unilogo4cmittel.jpg}}
 24 | \titlegraphic{ 
 25 | \begin{tikzpicture}[overlay,remember picture]
 26 | \node[right=-0.15cm] at (current page.154){%150 vorher
 27 |     \includegraphics[width=1.2\textwidth]{unilogo4c.png}
 28 | };
 29 | \end{tikzpicture}
 30 | }
 31 | \title{\textbf{U}niversal \textbf{L}anguage \textbf{Tra}nspiler} % Esperanto
 32 | \institute{Saarland University } 
 33 | \date{02.05.2022}
 34 | %\logo{\includegraphics[width=0.12\textwidth]{MessmapLogo.png}}
 35 | 
 36 | \usepackage{Wue}
 37 | 
 38 | \def\cmd#1{\texttt{\color{red}\footnotesize $\backslash$#1}}
 39 | \def\env#1{\texttt{\color{blue}\footnotesize #1}}
 40 | 
 41 | \newtheorem{thm}{Theorem}[theorem]
 42 | 
 43 | 
 44 | %—-------------------------------------------------------------
 45 | 
 46 | \begin{document}
 47 | 	\begin{frame}
 48 |     \titlepage
 49 |     \end{frame}
 50 |     
 51 |     % \section{ABC}
 52 |     
 53 |     \begin{frame}{Problem}
 54 |     \begin{itemize}
 55 |         \item Migration from scripting to programming \pause %https://dl.acm.org/doi/abs/10.1145/1176617.1176755?casa_token=dFMn2xhiTeUAAAAA:EC0wkfDgt3clgpbnwQ1x2JasOiL9wOvZQ0HoxKdUNexgjpAznzLg4tpfJpfDyXp0MIYzBHeE58tS
 56 |         %https://dl.acm.org/doi/abs/10.1145/103135.103138
 57 |         \item Rewriting of code \pause
 58 |         \item Interface changes
 59 |     \end{itemize}
 60 |     \only<4->{
 61 |     \begin{center}
 62 |     \alt<6>{
 63 |     \includegraphics[width=.5\textwidth]{imgs/standards3.png}%
 64 |     }{
 65 |     \alt<5>{
 66 |     \includegraphics[width=.5\textwidth]{imgs/standards2.png}%
 67 |     }{
 68 |     \includegraphics[width=.5\textwidth]{imgs/standards.png}
 69 |     }
 70 |     }
 71 |     %3145 esoterische PL
 72 |     
 73 |     { \tiny \url{https://xkcd.com/927/} \visible<5->{(modified)}}
 74 |     \end{center}
 75 |     }
 76 |     \end{frame}
 77 |     
 78 |     \begin{frame}{Goal}
 79 |     \begin{itemize}
 80 |         \item translation of code \pause
 81 |         \item language agnostic \pause
 82 |         \item minimal boilerplate \pause
 83 |         \item readable
 84 |     \end{itemize}
 85 |     \end{frame}
 86 |     
 87 |     \begin{frame}{Classification}
 88 |     \begin{itemize}
 89 |         \item {\color<2>{gray}Machine languages}
 90 |         \item {\color<2>{gray}Assembly languages}
 91 |         \item High-level languages
 92 |         \item Scripting languages
 93 |         \item System languages
 94 |         \item Domain-specific languages
 95 |         \item {\color<2>{gray}Visual languages}
 96 |         \item {\color<2>{gray}Esoteric languages}
 97 |         \item {\color<2>{gray}other}
 98 |     \end{itemize}
 99 |     \end{frame}
100 |     
101 |     \begin{frame}{High-level languages}
102 |     \begin{itemize}
103 |         \item imperative
104 |         \item functional
105 |         \item {\color<2>{gray}declarative}
106 |         \item {\color<2>{gray}array}
107 |         \item constraint
108 |         \item {\color<2>{gray}other}
109 |     \end{itemize}
110 |    
111 |     \end{frame}
112 |     
113 |     \begin{frame}{Examples}
114 |      %https://cs.lmu.edu/~ray/notes/pltypes/
115 |      \begin{itemize}
116 |          \item \href{https://rosettacode.org/wiki/Mandelbrot_set}{Mandelbrot}
117 |          \item \url{https://cs.lmu.edu/~ray/notes/pltypes/}
118 |      \end{itemize}
119 |     \end{frame}
120 |     
121 |     \begin{frame}{Issues}
122 |     \begin{itemize}
123 |         \item large difference between languages \pause % compiled interpreted, typed untypes
124 |         \item boilerplate around differences\pause 
125 |         \item language specific features\pause  %e.g. metaprogramming
126 |         \item correct code generation\pause 
127 |         \item interaction with the user
128 |         \item scaling
129 |     \end{itemize}
130 |     \end{frame}
131 |     
132 |     \begin{frame}{Ideas}
133 |     \begin{itemize}
134 |     \item concept:
135 |     \begin{itemize}
136 |         \item transpiler generator
137 |         \item transpiler that generates
138 |     \end{itemize} \pause
139 |     \item approach:
140 |     \begin{itemize}
141 |         \item structure based
142 |         \item partial evaluation
143 |         \item translation
144 |         \item synthesis
145 |         \item + validation
146 |         \item common language
147 |     \end{itemize}
148 |     \end{itemize}
149 |     \end{frame}
150 |     
151 |     % TODO
152 |     
153 |     % https://emina.github.io/rosette/
154 |     % https://docs.racket-lang.org/rosette-guide/index.html
155 |     % https://github.com/google-research/crossbeam
156 |     % https://github.com/jarble/transpiler
157 |     
158 |     \begin{frame}{Concept: Transpiler generator}
159 |     \begin{itemize}
160 |         \item general language independent framework
161 |         \item translation rules per language 
162 |         \item need synthesis of rules \pause
163 |         \item advantages:
164 |             \begin{itemize}
165 |                 \item modular
166 |                 \item well suited for very similar languages
167 |             \end{itemize} \pause
168 |         \item disadvantages: 
169 |             \begin{itemize}
170 |                 \item complicated rules
171 |                 \item difficult for different ASTs
172 |                 \item syntactical
173 |             \end{itemize}
174 |     \end{itemize}
175 |     \end{frame}
176 |     
177 |     \begin{frame}{Concept: Generating Transpiler}
178 |     \begin{itemize}
179 |         \item takes a fragment of code
180 |         \item produces a fragment of code \pause
181 |         \item advantages:
182 |             \begin{itemize}
183 |                 \item general
184 |                 \item allows structure changes
185 |             \end{itemize} \pause
186 |         \item disadvantages: 
187 |             \begin{itemize}
188 |                 \item not modular
189 |                 \item not necessarily structure preserving
190 |             \end{itemize}
191 |     \end{itemize}
192 |     \end{frame}
193 |     
194 |     % \begin{frame}{Concept: Correctness}
195 |     % \Huge TODO
196 |     % \end{frame}
197 |     
198 |     % Translation
199 |     
200 |     \begin{frame}{Idea: Structure based}
201 |     \begin{itemize}
202 |         \item synthesize translation rules
203 |         \item compare examples in the two languages
204 |         \item same context $\Rightarrow$ same semantics \pause
205 |         \item advantages:
206 |             \begin{itemize}
207 |                 \item modular
208 |                 \item well suited for very similar languages
209 |             \end{itemize} \pause
210 |         \item disadvantages: 
211 |             \begin{itemize}
212 |                 \item complicated rules
213 |                 \item difficult for different ASTs
214 |                 \item syntactical
215 |             \end{itemize}
216 |     \end{itemize}
217 |     \end{frame}
218 |     
219 |     \begin{frame}{Idea: Partial evaluation}
220 |     \begin{itemize}
221 |         \item interpreter of A in B
222 |         \item partial evaluation of B
223 |         \item symbolic evaluation $\Rightarrow$ partial eval, interpreter \pause
224 |         \item advantages:
225 |             \begin{itemize}
226 |                 \item reuses written code
227 |                 \item correctness transfer
228 |                 \item one manual transpilation
229 |             \end{itemize} \pause
230 |         \item disadvantages: 
231 |             \begin{itemize}
232 |                 \item needs complex components
233 |                 \item work for each new language
234 |             \end{itemize}
235 |     \end{itemize}
236 |     \end{frame}
237 |     
238 |     \begin{frame}{Idea: Translation}
239 |     \begin{itemize}
240 |         \item text to text translation \pause % or graph
241 |         \item advantages:
242 |             \begin{itemize}
243 |                 \item works for natural languages
244 |                 \item classical machine learning problem
245 |             \end{itemize} \pause
246 |         \item disadvantages: 
247 |             \begin{itemize}
248 |                 \item no correctness guarantees
249 |                 \item complicated structural constraints
250 |                 \item needs suitable representation
251 |                 \item no insight
252 |             \end{itemize}
253 |     \end{itemize}
254 |     % like a neural network translating normal languages
255 |     \end{frame}
256 |     
257 |     \begin{frame}{Idea: Synthesis}
258 |     \Huge TODO
259 |     \end{frame}
260 |     
261 |     \begin{frame}{Idea: Synthesis + Validation}
262 |     \Huge TODO
263 |     \end{frame}
264 |     
265 |     \begin{frame}{Idea: Common language}
266 |     \Huge TODO
267 |     \end{frame}
268 |     
269 |     % \begin{frame}{Idea: }
270 |     % \Huge TODO
271 |     % \end{frame}
272 |     
273 |     
274 |     
275 |     
276 |     \begin{frame}{Applications}
277 |     \begin{minipage}{.49\textwidth}
278 |     \begin{itemize}
279 |         \item Similar to Pandoc, Rosetta, LLVM \pause
280 |         \item API changes \pause
281 |         \item virtual API \pause
282 |         \item Migration to new code (maintainance) \pause
283 |             \begin{itemize}
284 |                 \item COBOL $\to$ C(++) \pause
285 |                 \item PHP $\to$ Javascript/Python \pause
286 |                 \item X $\to$ Typescript \pause
287 |             \end{itemize}
288 |     \end{itemize}
289 |     \end{minipage}%
290 |     \begin{minipage}{.49\textwidth}
291 |     \begin{itemize}
292 |         \item Language features \pause
293 |             \begin{itemize}
294 |                 \item verification \pause
295 |                 \item libraries \pause
296 |                 \item speed \pause
297 |                 \item knowledge \pause
298 |                 \item safety guarantees \pause
299 |                 \item optimizations \pause
300 |                 \item tooling environment \pause
301 |             \end{itemize}
302 |         \item competing frameworks % angular, react, vue,   fabric, forge
303 |     \end{itemize}
304 |     \end{minipage}
305 |     \end{frame}
306 |     
307 |     
308 | \end{document}


--------------------------------------------------------------------------------
/22-05-presentation/unilogo4c.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuralCoder3/transpilation/24222c009fa53485dc68b59656e47b208aea1280/22-05-presentation/unilogo4c.jpg


--------------------------------------------------------------------------------
/22-05-presentation/unilogo4c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuralCoder3/transpilation/24222c009fa53485dc68b59656e47b208aea1280/22-05-presentation/unilogo4c.png


--------------------------------------------------------------------------------
/Ideas/NeuralTransformer.md:
--------------------------------------------------------------------------------
  1 | # Neural Transformer Transpilation
  2 | 
  3 | The  basic idea of this approach is to generate (parts) of the output using text(-based) transformer networks.
  4 | Afterward, the trust (program equivalence) in the output has to be established.
  5 | 
  6 | 
  7 | ## Advantage
  8 | - [ ] Full usage of network to 
  9 |     - [ ] synthesize optimal code
 10 |     - [ ] connect concepts (unsupervised learning)
 11 | - [ ] conceptually simple
 12 | - [ ] modular & configurable
 13 | 
 14 | ## Difficulties & Limitations
 15 | - [ ] no formal guarantees
 16 | - [ ] output bound by network capabilities
 17 | - [ ] needs to ensure trust afterward
 18 | - [ ] can fail unexpectedly
 19 | - [ ] needs a LLM
 20 | 
 21 | ## Details
 22 | 
 23 | We start with code in language A and convince a LLM to generate code in language B with the same semantics.
 24 | There are many choices of networks to use:
 25 | * T5 inspired architectures special trained for programming language tasks
 26 | * AST network architectures (need knowledge of language A)
 27 | * general purpose LLM
 28 |     * zero-shot: Carefully crafted prompt to transpile
 29 |     * one-shot/few-shot: give translation examples to guide transpilation (give specifics about the language)
 30 |     * finetuning: fine tune the model using a few hundret / thousand examples
 31 | * attention based correlation (codex)
 32 | 
 33 | Sometimes, LLM run into a wrong direction. Therefore, a restart might be necessary to come up with another (hopefully correct) solution.
 34 | 
 35 | The model might produce (depending on the architecture) syntactically and semantically wrong results.
 36 | 
 37 | For syntactical issues, we can forward the compiler feedback to iterate until a valid program is returned.
 38 | 
 39 | Semantical issues are more difficult to detect and correct.
 40 | For the output to be trusted, we need to establish an equivalence with the input.
 41 | This can happen in multiple ways:
 42 | * correlation based mapping
 43 | * automatic equivalence proofs
 44 |     * (bounded) translation validation proofs
 45 | * property/fuzzing tests
 46 | 
 47 | A testing approach is the easiest to implement and maintain for all languages.
 48 | Tests can not guarantee correctness. However, a well-tested code is enough in practice. (Note: Depending on the type system and reasoning, whitebox tests could establish correctness)
 49 | Additionally, tests provide us with concrete examples of a mis-translation.
 50 | We can feed back input-output pairs to refine the result until both programs show the same behaviour.
 51 | A tool like quickcheck/hypothesis might help to generate the tests.
 52 | 
 53 | 
 54 | ## Experiments & Results
 55 | 
 56 | [see subfolder]
 57 | 
 58 | 
 59 | ## Related Literature
 60 | Some literature that applies specifically to this experiment.
 61 | This includes prompt engeneering attempt especially for \*GPT\* models.
 62 | - Prompting
 63 |     - GPT3 conditioning
 64 |         - https://news.ycombinator.com/item?id=34869960#34873669
 65 |         - [GPT3 to Chat](https://github.com/Kav-K/GPT3Discord) [Author Response](https://old.reddit.com/r/GPT3/comments/zxs18b/gpt3dalle2_discord_bot_with_mediumlong_term_memory/j22a0vk/) 
 66 |     - ChatGPT conditioning
 67 |         - [Overview](https://www.jailbreakchat.com/)
 68 |         - [prompt collection](https://github.com/f/awesome-chatgpt-prompts)
 69 |         - [Dan 5.0](https://www.reddit.com/r/ChatGPT/comments/10tevu1/new_jailbreak_proudly_unveiling_the_tried_and/)
 70 |             - [Dan 6.0](https://www.reddit.com/r/ChatGPT/comments/10vinun/presenting_dan_60/)
 71 |             - [SDAN](https://www.reddit.com/r/ChatGPT/comments/10vlzbo/presenting_sdan_simple_dan/)
 72 |         - [SQL analyst in 26 recursive prompts](https://www.patterns.app/blog/2023/01/18/crunchbot-sql-analyst-gpt/)
 73 |             - [Hackernews](https://news.ycombinator.com/item?id=34521149)
 74 |         - [Midjouney Prompt Engine 3](https://www.reddit.com/r/midjourney/comments/11chf6s/version_3_of_my_chatgpt_prompting_machine_it_now/)
 75 |         - [stable diffusion prompter](https://www.reddit.com/r/StableDiffusion/comments/11cfe1i/is_there_a_chatgpt_prompt_to_create_sd_prompts/)
 76 |         - [GPT4 Jailbreak](https://news.ycombinator.com/item?id=35190383)
 77 |     - LLM Prompt Engeneering
 78 |         - [Microsoft Prompt Engine](https://news.ycombinator.com/item?id=34811070) 
 79 |     - [GPT Error Reduction](https://medium.com/@0xjfan/how-we-cut-the-rate-of-gpt-hallucinations-from-20-to-less-than-2-f3bfcc10e4ec) 
 80 |     - [openai cookbook](https://github.com/openai/openai-cookbook)
 81 |     - [openai prompt engeneering](https://learnprompting.org/docs/intro)
 82 | - Data Processing
 83 |     - [Pinecone long term vector memory](https://www.pinecone.io/)
 84 |     - [Load Repo](https://github.com/mpoon/gpt-repository-loader)
 85 | - API
 86 |     - [ChatGPT official API](https://openai.com/blog/introducing-chatgpt-and-whisper-apis)
 87 |     - [ChatBlade](https://github.com/npiv/chatblade)
 88 |     - [AI Chat](https://github.com/sigoden/aichat/)
 89 | 
 90 | 
 91 | ### Concrete Prompts
 92 | 
 93 | <details>
 94 | <summary>Table prompt GUI Interaction</summary>
 95 | INSTRUCTIONS: Provide a {text) that you would like to make changes to. Generate a TABLE with 2
 96 | columns, one with numbers and one with 5 different writing styles. After the TABLE is generated, ask
 97 | the question "What writing style would you like to implement? Pick one from the table above" below
 98 | the TABLE. Wait for the user to pick a number.
 99 | 
100 | 
101 | text = 
102 | 
103 | Execute the INSTRUCTIONS in a TABLE format:
104 | </details>


--------------------------------------------------------------------------------
/ReadMe.md:
--------------------------------------------------------------------------------
  1 | # Transpilation
  2 | 
  3 | In this repository, we investigate different aspects and solution (ideas) to the problem of having too many programming languages.
  4 | 
  5 | ![Logo](attachments/logo.png)
  6 | 
  7 | - [Problem](#Problem)
  8 | 	- [Examples](#Examples)
  9 | - [Goal](#Goal)
 10 | - [Related Work](#related-work)
 11 | 	- [Program Equivalence](#program-equivalence)
 12 | 		- [Translation Validation](#translation-validation)
 13 | 		- [Program Translation](#program-translation)
 14 | 	- [Synthesis](#Synthesis)
 15 | 	- [Artificial Intelligence](#artificial-intelligence)
 16 | 	- [Transpilers](#Transpilers)
 17 | 	- [Related Concepts](#related-concepts)
 18 | - [Ideas](#Ideas)
 19 | 
 20 | 
 21 | ## Problem
 22 | 
 23 | ```
 24 | TLDR: 
 25 | There are features bound to programming languages that are conceptually independent of the concrete language used.
 26 | With over 9000 programming languages in use, porting these tools by hand to all popular languages is infeasible.
 27 | In conclusion, this leads to unncessesary work and unavailability of tools.
 28 | ```
 29 | 
 30 | 
 31 | There are a lot (current estimate >9000) of programming languages (at least 1000 with some hundret of active use and possible many more -- there are at least 3570 registered esoteric programming languages).
 32 | 
 33 | Not every language is the same.
 34 | We will focus on general-use programming languages (either by design or by use) and ignore special use programming languages.
 35 | For these languages we can find multiple similiarities and diference.
 36 | 
 37 | There are many aspects to languages (non-exclusive) (I only give one or two examples per class -- many languages are omitted):
 38 | * dependent type systems (Gallina, Idris)
 39 | * functional programming (Haskell, OCaml)
 40 | * imperative (C++)
 41 | * object oriented (Java)
 42 | * dynamically typed (python, javascript)
 43 | * array programming (APL, Fortran)
 44 | * low-level (C, Rust)
 45 | * Lisp
 46 | * prototype (Lua)
 47 | * scripting (python, lua)
 48 | * concurrent (Rust)
 49 | * logical (prolog)
 50 | * ....
 51 | And many more paradigms can be found on [Wikipedia](https://en.wikipedia.org/wiki/Programming_paradigm).
 52 | 
 53 | 
 54 | There are good (my opinion) reasons to use different languages:
 55 | * features suited for a special use case
 56 |     * low level memory control
 57 |     * fully manages memory 
 58 | * incompatible features that you like
 59 |     * imperative scripting
 60 |     * dependent type systems
 61 |     * pure functional programming
 62 |     * simplicity (depedent on special cases -> maybe consider staging)
 63 |     * complexity (depedent on special cases -> maybe consider staging)
 64 | * design around special use case (not just primitives)
 65 | **Note**: Many of these reasons could be summarized with the debate about static vs dynamic typing, functional vs imperative, dependent vs non-dependent types. And the complexity of specifying implicit assumption. This is an area that warrants improvement. However, it will not be our focus here.
 66 | 
 67 | There are also not so good (my opinion) reasons:
 68 | * Tools available for this language
 69 |     * Analyzers
 70 |     * IDEs
 71 |     * Libraries
 72 |     * Frameworks
 73 |     * Standard library functions
 74 | * Compiler
 75 |     * Speed
 76 |     * Optimizations
 77 | * Userbase
 78 | * Language Primitives
 79 | 
 80 | I am not saying, it is wrong to choose a language by any of the second set of points.
 81 | However, I propose that these point do not have to / should not influence the choice of language.
 82 | On a more abstract level, these points can (for the most part) be isolated from the language itself.
 83 | They are only linked by implementation to a language but not conceptually as they apply to programming itself.
 84 | In the end, a programming language is only a tool to express semantics of a procedure manipulating data. How we express this can differ in syntax but is conceptually the same.
 85 | 
 86 | Even though the syntax might look quite different between languages,
 87 | an experienced programmer can pick up any language fast with only a few examples.
 88 | They will tell you that the underlying principles between most languages are the same.
 89 | A for loop in C is the same as in Java or Python. 
 90 | The `map` function of Haskell, the `.map` in Java, the for loop in C, and the list comprehension in Python all look different but express the same semantics.
 91 | A while loop and a tail recursive function are quite different in appearance, but every undergrad student learns that they function the same and result in very similar assembly code. 
 92 | 
 93 | Despite these similarities or due to the freedom of expression, many languages developed and are in use.
 94 | It is good that you can freely choose how you want to write code.
 95 | However, it is a shame when tools are locked to a language or need time-intensive ports and re-implementations.
 96 | 
 97 | ![Logo](attachments/standards3.png)
 98 | 
 99 | ### Examples
100 | 
101 | Only to name a few examples, we can look at common libraries:
102 | 
103 | [**Tensorflow**](https://www.tensorflow.org/) and [**PyTorch**](https://pytorch.org/) are widely used and very successful machine-learning tools.
104 | There exist many (one for every popular language) implementations of the frameworks that effectively call the underlying C libraries. Some languages have more sophisticated libraries that embed the frameworks as DSL / reify parts of the language into the frameworks.
105 | Additionally, some languages have libraries building up on the basic frameworks.
106 | Python has one of the best supports for these frameworks.
107 | 
108 | It is tedious and schematic to write all the boilerplate to call the libraries.
109 | Some language pairs have semi-automation to generate ABI/FFI. 
110 | But these tools only help for special cases and do not solve the problem in general.
111 | The additional problem of advanced interfaces and libraries are currently not solved.
112 | 
113 | One could even argue, that for many applications the framework itself could be an implementation-detail: The programmer wants to express a ML-model. The details could be hidden by a common interface. There are libraries available that do this. However, it is imaginable that such an interface could be infered automatically and translate between both libraries without human interaction.
114 | 
115 | The [**QuickCheck**](https://en.wikipedia.org/wiki/QuickCheck) framework is a popular combinator library that helps in generating test cases.
116 | It originates in Haskell and was sucessfully applied to other languages as well.
117 | There is QuickChick in Coq, Hypothesis in Python, based on Hypothesis there is PropTest in Rust, and there are 60 more **re**-implementations in other languages.
118 | To be more precise, there are at least six quickcheck re-implementations for python on Github with at least 15 stars.
119 | Some of these re-implementation "only" support random test generation while others are more fully features including features like test minimization.
120 | For more quickcheck re-implementations also see the website of [Hypothesis](https://hypothesis.works/articles/quickcheck-in-every-language/) (a python re-implementation of quickcheck/inspired-project).
121 | 
122 | Before quickchick was re-implemented, the default way was to write the code, expose a FFI and use quickchick externally by hand.
123 | 
124 | Another common example are **SQL** frameworks that are re-implemented in every language. More sophisticated features like syntax checking, dummy data generation, compile time checking, ... are left on the way and are only available in very few languages.
125 | 
126 | 
127 | To give a last example, [**SOSML**](https://sosml.org/) is an online interpreter for Standard ML with a nice interface.
128 | The SML implementation is written from scratch by students.
129 | However, Saarland University switched from SML to OCaml.
130 | Theoretically, the difference between SML and OCaml are just some renamings.
131 | 
132 | But the correct handling of these changes either require a transpiler between OCaml and SML (including a parser for OCaml) or changes in the SML interpreter (required in-depth knowledge of the code). See [SOOCaml](https://github.com/NeuralCoder3/SOOCaml-frontend) for a discussion.
133 | 
134 | This is tedious and could theoretically be automated.
135 | However, until somebody takes time to deeply understand the interpreter, the current solution is to abandon SOSML or to patch in a third-party OCaml interpreter (transpiled to Javascript).
136 | 
137 | 
138 | 
139 | ## Goal 
140 | 
141 | There are multiple concrete goals.
142 | But abstractly, we want to bridge the gap between languages.
143 | To this end, it should be possible to use one languages tools, features, and libraries in another language.
144 | 
145 | More concretely, we want to develop an universal transpiler (or parts thereof).
146 | 
147 | 
148 | There are multiple conceptual paths to attack the problem.
149 | Each path offers multiple ways to achieve the goal.
150 | 
151 | Before we will look closer at ideas, we focus on related work in the field.
152 | We first introduce the works shallowly and go in-depth in the corresponding idea files.
153 | 
154 | ## Related Work
155 | 
156 | We collect interesting papers in the related areas of this work.
157 | 
158 | ### Language Overview
159 | An overview of languages and tools connecting languages (e.g. transpiler, compiler).
160 | - [Programming Language Database](https://pldb.com/) (4000 languages) [Github](https://github.com/breck7/pldb)
161 | - [Langugage Definition Tool Ott](https://github.com/ott-lang/ott)
162 | - https://rosettacode.org/wiki/Language_Comparison_Table
163 | - [Landscape of languages](https://docs.google.com/spreadsheets/d/1P738jVhd9-UkNneIRN6VbEH-stDiKbDbB0WWPROv6lk/edit?usp=sharing) (compiler, transpiler overview, incomplete)
164 |     - [Visualization](https://github.com/NeuralCoder3/language-map)
165 | 
166 | ### Program Equivalence
167 | An important part in the translation is the equivalence of the original and translated program.
168 | This equivalence either guides the translation/synthesis or has to be established alongside/after the translation.
169 | 
170 | - [Generative Language Modeling for Automated Theorem Proving](https://arxiv.org/pdf/2009.03393.pdf)
171 | - [Program Equivalence](https://drops.dagstuhl.de/opus/volltexte/2018/9758/pdf/dagrep_v008_i004_p001_18151.pdf)
172 | - [Interaction Trees](https://arxiv.org/pdf/1906.00046.pdf)
173 | - [Self-Supervised Learning to Prove Equivalence Between Straight-Line Programs via Rewrite Rules](https://arxiv.org/pdf/2109.10476.pdf)
174 | - CompCert
175 | - [Generating Proof Certificates for a Language-Agnostic Deductive Program Verifier](https://xchen.page/assets/pdf/LCT+23-paper.pdf)
176 | - [ARDiff: scaling program equivalence checking via iterative abstraction and refinement of common code](https://dl.acm.org/doi/10.1145/3368089.3409757)
177 | - [FuzzDiff: A Program Equivalence Checker based on feedback-directed fuzz testing and semantic analysis](https://www.scss.tcd.ie/publications/theses/diss/2022/TCD-SCSS-DISSERTATION-2022-134.pdf)
178 | - [Self-Supervised Learning to Prove Equivalence Between Straight-Line Programs via Rewrite Rules](https://arxiv.org/pdf/2109.10476.pdf)
179 | 
180 | [add from folder]
181 | 
182 | #### Translation Validation
183 | Translation Validation is a special subfield that focuses on automated equivalence checks of programs before and after optimizations.
184 | 
185 | - [End-to-End Translation Validation for the Halide Langauge](https://hal.inria.fr/hal-03653857/document)
186 | - [Alive2: Bounded Translation Validation for LLVM](https://dl.acm.org/doi/pdf/10.1145/3453483.3454030)
187 | - [Counterexample-Guided Correlation Algorithm for Translation Validation](https://shubhani.compiler.ai/pubs/oopsla20.pdf)
188 | 
189 | #### Program Translation
190 | Automated program translations like superoptimizers have to guarantee program equivalence (usually in one language). Superoptimizers are often restricted to loop-free short code segments.
191 | 
192 | [add from folder]
193 | 
194 | ### Synthesis
195 | The target program has to be synthesized from the original program.
196 | The programs need to be equivalent and can be quite complex involving complicated control flow.
197 | However, the synthesis has a clear guideline as the shape of the original program can be used and the semantics of the result is fully defined. 
198 | 
199 | - [CounterExample-Guided Inductive Synthesis (CEGIS)](https://www-cs.stanford.edu/~preiner/publications/2017/PreinerNiemetzBiere-TACAS17.pdf)
200 | - [Synthesis of Loop-free Programs](https://dl.acm.org/doi/pdf/10.1145/1993316.1993506) (tool: Brahma)
201 | - [Program Synthesis](https://www.nowpublishers.com/article/Details/PGL-010)
202 | - Cyclic Program Synthesis
203 | - [Bottom-Up Synthesis of Recursive Functional Programs using Angelic Execution](https://dl.acm.org/doi/pdf/10.1145/3498682)
204 | - [Evaluating Large Language Models Trained on Code](https://arxiv.org/pdf/2107.03374.pdf)
205 | - [APIfix: Output-Oriented Program Synthesis for Combating Breaking Changes in Libraries](https://gaoxiang9430.github.io/papers/APIFix.pdf)
206 | - [FlashFill++: Scaling Programming by Example by Cutting to the Chase](https://www.microsoft.com/en-us/research/uploads/prod/2022/12/flashfillpp-popl-23-camera-ready.pdf)
207 | - Inductive Synthesis of Structurally Recursive Functional Programs from Non-recursive Expressions
208 | - [Top-Down Synthesis For Library Learning](https://arxiv.org/pdf/2211.16605.pdf)
209 | - Bootstrapping Library-Based Synthesis
210 | - [Abstract Syntax Networks for Code Generation and Semantic Parsing](https://arxiv.org/pdf/1704.07535.pdf)
211 |     - [Github](https://github.com/xiye17/torchASN)
212 | - [Synthesizing an Instruction Selection Rule Library from Semantic Specifications](https://pp.ipd.kit.edu/uploads/publikationen/buchwald18cgo.pdf)
213 | - [Suoper: A Synthesizing Superoptimizer](https://arxiv.org/abs/1711.04422)
214 | - [Minotaur: A SIMD-Oriented Synthesizing Superoptimizer](https://arxiv.org/abs/2306.00229) (based on Alive)
215 | - [Supersonic](https://dl.acm.org/doi/10.1145/3497776.3517769) [Github](https://github.com/HuantWang/SUPERSONIC) (based on CompilerGym)
216 | - https://emina.github.io/rosette/
217 | - [Untyped -> Typed](https://dl.acm.org/doi/abs/10.1145/1176617.1176755?casa_token=dFMn2xhiTeUAAAAA:EC0wkfDgt3clgpbnwQ1x2JasOiL9wOvZQ0HoxKdUNexgjpAznzLg4tpfJpfDyXp0MIYzBHeE58tS)
218 | - LLM Forced Format
219 |     - [Jsonformer](https://news.ycombinator.com/item?id=35790092) [Github](https://github.com/1rgs/jsonformer)
220 |     - [clownfish](https://github.com/newhouseb/clownfish)
221 |     - [Output Fixing Parser](https://python.langchain.com/en/latest/modules/prompts/output_parsers/examples/output_fixing_parser.html)
222 |     - [Guardrails](https://github.com/ShreyaR/guardrails)
223 | - [Statistically certified approximate logic synthesis](https://ieeexplore.ieee.org/abstract/document/8203798?casa_token=x4a__CSWqaAAAAAA:Do9diuZVLhRFGh2v-tmwoaL4nEvNNqI3RkXBuoque2kGImJIb0WbMLve1bP2_jsIEURFES2QB5ZX)
224 | - [Certifying the Synthesis of Heap-Manipulating Programs](https://dl.acm.org/doi/pdf/10.1145/3473589) (SuSLik)
225 | - [A FRAMEWORK FOR CERTIFIED PROGRAM SYNTHESIS](https://ilyasergey.net/assets/pdf/papers/Watanabe-MComp.pdf)
226 | 
227 | [add from folder]
228 | 
229 | ### Artificial Intelligence
230 | A promising approach is neural-guided synthesis using artificial intelligence for translation.
231 | These tools have been proven to be capable of synthesizing complex code with acceptable accuracy.
232 | However, the current projects mainly look into natural language and do not establish thight guarantees like formal semantics of the output.
233 | 
234 | - [Competition-Level Code Generation with AlphaCode](https://www.datascienceassn.org/sites/default/files/Competition-Level%20Code%20Generation%20with%20AlphaCode.pdf)
235 | - [CrossBeam: Learning To Search in Bottom-Up Program Synthesis](https://arxiv.org/pdf/2203.10452.pdf)
236 | - [Formal Mathematics Statement Curriculum Learning](https://arxiv.org/pdf/2202.01344.pdf)
237 | - [OpenAI Codex](https://beta.openai.com/docs/guides/code/best-practices)
238 | - [ChatGPT](https://chat.openai.com/chat)
239 | - [Salesforce CodeT5](https://arxiv.org/abs/2109.00859) ([Github](https://github.com/salesforce/CodeT5))
240 | - [Github CoPilot](https://github.com/features/copilot)
241 | - [Transformer models: an introduction and catalog](https://arxiv.org/pdf/2302.07730.pdf)
242 | - [FlexGen: Running GPT on (relatively) low VRAM](https://news.ycombinator.com/item?id=34869960) [Github](https://github.com/FMInference/FlexGen)
243 | - [Jotte: Graph-based GPT Extension for large text corpus](https://news.ycombinator.com/item?id=34901481)
244 | - [Phind GPT Search](https://news.ycombinator.com/item?id=34884338)
245 | - [Open Source GPT Training](https://news.ycombinator.com/item?id=34858460)
246 | - [Microsoft Prompt Engine](https://news.ycombinator.com/item?id=34811070)
247 | - [The "an" neuron in GPT2](https://news.ycombinator.com/item?id=34821414)
248 | - [Run GPT3 fast](https://news.ycombinator.com/item?id=34702349)
249 | - [GPT Memory & Conversations](https://github.com/Kav-K/GPT3Discord)
250 | - [Show your work: Scratchpads for intermediate computation with language models](https://arxiv.org/pdf/2112.00114.pdf)
251 | - [GPT Error Reduction](https://medium.com/@0xjfan/how-we-cut-the-rate-of-gpt-hallucinations-from-20-to-less-than-2-f3bfcc10e4ec) 
252 | - [openai cookbook](https://github.com/openai/openai-cookbook)
253 | - [openai prompt engeneering](https://learnprompting.org/docs/intro)
254 | - [CompilerGym](https://arxiv.org/pdf/2109.08267.pdf) [Github](https://github.com/facebookresearch/CompilerGym)
255 | - fast-ai
256 | - https://github.com/salesforce/CodeGen
257 | - [Auto-GPT](https://github.com/Significant-Gravitas/Auto-GPT) An experimental open-source attempt to make GPT-4 fully autonomous.
258 | - [Maieutic Prompting: Logically Consistent Reasoning with Recursive Explanations](https://arxiv.org/pdf/2205.11822.pdf)
259 | 
260 | ### Transpilers
261 | Transpilers are mainly written by hand and are far from perfect.
262 | The produced code is not always readable and sometimes needs post-processing.
263 | However, there are often formal (or implicit) guarantees that the result agrees with the original program.
264 | 
265 | - [A General Purpose Transpiler for Fully Homomorphic Ecryption](https://arxiv.org/pdf/2106.07893.pdf)
266 | - [Interlanguage Migration: From Scripts to Programs](https://dl.acm.org/doi/pdf/10.1145/1176617.1176755)
267 | - [Improving mobile app development using transpilers with maintainable outputs](https://dl.acm.org/doi/pdf/10.1145/3422392.3422426)
268 | - [Automated Transpilation of Imperative to Functional Code using Neural-Guided Program Synthesis (Extended Version)](https://arxiv.org/pdf/2203.09452.pdf)
269 | - [A Simple Abstraction of Arrays and Maps by Program Translation](https://arxiv.org/pdf/1506.04161.pdf)
270 | - [Unsupervised Translation of Programming Languages](https://proceedings.neurips.cc/paper/2020/file/ed23fbf18c2cd35f8c7f8de44f85c08d-Paper.pdf)
271 | - [Pandoc](https://pandoc.org/)
272 | - Popular Media
273 |     - [Legacy Code Conversion - Computerphile](https://www.youtube.com/watch?v=Xz06zYlQrck)
274 | - Protobuf -- the universal language
275 | - [Oblivious Decompiler](https://www.recon.cx/2012/schedule/attachments/40_Chernov-Troshina.pdf)
276 | - [How to write a Transpiler](https://tomassetti.me/how-to-write-a-transpiler/)
277 | - [universal transpiler](https://github.com/jarble/transpiler)
278 |     - [CodeWorker](http://codeworker.free.fr/)
279 | - [User-Customizable Transpilation of Scripting Languages](https://arxiv.org/pdf/2301.11220.pdf)
280 | 
281 | ### Language Interoperability
282 | - [Melocoton: A Program Logic for Verified Interoperability Between OCaml and C](https://gallium.inria.fr/~agueneau/publis/melocoton.pdf)
283 | - [FFI Overhead Comparison](https://news.ycombinator.com/item?id=31376679)
284 | 
285 | 
286 | ### Related Concepts
287 | - Synthesis
288 |     - Neural(-guided) synthesis
289 |     - Top-Down synthesis
290 |     - Search/Planning
291 | - Program Equivalence
292 |     - Translation Validation
293 |     - Separation Logic
294 |     - symbolic abstraction
295 |     - smt
296 | - Language Design
297 |     - Program Paradigms
298 |     - Compilation Transpilation
299 |     - Decompilation
300 |     - Staging
301 |     - Metaprogramming
302 |     - DSL
303 |     - ABI/FFI, serilization, marshalling
304 |     - Partial Evaluation
305 | - Tests
306 |     - QA
307 |     - logarithmic types
308 |     - fuzzing
309 |     - specification mining
310 | - Program Communication
311 |     - Marshalling
312 |     - JSON
313 |     - Pickling
314 |     - RPC
315 |     - ABI/FFI
316 | 
317 | 
318 | ## Ideas
319 | 
320 | We present some promising ideas in [./Ideas/](./Ideas/). 
321 | The ideas are not exhaustive.  
322 | 
323 | ### Building Blocks
324 | 
325 | **Transformers**: LLM are currently shown to be knowledgable in complicated synthesis tasks.
326 | The unsupervised trained models present a grasp of related concepts like the relation between natural language and programming languages or between programming languages.
327 | Recent papers and projects have shown first successes in using text transformers to synthesize programs. A more refined and verified approach could build upon this preliminary success.
328 | 
329 | **Counter Example Guided Synthesis**: Counterexamples contradicting the synthesis specification can be used to refine and guide the search to a solution. These counterexamples can be obtained using SMT solvers or fuzzers.
330 | 
331 | **Tests**: We can employ automated tests/symbolic abstraction to find program equivalence contradiction. These can be used to refine the result and to ensure trust in a unverified result. This way, we can harvest the power of neural networks without suffering from the unpredictable/unverified nature of neural networks.
332 | 
333 | **Search**: Many parts of the approach involve/can be formulated as search problems.
334 | For instance the construction (synthesis) of the resulting program can be seen as a search for a valid program with the same properties.
335 | The search has to be heavily pruned and possibly (neural-)guided either directly or via heuristics. A common approach in recent research is to use beam search. There are papers that use bottom-up search as well as other papers that use a top-down approach.
336 | 
337 | **Latent Representation**: The neural guided approaches can attach to the approach at different levels. Either as guide, as main component, as checked assistant, ... .
338 | In any way, we need to find a suitable way to communicate and represent the data we present to the networks. 
339 | This can happen as simple text (as shown effective by LLM) or using more informative and sophisticated datastructures like graph nets or AST network structures.
340 | Historically more complicated structures did not provide better performance. But recent research showed a more focused network leading to better performance with less resource consumption.
341 | 
342 | **Partial Eval**: One could build on refined and optimized futamura projects to transpile programs in languages supporting partial evaluation (beta reduction and propagation) that implement corresponding interpreters/compilers. A lingua franca of programming (like in pandas or FFI communication) could make this approach feasible and also help in other approaches.
343 | 
344 | **Rewrite Rules**: Classically, transpilers operate in declarative programming languages (prolog) using rewrite rules. One idea could be to semi-automatically synthesize these rules.
345 | The approach would be limited to match-rule-based rewriting but would allow for more interaction and control. It would especially be open to formal verification. 
346 | 
347 | **ABI/FFI**: A unrelated concept to increase interoperability is to make it easier to call functions from other languages. This can happen at different abstraction level.
348 | The functions can be linked at the language level, at C level, at assembly level.
349 | The link can happen statically as foreign function using a FFI or by transpilation or
350 | at runtime using communication bridges.
351 | The function needs a common interface between both languages. This can be as simple
352 | as the standard FFI interface, a JSON bridge, or a universal communication language.
353 | _Related Projects_: APIFix, [DSL Project Github](https://github.com/NeuralCoder3/thorin-dsl), serilization, pickling, marshalling, isomorphism type system, [remote call procedure](https://en.wikipedia.org/wiki/Remote_procedure_call)
354 | 
355 | ### Concrete Ideas
356 | 
357 | We can group the ideas in the following categories:
358 | * Interoperability
359 | * Program Transpilation
360 | 
361 | [Ideas/NeuralTransformer](Ideas/NeuralTransformer): A promising but simple approach is to use unsupervised trained neural text transformers. These LLM (large language model) presented knowledge about many languages and their (intuitive) semantics (connection to other languages including natural language). The idea in this approach is to synthesize the resulting program and refine it stepwise using (automated) feedback.
362 | 
363 | [see presentation]
364 | 
365 | ## Applications
366 | 
367 | Here, we collect (more) concrete ideas for applications of our approaches:
368 | * Code -> Rust (safety guarantees, checker environment)
369 | * functional -> imperative (possible speedup)
370 | * python <-> language (library support)
371 | * php -> typescript (update old infrastructure)
372 | * java -> typescript (update old infrastructure)
373 | * imaginary markup -> latex (better syntax)
374 | * toy language -> real language (fast protoyping)
375 | * imaginary api -> real api (accessibility, better adoption, change resistent code)
376 | * pseudocode -> language 
377 | * python -> C++ (speedup)
378 | * imperative -> functional (verification)
379 | * react <-> angular <-> vue (re-usage between apis)
380 | * use arbitrary [Language Gimmics](https://buttondown.email/hillelwayne/archive/microfeatures-id-like-to-see-in-more-languages/) => language extension (similar to OCaml ppx)
381 | * synthesis by natural language transpilation
382 | * [Dependabot 2.0](https://github.com/vlts1/ruzz): Adapt code to changes automatically
383 | 
384 | Further high-level advantages:
385 | * not all code needs to be able to be converted
386 | * wrappers can be written according to semantics
387 | 
388 | Mottos:
389 | * LLVM (lingua franca) for high-level
390 | * write code you want and get code you need
391 | * 


--------------------------------------------------------------------------------
/attachments/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuralCoder3/transpilation/24222c009fa53485dc68b59656e47b208aea1280/attachments/logo.png


--------------------------------------------------------------------------------
/attachments/standards3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuralCoder3/transpilation/24222c009fa53485dc68b59656e47b208aea1280/attachments/standards3.png


--------------------------------------------------------------------------------