├── .gitignore
├── .travis.yml
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── Main.lhs
├── Main.md
├── Makefile
├── README.md
├── Setup.hs
├── corrode.cabal
├── doc
    └── .gitignore
├── fixGitSymlinksForWindows.bat
├── scripts
    ├── corrode-cc
    └── csmith-test
├── src
    └── Language
    │   ├── Rust.hs
    │   └── Rust
    │       ├── AST.hs
    │       ├── Corrode
    │           ├── C.lhs
    │           ├── C.md
    │           ├── CFG.lhs
    │           ├── CFG.md
    │           └── CrateMap.hs
    │       └── Idiomatic.hs
├── stack.yaml
└── tests
    └── test.hs


/.gitignore:
--------------------------------------------------------------------------------
1 | /dist/
2 | .stack-work/
3 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # Sudo used for custom apt setup
 2 | sudo: true
 3 | 
 4 | # Add new environments to the build here:
 5 | env:
 6 |  - GHCVER=7.10.1 CABALVER=1.22
 7 |  - GHCVER=8.0.1 CABALVER=1.22
 8 |  - GHCVER=head CABALVER=head
 9 | 
10 | # Allow for develop branch to break
11 | matrix:
12 |   allow_failures:
13 |    - env: GHCVER=head CABALVER=head
14 | 
15 | # Manually install ghc and cabal
16 | before_install:
17 |   - travis_retry sudo add-apt-repository -y ppa:hvr/ghc
18 |   - travis_retry sudo apt-get update
19 |   - travis_retry sudo apt-get install cabal-install-$CABALVER ghc-$GHCVER
20 |   - export PATH=/opt/ghc/$GHCVER/bin:/opt/cabal/$CABALVER/bin:$PATH
21 |   - export PATH=$HOME/.cabal/bin:$PATH
22 |   - travis_retry cabal update
23 | 
24 | # install happy and alex first, see: https://github.com/jameysharp/corrode/issues/57
25 | install:
26 |   - echo $PATH
27 |   - cabal --version
28 |   - ghc --version
29 |   - cabal install happy
30 |   - cabal install alex
31 |   - cabal install --verbose --enable-tests
32 | 
33 | script:
34 |   - dist/build/tests/tests --quickcheck-tests 100000
35 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | Thank you for your interest in contributing to Corrode!
  2 | 
  3 | Kinds of Contributions
  4 | ======================
  5 | 
  6 | This project has a goal of being useful and correct, but it also has a
  7 | goal of being helpful for learning. So I value many kinds of
  8 | contributions!
  9 | 
 10 | - Code improvements
 11 | - Bug reports
 12 | - Tests or other verification tools
 13 | - And documentation improvements!
 14 | 
 15 | One of the most valuable contributions you could make is to read through
 16 | any part of the code, take notes on anything you find confusing, and
 17 | ideally, come up with a better way to explain those confusing parts.
 18 | 
 19 | Reporting Bugs
 20 | ==============
 21 | 
 22 | Corrode has plenty of bugs, and I want to know about them!
 23 | 
 24 | Bugs in Corrode can show up either
 25 | 
 26 | - by reporting an error message that it shouldn't have,
 27 | - or by successfully generating Rust source, but the generated Rust
 28 |   doesn't compile or produces the wrong result.
 29 | 
 30 | Before reporting a bug, please check:
 31 | 
 32 | 1. Does the C source you're testing compile successfully, and produce
 33 |    the result you expected, when compiled with GCC or Clang? If not,
 34 |    then you should figure out what those compilers think your code means
 35 |    first. You may have found a case where the C standard is stupid,
 36 |    which would not count as a bug in Corrode.
 37 | 
 38 | 1. Has this bug already been reported? There are quite a few known bugs
 39 |    in Corrode and it's worth taking a moment to search the GitHub issues
 40 |    list to see if anything is related.
 41 | 
 42 | Once you've checked those things, open a new issue on GitHub. In your
 43 | bug report, please clearly explain:
 44 | 
 45 | 1. What did you try to do? For example, include the smallest C source
 46 |    code you can come up with that triggers the bug.
 47 | 1. What result did you expect to see?
 48 | 1. What result did you actually get?
 49 | 
 50 | I like Simon Tatham's essay on [How to Report Bugs
 51 | Effectively](http://www.chiark.greenend.org.uk/~sgtatham/bugs.html).
 52 | 
 53 | At your option, if you can see how to fix the bug you've found, you can
 54 | choose to open a pull request suggesting a fix without opening a
 55 | separate issue first.
 56 | 
 57 | Pull Requests
 58 | =============
 59 | 
 60 | Some project maintainers are picky about the contents of individual
 61 | commits. By contrast, I'm just delighted you want to contribute and I'd
 62 | like to make it as easy as possible for you to make your mark on
 63 | Corrode.
 64 | 
 65 | For example: Please don't use `git rebase` on your pull requests. I'm
 66 | not interested in Linux kernel style "perfect" patches&mdash;I'm going
 67 | to review your patches, but you're unlikely to write something I can't
 68 | follow regardless of how "messy" your commit history is&mdash;and rebase
 69 | loses information that I find valuable about the development history
 70 | that led up to your pull request. (See [Perspectives on Commit
 71 | History](http://jamey.thesharps.us/2016/05/perspectives-on-commit-history.html)
 72 | for more.)
 73 | 
 74 | I encourage you to submit an initial pull request as soon as you have
 75 | something kind of working, even if you know there are problems, so I can
 76 | review it and give you feedback as early as possible. (See [The Gentle
 77 | Art of Patch
 78 | Review](http://sarah.thesharps.us/2014/09/01/the-gentle-art-of-patch-review/)
 79 | for some observations from a maintainer's perspective on this.)
 80 | 
 81 | I'm not picky about code style. It's nice if you can follow the style
 82 | used in the rest of the code, but I'm not likely to reject your patches
 83 | because of code style. I do have a couple of very general guidelines I'd
 84 | appreciate if you could follow:
 85 | 
 86 | - I'd like line lengths to be below, oh, something like 85 or 90
 87 |   characters, because apparently that's how much Pandoc can fit inside
 88 |   1-inch margins on US-Letter paper. I discovered this after writing a
 89 |   bunch of code so there are still some lines that are longer.
 90 | 
 91 | - I don't like having lines indented based on how long things are on a
 92 |   previous line:
 93 | 
 94 |     ```haskell
 95 |     reallyLongName :: a
 96 |                    -> b
 97 |     ```
 98 | 
 99 |     because if you rename `reallyLongName` then you have to re-indent
100 |     all the lines after it, which leads to giant diffs where it's hard
101 |     to spot the real changes. So I generally try to indent exactly four
102 |     spaces anywhere that I need to indent.
103 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:16.04
2 | RUN apt-get update && apt-get install git ghc haskell-stack -y
3 | RUN git clone https://github.com/jameysharp/corrode.git
4 | RUN cd corrode && stack build && stack install
5 | ENV PATH="/root/.local/bin:${PATH}"
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |              GNU GENERAL PUBLIC LICENSE
  2 |                 Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                      Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |              GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                      NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |               END OF TERMS AND CONDITIONS
281 | 
282 |      How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     <one line to give the program's name and a brief idea of what it does.>
294 |     Copyright (C) <year>  <name of author>
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   <signature of Ty Coon>, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 


--------------------------------------------------------------------------------
/Main.lhs:
--------------------------------------------------------------------------------
1 | Main.md


--------------------------------------------------------------------------------
/Main.md:
--------------------------------------------------------------------------------
  1 | This is the main driver-program entry point for Corrode.
  2 | 
  3 | It brings together the C parser and preprocessor interface from
  4 | [language-c](http://hackage.haskell.org/package/language-c), Corrode's
  5 | algorithms, and the pretty-printing library
  6 | [pretty](https://hackage.haskell.org/package/pretty), all while
  7 | reporting errors in a consistent way.
  8 | 
  9 | ```haskell
 10 | import Control.Monad
 11 | import Control.Monad.Trans.Class
 12 | import Control.Monad.Trans.Except
 13 | import Data.List
 14 | import Language.C
 15 | import Language.C.System.GCC
 16 | import Language.C.System.Preprocess
 17 | import Language.Rust.Corrode.C
 18 | import Language.Rust.Corrode.CrateMap
 19 | import Language.Rust.Idiomatic
 20 | import System.Environment
 21 | import System.Exit
 22 | import System.FilePath
 23 | import Text.PrettyPrint.HughesPJClass
 24 | ```
 25 | 
 26 | Corrode can produce reasonable single-module output using only the
 27 | information that you would have passed to a C compiler. But with some
 28 | guidance from the user, it can produce better output. Here we remove
 29 | Corrode-specific command-line options; the rest will be passed to GCC.
 30 | 
 31 | ```haskell
 32 | newtype Options = Options
 33 |     { moduleMaps :: [(String, String)]
 34 |     }
 35 | 
 36 | defaultOptions :: Options
 37 | defaultOptions = Options
 38 |     { moduleMaps = []
 39 |     }
 40 | 
 41 | parseCorrodeArgs :: [String] -> Either String (Options, [String])
 42 | parseCorrodeArgs ("-corrode-module-map" : spec : rest) = do
 43 |     let spec' = case span (/= ':') spec of
 44 |             (crate, _ : specFile) -> (crate, specFile)
 45 |             (specFile, []) -> ("", specFile)
 46 |     (opts, other) <- parseCorrodeArgs rest
 47 |     return (opts { moduleMaps = spec' : moduleMaps opts }, other)
 48 | parseCorrodeArgs (arg : rest) = do
 49 |     (opts, other) <- parseCorrodeArgs rest
 50 |     return (opts, arg : other)
 51 | parseCorrodeArgs [] = return (defaultOptions, [])
 52 | ```
 53 | 
 54 | There are lots of steps in this process, and several of them return an
 55 | `Either`, which is used similarly to Rust's `Result` type. In Haskell we
 56 | don't have the `try!` macro that Rust has; instead the `ExceptT` monad
 57 | encapsulates the "return early on failure" pattern.
 58 | 
 59 | We layer `ExceptT` on top of the `IO` monad so that we're permitted to
 60 | access files and command-line arguments, but we need to adapt various
 61 | types of return values from different functions we'll call. For a
 62 | function which performs pure computation and might fail, wrap the
 63 | function call in `try`. If the function can also do I/O, wrap it in
 64 | `tryIO` instead.
 65 | 
 66 | ```haskell
 67 | try :: Either e a -> ExceptT e IO a
 68 | try = tryIO . return
 69 | 
 70 | tryIO :: IO (Either e a) -> ExceptT e IO a
 71 | tryIO = ExceptT
 72 | ```
 73 | 
 74 | We use one other function for dealing with errors. `withExceptT f`
 75 | applies `f` to the error value, if there is one, which lets us convert
 76 | different types of errors to one common error type.
 77 | 
 78 | Here's the pipeline:
 79 | 
 80 | ```haskell
 81 | main :: IO ()
 82 | main = dieOnError $ do
 83 | ```
 84 | 
 85 | 1. Extract the command-line arguments we care about. We'll pass the rest
 86 |    to the preprocessor.
 87 | 
 88 |     ```haskell
 89 |         let cc = newGCC "gcc"
 90 |         cmdline <- lift getArgs
 91 |         (options, cmdline') <- try (parseCorrodeArgs cmdline)
 92 |         (rawArgs, _other) <- try (parseCPPArgs cc cmdline')
 93 |     ```
 94 | 
 95 | 1. The user may have specified the `-o <outputfile>` option. Not only do
 96 |    we ignore that, but we need to suppress it so the preprocessor
 97 |    doesn't write its output where a binary was expected to be written.
 98 |    We also force-undefine preprocessor symbols that would indicate
 99 |    support for language features we can't actually handle, and remove
100 |    optimization flags that make GCC define preprocessor symbols.
101 | 
102 |     ```haskell
103 |         let defines = [Define "_FORTIFY_SOURCE" "0", Define "__NO_CTYPE" "1"]
104 |         let undefines = map Undefine ["__BLOCKS__", "__FILE__", "__LINE__"]
105 |         let warnings = ["-Wno-builtin-macro-redefined"]
106 |         let args = foldl addCppOption
107 |                 (rawArgs
108 |                     { outputFile = Nothing
109 |                     , extraOptions =
110 |                           (filter (not . ("-O" `isPrefixOf`)) (extraOptions rawArgs)) ++
111 |                           warnings
112 |                     })
113 |                 (defines ++ undefines)
114 |     ```
115 | 
116 | 1. Load any specified module-maps.
117 | 
118 |     ```haskell
119 |         allMaps <- fmap mergeCrateMaps $ forM (moduleMaps options) $
120 |             \ (crate, filename) -> tryIO $ do
121 |                 spec <- readFile filename
122 |                 return $ do
123 |                     crateMap <- parseCrateMap spec
124 |                     return (crate, crateMap)
125 |         let modName = takeBaseName (inputFile args)
126 |         let (currentModule, otherModules) = splitModuleMap modName allMaps
127 |         let allRewrites = rewritesFromCratesMap otherModules
128 |     ```
129 | 
130 | 1. Run the preprocessor&mdash;except that if the input appears to have
131 |    already been preprocessed, then we should just read it as-is.
132 | 
133 |     ```haskell
134 |         input <- if isPreprocessed (inputFile args)
135 |             then lift (readInputStream (inputFile args))
136 |             else withExceptT
137 |                 (\ status -> "Preprocessor failed with status " ++ show status)
138 |                 (tryIO (runPreprocessor cc args))
139 |     ```
140 | 
141 | 1. Get language-c to parse the preprocessed source to a `CTranslUnit`.
142 | 
143 |     ```haskell
144 |         unit <- withExceptT show (try (parseC input (initPos (inputFile args))))
145 |     ```
146 | 
147 | 1. Generate a list of Rust items from this C translation unit.
148 | 
149 |     ```haskell
150 |         items <- try (interpretTranslationUnit currentModule allRewrites unit)
151 |     ```
152 | 
153 | 1. Pretty-print all the items as a `String`.
154 | 
155 |     ```haskell
156 |         let output = intercalate "\n"
157 |                 [ prettyShow (itemIdioms item) ++ "\n"
158 |                 | item <- items
159 |                 ]
160 |     ```
161 | 
162 | 1. Write the final string to a file with the same name as the input,
163 |    except with any extension replaced by ".rs".
164 | 
165 |     ```haskell
166 |         let rsfile = replaceExtension (inputFile args) ".rs"
167 |         lift $ do
168 |             writeFile rsfile output
169 |             putStrLn rsfile
170 |             putStrLn $ case outputFile rawArgs of
171 |                 Just outfile -> outfile
172 |                 Nothing -> replaceExtension (inputFile args) ".o"
173 |     ```
174 | 
175 | When the pipeline ends, we need to check whether it resulted in an
176 | error. If so, we call `die` to print the error message to `stderr` and
177 | exit with a failure status code.
178 | 
179 | ```haskell
180 | dieOnError :: ExceptT String IO a -> IO a
181 | dieOnError m = do
182 |     result <- runExceptT m
183 |     case result of
184 |         Left err -> die err
185 |         Right a -> return a
186 | ```
187 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | docs: doc/corrode.pdf doc/cfg.pdf doc/driver.pdf
 2 | 
 3 | doc/corrode.pdf: src/Language/Rust/Corrode/C.md
 4 | 	pandoc --from markdown --to latex --variable papersize=letter --variable geometry=margin=1in --output "$@" "$^"
 5 | 
 6 | doc/cfg.pdf: src/Language/Rust/Corrode/CFG.md
 7 | 	pandoc --from markdown --to latex --variable papersize=letter --variable geometry=margin=1in --output "$@" "$^"
 8 | 
 9 | doc/driver.pdf: Main.md
10 | 	pandoc --from markdown --to latex --variable papersize=letter --variable geometry=margin=1in --output "$@" "$^"
11 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Corrode: Automatic semantics-preserving translation from C to Rust
  2 | 
  3 | [![Build Status](https://travis-ci.org/jameysharp/corrode.svg?branch=master)](https://travis-ci.org/jameysharp/corrode)
  4 | 
  5 | This program reads a C source file and prints an equivalent module in
  6 | Rust syntax. It's intended for partial automation of migrating legacy
  7 | code that was implemented in C. This tool does not fully automate the
  8 | job because its output is only as safe as the input was; you should
  9 | clean up the output afterward to use Rust features and idioms where
 10 | appropriate.
 11 | 
 12 | ## Quick Start
 13 | 
 14 | As of now, there are no pre-built binaries available, so you need to build the
 15 | project yourself, but don't let that scare you away; clone the project, `cd`
 16 | into it and follow along :)
 17 | 
 18 | ### Windows
 19 | 
 20 | If you're using Windows, start by running `fixGitSymlinksForWindows.bat`
 21 | as admin (this is necessary for Git to create symlinks).
 22 | 
 23 | ### Cabal
 24 | 
 25 | Ensure that you have GHC and the `cabal-install` tool installed by following
 26 | the [directions on haskell.org](https://www.haskell.org/downloads#minimal).
 27 | You'll also need to have the `happy` and `alex` tools available in order to
 28 | build `corrode`: you can install them with the `cabal-install` tool, as well.
 29 | Once you have installed the `cabal-install` tool, you can build `corrode` by
 30 | navigating to the `corrode` directory, installing the `happy` and `alex` tools,
 31 | and then building and installing the `corrode` package:
 32 | 
 33 | ```
 34 | cabal install happy
 35 | cabal install alex
 36 | cabal install
 37 | ```
 38 | 
 39 | This puts the `corrode` executable in `~/.cabal/bin`, so ensure that that
 40 | location is in your `$PATH`.
 41 | 
 42 | ### Alternate instructions: Stack
 43 | 
 44 | Alternately, you can use the [Haskell Stack](http://haskellstack.org) tool
 45 | for Haskell development. If you don't have it, head over to their website
 46 | and follow the instructions for installing it on your machine.
 47 | 
 48 | Install the Glasgow Haskell Compiler using ```stack setup```. You can skip this
 49 | step if you already have a version of GHC installed on your system.
 50 | You can then build and install `corrode` by navigating to the `corrode`
 51 | directory and running:
 52 | 
 53 | ```
 54 | stack install
 55 | ```
 56 | 
 57 | Stack will build and install `corrode` to `~/.local/bin`. For ease of use, make
 58 | sure that directory is in your `$PATH`.
 59 | 
 60 | To experiment with the project itself, you can build it using
 61 | 
 62 | ```
 63 | stack build
 64 | ```
 65 | 
 66 | then run the executable:
 67 | 
 68 | ```bash
 69 | stack exec -- corrode -Wall filename.c -I/usr/local/include -lm
 70 | ```
 71 | 
 72 | ## Usage
 73 | 
 74 | There are two ways to use Corrode. You can simply generate a `.rs` file
 75 | from the C source, or you can do this _and_ compile in one step.
 76 | 
 77 | ### Generating Rust source
 78 | 
 79 | You can now run `corrode`, giving it any options that `gcc` would
 80 | accept.
 81 | 
 82 | ```
 83 | corrode -Wall filename.c -I/usr/local/include -lm
 84 | ```
 85 | 
 86 | It will only use the options that are relevant to the C pre-processor,
 87 | like `-I` or `-D`, but since it accepts and ignores any other options,
 88 | you can usually get going just by changing `gcc` to `corrode` in the
 89 | `gcc` invocation you've been using.
 90 | 
 91 | Unlike a real C compiler, Corrode does not produce an object file or
 92 | executable! Instead, if you ask it to process `filename.c`, it generates
 93 | equivalent Rust source code in `filename.rs`. If you _do_ want object
 94 | code, there is a script to help with that.
 95 | 
 96 | ### Codegen with compilation
 97 | 
 98 | You can either invoke `rustc` on Corrode's output yourself (or import it
 99 | into a Rust project), or use the `scripts/corrode-cc` tool in place of
100 | `gcc` to compile and link. In many build systems, such as `make`, you
101 | can simply set `CC=corrode-cc` without modification.
102 | 
103 | ## Design principles
104 | 
105 | The overarching goal of Corrode is to preserve the original properties
106 | of the source program as much as possible: behavior, ABI compatibility,
107 | and maintainability. We expect the output of Corrode to be used to
108 | replace the original C, not just as an intermediate step in a compiler
109 | toolchain.
110 | 
111 | Corrode aims to produce Rust source code which behaves exactly the same
112 | way that the original C source behaved, if the input is free of
113 | undefined and implementation-defined behavior. In the presence of
114 | undefined behavior, we've tried to pick a behavior that isn't too
115 | surprising. For example, if a signed addition might overflow (which is
116 | undefined behavior in C), Corrode just translates it to Rust's `+`
117 | operator, which panics on overflow in debug builds.
118 | 
119 | The compiled Rust source in turn will be ABI-compatible with the
120 | original C. If you compile Corrode-generated Rust to a `.o` file, you
121 | can link to it exactly as if it were generated from the original C.
122 | Every function that Corrode generates with be annotated with the `extern
123 | "C"` modifier.
124 | 
125 | At the same time, Corrode should produce code which is recognizably
126 | structured like the original, so that the output is as maintainable as
127 | the original. Every statement and every expression should be represented
128 | in the output&mdash;in the same order, where possible. If a programmer
129 | went to the trouble to put something in, we usually want it in the
130 | translated output; if it's not necessary, we can let the Rust compiler
131 | warn about it.
132 | 
133 | If either behavior or ABI is not preserved, we consider that a bug in
134 | Corrode. However, it is not always possible to preserve the structure of
135 | the original code, so we do the best that we can.
136 | 
137 | ## Testing
138 | 
139 | So far, Corrode has primarily been tested by generating random C
140 | programs using [csmith](https://github.com/csmith-project/csmith),
141 | fixing Corrode until it can handle all syntax used in that particular
142 | program, and verifying that the resulting Rust module compiles without
143 | errors.
144 | 
145 | Verifying that the translated output is equivalent to the input is not
146 | trivial. One approach I think is worth trying is to use the
147 | Galois [Software Analysis Workbench](http://saw.galois.com/) to prove
148 | that the LLVM bitcode generated from `clang` on a C source file is
149 | equivalent to the LLVM bitcode generated from `rustc` on a Rust source
150 | file from Corrode. SAW uses a symbolic simulator over LLVM bitcode to
151 | extract logical formulas representing the behavior of each function, and
152 | then uses an SMT solver to prove equivalence between pairs of formulas.
153 | Generating large numbers of random C programs using csmith and then
154 | proving the translation results equivalent for each one should give
155 | pretty good confidence in the implementation.
156 | 
157 | Because the project is still in its early phases, it is not yet possible
158 | to translate most real C programs or libraries. But if you have one you
159 | particularly want to try out, I'd love to get pull requests implementing
160 | more of C!
161 | 
162 | ## Contributing
163 | 
164 | If this seems cool and you'd like to help complete it, welcome! There
165 | are quite a few fundamental pieces of the C standard which are not yet
166 | implemented. I'd love to chat with you if you're not quite sure how to
167 | get started! You can e-mail me at <mailto:jamey@minilop.net>.
168 | 
169 | ## What Corrode is not
170 | 
171 | A Rust module that exactly captures the semantics of a C source file is
172 | a Rust module that doesn't look very much like Rust. ;-) I would like to
173 | build a companion tool which rewrites parts of a valid Rust program in
174 | ways that have the same result but make use of Rust idioms. I think it
175 | should be separate from this tool because I expect it to be useful for
176 | other folks, not just users of Corrode. I propose to call that program
177 | "idiomatic", and I think it should be written in Rust using the Rust AST
178 | from [`syntex_syntax`](https://github.com/serde-rs/syntex).
179 | 


--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain
3 | 


--------------------------------------------------------------------------------
/corrode.cabal:
--------------------------------------------------------------------------------
 1 | name:                corrode
 2 | version:             0.1.0.0
 3 | license:             GPL-2
 4 | license-file:        LICENSE
 5 | author:              Jamey Sharp
 6 | maintainer:          jamey@minilop.net
 7 | category:            Language
 8 | build-type:          Simple
 9 | cabal-version:       >=1.10
10 | 
11 | library
12 |   hs-source-dirs:      src
13 |   exposed-modules:     Language.Rust,
14 |                        Language.Rust.AST,
15 |                        Language.Rust.Idiomatic,
16 |                        Language.Rust.Corrode.C,
17 |                        Language.Rust.Corrode.CFG,
18 |                        Language.Rust.Corrode.CrateMap
19 |   ghc-options:         -Wall -fwarn-incomplete-uni-patterns -pgmL markdown-unlit
20 |   default-language:    Haskell2010
21 |   build-depends:       base >=4.8,
22 |                        array >= 0.4,
23 |                        containers >= 0.5,
24 |                        language-c >=0.6 && <0.7,
25 |                        markdown-unlit,
26 |                        pretty,
27 |                        transformers
28 | 
29 | executable corrode
30 |   main-is:             Main.lhs
31 |   ghc-options:         -Wall -pgmL markdown-unlit
32 |   default-language:    Haskell2010
33 |   build-depends:       base >=4.8,
34 |                        bytestring,
35 |                        corrode,
36 |                        filepath,
37 |                        language-c >=0.6 && <0.7,
38 |                        markdown-unlit,
39 |                        pretty,
40 |                        transformers >=0.2
41 | 
42 | test-suite tests
43 |   type:                exitcode-stdio-1.0
44 |   hs-source-dirs:      tests
45 |   main-is:             test.hs
46 |   build-depends:       base
47 |                      , containers >= 0.5
48 |                      , corrode
49 |                      , pretty
50 |                      , tasty
51 |                      , tasty-quickcheck
52 |                      , transformers >=0.2
53 |   ghc-options:         -Wall -threaded -rtsopts -with-rtsopts=-N
54 |   default-language:    Haskell2010
55 | 


--------------------------------------------------------------------------------
/doc/.gitignore:
--------------------------------------------------------------------------------
1 | *.pdf
2 | 


--------------------------------------------------------------------------------
/fixGitSymlinksForWindows.bat:
--------------------------------------------------------------------------------
 1 | @echo.
 2 | @echo For converting Git symlink files to Windows file symlinks.
 3 | @echo    * Run in repository root as Administrator.
 4 | @echo    * Handling of folder symlinks is not implemented.
 5 | @echo    * Intended for windows versions Vista and above (Win 7,8)
 6 | @echo.
 7 | @echo    Thanks to: http://stackoverflow.com/a/5930443/1031870
 8 | @echo    v1.02 (c) 2015 Robert Benko (Quazistax), License: MIT
 9 | @echo.
10 | 
11 | @echo off
12 | pushd "%~dp0"
13 | setlocal EnableDelayedExpansion
14 | call :raiseUACIfNotAdmin || exit /B 1
15 | for /f "tokens=3,*" %%e in ('git ls-files -s ^| findstr /R /C:"^120000"') do (
16 |      call :processFirstLine %%f
17 | )
18 | REM pause
19 | goto :eof
20 | 
21 | :processFirstLine
22 | @echo.
23 | @echo FILE:    %1
24 | 
25 | dir "%~f1" | find "<SYMLINK>" >NUL && (
26 |   @echo FILE already is a symlink
27 |   goto :eof
28 | )
29 | 
30 | for /f "usebackq tokens=*" %%l in ("%~f1") do (
31 |   @echo LINK TO: %%l
32 | 
33 |   del "%~f1"
34 |   if not !ERRORLEVEL! == 0 (
35 |     @echo FAILED: del
36 |     goto :eof
37 |   )
38 | 
39 |   setlocal
40 |   call :expandRelative linkto "%1" "%%l"
41 |   mklink "%~f1" "!linkto!"
42 |   endlocal
43 |   if not !ERRORLEVEL! == 0 (
44 |     @echo FAILED: mklink
45 |     @echo reverting deletion...
46 |     git checkout -- "%~f1"
47 |     goto :eof
48 |   )
49 | 
50 |   git update-index --assume-unchanged "%1"
51 |   if not !ERRORLEVEL! == 0 (
52 |     @echo FAILED: git update-index --assume-unchanged
53 |     goto :eof
54 |   )
55 |   @echo SUCCESS
56 |   goto :eof
57 | )
58 | goto :eof
59 | 
60 | :: param1 = result variable
61 | :: param2 = reference path from which relative will be resolved
62 | :: param3 = relative path
63 | :expandRelative
64 |   pushd .
65 |   cd "%~dp2"
66 |   set %1=%~f3
67 |   popd
68 | goto :eof
69 | 
70 | :raiseUACIfNotAdmin
71 | :: Quick test for Windows generation: UAC aware or not ; all OS before NT4 ignored for simplicity
72 | :: Original from: http://stackoverflow.com/a/14729312/1031870
73 | 
74 | (ver | findstr /IL "5." > NUL  || ver | findstr /IL "4." > NUL) && (
75 |   @echo ERROR: Symlinks are not supported on this version of Windows.
76 |   exit /B 1
77 | )
78 | 
79 | :: Test if Admin
80 | call net session >NUL 2>&1
81 | if not !ERRORLEVEL! == 0 (
82 |   :: Start batch again with UAC
83 |   echo Set UAC = CreateObject^("Shell.Application"^) > "%temp%\getadmin.vbs"
84 |   echo UAC.ShellExecute "cmd.exe", "/K ""cd /d %~dp0 && %~s0""", "%~dp0", "runas", 1 >> "%temp%\getadmin.vbs"
85 |   @echo Requesting administrative privileges...
86 |   "%temp%\getadmin.vbs"
87 |   del "%temp%\getadmin.vbs"
88 |   exit /B 2
89 | )
90 | exit /B 0
91 | goto :eof
92 | 


--------------------------------------------------------------------------------
/scripts/corrode-cc:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import fnmatch
 4 | import hashlib
 5 | import os
 6 | from pathlib import PurePath
 7 | import subprocess
 8 | import sys
 9 | 
10 | cflags = []
11 | outfile = None
12 | depflags = []
13 | origflags = iter(sys.argv[1:])
14 | try:
15 |     while True:
16 |         flag = next(origflags)
17 |         if flag in ("-MD", "-MMD"):
18 |             depflags.append(flag[:-1])
19 |         elif flag.startswith("-M"):
20 |             depflags.append(flag)
21 |             if flag in ("-MF", "-MT", "-MQ"):
22 |                 depflags.append(next(origflags))
23 |         elif flag == "-o":
24 |             outfile = next(origflags)
25 |         else:
26 |             cflags.append(flag)
27 | except StopIteration:
28 |     pass
29 | 
30 | outflags = ['-o', outfile] if outfile is not None else []
31 | 
32 | if "-M" in depflags or "-MM" in depflags:
33 |     depflags.extend(flag for flag in cflags if flag not in ('-c', '-S', '-E'))
34 |     result = subprocess.run(['gcc'] + depflags)
35 |     if result.returncode != 0:
36 |         sys.exit(result.returncode)
37 | 
38 | if '-c' in cflags:
39 |     try:
40 |         rsfile = subprocess.run(
41 |             ['corrode'] + cflags,
42 |             stdout=subprocess.PIPE,
43 |             stderr=subprocess.PIPE,
44 |             check=True
45 |         ).stdout.splitlines()[0]
46 | 
47 |         if outfile is None:
48 |             outfile = str(PurePath(rsfile).with_suffix(".o"))
49 | 
50 |         rustwarn = subprocess.run(
51 |             ['rustc', '--crate-type=dylib', '--emit', 'obj', '-o', outfile, rsfile],
52 |             stderr=subprocess.PIPE,
53 |             check=True,
54 |         ).stderr
55 |         if rustwarn:
56 |             sys.stderr.buffer.write(rustwarn)
57 |     except subprocess.SubprocessError as e:
58 |         with open('errors-' + hashlib.md5(e.stderr).hexdigest(), 'wb') as f:
59 |             f.write(e.stderr)
60 | 
61 |         sys.exit(subprocess.run(['gcc'] + outflags + cflags).returncode)
62 | else:
63 |     sysroot = subprocess.run(
64 |         ['rustc', '--print', 'sysroot'],
65 |         stdout=subprocess.PIPE,
66 |         check=True,
67 |     ).stdout.strip()
68 | 
69 |     rustlibdir = sysroot + b'/lib/rustlib/x86_64-unknown-linux-gnu/lib/'
70 | 
71 |     needed = [b'libcompiler-rt.a'] + [
72 |         b'lib' + lib + b'-*.rlib'
73 |         for lib in (b'core', b'std', b'collections', b'panic_unwind', b'rustc_unicode', b'unwind', b'rand', b'alloc', b'alloc_system', b'libc')
74 |     ]
75 |     alllibs = os.listdir(rustlibdir)
76 | 
77 |     rustlibs = [ rustlibdir + fnmatch.filter(alllibs, pat)[0] for pat in needed ]
78 | 
79 |     sys.exit(subprocess.run(['gcc'] + outflags + cflags + rustlibs + ['-lgcc_eh', '-pthread', '-lm', '-ldl']).returncode)
80 | 


--------------------------------------------------------------------------------
/scripts/csmith-test:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # Run this script from an empty temporary directory; it places all of
  4 | # its output in the current working directory.
  5 | #
  6 | # You probably need to run this program with the include path for
  7 | # csmith.h; for example:
  8 | #   -I/usr/local/include/csmith-2.3.0
  9 | #
 10 | # You may pass -D or -U flags for the C pre-processor as well.
 11 | #
 12 | # In addition, you may pass flags for csmith. This is especially useful
 13 | # if you have the random seed from a previous run and you want to retry
 14 | # it:
 15 | #    --seed <seed>
 16 | 
 17 | import os
 18 | import os.path
 19 | import re
 20 | import shlex
 21 | import subprocess
 22 | import sys
 23 | 
 24 | # How long to wait, in seconds, for various subprocesses to finish.
 25 | csmith_timeout = 90
 26 | compiler_timeout = 120
 27 | prog_timeout = 8
 28 | 
 29 | # Disable operations Corrode can't handle yet.
 30 | #
 31 | # NOTE: Although Corrode supports pointers, we can't use them together
 32 | # with global variables because Rust doesn't allow storing the address
 33 | # of one static variable in another. Given the choice between globals or
 34 | # pointers, we currently have to pick globals because otherwise csmith's
 35 | # generated test harness doesn't report anything meaningful.
 36 | CSMITH_FLAGS = [
 37 |     '--no-arrays',
 38 |     '--no-bitfields',
 39 |     '--no-jumps',
 40 |     '--no-packed-struct',
 41 |     '--no-pointers',
 42 |     '--no-unions',
 43 |     '--no-volatiles',
 44 |     '--no-builtins',
 45 | ]
 46 | 
 47 | # Disable all warnings from the compilers, using `-w` for gcc/clang and
 48 | # `-A warnings` for rustc. csmith-generated programs trigger plenty of
 49 | # warnings and that's OK. We only care that the program can be compiled
 50 | # without errors.
 51 | #
 52 | # When pre-processing the C source with either the C compiler or
 53 | # Corrode, we also define macros that make `csmith.h` use fewer features
 54 | # of standard C, so Corrode has a better shot at translating it.
 55 | CFLAGS = ['-w', '-DCSMITH_MINIMAL', '-DUSE_MATH_MACROS']
 56 | RUSTFLAGS = ['-A', 'warnings']
 57 | 
 58 | compiler_env = os.environ.copy()
 59 | # If ccache is installed, there's no point caching the builds of these
 60 | # randomly generated C files.
 61 | compiler_env['CCACHE_DISABLE'] = '1'
 62 | 
 63 | def test(cfile):
 64 |     try:
 65 |         # Generate a new random C program. Pass --output last so the
 66 |         # user can't accidentally override it.
 67 |         subprocess.run(['csmith'] + CSMITH_FLAGS + ['--output', cfile],
 68 |             check=True, timeout=csmith_timeout, env=compiler_env)
 69 |     except subprocess.SubprocessError:
 70 |         # If csmith failed, we won't find any interesting Corrode or
 71 |         # Rust bugs with this program.
 72 |         return
 73 | 
 74 |     return check(cfile)
 75 | 
 76 | def check(cfile):
 77 |     rsfile = os.path.splitext(cfile)[0] + '.rs'
 78 |     cprog = './via-c'
 79 |     rsprog = './via-rust'
 80 | 
 81 |     # If csmith didn't generate any variables that it could update the
 82 |     # CRC with, this program is boring and we shouldn't bother testing
 83 |     # it.
 84 |     with open(cfile) as f:
 85 |         crc_count = sum('transparent_crc(' in line for line in f)
 86 |     if not crc_count:
 87 |         return
 88 | 
 89 |     try:
 90 |         # Compile with a real C compiler for reference.
 91 |         subprocess.run([
 92 |             'gcc',
 93 |             '-o', cprog,
 94 |             cfile
 95 |         ] + CFLAGS, check=True, timeout=compiler_timeout, env=compiler_env)
 96 |     except subprocess.SubprocessError:
 97 |         # If the C compiler failed, we won't find any interesting
 98 |         # Corrode or Rust bugs with this program.
 99 |         return
100 | 
101 |     try:
102 |         # Translate this C program to Rust.
103 |         subprocess.run([
104 |             'corrode',
105 |             cfile
106 |         ] + CFLAGS, check=True, timeout=compiler_timeout, env=compiler_env)
107 | 
108 |         # Compile the generated Rust program.
109 |         subprocess.run([
110 |             'rustc',
111 |             '-o', rsprog,
112 |             rsfile
113 |         ] + RUSTFLAGS, check=True, timeout=compiler_timeout, env=compiler_env)
114 |     except subprocess.SubprocessError as e:
115 |         # Found a compile-time bug!
116 |         return "compiling via Rust failed: {}".format(e)
117 | 
118 |     # Get reference output from the version compiled with a native C
119 |     # compiler. If any of the reference output looks wrong, comparing to
120 |     # the output from the Rust version won't be interesting.
121 |     try:
122 |         ref_long = subprocess.run(
123 |             [cprog, '1'], check=True, timeout=prog_timeout,
124 |             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
125 | 
126 |         # Long output must have one line per call to `transparent_crc`.
127 |         if ref_long.stderr or ref_long.stdout.count(b'\n') != crc_count:
128 |             return
129 | 
130 |         ref_short = subprocess.run(
131 |             [cprog], check=True, timeout=prog_timeout,
132 |             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
133 | 
134 |         # Short output must have one checksum line.
135 |         if ref_short.stderr or not re.match(b'checksum = [0-9a-fA-F]+\n$', ref_short.stdout):
136 |             return
137 |     except subprocess.SubprocessError:
138 |         # Give up if either mode of the reference version timed out or
139 |         # exited unsuccessfully.
140 |         return
141 | 
142 |     # Report any differences between the reference output and the Rust
143 |     # version, for both short and long output. Prefer long-output
144 |     # differences as they allow reducing smaller test cases, but if the
145 |     # long output is the same we should still catch differences in the
146 |     # short output.
147 |     return (
148 |         result_differences([rsprog, '1'], ref_long) or
149 |         result_differences([rsprog], ref_short)
150 |     )
151 | 
152 | def result_differences(cmd, ref):
153 |     cmdstr = ' '.join(cmd)
154 | 
155 |     # Get output from the Rust version. If the command fails in any way,
156 |     # return a message rather than propagating an exception.
157 |     try:
158 |         test = subprocess.run(
159 |             cmd, timeout=prog_timeout,
160 |             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
161 |     except subprocess.TimeoutExpired as e:
162 |         return "'{}' timeout after {} seconds".format(cmdstr, e.timeout)
163 | 
164 |     if test.stderr:
165 |         return "'{}' error:\n{}".format(cmdstr, test.stderr.decode())
166 | 
167 |     if test.returncode != 0:
168 |         return "'{}' failed with status {}".format(cmdstr, test.returncode)
169 | 
170 |     if not test.stdout:
171 |         return "'{}' produced no output".format(cmdstr)
172 | 
173 |     ref_lines = ref.stdout.splitlines()
174 |     test_lines = test.stdout.splitlines()
175 | 
176 |     if len(ref_lines) != len(test_lines):
177 |         return "'{}' produced wrong output:\n{}".format(cmdstr, test.stdout.decode())
178 | 
179 |     differences = [
180 |         "expected '{}', got '{}'".format(r.decode(), t.decode())
181 |         for (r,t) in zip(ref_lines, test_lines)
182 |         if r != t
183 |     ]
184 |     if differences:
185 |         return "'{}' produced wrong output:\n{}".format(cmdstr, '\n'.join(differences))
186 | 
187 |     # Otherwise, the test output matched the reference output. Hooray!
188 |     return None
189 | 
190 | def creduce(origfile, errorfile):
191 |     reduced = 'reduced.c'
192 | 
193 |     # Preprocess the source. C-Reduce seems to be able to produce
194 |     # smaller output if the input was already pre-processed.
195 |     subprocess.run(['gcc',
196 |         '-P', '-E',
197 |         '-o', reduced, origfile
198 |     ] + CFLAGS, check=True)
199 | 
200 |     # Package up a recursive call to the current script into a temporary
201 |     # shell script, preserving all the options we were passed.
202 |     script = "test.sh"
203 |     extra_arg = '--reduce-check={},{}'.format(reduced,
204 |         os.path.abspath(errorfile))
205 |     with open(script, 'w') as f:
206 |         f.write('#!/bin/sh\n')
207 |         f.write(' '.join(
208 |             shlex.quote(arg)
209 |             for arg in ['exec'] + sys.argv + [extra_arg]
210 |         ) + ' > /dev/null 2>&1\n')
211 |     os.chmod(script, 0o755)
212 | 
213 |     # Run creduce using the generated shell script.
214 |     subprocess.run(['creduce', script, reduced], check=True)
215 | 
216 | if __name__ == "__main__":
217 |     checkpath = None
218 | 
219 |     for arg in sys.argv[1:]:
220 |         # Put include-path and macro definitions in CFLAGS.
221 |         if arg.startswith(('-I', '-D', '-U')):
222 |             CFLAGS.append(arg)
223 |         # Check if we're being called recursively under creduce.
224 |         elif arg.startswith("--reduce-check="):
225 |             checkpath = arg.split('=', 1)[1].split(',', 1)
226 |         # Pass anything else through to csmith.
227 |         else:
228 |             CSMITH_FLAGS.append(arg)
229 | 
230 |     if checkpath is not None:
231 |         # This input is "interesting" if `check` returned the same error
232 |         # message that we started with.
233 |         with open(checkpath[1]) as f:
234 |             expected_error = f.read()
235 |         if check(checkpath[0]) == expected_error:
236 |             exit(0)
237 |         # Otherwise it's boring and creduce should backtrack.
238 |         exit(1)
239 | 
240 |     cfile = 'random.c'
241 |     result = test(cfile)
242 |     if result:
243 |         print(result)
244 |         with open('error', 'w') as f:
245 |             f.write(result)
246 |         print("reducing to a minimal test case...")
247 |         creduce(cfile, 'error')
248 |         exit(1)
249 | 


--------------------------------------------------------------------------------
/src/Language/Rust.hs:
--------------------------------------------------------------------------------
1 | module Language.Rust (
2 |     module Language.Rust.AST
3 | ) where
4 | 
5 | import Language.Rust.AST
6 | 


--------------------------------------------------------------------------------
/src/Language/Rust/AST.hs:
--------------------------------------------------------------------------------
  1 | module Language.Rust.AST where
  2 | 
  3 | import Data.Char
  4 | import Numeric
  5 | import Text.PrettyPrint.HughesPJClass
  6 | 
  7 | newtype Lifetime = Lifetime String
  8 |     deriving (Show, Eq)
  9 | newtype Type = TypeName String
 10 |     deriving (Show, Eq)
 11 | data LitIntRepr
 12 |     = DecRepr
 13 |     | OctalRepr
 14 |     | HexRepr
 15 |     deriving (Show, Eq)
 16 | data Lit
 17 |     = LitByteStr String
 18 |     | LitByteChar Char
 19 |     | LitBool Bool
 20 |     | LitInt Integer LitIntRepr Type
 21 |     | LitFloat String Type
 22 |     deriving (Show, Eq)
 23 | newtype Var = VarName String
 24 |     deriving (Show, Eq)
 25 | newtype Path = PathSegments [String]
 26 |     deriving Show
 27 | 
 28 | instance Pretty Lifetime where
 29 |     pPrint (Lifetime s) = text "'" <> text s
 30 | 
 31 | instance Pretty Type where
 32 |     pPrint (TypeName s) = text s
 33 | 
 34 | instance Pretty Lit where
 35 |     pPrint lit = case lit of
 36 |         LitByteStr s -> text $ "b\"" ++ concatMap rustByteLit s ++ "\""
 37 |         LitByteChar ch -> text $ "b'" ++ rustByteLit ch ++ "'"
 38 |         LitBool b -> text $ if b then "true" else "false"
 39 |         LitInt i repr (TypeName ty) -> text $ s ++ ty
 40 |             where
 41 |             s = case repr of
 42 |                 DecRepr -> show i
 43 |                 OctalRepr -> "0o" ++ showOct i ""
 44 |                 HexRepr -> "0x" ++ showHex i ""
 45 |         LitFloat s (TypeName ty) -> text $ s ++ ty
 46 |         where
 47 |         -- Rust character and string literals have only a few special
 48 |         -- escape sequences, so we can't reuse any functions for
 49 |         -- escaping Haskell or C strings.
 50 |         rustByteLit '"' = "\\\""
 51 |         rustByteLit '\'' = "\\'"
 52 |         rustByteLit '\n' = "\\n"
 53 |         rustByteLit '\r' = "\\r"
 54 |         rustByteLit '\t' = "\\t"
 55 |         rustByteLit '\\' = "\\\\"
 56 |         rustByteLit '\NUL' = "\\0"
 57 |         rustByteLit ch | ch >= ' ' && ch <= '~' = [ch]
 58 |         rustByteLit ch = "\\x" ++
 59 |             let (u, l) = ord ch `divMod` 16
 60 |             in map (toUpper . intToDigit) [u, l]
 61 | 
 62 | instance Pretty Var where
 63 |     pPrint (VarName s) = text s
 64 | 
 65 | instance Pretty Path where
 66 |     pPrint (PathSegments names) = hcat (punctuate (text "::") (map text names))
 67 | 
 68 | data Visibility = Public | Private
 69 |     deriving (Show, Eq)
 70 | 
 71 | data Mutable = Immutable | Mutable
 72 |     deriving (Show, Eq)
 73 | 
 74 | data Stmt
 75 |     = Stmt Expr
 76 |     | Let Mutable Var (Maybe Type) (Maybe Expr)
 77 |     | StmtItem [Attribute] ItemKind
 78 |     deriving Show
 79 | 
 80 | instance Pretty Stmt where
 81 |     -- Any statement consisting of an expression whose syntax ends with
 82 |     -- a block does not need to be followed by a semicolon, and
 83 |     -- including one anyway is poor style.
 84 |     pPrint (Stmt (BlockExpr b)) = pPrintBlock empty b -- no parens, no semicolon
 85 |     pPrint (Stmt e@(IfThenElse{})) = pPrint e -- no semicolon
 86 |     pPrint (Stmt e@(Loop{})) = pPrint e -- no semicolon
 87 |     pPrint (Stmt e@(While{})) = pPrint e -- no semicolon
 88 |     pPrint (Stmt e@(For{})) = pPrint e -- no semicolon
 89 |     pPrint (Stmt e) = pPrint e <> text ";"
 90 |     pPrint (Let mut var mty minit) = sep
 91 |         [ hsep [text "let", if mut == Mutable then text "mut" else empty, pPrint var]
 92 |         , nest 4 $ maybe empty (\ ty -> text ":" <+> pPrint ty) mty
 93 |         , nest 4 $ maybe empty (\ initial -> text "=" <+> pPrint initial) minit
 94 |         ] <> text ";"
 95 |     pPrint (StmtItem attrs k) = vcat $
 96 |         [ text "#[" <> text attr <> text "]" | Attribute attr <- attrs ] ++ [pPrint k]
 97 | 
 98 | data Block = Block [Stmt] (Maybe Expr)
 99 |     deriving Show
100 | 
101 | pPrintBlock :: Doc -> Block -> Doc
102 | pPrintBlock pre (Block [] e) = sep [pre <+> text "{", nest 4 (maybe empty pPrint e), text "}"]
103 | pPrintBlock pre (Block ss e) = pre <+> text "{" $+$ nest 4 (vcat (map pPrint ss ++ [maybe empty pPrint e])) $+$ text "}"
104 | 
105 | data Attribute = Attribute String
106 |     deriving Show
107 | data Item = Item [Attribute] Visibility ItemKind
108 |     deriving Show
109 | 
110 | instance Pretty Item where
111 |     pPrint (Item attrs vis k) = vcat $
112 |         [ text "#[" <> text attr <> text "]" | Attribute attr <- attrs ] ++
113 |         [(if vis == Public then zeroWidthText "pub " else empty) <> pPrint k]
114 | 
115 | data FunctionAttribute
116 |     = UnsafeFn
117 |     | ExternABI (Maybe String)
118 |     deriving Show
119 | 
120 | instance Pretty FunctionAttribute where
121 |     pPrint UnsafeFn = text "unsafe"
122 |     pPrint (ExternABI mabi) = text "extern" <+> maybe empty (text . show) mabi
123 | 
124 | data ItemKind
125 |     = Function [FunctionAttribute] String [(Mutable, Var, Type)] Type Block
126 |     | Static Mutable Var Type Expr
127 |     | Struct String [(String, Type)]
128 |     | Extern [ExternItem]
129 |     | Use String
130 |     | Enum String [Enumerator]
131 |     | CloneImpl Type -- TODO: generalize `impl` syntax
132 |     deriving Show
133 | 
134 | instance Pretty ItemKind where
135 |     pPrint (Function attrs nm args ret body) = pPrintBlock (cat
136 |         [ hsep (map pPrint attrs) <+> text "fn" <+> text nm <> text "("
137 |         , nest 4 $ sep $ punctuate (text ",")
138 |             [ sep [case mut of Mutable -> text "mut"; Immutable -> empty, pPrint v, text ":", pPrint t] | (mut, v, t) <- args ]
139 |         , text ")" <+> if ret == TypeName "()" then empty else text "->" <+> pPrint ret
140 |         ]) body
141 |     pPrint (Static mut var ty initial) = sep
142 |         [ hsep [text "static", if mut == Mutable then text "mut" else empty, pPrint var]
143 |         , nest 4 $ text ":" <+> pPrint ty
144 |         , nest 4 $ text "=" <+> pPrint initial
145 |         ] <> text ";"
146 |     pPrint (Struct name fields) =
147 |         text "struct" <+> text name <+> text "{" $+$
148 |         nest 4 (vcat [ text "pub" <+> text field <+> text ":" <+> pPrint ty <> text "," | (field, ty) <- fields ]) $+$
149 |         text "}"
150 |     pPrint (Extern defs) = vcat
151 |         ( text "extern {"
152 |         : map (nest 4 . pPrint) defs
153 |         ++ [text "}"]
154 |         )
155 |     pPrint (Use path) = text "use" <+> text path <> text ";"
156 |     pPrint (Enum name enums) =
157 |         text "enum" <+> text name <+> text "{" $+$
158 |         nest 4 (vcat [ pPrint enum <> text "," | enum <- enums ]) $+$
159 |         text "}"
160 |     pPrint (CloneImpl ty) =
161 |         hsep [text "impl", text "Clone", text "for", pPrint ty] <+> text "{" $+$
162 |             nest 4 (hsep
163 |                 [ text "fn"
164 |                 , text "clone" <> parens (text "&self")
165 |                 , text "->"
166 |                 , text "Self"
167 |                 , text "{"
168 |                 , nest 4 (text "*self")
169 |                 , text "}"
170 |                 ]) $+$
171 |             text "}"
172 | 
173 | data ExternItem
174 |     = ExternFn String [(Var, Type)] Bool Type
175 |     | ExternStatic Mutable Var Type
176 |     deriving Show
177 | 
178 | instance Pretty ExternItem where
179 |     pPrint (ExternFn nm args variadic ret) = cat
180 |         [ text "fn" <+> text nm <> text "("
181 |         , nest 4 $ sep $ punctuate (text ",")
182 |             ( [ sep [pPrint v, text ":", pPrint t] | (v, t) <- args ]
183 |             ++ if variadic then [text "..."] else []
184 |             )
185 |         , text ")" <+> (if ret == TypeName "()" then empty else text "->" <+> pPrint ret) <> text ";"
186 |         ]
187 |     pPrint (ExternStatic mut var ty) = hsep
188 |         [ text "static"
189 |         , if mut == Mutable then text "mut" else empty
190 |         , pPrint var
191 |         , text ":"
192 |         , pPrint ty
193 |         ] <> text ";"
194 | 
195 | data Enumerator
196 |     = EnumeratorAuto String
197 |     | EnumeratorExpr String Expr
198 |     deriving Show
199 | 
200 | instance Pretty Enumerator where
201 |     pPrint (EnumeratorAuto name) = text name
202 |     pPrint (EnumeratorExpr name expr) = text name <+> text "=" <+> pPrint expr
203 | 
204 | data Expr
205 |     = Lit Lit
206 |     | Var Var
207 |     | Path Path
208 |     | Index Expr Expr
209 |     | ArrayExpr [Expr]
210 |     | RepeatArray Expr Expr
211 |     | StructExpr String [(String, Expr)] (Maybe Expr)
212 |     | Call Expr [Expr]
213 |     | MethodCall Expr Var [Expr]
214 |     | Lambda [Var] Expr
215 |     | Member Expr Var
216 |     | BlockExpr Block
217 |     | UnsafeExpr Block
218 |     | IfThenElse Expr Block Block
219 |     | Loop (Maybe Lifetime) Block
220 |     | While (Maybe Lifetime) Expr Block
221 |     | For (Maybe Lifetime) Var Expr Block
222 |     | Break (Maybe Lifetime)
223 |     | Continue (Maybe Lifetime)
224 |     | Return (Maybe Expr)
225 |     -- "Unary operators have the same precedence level and are stronger than any of the binary operators."
226 |     -- precedence 12
227 |     | Neg Expr
228 |     | Deref Expr
229 |     | Not Expr -- NOTE: this is both logical not and bitwise complement
230 |     | Borrow Mutable Expr
231 |     -- "Operators at the same precedence level are evaluated left-to-right."
232 |     -- precedence 11
233 |     | Cast Expr Type
234 |     -- precedence 10
235 |     | Mul Expr Expr
236 |     | Div Expr Expr
237 |     | Mod Expr Expr
238 |     -- precedence 9
239 |     | Add Expr Expr
240 |     | Sub Expr Expr
241 |     -- precedence 8
242 |     | ShiftL Expr Expr
243 |     | ShiftR Expr Expr
244 |     -- precedence 7
245 |     | And Expr Expr
246 |     -- precedence 6
247 |     | Xor Expr Expr
248 |     -- precedence 5
249 |     | Or Expr Expr
250 |     -- precedence 4
251 |     | CmpLT Expr Expr
252 |     | CmpGT Expr Expr
253 |     | CmpLE Expr Expr
254 |     | CmpGE Expr Expr
255 |     | CmpEQ Expr Expr
256 |     | CmpNE Expr Expr
257 |     -- precedence 3
258 |     | LAnd Expr Expr
259 |     -- precedence 2
260 |     | LOr Expr Expr
261 |     -- precedence 1
262 |     | Range Expr Expr
263 |     | Assign Expr AssignOp Expr
264 |     deriving Show
265 | 
266 | data AssignOp
267 |     = (:=)
268 |     | (:+=)
269 |     | (:-=)
270 |     | (:*=)
271 |     | (:/=)
272 |     | (:%=)
273 |     | (:&=)
274 |     | (:|=)
275 |     | (:^=)
276 |     | (:<<=)
277 |     | (:>>=)
278 |     deriving Show
279 | 
280 | -- If a block is at the beginning of a statement, Rust parses it as if
281 | -- it were followed by a semicolon. If we didn't intend to put an
282 | -- implicit semicolon there, then we need to wrap the block in
283 | -- parentheses. The top-most expression doesn't need parentheses, but
284 | -- otherwise the first block encountered by following left children down
285 | -- the AST does. Any sub-expression that has an operator or keyword in
286 | -- front of it is not a left child.
287 | --
288 | -- If we get this wrong, Rust will either mis-parse the expression (if
289 | -- we don't have enough parentheses) or warn about excess parentheses.
290 | -- So it's worth going to some trouble to get this right.
291 | data ExprPosition = TopExpr | LeftExpr | RightExpr
292 |     deriving Show
293 | 
294 | instance Pretty Expr where
295 |     pPrintPrec _ = go TopExpr
296 |         where
297 |         leftParens LeftExpr body = parens body
298 |         leftParens _ body = body
299 |         left TopExpr = LeftExpr
300 |         left pos = pos
301 | 
302 |         go _ _ (Lit x) = pPrint x
303 |         go _ _ (Var x) = pPrint x
304 |         go _ _ (Path x) = pPrint x
305 |         go pos _ (Index arr idx) = cat [go (left pos) 13 arr <> text "[", nest 4 (go RightExpr 0 idx), text "]"]
306 |         go _ _ (ArrayExpr els) = sep
307 |             [ text "["
308 |             , sep (punctuate (text ",") (map (nest 4 . go RightExpr 0) els))
309 |             , text "]"
310 |             ]
311 |         go _ _ (RepeatArray el size) = text "[" <> go RightExpr 0 el <> text ";" <+> go RightExpr 0 size <> text "]"
312 |         go _ _ (StructExpr str fields base) = sep
313 |             ( text str <+> text "{"
314 |             : punctuate (text ",") ([ nest 4 (text name <> text ":" <+> go RightExpr 0 val) | (name, val) <- fields ] ++ maybe [] (\b -> [ text ".." <> go RightExpr 0 b ]) base)
315 |             ++ [text "}"]
316 |             )
317 |         go pos _ (Call f args) = cat
318 |             ( go (left pos) 14 f <> text "("
319 |             : punctuate (text ",") (map (nest 4 . go RightExpr 0) args)
320 |             ++ [text ")"]
321 |             )
322 |         go pos _ (MethodCall self f args) = cat
323 |             ( go (left pos) 13 self <> text "." <> pPrint f <> text "("
324 |             : punctuate (text ",") (map (nest 4 . go RightExpr 0) args)
325 |             ++ [text ")"]
326 |             )
327 |         go _ _ (Lambda args body) =
328 |             let args' = sep (punctuate (text ",") (map pPrint args))
329 |             in text "|" <> args' <> text "|" <+> go RightExpr 0 body
330 |         go pos d (Member obj field) = maybeParens (d > 13) (go (left pos) 13 obj <> text "." <> pPrint field)
331 |         go pos _ (BlockExpr x) = leftParens pos (pPrintBlock empty x)
332 |         go pos _ (UnsafeExpr x) = leftParens pos (pPrintBlock (text "unsafe") x)
333 |         go pos _ (IfThenElse c t f) = leftParens pos ((if any hasStmt clauses then vcat else sep) (concatMap body clauses ++ [text "}"]))
334 |             where
335 |             clauses = (text "if" <+> go RightExpr 0 c <+> text "{", t) : ladder f
336 |             hasStmt (_, Block [] _) = False
337 |             hasStmt _ = True
338 |             body (pre, Block ss e) = pre : map (nest 4) (map pPrint ss ++ [maybe empty (go LeftExpr 0) e])
339 |             ladder (Block [] Nothing) = []
340 |             ladder (Block [] (Just (IfThenElse c' t' f'))) = elseIf c' t' f'
341 |             ladder (Block [Stmt (IfThenElse c' t' f')] Nothing) = elseIf c' t' f'
342 |             ladder f' = [(text "}" <+> text "else" <+> text "{", f')]
343 |             elseIf c' t' f' = (text "} else if" <+> go RightExpr 0 c' <+> text "{", t') : ladder f'
344 |         go pos _ (Loop lt b) = leftParens pos (pPrintBlock (optLabel lt <+> text "loop") b)
345 |         go pos _ (While lt c b) = leftParens pos (pPrintBlock (optLabel lt <+> text "while" <+> go RightExpr 0 c) b)
346 |         go pos _ (For lt v i b) = leftParens pos (pPrintBlock (optLabel lt <+> text "for" <+> pPrint v <+> text "in" <+> go RightExpr 0 i) b)
347 |         go _ _ (Break lt) = text "break" <+> maybe empty pPrint lt
348 |         go _ _ (Continue lt) = text "continue" <+> maybe empty pPrint lt
349 |         go _ _ (Return Nothing) = text "return"
350 |         go _ _ (Return (Just e)) = hang (text "return") 4 (go RightExpr 0 e)
351 |         -- operators:
352 |         go _   d (Neg       e) = unary     d 12 "-" e
353 |         go _   d (Deref     e) = unary     d 12 "*" e
354 |         go _   d (Not       e) = unary     d 12 "!" e
355 |         go _   d (Borrow m  e) = unary     d 12 (case m of Immutable -> "&"; Mutable -> "&mut ") e
356 |         go pos d (Cast   e t) = maybeParens (d > 11) (go (left pos) 11 e <+> text "as" <+> parens (pPrint t))
357 |         go pos d (Mul    a b) = binary pos d 10 a "*" b
358 |         go pos d (Div    a b) = binary pos d 10 a "/" b
359 |         go pos d (Mod    a b) = binary pos d 10 a "%" b
360 |         go pos d (Add    a b) = binary pos d  9 a "+" b
361 |         go pos d (Sub    a b) = binary pos d  9 a "-" b
362 |         go pos d (ShiftL a b) = binary pos d  8 a "<<" b
363 |         go pos d (ShiftR a b) = binary pos d  8 a ">>" b
364 |         go pos d (And    a b) = binary pos d  7 a "&" b
365 |         go pos d (Xor    a b) = binary pos d  6 a "^" b
366 |         go pos d (Or     a b) = binary pos d  5 a "|" b
367 |         go pos d (CmpLT  a b) = nonass pos d  4 a "<" b
368 |         go pos d (CmpGT  a b) = nonass pos d  4 a ">" b
369 |         go pos d (CmpLE  a b) = nonass pos d  4 a "<=" b
370 |         go pos d (CmpGE  a b) = nonass pos d  4 a ">=" b
371 |         go pos d (CmpEQ  a b) = nonass pos d  4 a "==" b
372 |         go pos d (CmpNE  a b) = nonass pos d  4 a "!=" b
373 |         go pos d (LAnd   a b) = binary pos d  3 a "&&" b
374 |         go pos d (LOr    a b) = binary pos d  2 a "||" b
375 |         go pos d (Range  a b) = binary pos d  1 a ".." b
376 |         go pos d (Assign a op b) = binary pos d 1 a (assignOp op ++ "=") b
377 | 
378 |         optLabel = maybe empty (\ label -> pPrint label <> text ":")
379 | 
380 |         unary d n op e = maybeParens (d > n) (text op <> go RightExpr n e)
381 | 
382 |         -- If a same-precedence operator appears nested on the right,
383 |         -- then it needs parens, so increase the precedence there.
384 |         binary pos d n a op b = maybeParens (d > n) (go (left pos) n a <+> text op <+> go RightExpr (n + 1) b)
385 | 
386 |         -- Non-associative operators need parens if they're nested.
387 |         nonass pos d n a op b = maybeParens (d >= n) (go (left pos) n a <+> text op <+> go RightExpr n b)
388 | 
389 |         assignOp (:=) = ""
390 |         assignOp (:+=) = "+"
391 |         assignOp (:-=) = "-"
392 |         assignOp (:*=) = "*"
393 |         assignOp (:/=) = "/"
394 |         assignOp (:%=) = "%"
395 |         assignOp (:&=) = "&"
396 |         assignOp (:|=) = "|"
397 |         assignOp (:^=) = "^"
398 |         assignOp (:<<=) = "<<"
399 |         assignOp (:>>=) = ">>"
400 | 
401 | -- These instances are mostly convenient for typing expressions in GHCi.
402 | 
403 | instance Num Expr where
404 |     (+) = Add
405 |     (-) = Sub
406 |     (*) = Mul
407 |     negate = Neg
408 |     fromInteger i = Lit (LitInt i DecRepr (TypeName ""))
409 | 
410 | instance Fractional Expr where
411 |     (/) = Div
412 |     recip = Div (Lit (LitFloat "1.0" (TypeName "")))
413 |     fromRational r = Lit (LitFloat (show (fromRational r :: Double)) (TypeName ""))
414 | 


--------------------------------------------------------------------------------
/src/Language/Rust/Corrode/C.lhs:
--------------------------------------------------------------------------------
1 | C.md


--------------------------------------------------------------------------------
/src/Language/Rust/Corrode/CFG.lhs:
--------------------------------------------------------------------------------
1 | CFG.md


--------------------------------------------------------------------------------
/src/Language/Rust/Corrode/CFG.md:
--------------------------------------------------------------------------------
  1 | Languages with different primitives for control-flow can be tricky for
  2 | automatic translation. That's especially true if you're translating from
  3 | a language that allows arbitrary `goto` statements, like C, to a
  4 | language that does not, like pretty much every other widely used
  5 | programming language.
  6 | 
  7 | This module takes care of most of that complexity in two steps.
  8 | 
  9 | 1. First, it allows you to construct a Control-Flow Graph (CFG)
 10 | representing all loops, conditionals, and gotos for a function in the
 11 | source program. (This is usually pretty straight-forward.)
 12 | 
 13 | 2. Then this module can analyse that CFG and identify which parts should
 14 | be treated as loops and which should be treated as `if`-statements, and
 15 | what order those should appear in for the translated function.
 16 | 
 17 | If there are `goto` statements in the source, the output of step 2 may
 18 | look very different than the input to step 1!
 19 | 
 20 | ```haskell
 21 | {-# LANGUAGE Rank2Types #-}
 22 | module Language.Rust.Corrode.CFG (
 23 |     Label, Terminator'(..), Terminator, BasicBlock(..),
 24 |     CFG(..), Unordered, DepthFirst, prettyCFG,
 25 |     BuildCFGT, mapBuildCFGT, addBlock, newLabel, buildCFG,
 26 |     removeEmptyBlocks, depthFirstOrder,
 27 |     prettyStructure, relooperRoot, structureCFG,
 28 | ) where
 29 | 
 30 | import Control.Monad
 31 | import Control.Monad.Trans.State
 32 | import Data.Foldable
 33 | import qualified Data.IntMap.Lazy as IntMap
 34 | import qualified Data.IntSet as IntSet
 35 | import Data.Maybe
 36 | import Data.Traversable
 37 | import Text.PrettyPrint.HughesPJClass hiding (empty)
 38 | ```
 39 | 
 40 | 
 41 | Control-Flow Graph representation
 42 | =================================
 43 | 
 44 | A control-flow graph is a collection of "basic blocks" containing
 45 | sequential code, plus arrows indicating what to execute next when the
 46 | computer reaches the end of the current basic block.
 47 | 
 48 | To be a valid basic block, control flow must enter only at the beginning
 49 | of the block, and leave only at the end.
 50 | 
 51 | Basic blocks have a type parameter, `s`, for whatever type you want to
 52 | use to represent the code inside the basic block. This module generally
 53 | doesn't care what representation you use&mdash;a reasonable choice might
 54 | be a list of statements in your target language&mdash;but whatever you
 55 | choose, it should probably have an instance of both `Foldable` and
 56 | `Monoid`. (The built-in list type provides both of these, for instance.)
 57 | Otherwise you won't be able to use some key functions that this module
 58 | provides.
 59 | 
 60 | (We'll discuss the `c` type parameter while explaining terminators,
 61 | next.)
 62 | 
 63 | We assign every basic block an arbitrary "label" that we can use to
 64 | refer to it from elsewhere in the control-flow graph. This could be
 65 | anything, but it's convenient to use distinct integers as labels.
 66 | 
 67 | ```haskell
 68 | data BasicBlock s c = BasicBlock s (Terminator c)
 69 | type Label = Int
 70 | ```
 71 | 
 72 | A basic block ends with a specification of which block to proceed to
 73 | next, which we'll call the block's "terminator".
 74 | 
 75 | We model these cases:
 76 | 
 77 | - `Unreachable` indicates that the source language guarantees that
 78 |   control will never reach the end of this block. This is usually
 79 |   because the block ends with a `return` statement. But it can also
 80 |   happen if the block ends with a call to a function that is known to
 81 |   never return, for example.
 82 | 
 83 | - `Branch` indicates that when this block completes, control always
 84 |   proceeds to the specified block.
 85 | 
 86 | - `CondBranch` is a "conditional branch". If the specified condition is
 87 |   true at runtime, then control goes to the first specified block;
 88 |   otherwise it goes to the second block. Note that we represent
 89 |   conditional branches as always having both a "true" case and a "false"
 90 |   case; there's no implicit "fall-through" behavior like you might find
 91 |   for a conditional jump in assembly language, for instance.
 92 | 
 93 | ```haskell
 94 | data Terminator' c l
 95 |     = Unreachable
 96 |     | Branch l
 97 |     | CondBranch c l l
 98 |     deriving Show
 99 | ```
100 | 
101 | The above `Terminator'` type has two generic type parameters:
102 | 
103 | The first is the type to use for condition expressions. This should
104 | probably be whatever type you use to represent boolean expressions in
105 | your target language, but this module doesn't look at what's inside
106 | those condition expressions at all, so you can use any representation
107 | you want.
108 | 
109 | The second type parameter is for whatever type you want to use for
110 | labels for basic blocks. Although we've chosen a specific `Label` type
111 | above, it's convenient to make this a type parameter so we can define
112 | instances of the standard `Functor` and `Foldable` type-classes, for
113 | generic access to the outgoing edges.
114 | 
115 | For convenience, we define a type alias that specifies that the label
116 | type is specifically the above-chosen `Label`.
117 | 
118 | ```haskell
119 | type Terminator c = Terminator' c Label
120 | 
121 | instance Functor (Terminator' c) where
122 |     fmap = fmapDefault
123 | 
124 | instance Foldable (Terminator' c) where
125 |     foldMap = foldMapDefault
126 | 
127 | instance Traversable (Terminator' c) where
128 |     traverse _ Unreachable = pure Unreachable
129 |     traverse f (Branch l) = Branch <$> f l
130 |     traverse f (CondBranch c l1 l2) = CondBranch c <$> f l1 <*> f l2
131 | ```
132 | 
133 | Now we can define a complete control-flow graph in terms of the previous
134 | types. It has a "start" label, indicating which basic block is the first
135 | one to start executing on entrance to a function; and a map from labels
136 | to their corresponding basic blocks.
137 | 
138 | After the CFG has been constructed, there's a pre-processing step we do
139 | to sort the basic blocks into a useful order. We use a small type-system
140 | trick here to indicate whether that sorting has happened: a value of
141 | type `CFG Unordered` has not been sorted yet, while a `CFG DepthFirst`
142 | has. A function that accepts any `CFG k` doesn't care whether the blocks
143 | have been sorted or not. So keep an eye out for that, below, because the
144 | type signatures serve as documentation of an important precondition.
145 | 
146 | With this type-system trick, the Haskell compiler will enforce that
147 | callers pass only sorted CFGs to functions that require them, which is a
148 | nice sanity check. However, within this module we still have to be
149 | careful to only tag a CFG as sorted if it actually is, and also to tag
150 | functions as requiring a sorted CFG as needed. Haskell can't magically
151 | figure that out!
152 | 
153 | ```haskell
154 | data Unordered
155 | data DepthFirst
156 | data CFG k s c = CFG Label (IntMap.IntMap (BasicBlock s c))
157 | 
158 | instance (Show s, Show c) => Show (CFG k s c) where
159 |     show = render . prettyCFG (text . show) (text . show)
160 | ```
161 | 
162 | When things go wrong, it's handy to be able to print a human-readable
163 | version of an entire control-flow graph so we can inspect it. This
164 | function takes helper functions for formatting statements and
165 | conditional expressions, respectively, and uses them within each basic
166 | block to format the entire control-flow graph.
167 | 
168 | ```haskell
169 | prettyCFG :: (s -> Doc) -> (c -> Doc) -> CFG k s c -> Doc
170 | prettyCFG fmtS fmtC (CFG entry blocks) = vcat $
171 |     (text "start @" <> text (show entry)) : blocks'
172 |     where
173 |     blocks' = do
174 |         (label, BasicBlock stmts term) <- IntMap.toList blocks
175 |         let blockHead = text (show label) <> text ":"
176 |         let blockBody = fmtS stmts
177 |         let blockTail = case term of
178 |                 Unreachable -> text "// unreachable"
179 |                 Branch to -> text ("goto " ++ show to ++ ";")
180 |                 CondBranch cond true false ->
181 |                     text "if(" <> fmtC cond
182 |                         <> text ") goto " <> text (show true)
183 |                         <> text "; else goto " <> text (show false)
184 |                         <> text ";"
185 |         blockHead : map (nest 4) [blockBody, blockTail] ++ [text ""]
186 | ```
187 | 
188 | 
189 | Constructing CFGs
190 | =================
191 | 
192 | This module provides a small monadic interface for constructing
193 | control-flow graphs. It's provided as a "monad transformer", meaning that
194 | you can combine this monad with other monads. For example, if you need
195 | to keep information about variable declarations that are in scope in
196 | order to translate statements and expressions correctly, you can use a
197 | `State` monad for that, and layer this `BuildCFGT` monad on top of it.
198 | Then you can use actions from either monad as needed.
199 | 
200 | ```haskell
201 | type BuildCFGT m s c = StateT (BuildState s c) m
202 | ```
203 | 
204 | Because this is a monad transformer, you may find you need to perform
205 | some operation transforming the underlying monad. For example, `Reader`
206 | monads have a `local` operation that runs some specified monadic action
207 | with a different value in its environment than the outer computation
208 | uses, and similarly with the `Writer` monad's `listen` and `censor`
209 | operations. To use these kinds of operations with `BuildCFGT`, you need
210 | to wrap them in `mapBuildCFGT`.
211 | 
212 | The type signature here is a little bit weird. Your function has to
213 | preserve the current state of the CFG builder, because we're suspending
214 | the usual monad rules that would normally carry that state around behind
215 | the scenes. But we don't allow you to peek at or modify the CFG builder
216 | state along the way. That's enforced by using the GHC `Rank2Types`
217 | language extension (enabled at the top of this module) to declare that
218 | your transformation must work for all possible state types: Code that
219 | must work for all possible types can't possibly do anything but pass
220 | that data along unchanged.
221 | 
222 | ```haskell
223 | mapBuildCFGT
224 |     :: (forall st. m (a, st) -> n (b, st))
225 |     -> BuildCFGT m s c a -> BuildCFGT n s c b
226 | mapBuildCFGT = mapStateT
227 | ```
228 | 
229 | While constructing a new control-flow graph, we need to keep track of
230 | two things: any basic blocks constructed so far, and a unique label to
231 | use for the next basic block. We keep both in a new data type,
232 | `BuildState`.
233 | 
234 | It might seem like we shouldn't need to keep a separate counter for
235 | unique labels. Couldn't we just look at the label for the last block
236 | that was constructed, add 1, and use that as the next block's label?
237 | 
238 | Unfortunately, during CFG construction we often need to refer to blocks
239 | that we haven't constructed yet. For example, to construct a loop, we
240 | might construct the body of the loop with a branch back to the loop
241 | header, and only then construct the loop header with a branch into the
242 | body.
243 | 
244 | That means we may have to generate any number of labels before finishing
245 | the corresponding blocks, so we have to keep track of which IDs we
246 | already handed out.
247 | 
248 | Note that this also means that this intermediate representation of a CFG
249 | is generally not a valid CFG, because it includes blocks that branch to
250 | other blocks that haven't been constructed yet. It's the caller's
251 | responsibility to ensure that all blocks get added eventually.
252 | 
253 | ```haskell
254 | data BuildState s c = BuildState
255 |     { buildLabel :: Label
256 |     , buildBlocks :: IntMap.IntMap (BasicBlock s c)
257 |     }
258 | ```
259 | 
260 | `newLabel` just returns a unique `Label`.
261 | 
262 | ```haskell
263 | newLabel :: Monad m => BuildCFGT m s c Label
264 | newLabel = do
265 |     old <- get
266 |     put old { buildLabel = buildLabel old + 1 }
267 |     return (buildLabel old)
268 | ```
269 | 
270 | `addBlock` saves the given statements and terminator in the state.
271 | 
272 | ```haskell
273 | addBlock :: Monad m => Label -> s -> Terminator c -> BuildCFGT m s c ()
274 | addBlock label stmt terminator = do
275 |     modify $ \ st -> st
276 |         { buildBlocks = IntMap.insert label (BasicBlock stmt terminator)
277 |             (buildBlocks st)
278 |         }
279 | ```
280 | 
281 | Finally we have the function that runs a builder and returns the CFG
282 | that it built. The builder's return value must be the label to use as
283 | the entry-point for the control-flow graph.
284 | 
285 | Note that the constructed CFG is tagged as `Unordered` because we
286 | haven't sorted it yet.
287 | 
288 | ```haskell
289 | buildCFG :: Monad m => BuildCFGT m s c Label -> m (CFG Unordered s c)
290 | buildCFG root = do
291 |     (label, final) <- runStateT root (BuildState 0 IntMap.empty)
292 |     return (CFG label (buildBlocks final))
293 | ```
294 | 
295 | It's normal to write simple translations for building the CFG that
296 | produce some pretty silly-looking control-flow graphs. For example, they
297 | may produce a lot of basic blocks that have no statements in them and
298 | just unconditionally branch somewhere else. Those blocks can be safely
299 | removed, if we're a little careful, without changing the meaning of the
300 | CFG, and that's what `removeEmptyBlocks` does.
301 | 
302 | > **NOTE**: I don't think this is necessary; all of the following
303 | > algorithms should produce the same output even with empty blocks
304 | > present, as far as I can figure. But when something goes wrong and we
305 | > need to report an error, it's nice to have a simpler CFG to examine.
306 | > So I'm not deleting this, but I'm not going to bother documenting how
307 | > it works because it isn't important.
308 | 
309 | > **TODO**: Think about whether this can be folded into
310 | > `depthFirstOrder` without making that function too complicated.
311 | 
312 | ```haskell
313 | removeEmptyBlocks :: Foldable f => CFG k (f s) c -> CFG Unordered (f s) c
314 | removeEmptyBlocks (CFG start blocks) = CFG (rewrite start) blocks'
315 |     where
316 |     go = do
317 |         (empties, done) <- get
318 |         case IntMap.minViewWithKey empties of
319 |             Nothing -> return ()
320 |             Just ((from, to), empties') -> do
321 |                 put (empties', done)
322 |                 step from to
323 |                 go
324 |     step from to = do
325 |         (empties, done) <- get
326 |         case IntMap.splitLookup to empties of
327 |             (_, Nothing, _) -> return ()
328 |             (e1, Just to', e2) -> do
329 |                 put (e1 `IntMap.union` e2, done)
330 |                 step to to'
331 |         (empties', done') <- get
332 |         let to' = IntMap.findWithDefault to to done'
333 |         put (empties', IntMap.insert from to' done')
334 |     isBlockEmpty (BasicBlock s (Branch to)) | null s = Just to
335 |     isBlockEmpty _ = Nothing
336 |     rewrites = snd $ execState go (IntMap.mapMaybe isBlockEmpty blocks, IntMap.empty)
337 |     rewrite to = IntMap.findWithDefault to to rewrites
338 |     discards = IntMap.keysSet (IntMap.filterWithKey (/=) rewrites)
339 |     rewriteBlock from _ | from `IntSet.member` discards = Nothing
340 |     rewriteBlock _ (BasicBlock b term) = Just (BasicBlock b (fmap rewrite term))
341 |     blocks' = IntMap.mapMaybeWithKey rewriteBlock blocks
342 | ```
343 | 
344 | 
345 | Transforming CFGs to structured programs
346 | ========================================
347 | 
348 | Once we've constructed a CFG, the real challenge is to turn that messy
349 | pile of basic blocks back into structured control flow.
350 | 
351 | This implementation would work for a pretty wide variety of languages.
352 | It assumes the target language has:
353 | 
354 | 1. If-then-else,
355 | 2. Loops,
356 | 3. Multi-level exits from loops.
357 | 
358 | That last point needs some explanation. Most languages with loops
359 | provide some way for the programmer to break out of a loop early, or
360 | restart at the beginning of the loop without finishing the current
361 | iteration. (Let's call both kinds of control-flow "loop exits".) Of
362 | those languages, many but not all of them allow the programmer to exit
363 | more than one loop in one go, by giving loops names and specifying which
364 | loop to exit by name. This code assumes that your target language is one
365 | of the latter kind.
366 | 
367 | ```haskell
368 | data StructureLabel s c
369 |     = GoTo { structureLabel :: Label }
370 |     | ExitTo { structureLabel :: Label }
371 |     | Nested [Structure s c]
372 |     deriving Show
373 | 
374 | type StructureTerminator s c = Terminator' c (StructureLabel s c)
375 | type StructureBlock s c = (s, StructureTerminator s c)
376 | 
377 | data Structure' s c a
378 |     = Simple s (StructureTerminator s c)
379 |     | Loop a
380 |     | Multiple (IntMap.IntMap a) a
381 |     deriving Show
382 | 
383 | data Structure s c = Structure
384 |     { structureEntries :: IntSet.IntSet
385 |     , structureBody :: Structure' s c [Structure s c]
386 |     }
387 |     deriving Show
388 | 
389 | prettyStructure :: (Show s, Show c) => [Structure s c] -> Doc
390 | prettyStructure = vcat . map go
391 |     where
392 |     go (Structure _ (Simple s term)) = text (show s ++ ";") $+$ text (show term)
393 |     go (Structure entries (Loop body)) = prettyGroup entries "loop" (prettyStructure body)
394 |     go (Structure entries (Multiple handlers unhandled)) = prettyGroup entries "match" $
395 |         vcat [ text (show entry ++ " =>") $+$ nest 2 (prettyStructure handler) | (entry, handler) <- IntMap.toList handlers ]
396 |         $+$ if null unhandled then mempty else (text "_ =>" $+$ nest 2 (prettyStructure unhandled))
397 | 
398 |     prettyGroup entries kind body =
399 |         text "{" <> hsep (punctuate (text ",") (map (text . show) (IntSet.toList entries))) <> text ("} " ++ kind)
400 |         $+$ nest 2 body
401 | 
402 | relooperRoot :: Monoid s => CFG k s c -> [Structure s c]
403 | relooperRoot (CFG entry blocks) = relooper (IntSet.singleton entry) $
404 |     IntMap.map (\ (BasicBlock s term) -> (s, fmap GoTo term)) blocks
405 | 
406 | relooper :: Monoid s => IntSet.IntSet -> IntMap.IntMap (StructureBlock s c) -> [Structure s c]
407 | relooper entries blocks =
408 | ```
409 | 
410 | First we partition the entry labels into those that some block may
411 | branch to versus those that none can branch to. The key idea is that
412 | entry labels need to be placed early in the output, but if something
413 | later can branch to them, then we need to wrap them in a loop so we can
414 | send control flow back to the entry point again.
415 | 
416 | Each of these cases makes at least one recursive call. To ensure that
417 | this algorithm doesn't get stuck in an infinite loop, we need to make
418 | sure that every recursive call has a "simpler" problem to solve, such
419 | that eventually each subproblem has been made so simple that we can
420 | finish it off immediately. We'll show that the subproblems truly are
421 | simpler in each case.
422 | 
423 | ```haskell
424 |     let (returns, noreturns) = partitionMembers entries $ IntSet.unions $ map successors $ IntMap.elems blocks
425 |         (present, absent) = partitionMembers entries (IntMap.keysSet blocks)
426 |     in case (IntSet.toList noreturns, IntSet.toList returns) of
427 | ```
428 | 
429 | If there are no entry points, then the previous block can't reach any
430 | remaining blocks, so we don't need to generate any code for them. This
431 | is the primary recursive base case for this algorithm.
432 | 
433 | ```haskell
434 |     ([], []) -> []
435 | ```
436 | 
437 | Simple blocks
438 | -------------
439 | 
440 | If there's only one label and it is _not_ the target of a branch in the
441 | current set of blocks, then simply place that label next in the output.
442 | 
443 | This case always removes one block from consideration before making the
444 | recursive call, so the subproblem is one block smaller.
445 | 
446 | ```haskell
447 |     ([entry], []) -> case IntMap.updateLookupWithKey (\ _ _ -> Nothing) entry blocks of
448 |         (Just (s, term), blocks') -> Structure
449 |             { structureEntries = entries
450 |             , structureBody = Simple s term
451 |             } : relooper (successors (s, term)) blocks'
452 | ```
453 | 
454 | If the target is a block that we've already decided to place somewhere
455 | later, then we need to construct a fake block that tells the code
456 | generator to set the current-block state variable appropriately.
457 | 
458 | ```haskell
459 |         (Nothing, _) -> Structure
460 |             { structureEntries = entries
461 |             , structureBody = Simple mempty (Branch (GoTo entry))
462 |             } : []
463 | ```
464 | 
465 | Skipping to blocks placed later
466 | -------------------------------
467 | 
468 | When there are multiple entry labels and some or all of them refer to
469 | blocks that we have already decided to place somewhere later, we need
470 | some way to skip over any intervening code until control flow reaches
471 | wherever we actually placed these blocks. (Note that if we need to
472 | branch to a block that we placed somewhere earlier, then we'll have
473 | already constructed a loop for that, so we don't need to handle that
474 | case here.)
475 | 
476 | We accomplish this by constructing a `Multiple` block with an empty
477 | branch for each absent entry label, and an else-branch that contains the
478 | code we may want to skip. This gets control flow to the end of the
479 | enclosing block. If the target block isn't there either, then we'll do
480 | this again at that point, until we've gotten all the way out to a block
481 | that does contain the target label.
482 | 
483 | However, if we don't have any code to place in the else-branch, then
484 | this procedure would generate a no-op `Multiple` block, so we can avoid
485 | emitting anything at all in that case.
486 | 
487 | ```haskell
488 |     _ | not (IntSet.null absent) ->
489 |         if IntSet.null present then [] else Structure
490 |             { structureEntries = entries
491 |             , structureBody = Multiple
492 |                 (IntMap.fromSet (const []) absent)
493 |                 (relooper present blocks)
494 |             } : []
495 | ```
496 | 
497 | Loops
498 | -----
499 | 
500 | If all the entry labels are targets of branches in some block somewhere,
501 | then construct a loop with all those labels as entry points.
502 | 
503 | To keep the generated code simple, we want to eliminate any absent
504 | entries (the previous case) before constructing a loop. If we generate a
505 | loop with absent entry points, then to handle those inside the loop we'd
506 | need to `break` out of the loop. By doing it in this order instead, we
507 | don't need any code at all in the handlers for the absent branches.
508 | 
509 | In this case, we have one recursive call for the body of the loop, and
510 | another for the labels that go after the loop.
511 | 
512 | - The loop body has the same entry labels. However, for the recursive
513 |   call we remove all the branches that made it a loop, so we're
514 |   guaranteed to not hit this case again with the same set of entry
515 |   labels. As long as the other cases reduce the number of blocks, we're
516 |   set.
517 | 
518 | - For the labels following the loop, we've removed at least the current
519 |   entry labels from consideration, so there are fewer blocks we still
520 |   need to structure.
521 | 
522 | ```haskell
523 |     ([], _) -> Structure
524 |         { structureEntries = entries
525 |         , structureBody = Loop (relooper entries blocks')
526 |         } : relooper followEntries followBlocks
527 |         where
528 | ```
529 | 
530 | The labels that should be included in this loop's body are all those
531 | which can eventually come back to one of the entry points for the loop.
532 | 
533 | Note that `IntMap.keysSet returns' == entries`. If some entry were
534 | not reachable from any other entry, then we would have split it off into
535 | a `Multiple` block first.
536 | 
537 | ```haskell
538 |         returns' = (strictReachableFrom `IntMap.intersection` blocks) `restrictKeys` entries
539 |         bodyBlocks = blocks `restrictKeys`
540 |             IntSet.unions (IntMap.keysSet returns' : IntMap.elems returns')
541 | ```
542 | 
543 | Now that we've identified which labels belong in the loop body, we can
544 | partition the current blocks into those that are inside the loop and
545 | those that follow it.
546 | 
547 | ```haskell
548 |         followBlocks = blocks `IntMap.difference` bodyBlocks
549 | ```
550 | 
551 | Any branches that go from inside this loop to outside it form the entry
552 | points for the block following this one. (There can't be any branches
553 | that go to someplace earlier in the program because we've already
554 | removed those before recursing into some loop that encloses this one.)
555 | 
556 | ```haskell
557 |         followEntries = outEdges bodyBlocks
558 | ```
559 | 
560 | At this point we've identified some branches as either a `break` (so
561 | it's in `followEntries`) or a `continue` (because it was one of this
562 | loop's entry points) branch. When we recurse to structure the body of
563 | this loop, we must not consider those branches again, so we delete them
564 | from the successors of all blocks inside the loop.
565 | 
566 | Note that `structureEntries` for this loop block records the labels that
567 | are `continue` edges, and `structureEntries` for the subsequent block
568 | records the labels that are `break` edges, so we don't need to record
569 | any additional information here.
570 | 
571 | If we fail to delete some branch back to the loop entry, then when we
572 | recurse we'll generate another `Loop` block, which might mean the
573 | algorithm never terminates.
574 | 
575 | If we fail to delete some branch that exits the loop, I think the result
576 | will still be correct, but will have more `Multiple` blocks than
577 | necessary.
578 | 
579 | ```haskell
580 |         markEdge (GoTo label)
581 |             | label `IntSet.member` (followEntries `IntSet.union` entries)
582 |             = ExitTo label
583 |         markEdge edge = edge
584 |         blocks' = IntMap.map (\ (s, term) -> (s, fmap markEdge term)) bodyBlocks
585 | ```
586 | 
587 | Multiple-entry blocks
588 | ---------------------
589 | 
590 | Otherwise, we need to merge multiple control flow paths at this point,
591 | by constructing code that will dynamically check which path we're
592 | supposed to be on.
593 | 
594 | In a `Multiple` block, we construct a separate handler for each entry
595 | label that we can safely split off. We make a recursive call for each
596 | handler, and one more for all the blocks we couldn't handle in this
597 | block.
598 | 
599 | - If there are unhandled blocks, then each handler contains fewer blocks
600 |   than we started with. If we were able to handle all the entry labels,
601 |   then we've partitioned the blocks into at least two non-empty groups,
602 |   so each one is necessarily smaller than we started with. There must be
603 |   at least two entry labels because if there weren't any no-return
604 |   entries then we'd have constructed a loop, and if there were only one
605 |   no-return entry and no entries that can be returned to, we'd have
606 |   constructed a simple block.
607 | 
608 | - Each handler consumes at least its entry label, so as long as we
609 |   generate at least one handler, the recursive call for the unhandled
610 |   blocks will have a smaller subproblem. We can only handle an entry
611 |   label if none of the other entry labels can, through any series of
612 |   branches, branch to this label. But because we aren't in the case
613 |   above for constructing a loop, we know that at least one entry label
614 |   has no branches into it, so we're guaranteed to consume at least one
615 |   block in this pass.
616 | 
617 | ```haskell
618 |     _ -> Structure
619 |         { structureEntries = entries
620 |         , structureBody = Multiple handlers unhandled
621 |         } : relooper followEntries followBlocks
622 |         where
623 | ```
624 | 
625 | The elements in the `singlyReached` map are disjoint sets. Proof: keys
626 | in an `IntMap` are distinct by definition, and the values after `filter`
627 | are singleton sets; so after `flipEdges`, each distinct block can only
628 | be attached to one entry label.
629 | 
630 | ```haskell
631 |         reachableFrom = IntMap.unionWith IntSet.union (IntMap.fromSet IntSet.singleton entries) strictReachableFrom
632 |         singlyReached = flipEdges $ IntMap.filter (\ r -> IntSet.size r == 1) $ IntMap.map (IntSet.intersection entries) reachableFrom
633 | ```
634 | 
635 | Some subset of the entries are now associated with sets of labels that
636 | can only be reached via that entry. Mapping these to their corresponding
637 | blocks preserves the property that they're disjoint.
638 | 
639 | In addition, only labels that are permitted to appear inside this
640 | `Multiple` block will remain after this. Labels which have already been
641 | assigned to a later block won't get duplicated into this one, so we'll
642 | have to generate code to ensure that control continues to the later
643 | copy.
644 | 
645 | ```haskell
646 |         handledEntries = IntMap.map (\ within -> blocks `restrictKeys` within) singlyReached
647 | ```
648 | 
649 | If one of the entry labels can reach another one, then the latter can't
650 | be handled in this `Multiple` block because we'd have no way to make
651 | control flow from one to the other. These unhandled entries must be
652 | handled in subsequent blocks.
653 | 
654 | ```haskell
655 |         unhandledEntries = entries `IntSet.difference` IntMap.keysSet handledEntries
656 | ```
657 | 
658 | All labels that are reachable only from the entry points that we _are_
659 | handling, however, will be placed somewhere inside this `Multiple`
660 | block. Labels that are left over will be placed somewhere after this
661 | block.
662 | 
663 | ```haskell
664 |         handledBlocks = IntMap.unions (IntMap.elems handledEntries)
665 |         followBlocks = blocks `IntMap.difference` handledBlocks
666 | ```
667 | 
668 | The block after this one will have an entry point for each of this
669 | block's unhandled entries, and in addition, one for each branch that
670 | leaves this `Multiple` block.
671 | 
672 | ```haskell
673 |         followEntries = unhandledEntries `IntSet.union` outEdges handledBlocks
674 | ```
675 | 
676 | Finally, we've partitioned the entries and labels into those which
677 | should be inside this `Multiple` block and those which should follow it.
678 | Recurse on each handled entry point.
679 | 
680 | ```haskell
681 |         makeHandler entry blocks' = relooper (IntSet.singleton entry) blocks'
682 |         allHandlers = IntMap.mapWithKey makeHandler handledEntries
683 | ```
684 | 
685 | At this point we could throw all the handlers into a `Multiple` block
686 | and leave the `unhandled` portion empty. However, that generates code
687 | that is both more complicated than necessary, and sometimes wrong, in
688 | the case where we have a handler for every entry label. In that case, if
689 | control reaches the guard for the last handler, then the condition must
690 | always evaluate true, so we can replace a final `else if` statement with
691 | an unconditional `else`.
692 | 
693 | We can prove this using our precise knowledge of the set of values that
694 | the current-block variable could have at this point. But very few
695 | compilers could prove it, because for the general case, tracking precise
696 | value sets is hard and compiler writers don't usually consider the
697 | effort worth-while.
698 | 
699 | As a result, if this block is the last one in a function and every
700 | handler is supposed to return a value, a compiler that verifies that
701 | some value is returned on every path will conclude that some path is
702 | missing a `return` statement, even though we know that path is
703 | unreachable.
704 | 
705 | So, if we have a handler for every entry point, pick one to be the
706 | `else` branch of this block. Otherwise, there is no `else` branch.
707 | 
708 | ```haskell
709 |         (unhandled, handlers) = if IntMap.keysSet allHandlers == entries
710 |             then
711 |                 let (lastHandler, otherHandlers) = IntMap.deleteFindMax allHandlers
712 |                 in (snd lastHandler, otherHandlers)
713 |             else ([], allHandlers)
714 | 
715 |     where
716 |     strictReachableFrom = flipEdges (go (IntMap.map successors blocks))
717 |         where
718 |         grow r = IntMap.map (\ seen -> IntSet.unions $ seen : IntMap.elems (r `restrictKeys` seen)) r
719 |         go r = let r' = grow r in if r /= r' then go r' else r'
720 | 
721 | restrictKeys :: IntMap.IntMap a -> IntSet.IntSet -> IntMap.IntMap a
722 | restrictKeys m s = m `IntMap.intersection` IntMap.fromSet (const ()) s
723 | 
724 | outEdges :: IntMap.IntMap (StructureBlock s c) -> IntSet.IntSet
725 | outEdges blocks = IntSet.unions (map successors $ IntMap.elems blocks) `IntSet.difference` IntMap.keysSet blocks
726 | 
727 | partitionMembers :: IntSet.IntSet -> IntSet.IntSet -> (IntSet.IntSet, IntSet.IntSet)
728 | partitionMembers a b = (a `IntSet.intersection` b, a `IntSet.difference` b)
729 | 
730 | successors :: StructureBlock s c -> IntSet.IntSet
731 | successors (_, term) = IntSet.fromList [ target | GoTo target <- toList term ]
732 | 
733 | flipEdges :: IntMap.IntMap IntSet.IntSet -> IntMap.IntMap IntSet.IntSet
734 | flipEdges edges = IntMap.unionsWith IntSet.union [ IntMap.fromSet (const (IntSet.singleton from)) to | (from, to) <- IntMap.toList edges ]
735 | ```
736 | 
737 | 
738 | Eliminating unnecessary multiple-entry blocks
739 | ---------------------------------------------
740 | 
741 | ```haskell
742 | simplifyStructure :: Monoid s => [Structure s c] -> [Structure s c]
743 | simplifyStructure = foldr go [] . map descend
744 |     where
745 |     descend structure = structure { structureBody =
746 |         case structureBody structure of
747 |         Simple s term -> Simple s term
748 |         Multiple handlers unhandled ->
749 |             Multiple (IntMap.map simplifyStructure handlers) (simplifyStructure unhandled)
750 |         Loop body -> Loop (simplifyStructure body)
751 |     }
752 | ```
753 | 
754 | If there's a `Simple` block immediately followed by a `Multiple` block,
755 | then we know several useful facts immediately:
756 | 
757 | - The `Simple` block terminates with a conditional branch, where both
758 |   targets are distinct `GoTo` labels. Otherwise, the next block wouldn't
759 |   have enough entry points to be a `Multiple` block.
760 | 
761 | - Each target of the conditional branch either has a handler it can be
762 |   replaced by from the `Multiple` block, or it can be replaced with the
763 |   unhandled blocks.
764 | 
765 | - Every non-empty branch of the `Multiple` block will be used by this
766 |   process, so no code will be lost.
767 | 
768 | - This simplification never duplicates code.
769 | 
770 | The one tricky thing here is that under some circumstances we need to
771 | ensure that there's a `mkGoto` statement emitted in some branches.
772 | Conveniently, here we can unconditionally insert an empty `Simple` block
773 | ending in a `GoTo` branch, and let `structureCFG` decide later whether
774 | that requires emitting any actual code.
775 | 
776 | ```haskell
777 |     go (Structure entries (Simple s term))
778 |        (Structure _ (Multiple handlers unhandled) : rest) =
779 |         Structure entries (Simple s (fmap rewrite term)) : rest
780 |         where
781 |         rewrite (GoTo to) = Nested
782 |             $ Structure (IntSet.singleton to) (Simple mempty (Branch (GoTo to)))
783 |             : IntMap.findWithDefault unhandled to handlers
784 |         rewrite _ = error ("simplifyStructure: Simple/Multiple invariants violated in " ++ show entries)
785 | 
786 |     go block rest = block : rest
787 | 
788 | -- We no longer care about ordering, but reachability needs to only include
789 | -- nodes that are reachable from the function entry, and this has the side
790 | -- effect of pruning unreachable nodes from the graph.
791 | depthFirstOrder :: CFG k s c -> CFG DepthFirst s c
792 | depthFirstOrder (CFG start blocks) = CFG start' blocks'
793 |     where
794 |     search label = do
795 |         (seen, order) <- get
796 |         unless (label `IntSet.member` seen) $ do
797 |             put (IntSet.insert label seen, order)
798 |             case IntMap.lookup label blocks of
799 |                 Just (BasicBlock _ term) -> traverse_ search term
800 |                 _ -> return ()
801 |             modify (\ (seen', order') -> (seen', label : order'))
802 |     final = snd (execState (search start) (IntSet.empty, []))
803 |     start' = 0
804 |     mapping = IntMap.fromList (zip final [start'..])
805 |     rewrite label = IntMap.findWithDefault (error "basic block disappeared") label mapping
806 |     rewriteBlock label (BasicBlock body term) = (label, BasicBlock body (fmap rewrite term))
807 |     blocks' = IntMap.fromList (IntMap.elems (IntMap.intersectionWith rewriteBlock mapping blocks))
808 | ```
809 | 
810 | 
811 | Generating final structured code
812 | --------------------------------
813 | 
814 | With all the preliminary analyses out of the way, we're finally ready to
815 | turn a control-flow graph back into a structured program full of loops
816 | and `if`-statements!
817 | 
818 | Since this module is not language-specific, the caller needs to provide
819 | functions for constructing `break`, `continue`, loop, and `if`
820 | statements. The loop-related constructors take a label and generate a
821 | loop name from it, to support multi-level exits.
822 | 
823 | ```haskell
824 | structureCFG
825 |     :: Monoid s
826 |     => (Maybe Label -> s)
827 |     -> (Maybe Label -> s)
828 |     -> (Label -> s -> s)
829 |     -> (c -> s -> s -> s)
830 |     -> (Label -> s)
831 |     -> ([(Label, s)] -> s -> s)
832 |     -> CFG DepthFirst s c
833 |     -> (Bool, s)
834 | structureCFG mkBreak mkContinue mkLoop mkIf mkGoto mkMatch cfg =
835 |     (hasMultiple root, foo [] mempty root)
836 |     where
837 |     root = simplifyStructure (relooperRoot cfg)
838 |     foo exits next' = snd . foldr go (next', mempty)
839 |         where
840 |         go structure (next, rest) = (structureEntries structure, go' structure next `mappend` rest)
841 | 
842 |         go' (Structure entries (Simple body term)) next = body `mappend` case term of
843 |                 Unreachable -> mempty
844 |                 Branch to -> branch to
845 |                 CondBranch c t f -> mkIf c (branch t) (branch f)
846 |             where
847 |             branch (Nested nested) = foo exits next nested
848 |             branch to | structureLabel to `IntSet.member` next =
849 |                 insertGoto (structureLabel to) (next, mempty)
850 |             branch (ExitTo to) | isJust target = insertGoto to (fromJust target)
851 |                 where
852 |                 inScope immediate (label, local) = do
853 |                     (follow, mkStmt) <- IntMap.lookup to local
854 |                     return (follow, mkStmt (immediate label))
855 |                 target = msum (zipWith inScope (const Nothing : repeat Just) exits)
856 |             branch to = error ("structureCFG: label " ++ show (structureLabel to) ++ " is not a valid exit from " ++ show entries)
857 | 
858 |             insertGoto _ (target, s) | IntSet.size target == 1 = s
859 |             insertGoto to (_, s) = mkGoto to `mappend` s
860 | 
861 |         go' (Structure _ (Multiple handlers unhandled)) next =
862 |             mkMatch [ (label, foo exits next body) | (label, body) <- IntMap.toList handlers ] (foo exits next unhandled)
863 | 
864 |         go' (Structure entries (Loop body)) next = mkLoop label (foo exits' entries body)
865 |             where
866 |             label = IntSet.findMin entries
867 |             exits' =
868 |                 ( label
869 |                 , IntMap.union
870 |                     (IntMap.fromSet (const (entries, mkContinue)) entries)
871 |                     (IntMap.fromSet (const (next, mkBreak)) next)
872 |                 ) : exits
873 | 
874 | hasMultiple :: [Structure s c] -> Bool
875 | hasMultiple = any (go . structureBody)
876 |     where
877 |     go (Multiple{}) = True
878 |     go (Simple _ term) = or [ hasMultiple nested | Nested nested <- toList term ]
879 |     go (Loop body) = hasMultiple body
880 | ```
881 | 


--------------------------------------------------------------------------------
/src/Language/Rust/Corrode/CrateMap.hs:
--------------------------------------------------------------------------------
 1 | module Language.Rust.Corrode.CrateMap where
 2 | 
 3 | import Data.Foldable
 4 | import Data.List
 5 | import qualified Data.Map as Map
 6 | import Data.Maybe
 7 | 
 8 | data ItemKind
 9 |     = Enum
10 |     | Struct
11 |     | Union
12 |     | Type
13 |     | Symbol
14 |     deriving (Eq, Ord, Show)
15 | 
16 | type ModuleMap = [((ItemKind, String), String)]
17 | type CrateMap = Map.Map String ModuleMap
18 | type CratesMap = Map.Map String CrateMap
19 | type ItemRewrites = Map.Map (ItemKind, String) [String]
20 | 
21 | parseCrateMap :: String -> Either String CrateMap
22 | parseCrateMap = fmap root . foldrM parseLine (Map.empty, []) . filter (not . null) . map cleanLine . lines
23 |     where
24 |     root (crate, []) = crate
25 |     root (crate, unassigned) = Map.insert "" unassigned crate
26 | 
27 |     cleanLine = words . takeWhile (/= '#')
28 | 
29 |     parseLine ("-" : item) (crate, items) = do
30 |         item' <- parseItem item
31 |         return (crate, item' : items)
32 |     parseLine [name] (crate, items) | ":" `isSuffixOf` name = return (Map.insert (init name) items crate, [])
33 |     parseLine contents _ = Left (unwords ("invalid crate map entry:" : contents))
34 | 
35 |     parseItem contents = case parseItemKind contents of
36 |         (kind, [name]) -> return ((kind, name), name)
37 |         (kind, [old, "as", new]) -> return ((kind, old), new)
38 |         _ -> Left (unwords ("unsupported crate map item:" : contents))
39 | 
40 |     parseItemKind ("enum" : rest) = (Enum, rest)
41 |     parseItemKind ("struct" : rest) = (Struct, rest)
42 |     parseItemKind ("union" : rest) = (Union, rest)
43 |     parseItemKind ("typedef" : rest) = (Type, rest)
44 |     parseItemKind rest = (Symbol, rest)
45 | 
46 | mergeCrateMaps :: [(String, CrateMap)] -> Map.Map String CrateMap
47 | mergeCrateMaps = Map.fromListWith (Map.unionWith (++))
48 | 
49 | splitModuleMap :: String -> CratesMap -> (ModuleMap, CratesMap)
50 | splitModuleMap modName crates = fromMaybe ([], crates) $ do
51 |     thisCrate <- Map.lookup "" crates
52 |     thisModule <- Map.lookup modName thisCrate
53 |     let thisCrate' = Map.delete modName thisCrate
54 |     let crates' = Map.insert "" thisCrate' crates
55 |     return (thisModule, crates')
56 | 
57 | rewritesFromCratesMap :: CratesMap -> ItemRewrites
58 | rewritesFromCratesMap crates = Map.fromList
59 |     [ (item, setCrate [modName, new])
60 |     | (crateName, mods) <- Map.toList crates
61 |     , let setCrate = case crateName of
62 |             "" -> id
63 |             _ -> (crateName :)
64 |     , (modName, items) <- Map.toList mods
65 |     , (item, new) <- items
66 |     ]
67 | 


--------------------------------------------------------------------------------
/src/Language/Rust/Idiomatic.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE ViewPatterns #-}
 2 | 
 3 | module Language.Rust.Idiomatic (
 4 |     itemIdioms
 5 | ) where
 6 | 
 7 | import qualified Language.Rust.AST as Rust
 8 | 
 9 | unsnoc :: [a] -> Maybe ([a], a)
10 | unsnoc [] = Nothing
11 | unsnoc (x:xs) = case unsnoc xs of
12 |     Just (a, b) -> Just (x:a, b)
13 |     Nothing -> Just ([], x)
14 | 
15 | tailExpr :: Rust.Expr -> Maybe (Maybe Rust.Expr)
16 | -- If the last statement in this block is a return statement, extract
17 | -- its expression (if any) to the final expression position.
18 | tailExpr (Rust.Return e) = Just e
19 | -- If the last statement is a block, that's a tail-block.
20 | tailExpr (Rust.BlockExpr b) = Just (Just (Rust.BlockExpr (tailBlock b)))
21 | -- If the last statement is an if-expression, its true and false blocks
22 | -- are themselves tail-blocks.
23 | -- TODO: treat match-expressions like if-expressions.
24 | tailExpr (Rust.IfThenElse c t f) = Just (Just (Rust.IfThenElse c (tailBlock t) (tailBlock f)))
25 | -- Otherwise, there's nothing to rewrite.
26 | tailExpr _ = Nothing
27 | 
28 | -- Eliminate any return statement that has no statements which
29 | -- dynamically follow it.
30 | tailBlock :: Rust.Block -> Rust.Block
31 | -- If this block already has a final expression, just try to get rid of
32 | -- return statements within that expression.
33 | tailBlock (Rust.Block b (Just (tailExpr -> Just e))) = Rust.Block b e
34 | -- If there's no final expression but the final statement consists of an
35 | -- expression that makes sense in the tail position, move it.
36 | tailBlock (Rust.Block (unsnoc -> Just (b, Rust.Stmt (tailExpr -> Just e))) Nothing) = Rust.Block b e
37 | -- Otherwise, leave this block unchanged.
38 | tailBlock b = b
39 | 
40 | itemIdioms :: Rust.Item -> Rust.Item
41 | itemIdioms (Rust.Item attrs vis (Rust.Function fattrs name formals ret b)) = Rust.Item attrs vis (Rust.Function fattrs name formals ret (tailBlock b))
42 | itemIdioms i = i
43 | 


--------------------------------------------------------------------------------
/stack.yaml:
--------------------------------------------------------------------------------
1 | flags: {}
2 | extra-package-dbs: []
3 | packages:
4 | - '.'
5 | extra-deps:
6 | - language-c-0.6.1
7 | resolver: lts-8.5
8 | 


--------------------------------------------------------------------------------
/tests/test.hs:
--------------------------------------------------------------------------------
  1 | import Control.Monad
  2 | import Control.Monad.Trans.Class
  3 | import Control.Monad.Trans.State.Lazy
  4 | import Data.Either
  5 | import Data.Foldable
  6 | import Data.Functor.Identity
  7 | import qualified Data.IntMap as IntMap
  8 | import qualified Data.Map as Map
  9 | import Data.List
 10 | import Language.Rust.Corrode.CFG
 11 | import Test.Tasty
 12 | import qualified Test.Tasty.QuickCheck as QC
 13 | import Text.PrettyPrint.HughesPJClass hiding (empty)
 14 | 
 15 | main :: IO ()
 16 | main = defaultMain tests
 17 | 
 18 | tests :: TestTree
 19 | tests = testGroup "Tests"
 20 |     [ QC.testProperty "structuring CFGs conserves code" cfgStructureConservesCode
 21 |     , QC.testProperty "CFG structuring round-trips" cfgRoundTrip
 22 |     ]
 23 | 
 24 | data Stmt
 25 |     = Stmt Int
 26 |     | Return
 27 |     | Loop Label [Stmt]
 28 |     | Break (Maybe Label)
 29 |     | Continue (Maybe Label)
 30 |     | If Cond [Stmt] [Stmt]
 31 |     | Goto Int
 32 |     deriving (Show, Eq)
 33 | 
 34 | data Cond
 35 |     = Cond Int
 36 |     | Match Int
 37 |     deriving (Show, Eq)
 38 | 
 39 | instance Pretty Stmt where
 40 |     pPrint (Loop l b) = vcat
 41 |         [ text ("Loop " ++ show l ++ " {")
 42 |         , nest 4 (prettyStmts b)
 43 |         , text "}"
 44 |         ]
 45 |     pPrint (If c t f) = vcat
 46 |         [ text ("If " ++ show c ++ " {")
 47 |         , nest 4 (prettyStmts t)
 48 |         , text "} else {"
 49 |         , nest 4 (prettyStmts f)
 50 |         , text "}"
 51 |         ]
 52 |     pPrint s = text (show s)
 53 | 
 54 | prettyStmts :: [Stmt] -> Doc
 55 | prettyStmts = vcat . map pPrint
 56 | 
 57 | stmtToCFG :: [Stmt] -> CFG DepthFirst [Stmt] Cond
 58 | stmtToCFG = depthFirstOrder . staticCFG . runIdentity . buildCFG . convert [] ([], Unreachable)
 59 |     where
 60 |     makeBlock ([], Branch to) = return to
 61 |     makeBlock (after, term) = do
 62 |         b <- newLabel
 63 |         addBlock b after term
 64 |         return b
 65 | 
 66 |     convert loops term stmts = convert' loops term stmts >>= makeBlock
 67 |     convert' loops = foldrM go
 68 |         where
 69 |         getLoop Nothing = case loops of
 70 |             (_, labels) : _ -> labels
 71 |             [] -> error "stmtToCFG: loop exit without an enclosing loop"
 72 |         getLoop (Just l) = case lookup l loops of
 73 |             Just labels -> labels
 74 |             Nothing -> error ("stmtToCFG: loop " ++ show l ++ " not in " ++ show loops)
 75 |         go Return _ = return ([Return], Unreachable)
 76 |         go (Loop l stmts) (after, term) = do
 77 |             brk <- makeBlock (after, term)
 78 |             cont <- newLabel
 79 |             (after', term') <- convert' ((l, (brk, cont)) : loops) ([], Branch cont) stmts
 80 |             addBlock cont after' term'
 81 |             return ([], Branch cont)
 82 |         go (Break l) _ = return ([], Branch (fst (getLoop l)))
 83 |         go (Continue l) _ = return ([], Branch (snd (getLoop l)))
 84 |         go (If c t f) term = do
 85 |             term' <- (,) [] <$> Branch <$> makeBlock term
 86 |             t' <- convert loops term' t
 87 |             f' <- convert loops term' f
 88 |             return ([], CondBranch c t' f')
 89 |         go s (after, term) = return (s : after, term)
 90 | 
 91 | staticCFG :: CFG k [Stmt] Cond -> CFG Unordered [Stmt] Cond
 92 | staticCFG (CFG start blocks) = removeEmptyBlocks $ evalState (buildCFG $ foo initialState start) Map.empty
 93 |     where
 94 |     initialState = -1
 95 |     getGoto (Goto l) = Right l
 96 |     getGoto s = Left s
 97 |     extractGoto current stmts = case partitionEithers $ map getGoto stmts of
 98 |         (stmts', gotos) -> (last (current : gotos), stmts')
 99 |     foo current b = do
100 |         let key = (current, b)
101 |         seen <- lift get
102 |         case key `Map.lookup` seen of
103 |             Just b' -> return b'
104 |             Nothing -> case IntMap.lookup b blocks of
105 |                 Nothing -> fail ("staticCFG: block " ++ show b ++ " missing")
106 |                 Just (BasicBlock stmts term) -> do
107 |                     b' <- newLabel
108 |                     lift $ put $ Map.insert key b' seen
109 |                     let (current', stmts') = extractGoto current stmts
110 |                     term' <- case term of
111 |                         CondBranch (Match n) t f
112 |                             | current' == n -> Branch <$> foo current' t
113 |                             | otherwise -> Branch <$> foo current' f
114 |                         _ -> mapM (foo current') term
115 |                     addBlock b' stmts' term'
116 |                     return b'
117 | 
118 | data BigramFst = FstStmt Int | FstCond Int Bool
119 |     deriving (Eq, Ord, Show)
120 | data BigramSnd = SndStmt Int | SndCond Int | SndReturn
121 |     deriving (Eq, Ord, Show)
122 | 
123 | bigrams :: CFG DepthFirst [Stmt] Cond -> Map.Map BigramFst BigramSnd
124 | bigrams (CFG start blocks) = snd $ IntMap.findWithDefault undefined start $ allBlocks $ allBlocks IntMap.empty
125 |     where
126 |     allBlocks seen = IntMap.foldrWithKey perBlock seen blocks
127 | 
128 |     perBlock l (BasicBlock stmts term) seen = IntMap.insert l (foldr perStmt (perTerm go term) stmts) seen
129 |         where
130 |         go to = IntMap.findWithDefault (Nothing, Map.empty) to seen
131 | 
132 |     newBigram _ (Nothing, seen) = seen
133 |     newBigram bigramFst (Just bigramSnd, seen) = Map.insert bigramFst bigramSnd seen
134 | 
135 |     perStmt (Stmt s) next = (Just (SndStmt s), newBigram (FstStmt s) next)
136 |     perStmt Return _ = (Just SndReturn, Map.empty)
137 |     perStmt s _ = error ("bigrams: unsupported statment " ++ show s)
138 | 
139 |     perTerm go term = case term of
140 |         Unreachable -> (Nothing, Map.empty)
141 |         Branch to -> go to
142 |         CondBranch (Cond c) t f -> (Just (SndCond c), bar True t `Map.union` bar False f)
143 |             where
144 |             bar matched to = newBigram (FstCond c matched) (go to)
145 |         CondBranch c _ _ -> error ("bigrams: unsupported condition " ++ show c)
146 | 
147 | fingerprintStmt :: [Stmt] -> (IntMap.IntMap Int, IntMap.IntMap Int)
148 | fingerprintStmt = foldr go (IntMap.empty, IntMap.empty)
149 |     where
150 |     go (Stmt l) (stmts, conds) = (IntMap.insertWith (+) l 1 stmts, conds)
151 |     go Return rest = rest
152 |     go (Loop _ stmts) rest = foldr go rest stmts
153 |     go (Break _) rest = rest
154 |     go (Continue _) rest = rest
155 |     go (If c t f) rest = case c of
156 |         Cond l -> (stmts, IntMap.insertWith (+) l 1 conds)
157 |         Match _ -> (stmts, conds)
158 |         where (stmts, conds) = foldr go (foldr go rest t) f
159 |     go (Goto _) rest = rest
160 | 
161 | fingerprintCFG :: CFG k [Stmt] Cond -> (IntMap.IntMap Int, IntMap.IntMap Int)
162 | fingerprintCFG (CFG _ blocks) = mconcat $ map go $ IntMap.elems blocks
163 |     where
164 |     go (BasicBlock stmt term) =
165 |         let (stmts, conds) = fingerprintStmt stmt in case term of
166 |         CondBranch (Cond c) _ _ -> (stmts, IntMap.insertWith (+) c 1 conds)
167 |         _ -> (stmts, conds)
168 | 
169 | genStmts :: CFG DepthFirst s c -> CFG DepthFirst [Stmt] c
170 | genStmts (CFG start blocks) = CFG start (IntMap.mapWithKey go blocks)
171 |     where
172 |     go _ (BasicBlock _ Unreachable) = BasicBlock [Return] Unreachable
173 |     go l (BasicBlock _ term) = BasicBlock [Stmt l] term
174 | 
175 | genCFG :: QC.Gen (CFG DepthFirst [Stmt] Cond)
176 | genCFG = QC.sized $ \ n -> fmap (genStmts . depthFirstOrder) $ buildCFG $ do
177 |     labels <- replicateM (1 + n) newLabel
178 |     let chooseLabel = QC.elements labels
179 |     forM_ labels $ \ label -> do
180 |         term <- lift $ QC.oneof
181 |             [ return Unreachable
182 |             , Branch <$> chooseLabel
183 |             , CondBranch (Cond label) <$> chooseLabel <*> chooseLabel
184 |             ]
185 |         addBlock label () term
186 |     return (head labels)
187 | 
188 | shrinkCFG :: CFG DepthFirst [Stmt] Cond -> [CFG DepthFirst [Stmt] Cond]
189 | shrinkCFG (CFG entry blocks) = map (genStmts . depthFirstOrder) (removeEdges ++ skipBlocks)
190 |     where
191 |     removeEdges = map (CFG entry . IntMap.fromList) $ go $ IntMap.toList blocks
192 |         where
193 |         go [] = []
194 |         go ((l, BasicBlock b term) : xs) =
195 |             [ (l, BasicBlock b term') : xs | term' <- removeEdge term ] ++
196 |             [ (l, BasicBlock b term) : xs' | xs' <- go xs ]
197 |     removeEdge (CondBranch _ t f) = Unreachable : map Branch (nub [t, f])
198 |     removeEdge (Branch _) = [Unreachable]
199 |     removeEdge Unreachable = []
200 | 
201 |     skipBlocks = [ skipBlock from to | (from, BasicBlock _ (Branch to)) <- IntMap.toList blocks ]
202 |     skipBlock from to = CFG (rewrite entry) (IntMap.map (\ (BasicBlock b term) -> BasicBlock b (fmap rewrite term)) blocks)
203 |         where rewrite label = if label == from then to else label
204 | 
205 | structureStmtCFG :: CFG DepthFirst [Stmt] Cond -> [Stmt]
206 | structureStmtCFG = snd . structureCFG
207 |     (return . Break)
208 |     (return . Continue)
209 |     (\ l b -> [Loop l b])
210 |     (\ c t f -> [If c t f])
211 |     (return . Goto)
212 |     (flip (foldr (\ (l, t) f -> [If (Match l) t f])))
213 | 
214 | subtractMap :: IntMap.IntMap Int -> IntMap.IntMap Int -> IntMap.IntMap Int
215 | subtractMap = IntMap.mergeWithKey (\ _ a b -> Just (a - b)) id (IntMap.map negate)
216 | 
217 | cfgStructureConservesCode :: QC.Property
218 | cfgStructureConservesCode = QC.forAllShrink genCFG shrinkCFG $ \ cfg ->
219 |     let (cfgStmts, cfgConds) = fingerprintCFG cfg
220 |         structured = structureStmtCFG cfg
221 |         (structuredStmts, structuredConds) = fingerprintStmt structured
222 |     in QC.counterexample (render (prettyStructure (relooperRoot cfg) $+$ prettyStmts structured))
223 |         (conserved "statements" structuredStmts cfgStmts QC..&&. conserved "conditions" structuredConds cfgConds)
224 |     where
225 |     conserved kind structured cfg = case IntMap.toList $ IntMap.filter (/= 0) $ subtractMap structured cfg of
226 |         [] -> QC.property True
227 |         miss -> QC.counterexample (kind ++ " not conserved: " ++ show miss) False
228 | 
229 | cfgRoundTrip :: QC.Property
230 | cfgRoundTrip = QC.forAllShrink genCFG shrinkCFG $ \ cfg ->
231 |     let bi = bigrams cfg
232 |         stmts = structureStmtCFG cfg
233 |         cfg' = stmtToCFG stmts
234 |         bi' = bigrams cfg'
235 |     in foldr QC.counterexample (QC.property (bi == bi')) $
236 |         map (++ "\n")
237 |             [ render (prettyStructure (relooperRoot cfg))
238 |             , render (prettyStmts stmts)
239 |             , show bi
240 |             , show bi'
241 |             , show cfg'
242 |             ]
243 | 


--------------------------------------------------------------------------------