├── .gitignore ├── .travis.yml ├── ChangeLog.md ├── LICENSE ├── README.md ├── Setup.hs ├── appveyor.yml ├── bench ├── README.md ├── allocation-benchmarks │ └── Main.hs ├── bench.py └── timing-benchmarks │ └── Main.hs ├── get-rust-sources.sh ├── language-rust.cabal ├── sample-sources ├── attributes.rs ├── empty.rs ├── expressions.rs ├── items.rs ├── let.rs ├── literals.rs ├── macros.rs ├── patterns.rs ├── precedences.rs ├── statement-expressions.rs ├── statements.rs └── types.rs ├── src └── Language │ └── Rust │ ├── Data │ ├── Ident.hs │ ├── InputStream.hs │ └── Position.hs │ ├── Parser.hs │ ├── Parser │ ├── Internal.y │ ├── Lexer.x │ ├── Literals.hs │ ├── ParseMonad.hs │ └── Reversed.hs │ ├── Pretty.hs │ ├── Pretty │ ├── Internal.hs │ ├── Literals.hs │ ├── Resolve.hs │ └── Util.hs │ ├── Quote.hs │ ├── Syntax.hs │ └── Syntax │ ├── AST.hs │ ├── Token.hs │ └── Token.hs-boot └── test ├── README.md ├── rustc-tests ├── Diff.hs ├── DiffUtils.hs └── Main.hs └── unit-tests ├── CompleteTest.hs ├── LexerTest.hs ├── Main.hs ├── ParserTest.hs └── PrettyTest.hs /.gitignore: -------------------------------------------------------------------------------- 1 | # Cabal related files 2 | cabal.project.local 3 | dist/ 4 | dist-newstyle/ 5 | resources/ 6 | 7 | # Stack related files 8 | .stack-work/ 9 | stack.yaml 10 | 11 | # Benchmark output folders 12 | bench/allocations/ 13 | bench/timings/ 14 | 15 | # Sample source files 16 | sample-sources/ 17 | !sample-sources/attributes.rs 18 | !sample-sources/empty.rs 19 | !sample-sources/expressions.rs 20 | !sample-sources/items.rs 21 | !sample-sources/let.rs 22 | !sample-sources/literals.rs 23 | !sample-sources/macros.rs 24 | !sample-sources/patterns.rs 25 | !sample-sources/precedences.rs 26 | !sample-sources/statement-expressions.rs 27 | !sample-sources/statements.rs 28 | !sample-sources/types.rs 29 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Sudo used for custom apt setup 2 | sudo: true 3 | 4 | # Add new environments to the build here: 5 | env: 6 | - GHCVER=8.0.2 CABALVER=3.0 7 | - GHCVER=8.2.2 CABALVER=3.0 8 | - GHCVER=8.4.4 CABALVER=3.0 9 | - GHCVER=8.6.5 CABALVER=3.0 10 | - GHCVER=8.8.1 CABALVER=3.0 11 | - GHCVER=head CABALVER=head 12 | 13 | # Allow for develop branch to break 14 | matrix: 15 | allow_failures: 16 | - env: GHCVER=8.8.1 CABALVER=3.0 17 | - env: GHCVER=head CABALVER=head 18 | 19 | # Manually install ghc and cabal 20 | before_install: 21 | - travis_retry sudo add-apt-repository -y ppa:hvr/ghc 22 | - travis_retry sudo apt-get update 23 | - travis_retry sudo apt-get install cabal-install-$CABALVER ghc-$GHCVER 24 | - export PATH=/opt/ghc/$GHCVER/bin:/opt/cabal/$CABALVER/bin:$PATH 25 | - export PATH=$HOME/.cabal/bin:$PATH 26 | - travis_retry cabal update 27 | 28 | # Install Happy and Alex first, before installing 29 | install: 30 | - echo $PATH 31 | - cabal --version 32 | - ghc --version 33 | - cabal configure --verbose --enable-tests 34 | 35 | script: 36 | - cabal test 37 | -------------------------------------------------------------------------------- /ChangeLog.md: -------------------------------------------------------------------------------- 1 | # Revision history for language-rust 2 | 3 | ## 0.2.0.27 -- 2018-04-22 4 | 5 | * Bump Rust version 6 | 7 | ## 0.1.1.26 -- 2018-03-02 8 | 9 | * Bump test and benchmark dependencies 10 | 11 | ## 0.1.0.26 -- 2018-03-01 12 | 13 | * Parser module (using Alex and Happy) 14 | * Pretty printing module 15 | * Resolving module for validating ASTs 16 | * Unit testsuite 17 | * Difference testsuite 18 | * Allocation benchmarks 19 | * Timing benchmarks 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Alec Theriault 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Alec Theriault nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Parser and pretty printer for Rust [![Build Status][4]][5] [![Windows build status][7]][8] [![Hackage Version][11]][12] 2 | 3 | `language-rust` aspires to efficiently and accurately parse and pretty print the [Rust language][0]. 4 | The underlying AST structures are also intended to be as similar as possible to the [`libsyntax` AST 5 | that `rustc`][10] itself uses. 6 | 7 | A typical use looks like: 8 | 9 | ```haskell 10 | >>> :set -XTypeApplications +t 11 | >>> import Language.Rust.Syntax 12 | 13 | >>> -- Sample use of the parser 14 | >>> import Language.Rust.Parser 15 | >>> let inp = inputStreamFromString "fn main () { println!(\"Hello world!\"); }" 16 | inp :: InputStream 17 | >>> let sourceFile = parse' @(SourceFile Span) inp 18 | sourceFile :: SourceFile Span 19 | 20 | >>> -- Sample use of the pretty printer 21 | >>> import Language.Rust.Pretty 22 | >>> pretty' sourceFile 23 | fn main() { 24 | println!("Hello world!"); 25 | } 26 | it :: Doc b 27 | ``` 28 | 29 | ## Building 30 | 31 | ### Cabal 32 | 33 | With Cabal and GHC, run 34 | 35 | cabal install happy --constraint 'happy >= 1.19.8' 36 | cabal install alex 37 | cabal configure 38 | cabal build 39 | 40 | ### Stack 41 | 42 | With the [Stack][1] tool installed, run 43 | 44 | stack init 45 | stack build 46 | 47 | The second command is responsible for pulling in all of the dependencies (including executable 48 | tools like [Alex][2], [Happy][3], and GHC itself) and then compiling everything. If Stack complains 49 | about the version of Happy installed, you can explicitly install a recent one with `stack install 50 | happy-1.19.8`. 51 | 52 | ## Evolution of Rust 53 | 54 | As Rust evolves, so will `language-rust`. A best effort will be made to support unstable features 55 | from nightly as they come out, but only compatibility with stable is guaranteed. The last component 56 | of the version number indicates the nightly Rust compiler version against which tests were run. For 57 | example, `0.1.0.26` is tested against `rustc 1.26.0-nightly`. 58 | 59 | ## Bugs 60 | 61 | Please report any bugs to the [github issue tracker][9]. 62 | 63 | ### Parser 64 | 65 | Any difference between what is accepted by the `rustc` parser and the `language-rust` parser 66 | indicates one of 67 | 68 | * a bug in `language-rust` (this is almost always the case) 69 | * a bug in `rustc` 70 | * that there is a newer version of `rustc` which made a breaking change to this syntax 71 | 72 | If the AST/parser of `rustc` changes, the `rustc-tests` test suite should start failing - it 73 | compares the JSON AST debug output of `rustc` to our parsed AST. 74 | 75 | ### Pretty printer 76 | 77 | For the pretty printer, bugs are a bit tougher to list exhaustively. Suggestions for better layout 78 | algorithms are most welcome! The [`fmt-rfcs`][6] repo is loosely used as the reference for "correct" 79 | pretty printing. 80 | 81 | [0]: https://www.rust-lang.org/en-US/ 82 | [1]: https://docs.haskellstack.org/en/stable/README/ 83 | [2]: https://hackage.haskell.org/package/alex 84 | [3]: https://hackage.haskell.org/package/happy 85 | [4]: https://travis-ci.org/harpocrates/language-rust.svg?branch=master 86 | [5]: https://travis-ci.org/harpocrates/language-rust 87 | [6]: https://github.com/rust-lang-nursery/fmt-rfcs 88 | [7]: https://ci.appveyor.com/api/projects/status/um8dxklqmubvn091/branch/master?svg=true 89 | [8]: https://ci.appveyor.com/project/harpocrates/language-rust/branch/master 90 | [9]: https://github.com/harpocrates/language-rust/issues 91 | [10]: https://github.com/rust-lang/rust/blob/master/src/libsyntax/ast.rs 92 | [11]: https://img.shields.io/hackage/v/language-rust.svg 93 | [12]: https://hackage.haskell.org/package/language-rust 94 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | # Clone location 2 | clone_folder: c:\language-rust 3 | 4 | # Add new environments to the build here: 5 | environment: 6 | matrix: 7 | # - resolver: lts-6.35 # ghc-7.10.3 8 | # - resolver: lts-7.24 # ghc-8.0.1 9 | # - resolver: lts-9.21 # ghc-8.0.2 10 | - resolver: lts-11.22 # ghc-8.2.2 11 | - resolver: lts-12.14 # ghc-8.4.4 12 | - resolver: lts-14.4 # ghc-8.6.5 13 | - resolver: nightly 14 | 15 | # Manually fetch stack 16 | install: 17 | - set PATH=C:\Program Files\Git\mingw64\bin;%PATH% 18 | - curl --output stack.zip --location --insecure http://www.stackage.org/stack/windows-x86_64 19 | - dir 20 | - 7z x stack.zip stack.exe 21 | - stack --no-terminal init --resolver %resolver% > nul 22 | - stack --no-terminal setup --resolver %resolver% > nul 23 | 24 | # Install Happy and Alex first, before installing 25 | build_script: 26 | - stack --no-terminal install --resolver %resolver% happy-1.19.12 27 | - stack --no-terminal install --resolver %resolver% alex 28 | 29 | test_script: 30 | - stack --no-terminal test --resolver %resolver% :unit-tests --test-arguments "--plain" 31 | 32 | -------------------------------------------------------------------------------- /bench/README.md: -------------------------------------------------------------------------------- 1 | We have two types of benchmarks. If you are using `stack` you can run them with 2 | 3 | ``` 4 | $ stack bench # runs all benchmarks 5 | $ stack bench :allocation-benchmarks # runs allocation benchmarks only (faster) 6 | $ stack bench :timing-benchmarks # runs timing benchmarks only (slower) 7 | ``` 8 | 9 | ## `allocation-benchmarks` 10 | 11 | Benchmarks how much memory is allocated by the runtime when parsing the files inside of the 12 | `sample-sources` directory at the project root. Resulting information is stored in a JSON file in 13 | the `allocations` folder (automatically created in this directory). 14 | 15 | ## `timing-benchmarks` 16 | 17 | Benchmark how long it takes to parse the files inside the `sample-sources` directory. Resulting 18 | information is stored in a JSON file in the `timings` folder (automatically created in this 19 | directory). 20 | 21 | # Tools 22 | 23 | Since some of these tests take a while, you can add a `.benchignore` file in `sample-sources` which 24 | lists files to skip for benchmarking (one file name per line). 25 | 26 | There is also a `bench.py` utility in this directory which lets you compare benchmarks across 27 | different commits. It relies on the JSON files in `allocations` and `timings`, so you will have to 28 | checkout and run the benchmarks on commits you want to compare against (to generate the 29 | corresponding JSON file). 30 | 31 | ``` 32 | $ ./bench.py --folder allocations # compare the last several commits for allocations 33 | $ ./bench.py --folder timings # compare the last several commits for timings 34 | ``` 35 | 36 | -------------------------------------------------------------------------------- /bench/allocation-benchmarks/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | import Weigh 4 | 5 | import Control.Monad (filterM) 6 | import Control.Exception (catch, throwIO) 7 | import Data.Foldable (for_) 8 | import Data.Traversable (for) 9 | import GHC.Exts (fromString) 10 | 11 | import Language.Rust.Data.InputStream (InputStream) 12 | import Language.Rust.Syntax (SourceFile) 13 | import Language.Rust.Parser (readInputStream, Span, parse') 14 | 15 | import System.Directory (getCurrentDirectory, listDirectory, createDirectoryIfMissing, doesFileExist, removeFile) 16 | import System.FilePath ((), (<.>), takeFileName) 17 | import System.Process (proc, readCreateProcess) 18 | import System.IO.Error (isDoesNotExistError) 19 | 20 | import Data.Aeson 21 | import qualified Data.ByteString.Lazy as BL 22 | 23 | -- TODO: only allocation and GCs seem to be really reproducible. Live and max sometimes are 0. 24 | 25 | main :: IO () 26 | main = do 27 | -- Open the output log file 28 | status <- readCreateProcess (proc "git" ["status", "--porcelain"]) "" 29 | logFileName <- case status of 30 | "" -> init <$> readCreateProcess (proc "git" ["rev-parse", "HEAD"]) "" 31 | _ -> pure "WIP" 32 | 33 | -- Get the test cases 34 | workingDirectory <- getCurrentDirectory 35 | let sampleSources = workingDirectory "sample-sources" 36 | benchIgnore = sampleSources ".benchignore" 37 | benchIgnoreExists <- doesFileExist benchIgnore 38 | ignore <- if benchIgnoreExists 39 | then (\f -> map (sampleSources ) (lines f)) <$> readFile benchIgnore 40 | else pure [] 41 | entries <- map (sampleSources ) <$> listDirectory sampleSources 42 | files <- filterM doesFileExist (filter (`notElem` ignore) entries) 43 | 44 | -- Clear out previous WIP (if there is one) 45 | catch (removeFile (workingDirectory "bench" "allocations" "WIP" <.> "json")) 46 | (\e -> if isDoesNotExistError e then pure () else throwIO e) 47 | 48 | -- Run 'weigh' tests 49 | fileStreams <- for files $ \file -> do { is <- readInputStream file; pure (takeFileName file, is) } 50 | let weigh = do setColumns [ Case, Max, Allocated, GCs, Live ] 51 | for_ fileStreams $ \(file,is) -> func file (parse' :: InputStream -> SourceFile Span) is 52 | mainWith weigh 53 | (wr, _) <- weighResults weigh 54 | let results = object [ case maybeErr of 55 | Nothing -> key .= object [ "allocated" .= weightAllocatedBytes weight 56 | -- , "max" .= weightMaxBytes w 57 | -- , "live" .= weightLiveBytes w 58 | -- , "GCs" .= weightGCs w 59 | ] 60 | Just err -> key .= String (fromString err) 61 | | (weight, maybeErr) <- wr 62 | , let key = fromString (weightLabel weight) 63 | ] 64 | 65 | -- Save the output to JSON 66 | createDirectoryIfMissing False (workingDirectory "bench" "allocations") 67 | let logFile = workingDirectory "bench" "allocations" logFileName <.> "json" 68 | putStrLn $ "writing results to: " ++ logFile 69 | logFile `BL.writeFile` encode results 70 | 71 | -------------------------------------------------------------------------------- /bench/bench.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import json 4 | import sys 5 | import subprocess 6 | import tabulate 7 | import argparse 8 | 9 | def merge(dict1, dict2, def1, def2, func): 10 | """Merge two nested dictionaries, using default values when it makes sense""" 11 | assert isinstance(dict1, dict) 12 | assert isinstance(dict2, dict) 13 | 14 | toReturn = {} 15 | keys1 = set(dict1.keys()) 16 | keys2 = set(dict2.keys()) 17 | 18 | for key in keys1 | keys2: # change this to | 19 | val1 = dict1.get(key, None) 20 | val2 = dict2.get(key, None) 21 | 22 | if isinstance(val1,dict) or isinstance(val2,dict): 23 | toReturn[key] = merge(val1 or {}, val2 or {}, def1, def2, func) 24 | else: 25 | toReturn[key] = func(val1 or def1, val2 or def2) 26 | 27 | return toReturn 28 | 29 | 30 | def flattenListDict(d, indent=0): 31 | """Flatten a nested dictionary into a list of lists representing a table""" 32 | assert isinstance(d, dict) 33 | result = [] 34 | for k,v in d.items(): 35 | assert isinstance(k, str) 36 | if isinstance(v, list): 37 | first = None 38 | row = [] 39 | for entry in v: 40 | if entry: 41 | if first: 42 | percentDiff = 100 * (float(entry) - first) / first 43 | color = '\033[92m' if percentDiff > -1.0 else '\033[91m' 44 | row.append("%s%2.1f%s" % (color, percentDiff, '%\033[0m')) 45 | else: 46 | first = float(entry) 47 | row.append(entry) 48 | else: 49 | row.append(entry) 50 | 51 | result.append([ '.' * indent + k ] + row) 52 | elif isinstance(v, dict): 53 | result.append([ '.' * indent + k ]) 54 | result.extend(flattenListDict(v, indent + 2)) 55 | else: 56 | raise "List dict can only contain lists or other list dicts" 57 | return result 58 | 59 | # Currently not used... 60 | def fmtSize(num): 61 | """format a number of bytes on disk into a human readable form""" 62 | for unit in ['','KB','MB','GB','TB','PB','EB','ZB']: 63 | if abs(num) < 1024.0: 64 | return "%3.1f%s" % (num, unit) 65 | num /= 1024.0 66 | return "%.1f%s%s" % (num, 'YB', suffix) 67 | 68 | def revParse(commit, useAbbreviated=False): 69 | """get the hash for a commit""" 70 | abbreviated = subprocess.run( 71 | ["git", "rev-parse", "--abbrev-ref", commit], 72 | stdout=subprocess.PIPE, 73 | check=True 74 | ).stdout.decode("utf8").strip() 75 | 76 | other = subprocess.run( 77 | ["git", "rev-parse", commit], 78 | stdout=subprocess.PIPE, 79 | check=True 80 | ).stdout.decode("utf8").strip() 81 | 82 | return (useAbbreviated and abbreviated) or other 83 | 84 | # Run benchmarks for a commit 85 | def runBenchmarks(commit): 86 | """temporarily check out the given commit to run the benchmarks""" 87 | 88 | print("Running benchmarks for '" + commit + "'") 89 | commit = revParse(commit) 90 | print('\033[31m' + "Do not make any changes to files!" + '\033[0m') 91 | init = revParse("HEAD") 92 | 93 | localChanges = "No local changes to save\n" != subprocess.run( 94 | ["git", "status"], 95 | stdout=subprocess.PIPE 96 | ).stdout 97 | 98 | if localChanges: 99 | subprocess.run(["git", "stash"], stdout=subprocess.PIPE) 100 | 101 | subprocess.run(["git", "checkout", commit]) 102 | subprocess.run(["stack", "bench"]) 103 | subprocess.run(["git", "checkout", init]) 104 | 105 | if localChanges: 106 | subprocess.run(["git", "stash", "pop"], stdout=subprocess.PIPE) 107 | 108 | print('\033[32m' + "Back to initial state" + '\033[0m') 109 | 110 | 111 | if __name__ == "__main__": 112 | # Argument parser 113 | parser = argparse.ArgumentParser() 114 | parser.add_argument('--folder', default='.', type=str, help='benchmark folder to analyze') 115 | parser.add_argument('--last', nargs='?', default=5, type=int, help='include benchmarks for the last "n" commits') 116 | parser.add_argument('--exact', nargs='*', default=[], type=str, help='include benchmarks for specific commits') 117 | parsed = parser.parse_args(sys.argv[1:]) 118 | 119 | # Commits 120 | commits = ["WIP", "HEAD"] 121 | if parsed.last: 122 | commits.extend([ "HEAD~" + str(i) for i in range(1,parsed.last) ]) 123 | if parsed.exact: 124 | commits.extend([ str(commit) for commit in parsed.exact ]) 125 | 126 | # Sanitized commits 127 | sanitized = ["WIP"] 128 | for commit in commits[1:]: 129 | try: 130 | sanitized.append(revParse(commit)) 131 | except: 132 | print('Invalid commit "' + commit + '"') 133 | 134 | # Load the JSONs 135 | datas = [] 136 | for sane in sanitized: 137 | try: 138 | with open(parsed.folder + '/' + sane + '.json') as json_data: 139 | datas.append(json.load(json_data)) 140 | except: 141 | print('Could not read file for "' + sane + '.json"') 142 | datas.append({}) 143 | 144 | # Aggregate the output 145 | aggregated = {} 146 | n = 0 147 | for data in datas: 148 | aggregated = merge(aggregated, data, n * [ None ], None, lambda xs, x: xs + [x]) 149 | n += 1 150 | 151 | # Convert to a table 152 | print(tabulate.tabulate(flattenListDict(aggregated), [ '' ] + commits)) 153 | 154 | 155 | 156 | -------------------------------------------------------------------------------- /bench/timing-benchmarks/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | import Criterion 4 | import Criterion.Main (defaultConfig) 5 | import Criterion.Types (anMean, reportAnalysis, timeLimit, anOutlierVar, ovEffect, OutlierEffect(Moderate)) 6 | import Statistics.Types (Estimate(..), ConfInt(..)) 7 | 8 | import Control.Monad (filterM) 9 | import Control.Exception (catch, throwIO) 10 | import Data.Foldable (for_) 11 | import Data.Traversable (for) 12 | import GHC.Exts (fromString) 13 | 14 | import Language.Rust.Data.InputStream (InputStream) 15 | import Language.Rust.Syntax (SourceFile) 16 | import Language.Rust.Parser (readInputStream, Span, parse') 17 | 18 | import System.Directory (getCurrentDirectory, listDirectory, createDirectoryIfMissing, doesFileExist, removeFile) 19 | import System.FilePath ((), (<.>), takeFileName) 20 | import System.Process (proc, readCreateProcess) 21 | import System.IO.Error (isDoesNotExistError) 22 | 23 | import Data.Aeson 24 | import qualified Data.ByteString.Lazy as BL 25 | 26 | main :: IO () 27 | main = do 28 | -- Open the output log file 29 | status <- readCreateProcess (proc "git" ["status", "--porcelain"]) "" 30 | logFileName <- case status of 31 | "" -> init <$> readCreateProcess (proc "git" ["rev-parse", "HEAD"]) "" 32 | _ -> pure "WIP" 33 | 34 | -- Get the test cases 35 | workingDirectory <- getCurrentDirectory 36 | let sampleSources = workingDirectory "sample-sources" 37 | benchIgnore = sampleSources ".benchignore" 38 | benchIgnoreExists <- doesFileExist benchIgnore 39 | ignore <- if benchIgnoreExists 40 | then (\f -> map (sampleSources ) (lines f)) <$> readFile benchIgnore 41 | else pure [] 42 | entries <- map (sampleSources ) <$> listDirectory sampleSources 43 | files <- filterM doesFileExist (filter (`notElem` ignore) entries) 44 | 45 | -- Clear out previous WIP (if there is one) 46 | catch (removeFile (workingDirectory "bench" "timings" "WIP" <.> "json")) 47 | (\e -> if isDoesNotExistError e then pure () else throwIO e) 48 | 49 | -- Run 'criterion' tests 50 | reports <- for files $ \f -> do 51 | let name = takeFileName f 52 | putStrLn name 53 | is <- readInputStream f 54 | bnch <- benchmarkWith' defaultConfig{ timeLimit = 20 } (nf (parse' :: InputStream -> SourceFile Span) is) 55 | pure (name, bnch) 56 | let results = object [ fromString name .= object [ "mean" .= m 57 | , "lower bound" .= l 58 | , "upper bound" .= u 59 | ] 60 | | (name,report) <- reports 61 | , let Estimate m (ConfInt l u _) = anMean (reportAnalysis report) 62 | , ovEffect (anOutlierVar (reportAnalysis report)) < Moderate 63 | ] 64 | for_ [ name | (name,report) <- reports, ovEffect (anOutlierVar (reportAnalysis report)) >= Moderate ] $ \n -> 65 | putStrLn $ "Benchmark for `" ++ n ++ "' will not be considered since it was inflated" 66 | 67 | -- Save the output to JSON 68 | createDirectoryIfMissing False (workingDirectory "bench" "timings") 69 | let logFile = workingDirectory "bench" "timings" logFileName <.> "json" 70 | putStrLn $ "writing results to: " ++ logFile 71 | logFile `BL.writeFile` encode results 72 | 73 | -------------------------------------------------------------------------------- /get-rust-sources.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Exit if anything goes wrong 4 | set -e 5 | 6 | # Usage info 7 | if ! [ $# = 1 ] 8 | then 9 | echo "This script gets all of the (> 1000 LOC) source files in repositories" 10 | echo "under 'rust-lang' and 'rust-lang-nursery' organizations" 11 | echo "" 12 | echo "Expected usage:" 13 | echo " $0 " 14 | echo "" 15 | echo "You probably want to run:" 16 | echo " $0 sample-sources" 17 | exit 1 18 | else 19 | DEST="$1" 20 | fi 21 | 22 | # Work inside a temporary directory 23 | TEMP=temp 24 | mkdir $TEMP 25 | cd $TEMP 26 | 27 | # Get the JSON files 28 | curl https://api.github.com/orgs/rust-lang/repos > rust-lang.json 29 | curl https://api.github.com/orgs/rust-lang-nursery/repos > rust-lang-nursery.json 30 | 31 | # Make one big JSON array of repos and extract the name and clone url 32 | (jq -rs '.[0] + .[1] | .[] | (.name, .clone_url)' rust-lang.json rust-lang-nursery.json \ 33 | ) | while read -r REPO_NAME; read -r REPO_CLONE; do 34 | 35 | # Skip 'multirust-rs-binaries' and 'rustc-timing-archive' in particular 36 | if [ $REPO_NAME = "multirust-rs-binaries" ] || [ $REPO_NAME = "rustc-timing-archive" ] 37 | then 38 | continue 39 | fi 40 | 41 | # Do a shallow clone of the repo 42 | echo "Cloning $REPO_NAME at $REPO_CLONE" 43 | git clone --depth=1 $REPO_CLONE 44 | 45 | # Find all rust files in the repo and copy each of these files to the DEST folder, provided they 46 | # are more than 2000 lines long. The 2000 line long stipulation serves several purposes: to 47 | # provide files that whose parsing time is non-trivial and also source files which are expected to 48 | # compile. 49 | echo "Finding rust files in $REPO_NAME" 50 | find $REPO_NAME -type f -name '*.rs' | while read -r FILE; do 51 | 52 | # Escaped file name 53 | DEST_FILE="../$DEST/${FILE//\//|}" 54 | 55 | # Check the file is longer than 2000 lines 56 | if (( 1000 < $(wc -l < "$FILE") )) 57 | then 58 | # copy the file over, but filter out lines which contain a reference to a 'mod ;' 59 | # since those cause some issues for the rust compiler (it will go looking for those files 60 | # even during parsing. 61 | grep -Ev "(#\[macro_use\]|mod\s+\w+;)" $FILE > $DEST_FILE 62 | fi 63 | 64 | done; 65 | 66 | # Delete the cloned repo 67 | rm -rf $REPO_NAME 68 | 69 | done; 70 | 71 | # Clean up 72 | cd .. 73 | rm -rf $TEMP 74 | 75 | # Print disclaimer 76 | echo "WARNING: Don't expect the 'rustc-tests' suite to necessarily" 77 | echo " work on all the files this produces. A failure in the" 78 | echo " test suite is only a bug if \`rustc\` succeeds on the" 79 | echo " same file." 80 | 81 | -------------------------------------------------------------------------------- /language-rust.cabal: -------------------------------------------------------------------------------- 1 | name: language-rust 2 | version: 0.2.0.27 3 | 4 | synopsis: Parsing and pretty printing of Rust code 5 | description: Language Rust is a library for the analysis of Rust code. It includes a 6 | complete, well tested parser and pretty printer. 7 | 8 | homepage: https://github.com/harpocrates/language-rust 9 | license: BSD3 10 | license-file: LICENSE 11 | author: Alec Theriault 12 | maintainer: alec.theriault@gmail.com 13 | copyright: (c) 2017-2018 Alec Theriault 14 | stability: provisional 15 | bug-reports: https://github.com/harpocrates/language-rust/issues 16 | category: Language 17 | build-type: Simple 18 | extra-source-files: ChangeLog.md README.md 19 | cabal-version: >=1.10 20 | 21 | source-repository head 22 | type: git 23 | location: https://github.com/harpocrates/language-rust.git 24 | 25 | flag useByteStrings 26 | description: Use 'ByteString' instead of 'String' as 'InputStream' datatype 27 | default: True 28 | 29 | flag enableQuasiquotes 30 | description: Provide the experimental 'Language.Rust.Quote' module 31 | default: True 32 | 33 | library 34 | hs-source-dirs: src 35 | 36 | ghc-options: -Wall 37 | 38 | if impl(ghc >= 8) 39 | ghc-options: 40 | -Wincomplete-patterns 41 | -Wincomplete-uni-patterns 42 | -Wmissing-signatures 43 | 44 | build-tools: alex >=3.1, happy >=1.19.8 45 | default-language: Haskell2010 46 | 47 | exposed-modules: Language.Rust.Syntax 48 | Language.Rust.Parser 49 | Language.Rust.Parser.ParseMonad 50 | Language.Rust.Parser.Lexer 51 | Language.Rust.Parser.Internal 52 | Language.Rust.Pretty 53 | Language.Rust.Pretty.Internal 54 | Language.Rust.Data.Position 55 | Language.Rust.Data.Ident 56 | Language.Rust.Data.InputStream 57 | if flag(enableQuasiquotes) 58 | exposed-modules: Language.Rust.Quote 59 | 60 | other-modules: Language.Rust.Parser.Literals 61 | Language.Rust.Parser.Reversed 62 | Language.Rust.Pretty.Resolve 63 | Language.Rust.Pretty.Literals 64 | Language.Rust.Pretty.Util 65 | Language.Rust.Syntax.AST 66 | Language.Rust.Syntax.Token 67 | 68 | -- Starting in 8.8, `MonadFailDesugaring` is default 69 | if impl(ghc < 8.8) 70 | default-extensions: 71 | MonadFailDesugaring 72 | 73 | other-extensions: FlexibleContexts 74 | , FlexibleInstances 75 | , OverloadedStrings 76 | , OverloadedLists 77 | , DeriveFunctor 78 | , DeriveGeneric 79 | , DeriveAnyClass 80 | , DeriveDataTypeable 81 | , TypeFamilies 82 | , TypeOperators 83 | , Rank2Types 84 | , BangPatterns 85 | , CPP 86 | 87 | build-depends: base >=4.9 && <5.0 88 | , prettyprinter >=1.0 && <2.0 89 | , transformers >=0.4 && <0.6 90 | , array >=0.5 && <0.6 91 | , deepseq >=1.1 && <1.5 92 | 93 | if flag(useByteStrings) 94 | cpp-options: -DUSE_BYTESTRING 95 | build-depends: utf8-string >=1.0 96 | , bytestring >=0.10 97 | 98 | if flag(enableQuasiquotes) 99 | build-depends: template-haskell >=2.10 100 | 101 | 102 | test-suite unit-tests 103 | hs-source-dirs: test/unit-tests 104 | ghc-options: -Wall 105 | main-is: Main.hs 106 | other-modules: LexerTest 107 | ParserTest 108 | PrettyTest 109 | CompleteTest 110 | 111 | other-extensions: FlexibleContexts 112 | , OverloadedStrings 113 | , OverloadedLists 114 | , ScopedTypeVariables 115 | , UnicodeSyntax 116 | 117 | type: exitcode-stdio-1.0 118 | default-language: Haskell2010 119 | build-depends: HUnit >=1.3.0.0 120 | , test-framework >=0.8.0 121 | , test-framework-hunit >=0.3.0 122 | 123 | , language-rust 124 | , base 125 | , prettyprinter 126 | 127 | test-suite rustc-tests 128 | hs-source-dirs: test/rustc-tests 129 | ghc-options: -Wall 130 | main-is: Main.hs 131 | other-modules: Diff 132 | DiffUtils 133 | 134 | other-extensions: InstanceSigs 135 | , OverloadedStrings 136 | , OverloadedLists 137 | , MultiParamTypeClasses 138 | , UnicodeSyntax 139 | 140 | type: exitcode-stdio-1.0 141 | default-language: Haskell2010 142 | 143 | build-depends: process >=1.3 144 | , bytestring >=0.10 145 | , aeson >=0.11.0.0 146 | , directory >=1.2.5.0 147 | , filepath >=1.4.0.0 148 | , test-framework >=0.8.0 149 | , vector >=0.10.0 150 | , text >=1.2.0 151 | , unordered-containers >=0.2.7 152 | , time >=1.2.0.0 153 | 154 | , language-rust 155 | , base 156 | , prettyprinter >=1.1 157 | 158 | benchmark timing-benchmarks 159 | hs-source-dirs: bench/timing-benchmarks 160 | ghc-options: -Wall 161 | main-is: Main.hs 162 | 163 | other-extensions: OverloadedStrings 164 | 165 | type: exitcode-stdio-1.0 166 | default-language: Haskell2010 167 | build-depends: process >=1.3 168 | , bytestring >=0.10 169 | , aeson >=0.11.0.0 170 | , directory >=1.2.5.0 171 | , filepath >=1.4.0.0 172 | , criterion >=1.1.1.0 173 | , statistics >=0.14.0 174 | 175 | , language-rust 176 | , base 177 | 178 | benchmark allocation-benchmarks 179 | hs-source-dirs: bench/allocation-benchmarks 180 | ghc-options: -Wall 181 | main-is: Main.hs 182 | 183 | 184 | other-extensions: OverloadedStrings 185 | 186 | type: exitcode-stdio-1.0 187 | default-language: Haskell2010 188 | build-depends: process >=1.3 189 | , bytestring >=0.10 190 | , aeson >=0.11.0.0 191 | , directory >=1.2.5.0 192 | , filepath >=1.4.0.0 193 | , weigh >=0.0.6 194 | 195 | , language-rust 196 | , base 197 | 198 | -------------------------------------------------------------------------------- /sample-sources/attributes.rs: -------------------------------------------------------------------------------- 1 | fn main () { 2 | match x { 3 | #[arm_outer] 4 | 0 => true, 5 | } 6 | } 7 | 8 | fn expressions() { 9 | #[box_outer] 10 | box 1; 11 | 12 | // #[inplace_outer] 13 | // x <- 1; 14 | 15 | #[vec_outer] 16 | [ #![vec_inner] 1, 2, 3]; 17 | 18 | #[call_outer] 19 | foo(1, 2, 3); 20 | 21 | #[methodcall_outer] 22 | x.foo(1, 2, 3); 23 | 24 | #[tuple_outer] 25 | ( #![tuple_inner] 1, 2, 3); 26 | #[tuple_outer] 27 | ( #![tuple_inner] 1, ); 28 | #[tuple_outer] 29 | ( #![tuple_inner] ); 30 | 31 | // #[binary_outer] 32 | // 1 + 2; 33 | 34 | #[unary_outer] 35 | -1; 36 | 37 | #[lit_outer] 38 | 1; 39 | 40 | // #[cast_outer] 41 | // 1 as f64; 42 | 43 | // #[typeascription_outer] 44 | // 1: f32; 45 | 46 | // #[if_outer] 47 | // if (true) { 48 | // 1; 49 | // } 50 | 51 | // #[iflet_outer] 52 | // if let x = true { 53 | // 1; 54 | // } 55 | 56 | #[while_outer] 57 | while (true) { 58 | #![while_inner] 59 | 1; 60 | } 61 | 62 | #[whilelet_outer] 63 | while let x = true { 64 | #![whilelet_inner] 65 | 1; 66 | } 67 | 68 | #[for_outer] 69 | for i in 1..3 { 70 | #![for_inner] 71 | 1; 72 | } 73 | 74 | #[loop_outer] 75 | loop { 76 | #![loop_inner] 77 | 1; 78 | } 79 | 80 | #[match_outer] 81 | match x { 82 | #![match_inner] 83 | _ => 1 84 | } 85 | 86 | #[closure_outer] 87 | move |x| { 1 + 2 }; 88 | 89 | #[blockexpr_outer] 90 | { #![blockexpr_inner] 1; 2 } 91 | #[blockexpr_outer] 92 | unsafe { #![blockexpr_inner] 1; 2 } 93 | 94 | #[catch_outer] 95 | do catch { #![catch_inner] 1 }; 96 | 97 | // #[assign_outer] 98 | // x = 1; 99 | 100 | // #[assignop_outer] 101 | // x += 1; 102 | 103 | #[fieldaccess_outer] 104 | x.foo; 105 | 106 | #[tupfield_outer] 107 | x.0; 108 | 109 | #[index_outer] 110 | x[0]; 111 | 112 | // #[range_outer] 113 | // 1..2; 114 | 115 | #[pathexpr_outer] 116 | math::PI; 117 | 118 | } 119 | 120 | fn foreign_items() { 121 | #[foreign_outer] 122 | extern "C" { 123 | #![foreign_inner] 124 | 125 | #[static_outer] 126 | static ext: u8; 127 | 128 | #[fn_outer] 129 | fn foo(x: i32, ...); 130 | } 131 | } 132 | 133 | #[trait_outer] 134 | trait Trait { 135 | 136 | #[const_outer] 137 | const x: i32; 138 | 139 | #[method_outer] 140 | fn area(&self) -> f64; 141 | 142 | #[type_outer] 143 | type N; 144 | 145 | #[macro_outer] 146 | foo!(); 147 | } 148 | 149 | 150 | #[impl_outer] 151 | impl Impls { 152 | #![impl_inner] 153 | 154 | #[const_outer] 155 | const x: i32 = 1; 156 | 157 | #[method_outer] 158 | fn area(&self) -> f64 { 159 | #![method_inner] 160 | 1f64 161 | } 162 | 163 | #[type_outer] 164 | type N = i32; 165 | 166 | #[macro_outer] 167 | foo!(); 168 | } 169 | 170 | fn items() { 171 | #[use_outer] 172 | use foo::bar as FooBar; 173 | 174 | #[static_outer] 175 | static FOO: i32 = 42; 176 | 177 | #[const_outer] 178 | const FOO: i32 = 42; 179 | 180 | #[fn_outer] 181 | fn foo(bar: usize) -> usize { 182 | #![fn_inner] 183 | 1 184 | } 185 | 186 | #[mod_outer] 187 | mod foo { #![mod_inner] } 188 | 189 | #[type_outer] 190 | type Foo = Bar; 191 | 192 | #[enum_outer] 193 | enum Foo { #[variant_outer] C(A), D(B) } 194 | 195 | #[struct_outer] 196 | struct Foo { #[field_outer] x: A } 197 | 198 | #[union_outer] 199 | union Foo { x: A, y: B } 200 | 201 | #[macro_outer] 202 | foo!{ .. } 203 | 204 | #[macrodef_outer] 205 | macro_rules! foo { () => ( .. ) } 206 | 207 | } 208 | 209 | fn foo< 210 | #[lifetimedef_outer] 'a: 'b, 211 | #[typaram_outer] T 212 | >() { } 213 | -------------------------------------------------------------------------------- /sample-sources/empty.rs: -------------------------------------------------------------------------------- 1 | #![no_std = "hi"] 2 | fn main() { } 3 | -------------------------------------------------------------------------------- /sample-sources/expressions.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let x = box "foo"; 3 | let x = [1,2,3]; 4 | let x = foo(1,2,x); 5 | let x = x.foo::(a, b, c, d); 6 | let x = (a, b, c ,d); 7 | let x = a + b; 8 | let x = a * b; 9 | let x = !x; 10 | let x = *x; 11 | let x = true as f64; 12 | let x = 1: f64; 13 | if true { } else { }; 14 | if true { }; 15 | if let y = true { }; 16 | while true { } 17 | 'l: while true { } 18 | while let None = None { continue; } 19 | 'l: while let None = None { continue 'l; } 20 | for i in 1.. { } 21 | 'l: for i in 1..10 { } 22 | loop { break; } 23 | 'l: loop { break 'l 1; } 24 | match x { _ => () } 25 | let x = move |a,b,c| { a + b + c }; 26 | let x = static move |a,b,c| { a + b + c }; 27 | let f = |_||x, y| x+y; 28 | let f = static |_||x, y| x+y; 29 | let x = { 1 }; 30 | let x = unsafe { 1 }; 31 | a = 1; 32 | a += 1; 33 | let x = obj.foo; 34 | let x = foo.0; 35 | let x = foo[2]; 36 | let x = &a; 37 | let x = &mut a; 38 | let x = return 1; 39 | let x = asm!("NOP"); 40 | let x = println!("hi"); 41 | let x = Foo { x: 1, y: 2 }; 42 | let x = Foo { x: 1, ..base }; 43 | let x = [1; 5]; 44 | let x = 1 * (2 + 3); 45 | let x = foo()?; 46 | let x = do catch { 1 }; 47 | return 0; 48 | return; 49 | yield 0; 50 | yield; 51 | let r#return = 0; 52 | 53 | match true { 54 | true => move | | { 1 }, 55 | false => | | { 2} 56 | } 57 | } 58 | 59 | fn precedences() { 60 | x==&y||z|w.0<=x**y*&&z^=l^m<<=n; 61 | y|z..wm; 62 | 1>!x%y-z*-w||u?==v; 63 | } 64 | -------------------------------------------------------------------------------- /sample-sources/items.rs: -------------------------------------------------------------------------------- 1 | extern crate foo; 2 | extern crate foo_bar as foo; 3 | 4 | use foo; 5 | use foo::bar; 6 | use foo::bar as FooBar; 7 | 8 | static FOO: i32 = 42; 9 | static mut FOO: i32 = 42; 10 | static FOO: &'static str = "bar"; 11 | 12 | const FOO: i32 = 42; 13 | 14 | mod foo { } 15 | mod bar { 16 | 17 | extern { } 18 | extern "C" { 19 | fn foo(x: int) -> int; 20 | static x: int; 21 | static mut x: *mut int; 22 | type C; 23 | } 24 | 25 | type Foo = Bar; 26 | 27 | enum Foo { C(A), D(B) } 28 | 29 | struct Foo { x: A } 30 | union Foo { x: A, y: B } 31 | 32 | pub(super) struct Point { x: i32 } 33 | 34 | trait Foo { } 35 | trait Foo { 36 | const ID1: i32; 37 | const ID2: i32 = 1; 38 | 39 | fn area1(self) -> f64; 40 | fn area2(mut self) -> f64 { 1f64 } 41 | fn area1(&self) -> f64; 42 | fn area2(&mut self) -> f64 { 1f64 } 43 | fn area1(&'lt self) -> f64; 44 | fn area2(&'lt mut self) -> f64 { 1f64 } 45 | fn area1(self: Foo) -> f64; 46 | fn area2(mut self: Foo) -> f64 { 1f64 } 47 | 48 | type N; 49 | type N: fmt::Display; 50 | type N: fmt::Display = i32; 51 | 52 | foo!{} 53 | } 54 | 55 | trait Foo = Bar + Baz; 56 | trait Foo<'a,N> = Bar<'a,N> + Baz + 'a; 57 | 58 | fn foo(x: &T) { } 59 | struct Foo { field: Box } 60 | trait Bar { type Baz: ?Sized; } 61 | 62 | struct Bleh where T: Copy, U: Sized; 63 | 64 | impl Foo { 65 | const ID: i32 = 1; 66 | fn area(&self) -> f64 { 1f64 } 67 | type N = i32; 68 | foo!(); 69 | } 70 | impl Trait for Foo { } 71 | impl !Trait for Foo { } 72 | 73 | macro_rules! foo { } 74 | foo!(); 75 | 76 | enum Foo { 77 | Baz { 78 | foo: i32, 79 | bar: (), 80 | }, 81 | Bar(i32, i32), 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /sample-sources/let.rs: -------------------------------------------------------------------------------- 1 | #![no_std = "hi"] 2 | #![no_std("hi",cfg = "there")] 3 | fn main() { 4 | let x = 1; 5 | let y: () = (); 6 | let z: i32 = 1i32; 7 | let mut w: i32; 8 | let mut u; 9 | let mut v = 1; 10 | } 11 | -------------------------------------------------------------------------------- /sample-sources/literals.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | // Numeric 3 | 1; 4 | 1isize; 5 | 1i8; 6 | 1i16; 7 | 1i32; 8 | 1i64; 9 | 1i128; 10 | 1usize; 11 | 1u8; 12 | 1u16; 13 | 1u32; 14 | 1u64; 15 | 1u128; 16 | 1f32; 17 | 1f64; 18 | 19 | // Strings 20 | "hello world"; 21 | "hello 22 | world"; 23 | "hello\ 24 | world"; 25 | r"hello world"; 26 | r"hello 27 | world"; 28 | r"hello\ 29 | world"; 30 | r#"hello " world"#; 31 | b"hello world"; 32 | b"hello 33 | world"; 34 | b"hello\ 35 | world"; 36 | br"hello world"; 37 | br#"hello " world"#; 38 | 39 | // Booleans, Characters, and Bytes 40 | 'a'; 41 | '\n'; 42 | b'a'; 43 | true; 44 | false; 45 | } 46 | -------------------------------------------------------------------------------- /sample-sources/macros.rs: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | // Every token 4 | token!{ 5 | [= < > & | ! ~ + - * / % ^] 6 | (>= >>= && || << >> == != <= <<= -= &= |= += *= /= ^= %=) 7 | {@ . .. ... , ; : :: -> <- => # $ ?} 8 | ( ) [ ] { } 1.0foo x _ 'lt #! $x 9 | /*! inner doc comment */ /// outer doc comment 10 | } 11 | 12 | // Check new way of tokenization 13 | tokentrees!{ 14 | $x 15 | $x:ty 16 | $($xs:expr),+ 17 | $(1+2),+ 18 | br##"hello "# 19 | world!"###suf 20 | } 21 | 22 | // literals 23 | literals!{ 24 | characters_bytes!{ 25 | b'a' b'\n' b'a'suffix 26 | 'a' '\n' 'a'suffix 27 | } 28 | 29 | integral_numbers!{ 30 | 123 123i32 31 | 0b1100_1101 0b1100_1101isize 32 | 0o3170 0o3170i64 33 | 0xAFAC 0xAFACu32 34 | } 35 | 36 | float_numbers!{ 37 | 123.1 123.1f32 38 | 123.f32 // Gets parsed as [integer token "123", ".", ident token "f32"] 39 | 123.0f32 40 | 0e+10 41 | 00e+10 42 | 9e+10 43 | 123e-9f32 44 | } 45 | 46 | strings!{ 47 | "hello \n world!" 48 | 49 | "hello \n world!"suffix 50 | 51 | r"hello 52 | world!" 53 | 54 | r"hello 55 | world!"suffix 56 | 57 | b"hello \n world!" 58 | 59 | b"hello \n world!"suffix 60 | 61 | br"hello 62 | world!" 63 | 64 | br"hello 65 | world!"suffix 66 | 67 | 68 | "hello \ 69 | world!" 70 | 71 | b"hello \ 72 | world!" 73 | 74 | br##"hello "# 75 | world!"##suf 76 | } 77 | } 78 | 79 | fn main() { 80 | print!("{}\n", 9e+1); 81 | print!("{}\n", 0.1e+1); 82 | print!("{}\n", 00e+1); 83 | print!("{}\n", 001e+1); 84 | print!("{}\n", 123.); 85 | print!("{}\n", 123.0f32); 86 | print!("{}\n", 0e+1); 87 | print!("{}\n", 123.0__9); 88 | print!("{}\n", 123e-4f32); 89 | 90 | // Not what they look like, see above 91 | print!("{}\n", 123.f32); 92 | } 93 | 94 | -------------------------------------------------------------------------------- /sample-sources/patterns.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | match 0 { 3 | _ => 0, 4 | x => 0, 5 | &x => 0, 6 | mut x => 0, 7 | &mut x => 0, 8 | Person { age, name } => 0, 9 | Point { age: 0, .. } => 0, 10 | Point(x, y, z) => 0, 11 | Point(x, y, .., z) => 0, 12 | std::math::pi => 0, 13 | ::math::e => 0, 14 | (x, y, z) => 0, 15 | (x, y, .., z) => 0, 16 | box x => 0, 17 | &mut (a,b) => 0, 18 | 0 => 0, 19 | 0...1 => 0, 20 | [a, b, i.., y, z] => 0, 21 | [a, b, .., y, z] => 0, 22 | [a, b, c] => 0, 23 | LinkedList!(1,2,3) => 0, 24 | } 25 | } 26 | 27 | -------------------------------------------------------------------------------- /sample-sources/precedences.rs: -------------------------------------------------------------------------------- 1 | // See 2 | 3 | fn main() { 4 | a /= - b ? ^= * - c >= - * d [0] ? == box & ! e ? [0] .0 ? [0] .0 ?; 5 | - a == box b [0] [0] [0] .0 || c [0] ^= d % e = f != g; 6 | & a *= ! b ^= c * d | e [0]; 7 | a ^ b < * c ? || box d ? %= e [0] = - f < box g % & h &= i ? % & & & j ?; 8 | a &= - box b ? >> c [0] <= d || & - e ^= f = & g .0 ? [0] - h .0 .0 += i [0] << j ?; 9 | a >> & b && - box c >= d .. e .0 [0] + ! f * g >= h && & - i [0] * j; 10 | * a > b *= c * d && e ^ - & f ? *= g %= h .0 [0] ? & i -= j ?; 11 | * a >> b %= c .0 != * - d [0] || e <= f << g .0 | ! h += - & i >= j .0; 12 | a && ! * box b [0] >> - c || * d ^ * e >= f >= g /= ! h = i [0] ... j; 13 | .. a + box b ? [0] * c ? .0 ^ d + & f <= g [0] * h && i + j [0] ?; 14 | a * b &= c & * d ? |= & ! e [0] != & f .0 = g >> ! ! * - h ? ^ i ? %= - j; 15 | & a != b [0] % c - & & ! - box - d & e [0] += g ? << h /= i <<= .. .. j; 16 | a [0] >>= b | & ! c / ! & d [0] ? / box e .. - f * * & * - g < h && j; 17 | a & b - c *= &&*d? << *e.0?.0 ^= !f?[0].0 >> -g - h .. i??[0] | j; 18 | -a[0] - b[0][0]??.0 != c -= d || e[0][0] *= &f = !!g[0] / h ^ i | box!j[0]; 19 | a != -b.0 - box!c.0?.0 *= -e? <= *-f.0 ^= g >> box box h + i + j; 20 | !&&-box a??.0 >> b |= c[0]? ^ d * e | &*f[0] <= box -g? ... &h * &i - j[0][0]; 21 | a & b &= c[0] <= d + -!-box*e[0]?[0] % f.0? + -g >>= h[0] /= i = j; 22 | a[0][0] >>= *b.0 .0? / c ^ d >>= !e >= box f ... g >= h + i? || j; 23 | !&a? |= -b >= c / *-d? *= e % f += !box g.0? != &-*h? + i.0 / j; 24 | a? - b? | c * *-box box box*box d? .0 .0 %= !!!e? &= -box!f.0 += g - h /= i && j; 25 | *box box!a -= box-b.0 % c &= d % e? ^= !&f[0]? != g > h & i /= j; 26 | box---!a.0 ^= b = -&box c? > d << e.0 >= f[0] + g -= h >>= i * j; 27 | a .0 .0 >>= -b >= -box*c || d? &= box&-e | f << g * h[0][0] |= i * j; 28 | box-a / b != c -= d == &e.0 >>= f - *g[0] %= h & i << &-j??; 29 | !&-a.0 == b *= c.0 <<= --d && e? ^= !f <<= g[0] > h += -i >>= j; 30 | return box break 'lt 1? + 2?; 31 | break 'lt box return 1? + 2?; 32 | } 33 | 34 | struct Point { x: i32 } 35 | fn range_expressions() { 36 | 37 | // general expression 38 | let _ = ..Point { x: 1 }; 39 | let _ = ..{}; 40 | let _ = |x| { 1 }; 41 | 42 | // non-block expression 43 | ..Point { x: 1 }; 44 | ..{}; 45 | - { 1 }; 46 | return |x: i32| x + 1; 47 | box 1 + 2; 48 | |x| { 1 }; 49 | x || y(); 50 | x && y(); 51 | 52 | // block expression / starting with block statement 53 | { 1 }?.0 + 1; 54 | if true { 1 } else { 2 }.toString(); 55 | if true { 1 }.toString(); 56 | { x }[2]?.foo * { 3 }; 57 | 58 | // no struct expression 59 | for x in 1.. { } 60 | for x in ..1 { } 61 | for x in ..Point{} { } 62 | for x in .. { } 63 | for x in {}.. { } 64 | for x in |x| { 1 } { } 65 | 66 | // no struct/block expression (to the right of '..') 67 | for x in ..1 { } 68 | for x in ..1 + 1 { } 69 | for x in ..1 + { 2 } { } 70 | for x in ..|x| { 1 } { } 71 | 72 | // precedences of ranges in general 73 | fn general_ranges() { 74 | let _ = ..; 75 | let _ = a - c .. 3; 76 | let _ = a - c == 3; 77 | 78 | let _ = 1 + 2..; 79 | let _ = 1 == 2..; 80 | let _ = 1 == 2..3; 81 | let _ = 2..3 == 1; 82 | let _ = ..1 + 2; 83 | let _ = ..1 == 2; 84 | let _ = 1 == ..2; 85 | // let _ = 1.. == 2; Shouldn't parse according to rustc 86 | 87 | let _ = .. - 1; 88 | let _ = - 1 .. 2; 89 | let _ = .. box 1; 90 | let _ = .. .. 1; 91 | let _ = 1.. ..; 92 | let _ = 2 & ..2 + ..3; 93 | } 94 | 95 | } 96 | 97 | 98 | -------------------------------------------------------------------------------- /sample-sources/statement-expressions.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | 3 | { 1 }[1]; // !!! Parses as { 1 }; [1]; 4 | { 1 }(0); // !!! Parses as { 1 }; (0); 5 | 6 | { 1 }.foo; // Parses as a field access 7 | { 1 }.foo(0); // Parses as a method call 8 | { 1 }.0; // Parses as a tup field access 9 | { 1 }?; // Parses as a try 10 | 11 | 12 | { 1 }? + 1; // SHOULD WORK 13 | { 1 }[0] + 1; // SHOULD WORK 14 | { 1 }(0,1,2) + 1; // SHOULD WORK 15 | { 1 }.foo(0,1,2) + 1; // SHOULD WORK 16 | { 1 }.foo + 1; // SHOULD WORK 17 | { 1 }.0 + 1; // SHOULD WORK 18 | 19 | // { 1 } as i32 + 1; // SHOULD NOT WORK 20 | // { 1 } + 1; // SHOULD NOT WORK 21 | 22 | { 1 }[1]; 23 | { 1 }(); 24 | { 1 }.bar; 25 | { 1 }.bar(); 26 | { 1 }.0; 27 | 28 | 29 | if true { 1 } [1]; 30 | if true { 1 } (); 31 | if true { 1 } .bar; 32 | if true { 1 } .bar(); 33 | 34 | if true { 1 } else { 2 }[1]; 35 | if true { 1 } else { 2 }(); 36 | if true { 1 } else { 2 }.bar; 37 | if true { 1 } else { 2 }.bar(); 38 | 39 | 40 | loop { 1 } [1]; 41 | loop { 1 } (); 42 | loop { 1 } .bar; 43 | loop { 1 } .bar(); 44 | 45 | match true { 46 | // true => { 1 } + 2, // SHOULD NOT WORK 47 | true => { 1 }? + 2, // SHOULD WORK 48 | false => 1, 49 | true => move | | { 1 }, 50 | false => 1 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /sample-sources/statements.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | // Local 3 | #[d = 2] 4 | let x = 1; 5 | 6 | // Item 7 | fn foo() { return 1 } 8 | 9 | // Empty 10 | ;;; 11 | 12 | // NoSemi 13 | if true { 14 | foo() 15 | } 16 | let b = { let y = 3; y }; 17 | 18 | // Semi 19 | 2 + { 1 }; 20 | 21 | // Mac 22 | println!("hi") 23 | } 24 | -------------------------------------------------------------------------------- /sample-sources/types.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let x: [i32]; 3 | let x: [i32; 128]; 4 | let x: *mut i32; 5 | let x: *const i32; 6 | let x: &'a mut i32; 7 | let x: &mut i32; 8 | let x: &'a i32; 9 | let x: &i32; 10 | let x: fn() -> i32; 11 | let x: fn(i32) -> i32; 12 | let x: fn(i32,i32); 13 | let x: !; 14 | let x: (i32,); 15 | let x: (i32,!); 16 | let x: i32; 17 | let x: T; 18 | let x: as SomeTrait>::SomeType; 19 | let x: Bound1 + Bound2 + 'static; 20 | let x: impl Bound1 + Bound2 + 'static; 21 | let x: dyn Bound1 + Bound2 + 'static; 22 | let x: dyn for<'a> Debug; 23 | let x: (i32); 24 | let x: typeof(1i32); 25 | let x: _; 26 | let x: HList![i32,(),u8]; 27 | } 28 | 29 | fn foo() -> impl Bound1 + Bound2 + Bound3 { } 30 | 31 | pub fn walk(x: i32, it: &mut F) -> bool 32 | where 33 | F: FnMut(&Pat) -> bool, 34 | 'a: 'b + 'c, 35 | F = i32, 36 | { 37 | foo::<'a,A,B,C=i32> 38 | } 39 | -------------------------------------------------------------------------------- /src/Language/Rust/Data/Ident.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | Module : Language.Rust.Data.Ident 3 | Description : Identifiers 4 | Copyright : (c) Alec Theriault, 2017-2018 5 | License : BSD-style 6 | Maintainer : alec.theriault@gmail.com 7 | Stability : experimental 8 | Portability : portable 9 | 10 | Data structure behind identifiers. 11 | -} 12 | {-# LANGUAGE DeriveDataTypeable #-} 13 | {-# LANGUAGE DeriveGeneric #-} 14 | {-# LANGUAGE DeriveAnyClass #-} 15 | 16 | module Language.Rust.Data.Ident (Ident(..), mkIdent, Name) where 17 | 18 | import GHC.Generics ( Generic ) 19 | 20 | import Control.DeepSeq ( NFData ) 21 | import Data.Data ( Data ) 22 | import Data.Typeable ( Typeable ) 23 | 24 | import Data.List ( foldl' ) 25 | import Data.Char ( ord ) 26 | import Data.String ( IsString(..) ) 27 | import Data.Semigroup as Sem 28 | 29 | -- | An identifier 30 | data Ident 31 | = Ident { name :: Name -- ^ payload of the identifier 32 | , raw :: Bool -- ^ whether the identifier is raw 33 | , hash :: {-# UNPACK #-} !Int -- ^ hash for quick comparision 34 | } deriving (Data, Typeable, Generic, NFData) 35 | 36 | -- | Shows the identifier as a string (for use with @-XOverloadedStrings@) 37 | instance Show Ident where 38 | show = show . name 39 | 40 | instance IsString Ident where 41 | fromString = mkIdent 42 | 43 | -- | Uses 'hash' to short-circuit 44 | instance Eq Ident where 45 | i1 == i2 = hash i1 == hash i2 && name i1 == name i2 && raw i1 == raw i2 46 | i1 /= i2 = hash i1 /= hash i2 || name i1 /= name i2 || raw i1 /= raw i2 47 | 48 | -- | Uses 'hash' to short-circuit 49 | instance Ord Ident where 50 | compare i1 i2 = case compare i1 i2 of 51 | EQ -> compare (raw i1, name i1) (raw i2, name i2) 52 | rt -> rt 53 | 54 | -- | "Forgets" about whether either argument was raw 55 | instance Monoid Ident where 56 | mappend = (<>) 57 | mempty = mkIdent "" 58 | 59 | -- | "Forgets" about whether either argument was raw 60 | instance Sem.Semigroup Ident where 61 | Ident n1 _ _ <> Ident n2 _ _ = mkIdent (n1 <> n2) 62 | 63 | 64 | -- | Smart constructor for making an 'Ident'. 65 | mkIdent :: String -> Ident 66 | mkIdent s = Ident s False (hashString s) 67 | 68 | -- | Hash a string into an 'Int' 69 | hashString :: String -> Int 70 | hashString = foldl' f golden 71 | where f m c = fromIntegral (ord c) * magic + m 72 | magic = 0xdeadbeef 73 | golden = 1013904242 74 | 75 | -- | The payload of an identifier 76 | type Name = String 77 | 78 | -------------------------------------------------------------------------------- /src/Language/Rust/Data/InputStream.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | Module : Language.Rust.Data.InputStream 3 | Description : Interface to the underlying input of parsing 4 | Copyright : (c) Alec Theriault, 2017-2018 5 | License : BSD-style 6 | Maintainer : alec.theriault@gmail.com 7 | Stability : experimental 8 | Portability : portable 9 | 10 | These are the only functions that need to be implemented in order to use the parser. Whether this 11 | wraps 'BS.ByteString' or 'String' depends on whether the @useByteStrings@ option is on or not (it is 12 | by default). Using 'BS.ByteString' means better handling of weird characters ('takeByte' for plain 13 | 'String' fails badly if you try to take a byte that doesn't fall on a character boundary), but it 14 | means incurring a dependency on the [utf8-string](https://hackage.haskell.org/package/utf8-string) 15 | package. 16 | -} 17 | {-# LANGUAGE CPP #-} 18 | 19 | module Language.Rust.Data.InputStream ( 20 | -- * InputStream type 21 | InputStream, 22 | countLines, 23 | inputStreamEmpty, 24 | 25 | -- * Introduction forms 26 | readInputStream, 27 | hReadInputStream, 28 | inputStreamFromString, 29 | 30 | -- * Elimination forms 31 | inputStreamToString, 32 | takeByte, 33 | takeChar, 34 | peekChars, 35 | ) where 36 | 37 | import Data.Word ( Word8 ) 38 | import Data.Coerce ( coerce ) 39 | import Data.String ( IsString(..) ) 40 | import System.IO 41 | 42 | #ifdef USE_BYTESTRING 43 | import qualified Data.ByteString as BS 44 | import qualified Data.ByteString.UTF8 as BE 45 | #else 46 | import qualified Data.Char as Char 47 | #endif 48 | 49 | -- | Read an encoded file into an 'InputStream' 50 | readInputStream :: FilePath -> IO InputStream 51 | {-# INLINE readInputStream #-} 52 | 53 | -- | Read an 'InputStream' from a 'Handle' 54 | hReadInputStream :: Handle -> IO InputStream 55 | {-# INLINE hReadInputStream #-} 56 | 57 | -- | Convert 'InputStream' to 'String'. 58 | inputStreamToString :: InputStream -> String 59 | {-# INLINE inputStreamToString #-} 60 | 61 | -- | Convert a 'String' to an 'InputStream'. 62 | inputStreamFromString :: String -> InputStream 63 | {-# INLINE inputStreamFromString #-} 64 | 65 | -- | Uses 'inputStreamFromString' 66 | instance IsString InputStream where fromString = inputStreamFromString 67 | 68 | -- | Read the first byte from an 'InputStream' and return that byte with what remains of the 69 | -- 'InputStream'. Behaviour is undefined when 'inputStreamEmpty' returns 'True'. 70 | -- 71 | -- >>> takeByte "foo bar" 72 | -- (102, "oo bar") 73 | -- 74 | -- >>> takeByte "Ĥăƨĸëļļ" 75 | -- (196, "\ETX\168\&8\235<<") 76 | -- 77 | takeByte :: InputStream -> (Word8, InputStream) 78 | {-# INLINE takeByte #-} 79 | 80 | -- | Read the first character from an 'InputStream' and return that 'Char' with what remains of the 81 | -- 'InputStream'. Behaviour is undefined when 'inputStreamEmpty' returns 'True'. 82 | -- 83 | -- >>> takeChar "foo bar" 84 | -- ('f', "oo bar") 85 | -- 86 | -- >>> takeChar "Ĥăƨĸëļļ" 87 | -- ('Ĥ', "ăƨĸëļļ") 88 | -- 89 | takeChar :: InputStream -> (Char, InputStream) 90 | {-# INLINE takeChar #-} 91 | 92 | -- | Return @True@ if the given input stream is empty. 93 | -- 94 | -- >>> inputStreamEmpty "" 95 | -- True 96 | -- 97 | -- >>> inputStreamEmpty "foo" 98 | -- False 99 | -- 100 | inputStreamEmpty :: InputStream -> Bool 101 | {-# INLINE inputStreamEmpty #-} 102 | 103 | -- | Returns the first @n@ characters of the given input stream, without removing them. 104 | -- 105 | -- >>> peekChars 5 "foo bar" 106 | -- "foo ba" 107 | -- 108 | -- >>> peekChars 5 "foo" 109 | -- "foo" 110 | -- 111 | -- >>> peekChars 3 "Ĥăƨĸëļļ" 112 | -- "Ĥăƨ" 113 | -- 114 | peekChars :: Int -> InputStream -> String 115 | {-# INLINE peekChars #-} 116 | 117 | -- | Returns the number of text lines in the given 'InputStream' 118 | -- 119 | -- >>> countLines "" 120 | -- 0 121 | -- 122 | -- >>> countLines "foo" 123 | -- 1 124 | -- 125 | -- >>> countLines "foo\n\nbar" 126 | -- 3 127 | -- 128 | -- >>> countLines "foo\n\nbar\n" 129 | -- 3 130 | -- 131 | countLines :: InputStream -> Int 132 | {-# INLINE countLines #-} 133 | 134 | #ifdef USE_BYTESTRING 135 | 136 | -- | Opaque input type. 137 | newtype InputStream = IS BS.ByteString deriving (Eq, Ord) 138 | takeByte bs = (BS.head (coerce bs), coerce (BS.tail (coerce bs))) 139 | takeChar bs = maybe (error "takeChar: no char left") coerce (BE.uncons (coerce bs)) 140 | inputStreamEmpty = BS.null . coerce 141 | peekChars n = BE.toString . BE.take n . coerce 142 | readInputStream f = coerce <$> BS.readFile f 143 | hReadInputStream h = coerce <$> BS.hGetContents h 144 | inputStreamToString = BE.toString . coerce 145 | inputStreamFromString = IS . BE.fromString 146 | countLines = length . BE.lines . coerce 147 | 148 | instance Show InputStream where 149 | show (IS bs) = show bs 150 | 151 | #else 152 | 153 | -- | Opaque input type. 154 | newtype InputStream = IS String deriving (Eq, Ord) 155 | takeByte (IS ~(c:str)) 156 | | Char.isLatin1 c = let b = fromIntegral (Char.ord c) in b `seq` (b, IS str) 157 | | otherwise = error "takeByte: not a latin-1 character" 158 | takeChar (IS ~(c:str)) = (c, IS str) 159 | inputStreamEmpty (IS str) = null str 160 | peekChars n (IS str) = take n str 161 | readInputStream f = IS <$> readFile f 162 | hReadInputStream h = IS <$> hGetContents h 163 | inputStreamToString = coerce 164 | inputStreamFromString = IS 165 | countLines (IS str) = length . lines $ str 166 | 167 | instance Show InputStream where 168 | show (IS bs) = show bs 169 | 170 | #endif 171 | -------------------------------------------------------------------------------- /src/Language/Rust/Data/Position.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | Module : Language.Rust.Data.Position 3 | Description : Positions and spans in files 4 | Copyright : (c) Alec Theriault, 2017-2018 5 | License : BSD-style 6 | Maintainer : alec.theriault@gmail.com 7 | Stability : experimental 8 | Portability : GHC 9 | 10 | Everything to do with describing a position or a contiguous region in a file. 11 | -} 12 | {-# LANGUAGE CPP #-} 13 | {-# LANGUAGE DeriveDataTypeable #-} 14 | {-# LANGUAGE DeriveGeneric #-} 15 | {-# LANGUAGE DeriveAnyClass #-} 16 | 17 | module Language.Rust.Data.Position ( 18 | -- * Positions in files 19 | Position(..), 20 | prettyPosition, 21 | maxPos, 22 | minPos, 23 | initPos, 24 | incPos, 25 | retPos, 26 | incOffset, 27 | 28 | -- * Spans in files 29 | Span(..), 30 | unspan, 31 | prettySpan, 32 | subsetOf, 33 | (#), 34 | Spanned(..), 35 | Located(..), 36 | ) where 37 | 38 | import GHC.Generics ( Generic ) 39 | 40 | import Control.DeepSeq ( NFData ) 41 | import Data.Data ( Data ) 42 | import Data.Typeable ( Typeable ) 43 | 44 | import Data.List.NonEmpty ( NonEmpty(..) ) 45 | import Data.Monoid as Mon 46 | import Data.Semigroup as Sem 47 | 48 | 49 | -- | A position in a source file. The row and column information is kept only for its convenience 50 | -- and human-readability. Analogous to the information encoded in a cursor. 51 | data Position = Position { 52 | absoluteOffset :: {-# UNPACK #-} !Int, -- ^ absolute offset the source file. 53 | row :: {-# UNPACK #-} !Int, -- ^ row (line) in the source file. 54 | col :: {-# UNPACK #-} !Int -- ^ column in the source file. 55 | } 56 | | NoPosition 57 | deriving (Eq, Ord, Data, Typeable, Generic, NFData) 58 | 59 | -- | Field names are not shown 60 | instance Show Position where 61 | showsPrec _ NoPosition = showString "NoPosition" 62 | showsPrec p (Position a r c) = showParen (p >= 11) 63 | ( showString "Position" 64 | . showString " " . showsPrec 11 a 65 | . showString " " . showsPrec 11 r 66 | . showString " " . showsPrec 11 c ) 67 | 68 | -- | Pretty print a 'Position' 69 | prettyPosition :: Position -> String 70 | prettyPosition NoPosition = "???" 71 | prettyPosition (Position _ r c) = show r ++ ":" ++ show c 72 | 73 | -- | Maximum of two positions, bias for actual positions. 74 | -- 75 | -- >>> maxPos (Position 30 5 8) (Position 37 5 15) 76 | -- Position 37 5 15 77 | -- 78 | -- >>> maxPos NoPosition (Position 30 5 8) 79 | -- Position 30 5 8 80 | -- 81 | {-# INLINE maxPos #-} 82 | maxPos :: Position -> Position -> Position 83 | maxPos NoPosition p2 = p2 84 | maxPos p1 NoPosition = p1 85 | maxPos p1@(Position a1 _ _) p2@(Position a2 _ _) = if a1 > a2 then p1 else p2 86 | 87 | -- | Maximum and minimum positions, bias for actual positions. 88 | -- 89 | -- >>> minPos (Position 30 5 8) (Position 37 5 15) 90 | -- Position 30 5 8 91 | -- 92 | -- >>> minPos NoPosition (Position 30 5 8) 93 | -- Position 30 5 8 94 | -- 95 | {-# INLINE minPos #-} 96 | minPos :: Position -> Position -> Position 97 | minPos NoPosition p2 = p2 98 | minPos p1 NoPosition = p1 99 | minPos p1@(Position a1 _ _) p2@(Position a2 _ _) = if a1 < a2 then p1 else p2 100 | 101 | -- | Starting position in a file. 102 | {-# INLINE initPos #-} 103 | initPos :: Position 104 | initPos = Position 0 1 0 105 | 106 | -- | Advance column a certain number of times. 107 | {-# INLINE incPos #-} 108 | incPos :: Position -> Int -> Position 109 | incPos NoPosition _ = NoPosition 110 | incPos p@Position{ absoluteOffset = a, col = c } offset = p { absoluteOffset = a + offset, col = c + offset } 111 | 112 | -- | Advance to the next line. 113 | {-# INLINE retPos #-} 114 | retPos :: Position -> Position 115 | retPos NoPosition = NoPosition 116 | retPos (Position a r _) = Position { absoluteOffset = a + 1, row = r + 1, col = 1 } 117 | 118 | -- | Advance only the absolute offset, not the row and column information. Only use this if you 119 | -- know what you are doing! 120 | {-# INLINE incOffset #-} 121 | incOffset :: Position -> Int -> Position 122 | incOffset NoPosition _ = NoPosition 123 | incOffset p@Position{ absoluteOffset = a } offset = p { absoluteOffset = a + offset } 124 | 125 | -- | Spans represent a contiguous region of code, delimited by two 'Position's. The endpoints are 126 | -- inclusive. Analogous to the information encoded in a selection. 127 | data Span = Span { lo, hi :: !Position } 128 | deriving (Eq, Ord, Data, Typeable, Generic, NFData) 129 | 130 | -- | Field names are not shown 131 | instance Show Span where 132 | showsPrec p (Span l h) = showParen (p >= 11) 133 | ( showString "Span" 134 | . showString " " . showsPrec 11 l 135 | . showString " " . showsPrec 11 h ) 136 | 137 | 138 | -- | Check if a span is a subset of another span 139 | subsetOf :: Span -> Span -> Bool 140 | Span l1 h1 `subsetOf` Span l2 h2 = minPos l1 l2 == l1 && maxPos h1 h2 == h2 141 | 142 | -- | Convenience function lifting 'Mon.<>' to work on all 'Located' things 143 | {-# INLINE (#) #-} 144 | (#) :: (Located a, Located b) => a -> b -> Span 145 | left # right = spanOf left Mon.<> spanOf right 146 | 147 | -- | smallest covering 'Span' 148 | instance Sem.Semigroup Span where 149 | {-# INLINE (<>) #-} 150 | Span l1 h1 <> Span l2 h2 = Span (l1 `minPos` l2) (h1 `maxPos` h2) 151 | 152 | instance Mon.Monoid Span where 153 | {-# INLINE mempty #-} 154 | mempty = Span NoPosition NoPosition 155 | 156 | {-# INLINE mappend #-} 157 | mappend = (Sem.<>) 158 | 159 | -- | Pretty print a 'Span' 160 | prettySpan :: Span -> String 161 | prettySpan (Span lo' hi') = show lo' ++ " - " ++ show hi' 162 | 163 | -- | A "tagging" of something with a 'Span' that describes its extent. 164 | data Spanned a = Spanned a {-# UNPACK #-} !Span 165 | deriving (Eq, Ord, Data, Typeable, Generic, NFData) 166 | 167 | -- | Extract the wrapped value from 'Spanned' 168 | {-# INLINE unspan #-} 169 | unspan :: Spanned a -> a 170 | unspan (Spanned x _) = x 171 | 172 | instance Functor Spanned where 173 | {-# INLINE fmap #-} 174 | fmap f (Spanned x s) = Spanned (f x) s 175 | 176 | instance Applicative Spanned where 177 | {-# INLINE pure #-} 178 | pure x = Spanned x mempty 179 | 180 | {-# INLINE (<*>) #-} 181 | Spanned f s1 <*> Spanned x s2 = Spanned (f x) (s1 Sem.<> s2) 182 | 183 | instance Monad Spanned where 184 | return = pure 185 | Spanned x s1 >>= f = let Spanned y s2 = f x in Spanned y (s1 Sem.<> s2) 186 | 187 | instance Show a => Show (Spanned a) where 188 | show = show . unspan 189 | 190 | 191 | -- | Describes nodes that can be located - their span can be extracted from them. In general, we 192 | -- expect that for a value constructed as @Con x y z@ where @Con@ is an arbitrary constructor 193 | -- 194 | -- prop> (spanOf x <> spanOf y <> spanOf z) `subsetOf` spanOf (Con x y z) == True 195 | -- 196 | class Located a where 197 | spanOf :: a -> Span 198 | 199 | instance Located Span where 200 | {-# INLINE spanOf #-} 201 | spanOf = id 202 | 203 | instance Located (Spanned a) where 204 | {-# INLINE spanOf #-} 205 | spanOf (Spanned _ s) = s 206 | 207 | instance Located a => Located (Maybe a) where 208 | {-# INLINE spanOf #-} 209 | spanOf = foldMap spanOf 210 | 211 | -- | /O(n)/ time complexity 212 | instance Located a => Located [a] where 213 | {-# INLINE spanOf #-} 214 | spanOf = foldMap spanOf 215 | 216 | -- | /O(n)/ time complexity 217 | instance Located a => Located (NonEmpty a) where 218 | {-# INLINE spanOf #-} 219 | spanOf = foldMap spanOf 220 | 221 | 222 | -------------------------------------------------------------------------------- /src/Language/Rust/Parser.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | Module : Language.Rust.Parser 3 | Description : Parsing and lexing 4 | Copyright : (c) Alec Theriault, 2017-2018 5 | License : BSD-style 6 | Maintainer : alec.theriault@gmail.com 7 | Stability : experimental 8 | Portability : GHC 9 | 10 | Selecting the right parser may require adding an annotation or using @-XTypeApplications@ to avoid 11 | an "Ambiguous type variable" error. 12 | 13 | Using 'Control.Monad.void' (as in the examples below) exploits the fact that most AST nodes are 14 | instances of 'Functor' to discard the 'Span' annotation that is attached to most parsed AST nodes. 15 | Conversely, if you wish to extract the 'Span' annotation, the 'Language.Rust.Syntax.AST.Located' 16 | typeclass provides a 'Language.Rust.Syntax.AST.spanOf' method. 17 | 18 | The examples below assume the following GHCi flags and imports: 19 | 20 | >>> :set -XTypeApplications -XOverloadedStrings 21 | >>> import Language.Rust.Syntax.AST 22 | >>> import Control.Monad ( void ) 23 | >>> import System.IO 24 | 25 | -} 26 | {-# LANGUAGE FlexibleInstances #-} 27 | 28 | module Language.Rust.Parser ( 29 | -- * Parsing 30 | parse, 31 | parse', 32 | readSourceFile, 33 | readTokens, 34 | Parse(..), 35 | P, 36 | execParser, 37 | execParserTokens, 38 | initPos, 39 | Span, 40 | 41 | -- * Lexing 42 | lexToken, 43 | lexNonSpace, 44 | lexTokens, 45 | translateLit, 46 | 47 | -- * Input stream 48 | readInputStream, 49 | hReadInputStream, 50 | inputStreamToString, 51 | inputStreamFromString, 52 | 53 | -- * Error reporting 54 | lexicalError, 55 | parseError, 56 | ParseFail(..), 57 | ) where 58 | 59 | import Language.Rust.Syntax 60 | 61 | import Language.Rust.Data.InputStream 62 | import Language.Rust.Data.Position ( Position, Span, Spanned, initPos ) 63 | 64 | import Language.Rust.Parser.Internal 65 | import Language.Rust.Parser.Lexer ( lexToken, lexNonSpace, lexTokens, lexicalError ) 66 | import Language.Rust.Parser.Literals ( translateLit ) 67 | import Language.Rust.Parser.ParseMonad ( P, execParser, parseError, pushToken, ParseFail(..) ) 68 | 69 | import Control.Exception ( throw ) 70 | import Data.Foldable ( traverse_ ) 71 | import System.IO ( Handle ) 72 | 73 | -- | Parse something from an input stream (it is assumed the initial position is 'initPos'). 74 | -- 75 | -- >>> fmap void $ parse @(Expr Span) "x + 1" 76 | -- Right (Binary [] AddOp (PathExpr [] Nothing (Path False [PathSegment "x" Nothing ()] ()) ()) 77 | -- (Lit [] (Int Dec 1 Unsuffixed ()) ()) 78 | -- ()) 79 | -- 80 | -- >>> fmap void $ parse @(Expr Span) "x + " 81 | -- Left (parse failure at 1:4 (Syntax error: unexpected `' (expected an expression))) 82 | -- 83 | parse :: Parse a => InputStream -> Either ParseFail a 84 | parse is = execParser parser is initPos 85 | 86 | -- | Same as 'parse', but throws a 'ParseFail' exception if it cannot parse. This function is 87 | -- intended for situations in which you are already stuck catching exceptions - otherwise you should 88 | -- prefer 'parse'. 89 | -- 90 | -- >>> void $ parse' @(Expr Span) "x + 1" 91 | -- Binary [] AddOp (PathExpr [] Nothing (Path False [PathSegment "x" Nothing ()] ()) ()) 92 | -- (Lit [] (Int Dec 1 Unsuffixed ()) ()) 93 | -- () 94 | -- 95 | -- >>> void $ parse' @(Expr Span) "x + " 96 | -- *** Exception: parse failure at 1:4 (Syntax error: unexpected `' (expected an expression)) 97 | -- 98 | parse' :: Parse a => InputStream -> a 99 | parse' = either throw id . parse 100 | 101 | -- | Same as 'execParser', but working from a list of tokens instead of an 'InputStream'. 102 | execParserTokens :: P a -> [Spanned Token] -> Position -> Either ParseFail a 103 | execParserTokens p toks = execParser (pushTokens toks *> p) (inputStreamFromString "") 104 | where pushTokens = traverse_ pushToken . reverse 105 | 106 | -- | Given a handle to a Rust source file, read that file and parse it into a 'SourceFile' 107 | -- 108 | -- >>> writeFile "empty_main.rs" "fn main() { }" 109 | -- >>> fmap void $ withFile "empty_main.rs" ReadMode readSourceFile 110 | -- SourceFile Nothing [] [Fn [] InheritedV "main" 111 | -- (FnDecl [] Nothing False ()) 112 | -- Normal NotConst Rust 113 | -- (Generics [] [] (WhereClause [] ()) ()) 114 | -- (Block [] Normal ()) ()] 115 | -- 116 | readSourceFile :: Handle -> IO (SourceFile Span) 117 | readSourceFile hdl = parse' <$> hReadInputStream hdl 118 | 119 | -- | Given a path pointing to a Rust source file, read that file and lex it (ignoring whitespace) 120 | -- 121 | -- >>> writeFile "empty_main.rs" "fn main() { }" 122 | -- >>> withFile "empty_main.rs" ReadMode readTokens 123 | -- [fn,main,(,),{,}] 124 | -- 125 | readTokens :: Handle -> IO [Spanned Token] 126 | readTokens hdl = do 127 | inp <- hReadInputStream hdl 128 | case execParser (lexTokens lexNonSpace) inp initPos of 129 | Left pf -> throw pf 130 | Right x -> pure x 131 | 132 | -- | Describes things that can be parsed 133 | class Parse a where 134 | -- | Complete parser (fails if not all of the input is consumed) 135 | parser :: P a 136 | 137 | instance Parse (Lit Span) where parser = parseLit 138 | instance Parse (Attribute Span) where parser = parseAttr 139 | instance Parse (Ty Span) where parser = parseTy 140 | instance Parse (Pat Span) where parser = parsePat 141 | instance Parse (Expr Span) where parser = parseExpr 142 | instance Parse (Stmt Span) where parser = parseStmt 143 | instance Parse (Item Span) where parser = parseItem 144 | instance Parse (SourceFile Span) where parser = parseSourceFile 145 | instance Parse TokenTree where parser = parseTt 146 | instance Parse TokenStream where parser = parseTokenStream 147 | instance Parse (Block Span) where parser = parseBlock 148 | instance Parse (ImplItem Span) where parser = parseImplItem 149 | instance Parse (TraitItem Span) where parser = parseTraitItem 150 | instance Parse (TyParam Span) where parser = parseTyParam 151 | instance Parse (LifetimeDef Span) where parser = parseLifetimeDef 152 | instance Parse (Generics Span) where parser = parseGenerics 153 | instance Parse (WhereClause Span) where parser = parseWhereClause 154 | -------------------------------------------------------------------------------- /src/Language/Rust/Parser/Lexer.x: -------------------------------------------------------------------------------- 1 | { 2 | {-| 3 | Module : Language.Rust.Parser.Lexer 4 | Description : Rust lexer 5 | Copyright : (c) Alec Theriault, 2017-2018 6 | License : BSD-style 7 | Maintainer : alec.theriault@gmail.com 8 | Stability : experimental 9 | Portability : portable 10 | 11 | As much as possible, this follows Rust's choices for tokenization, including punting some things to 12 | the parser. For instance, the last two @>@ in @Vec\\>@ are lexed as a single 13 | 'GreaterGreater' token while the last two tokens of @Vec\\>\>@ are 14 | 'GreaterGreater' and 'Greater'. 15 | 16 | Yet weirder (but very useful in parsing for dealing with conflicts and precedences of logical and, 17 | bitwise and, and unary reference), @&&&x&&&y@ lexes into 'AmpersandAmpersand', 'Ampersand', 18 | @'IdentTok' "x"@, 'AmpersandAmpersand', 'Ampersand', @'IdentTok' "y"@. Although the parser sometimes 19 | needs to "break apart" tokens, it never has to think about putting them together. That means it can 20 | easily figure out that @&&&x&&&y@ parses as @&(&(&x)) && (&y)@ and not @&(&(&x)) & (&(&y))@ even if 21 | bitwise conjunctions bind more tightly that logical conjunctions. 22 | 23 | This sort of amguity where one token need to be broken up by the parser occurs for 24 | 25 | * @&&@ in patterns like @&&mut x@ 26 | * @||@ in closures with no arguments like @|| x@ 27 | * @<<@ in qualified type paths like @FromIterator\<\::Item\>@ 28 | * @>>@ in qualified paths like @\\>::Bar@ 29 | * @>=@ in equality predicates like @F\=i32@ 30 | * @>>=@ in equality predicates like @F\\>=i32@ 31 | -} 32 | 33 | module Language.Rust.Parser.Lexer ( 34 | -- * Lexing 35 | lexToken, 36 | lexNonSpace, 37 | lexTokens, 38 | lexShebangLine, 39 | 40 | -- * Tokens 41 | Token(..), 42 | 43 | -- * Error reporting 44 | lexicalError, 45 | ) where 46 | 47 | import Language.Rust.Data.Ident ( mkIdent, Ident(..) ) 48 | import Language.Rust.Data.InputStream 49 | import Language.Rust.Data.Position 50 | import Language.Rust.Parser.ParseMonad 51 | import Language.Rust.Syntax.Token 52 | 53 | import Data.Char ( chr ) 54 | import Data.Word ( Word8 ) 55 | 56 | -- Things to review: 57 | -- * improved error messages 58 | 59 | -- Based heavily on: 60 | -- * 61 | -- * 62 | -- * 63 | 64 | } 65 | 66 | -- XID_START unicode character class 67 | @xid_start 68 | = [\x0041-\x005a] 69 | | "_" 70 | | [\x0061-\x007a] 71 | | \x00aa 72 | | \x00b5 73 | | \x00ba 74 | | [\x00c0-\x00d6] 75 | | [\x00d8-\x00f6] 76 | | [\x00f8-\x0236] 77 | | [\x0250-\x02c1] 78 | | [\x02c6-\x02d1] 79 | | [\x02e0-\x02e4] 80 | | \x02ee 81 | | \x0386 82 | | [\x0388-\x038a] 83 | | \x038c 84 | | [\x038e-\x03a1] 85 | | [\x03a3-\x03ce] 86 | | [\x03d0-\x03f5] 87 | | [\x03f7-\x03fb] 88 | | [\x0400-\x0481] 89 | | [\x048a-\x04ce] 90 | | [\x04d0-\x04f5] 91 | | [\x04f8-\x04f9] 92 | | [\x0500-\x050f] 93 | | [\x0531-\x0556] 94 | | \x0559 95 | | [\x0561-\x0587] 96 | | [\x05d0-\x05ea] 97 | | [\x05f0-\x05f2] 98 | | [\x0621-\x063a] 99 | | [\x0640-\x064a] 100 | | [\x066e-\x066f] 101 | | [\x0671-\x06d3] 102 | | \x06d5 103 | | [\x06e5-\x06e6] 104 | | [\x06ee-\x06ef] 105 | | [\x06fa-\x06fc] 106 | | \x06ff 107 | | \x0710 108 | | [\x0712-\x072f] 109 | | [\x074d-\x074f] 110 | | [\x0780-\x07a5] 111 | | \x07b1 112 | | [\x0904-\x0939] 113 | | \x093d 114 | | \x0950 115 | | [\x0958-\x0961] 116 | | [\x0985-\x098c] 117 | | [\x098f-\x0990] 118 | | [\x0993-\x09a8] 119 | | [\x09aa-\x09b0] 120 | | \x09b2 121 | | [\x09b6-\x09b9] 122 | | \x09bd 123 | | [\x09dc-\x09dd] 124 | | [\x09df-\x09e1] 125 | | [\x09f0-\x09f1] 126 | | [\x0a05-\x0a0a] 127 | | [\x0a0f-\x0a10] 128 | | [\x0a13-\x0a28] 129 | | [\x0a2a-\x0a30] 130 | | [\x0a32-\x0a33] 131 | | [\x0a35-\x0a36] 132 | | [\x0a38-\x0a39] 133 | | [\x0a59-\x0a5c] 134 | | \x0a5e 135 | | [\x0a72-\x0a74] 136 | | [\x0a85-\x0a8d] 137 | | [\x0a8f-\x0a91] 138 | | [\x0a93-\x0aa8] 139 | | [\x0aaa-\x0ab0] 140 | | [\x0ab2-\x0ab3] 141 | | [\x0ab5-\x0ab9] 142 | | \x0abd 143 | | \x0ad0 144 | | [\x0ae0-\x0ae1] 145 | | [\x0b05-\x0b0c] 146 | | [\x0b0f-\x0b10] 147 | | [\x0b13-\x0b28] 148 | | [\x0b2a-\x0b30] 149 | | [\x0b32-\x0b33] 150 | | [\x0b35-\x0b39] 151 | | \x0b3d 152 | | [\x0b5c-\x0b5d] 153 | | [\x0b5f-\x0b61] 154 | | \x0b71 155 | | \x0b83 156 | | [\x0b85-\x0b8a] 157 | | [\x0b8e-\x0b90] 158 | | [\x0b92-\x0b95] 159 | | [\x0b99-\x0b9a] 160 | | \x0b9c 161 | | [\x0b9e-\x0b9f] 162 | | [\x0ba3-\x0ba4] 163 | | [\x0ba8-\x0baa] 164 | | [\x0bae-\x0bb5] 165 | | [\x0bb7-\x0bb9] 166 | | [\x0c05-\x0c0c] 167 | | [\x0c0e-\x0c10] 168 | | [\x0c12-\x0c28] 169 | | [\x0c2a-\x0c33] 170 | | [\x0c35-\x0c39] 171 | | [\x0c60-\x0c61] 172 | | [\x0c85-\x0c8c] 173 | | [\x0c8e-\x0c90] 174 | | [\x0c92-\x0ca8] 175 | | [\x0caa-\x0cb3] 176 | | [\x0cb5-\x0cb9] 177 | | \x0cbd 178 | | \x0cde 179 | | [\x0ce0-\x0ce1] 180 | | [\x0d05-\x0d0c] 181 | | [\x0d0e-\x0d10] 182 | | [\x0d12-\x0d28] 183 | | [\x0d2a-\x0d39] 184 | | [\x0d60-\x0d61] 185 | | [\x0d85-\x0d96] 186 | | [\x0d9a-\x0db1] 187 | | [\x0db3-\x0dbb] 188 | | \x0dbd 189 | | [\x0dc0-\x0dc6] 190 | | [\x0e01-\x0e30] 191 | | \x0e32 192 | | [\x0e40-\x0e46] 193 | | [\x0e81-\x0e82] 194 | | \x0e84 195 | | [\x0e87-\x0e88] 196 | | \x0e8a 197 | | \x0e8d 198 | | [\x0e94-\x0e97] 199 | | [\x0e99-\x0e9f] 200 | | [\x0ea1-\x0ea3] 201 | | \x0ea5 202 | | \x0ea7 203 | | [\x0eaa-\x0eab] 204 | | [\x0ead-\x0eb0] 205 | | \x0eb2 206 | | \x0ebd 207 | | [\x0ec0-\x0ec4] 208 | | \x0ec6 209 | | [\x0edc-\x0edd] 210 | | \x0f00 211 | | [\x0f40-\x0f47] 212 | | [\x0f49-\x0f6a] 213 | | [\x0f88-\x0f8b] 214 | | [\x1000-\x1021] 215 | | [\x1023-\x1027] 216 | | [\x1029-\x102a] 217 | | [\x1050-\x1055] 218 | | [\x10a0-\x10c5] 219 | | [\x10d0-\x10f8] 220 | | [\x1100-\x1159] 221 | | [\x115f-\x11a2] 222 | | [\x11a8-\x11f9] 223 | | [\x1200-\x1206] 224 | | [\x1208-\x1246] 225 | | \x1248 226 | | [\x124a-\x124d] 227 | | [\x1250-\x1256] 228 | | \x1258 229 | | [\x125a-\x125d] 230 | | [\x1260-\x1286] 231 | | \x1288 232 | | [\x128a-\x128d] 233 | | [\x1290-\x12ae] 234 | | \x12b0 235 | | [\x12b2-\x12b5] 236 | | [\x12b8-\x12be] 237 | | \x12c0 238 | | [\x12c2-\x12c5] 239 | | [\x12c8-\x12ce] 240 | | [\x12d0-\x12d6] 241 | | [\x12d8-\x12ee] 242 | | [\x12f0-\x130e] 243 | | \x1310 244 | | [\x1312-\x1315] 245 | | [\x1318-\x131e] 246 | | [\x1320-\x1346] 247 | | [\x1348-\x135a] 248 | | [\x13a0-\x13f4] 249 | | [\x1401-\x166c] 250 | | [\x166f-\x1676] 251 | | [\x1681-\x169a] 252 | | [\x16a0-\x16ea] 253 | | [\x16ee-\x16f0] 254 | | [\x1700-\x170c] 255 | | [\x170e-\x1711] 256 | | [\x1720-\x1731] 257 | | [\x1740-\x1751] 258 | | [\x1760-\x176c] 259 | | [\x176e-\x1770] 260 | | [\x1780-\x17b3] 261 | | \x17d7 262 | | \x17dc 263 | | [\x1820-\x1877] 264 | | [\x1880-\x18a8] 265 | | [\x1900-\x191c] 266 | | [\x1950-\x196d] 267 | | [\x1970-\x1974] 268 | | [\x1d00-\x1d6b] 269 | | [\x1e00-\x1e9b] 270 | | [\x1ea0-\x1ef9] 271 | | [\x1f00-\x1f15] 272 | | [\x1f18-\x1f1d] 273 | | [\x1f20-\x1f45] 274 | | [\x1f48-\x1f4d] 275 | | [\x1f50-\x1f57] 276 | | \x1f59 277 | | \x1f5b 278 | | \x1f5d 279 | | [\x1f5f-\x1f7d] 280 | | [\x1f80-\x1fb4] 281 | | [\x1fb6-\x1fbc] 282 | | \x1fbe 283 | | [\x1fc2-\x1fc4] 284 | | [\x1fc6-\x1fcc] 285 | | [\x1fd0-\x1fd3] 286 | | [\x1fd6-\x1fdb] 287 | | [\x1fe0-\x1fec] 288 | | [\x1ff2-\x1ff4] 289 | | [\x1ff6-\x1ffc] 290 | | \x2071 291 | | \x207f 292 | | \x2102 293 | | \x2107 294 | | [\x210a-\x2113] 295 | | \x2115 296 | | [\x2118-\x211d] 297 | | \x2124 298 | | \x2126 299 | | \x2128 300 | | [\x212a-\x2131] 301 | | [\x2133-\x2139] 302 | | [\x213d-\x213f] 303 | | [\x2145-\x2149] 304 | | [\x2160-\x2183] 305 | | [\x3005-\x3007] 306 | | [\x3021-\x3029] 307 | | [\x3031-\x3035] 308 | | [\x3038-\x303c] 309 | | [\x3041-\x3096] 310 | | [\x309d-\x309f] 311 | | [\x30a1-\x30fa] 312 | | [\x30fc-\x30ff] 313 | | [\x3105-\x312c] 314 | | [\x3131-\x318e] 315 | | [\x31a0-\x31b7] 316 | | [\x31f0-\x31ff] 317 | | [\x3400-\x4db5] 318 | | [\x4e00-\x9fa5] 319 | | [\xa000-\xa48c] 320 | | [\xac00-\xd7a3] 321 | | [\xf900-\xfa2d] 322 | | [\xfa30-\xfa6a] 323 | | [\xfb00-\xfb06] 324 | | [\xfb13-\xfb17] 325 | | \xfb1d 326 | | [\xfb1f-\xfb28] 327 | | [\xfb2a-\xfb36] 328 | | [\xfb38-\xfb3c] 329 | | \xfb3e 330 | | [\xfb40-\xfb41] 331 | | [\xfb43-\xfb44] 332 | | [\xfb46-\xfbb1] 333 | | [\xfbd3-\xfc5d] 334 | | [\xfc64-\xfd3d] 335 | | [\xfd50-\xfd8f] 336 | | [\xfd92-\xfdc7] 337 | | [\xfdf0-\xfdf9] 338 | | \xfe71 339 | | \xfe73 340 | | \xfe77 341 | | \xfe79 342 | | \xfe7b 343 | | \xfe7d 344 | | [\xfe7f-\xfefc] 345 | | [\xff21-\xff3a] 346 | | [\xff41-\xff5a] 347 | | [\xff66-\xff9d] 348 | | [\xffa0-\xffbe] 349 | | [\xffc2-\xffc7] 350 | | [\xffca-\xffcf] 351 | | [\xffd2-\xffd7] 352 | | [\xffda-\xffdc] 353 | | \xd800 [\xdc00-\xdc0a] 354 | | \xd800 [\xdc0d-\xdc25] 355 | | \xd800 [\xdc28-\xdc39] 356 | | \xd800 [\xdc3c-\xdc3c] 357 | | \xd800 [\xdc3f-\xdc4c] 358 | | \xd800 [\xdc50-\xdc5c] 359 | | \xd800 [\xdc80-\xdcf9] 360 | | \xd800 [\xdf00-\xdf1d] 361 | | \xd800 [\xdf30-\xdf49] 362 | | \xd800 [\xdf80-\xdf9c] 363 | | \xd801 [\xe000-\xe09c] 364 | | \xd802 [\xe400-\xe404] 365 | | \xd802 \x0808 366 | | \xd802 [\xe40a-\xe434] 367 | | \xd802 [\xe437-\xe437] 368 | | \xd802 \x083c 369 | | \xd802 \x083f 370 | | \xd835 [\xb000-\xb053] 371 | | \xd835 [\xb056-\xb09b] 372 | | \xd835 [\xb09e-\xb09e] 373 | | \xd835 \xd4a2 374 | | \xd835 [\xb0a5-\xb0a5] 375 | | \xd835 [\xb0a9-\xb0ab] 376 | | \xd835 [\xb0ae-\xb0b8] 377 | | \xd835 \xd4bb 378 | | \xd835 [\xb0bd-\xb0c2] 379 | | \xd835 [\xb0c5-\xb104] 380 | | \xd835 [\xb107-\xb109] 381 | | \xd835 [\xb10d-\xb113] 382 | | \xd835 [\xb116-\xb11b] 383 | | \xd835 [\xb11e-\xb138] 384 | | \xd835 [\xb13b-\xb13d] 385 | | \xd835 [\xb140-\xb143] 386 | | \xd835 \xd546 387 | | \xd835 [\xb14a-\xb14f] 388 | | \xd835 [\xb152-\xb2a2] 389 | | \xd835 [\xb2a8-\xb2bf] 390 | | \xd835 [\xb2c2-\xb2d9] 391 | | \xd835 [\xb2dc-\xb2f9] 392 | | \xd835 [\xb2fc-\xb313] 393 | | \xd835 [\xb316-\xb333] 394 | | \xd835 [\xb336-\xb34d] 395 | | \xd835 [\xb350-\xb36d] 396 | | \xd835 [\xb370-\xb387] 397 | | \xd835 [\xb38a-\xb3a7] 398 | | \xd835 [\xb3aa-\xb3c1] 399 | | \xd835 [\xb3c4-\xb3c8] 400 | | \xd840 [\xdc00-\xdffe] 401 | | \xd841 [\xe000-\xe3fe] 402 | | \xd842 [\xe400-\xe7fe] 403 | | \xd843 [\xe800-\xebfe] 404 | | \xd844 [\xec00-\xeffe] 405 | | \xd845 [\xf000-\xf3fe] 406 | | \xd846 [\xf400-\xf7fe] 407 | | \xd847 [\xf800-\xfbfe] 408 | | \xd848 [\xfc00-\xfffe] 409 | | \xd849 [\x0000-\x03fe] 410 | | \xd84a [\x0400-\x07fe] 411 | | \xd84b [\x0800-\x0bfe] 412 | | \xd84c [\x0c00-\x0ffe] 413 | | \xd84d [\x1000-\x13fe] 414 | | \xd84e [\x1400-\x17fe] 415 | | \xd84f [\x1800-\x1bfe] 416 | | \xd850 [\x1c00-\x1ffe] 417 | | \xd851 [\x2000-\x23fe] 418 | | \xd852 [\x2400-\x27fe] 419 | | \xd853 [\x2800-\x2bfe] 420 | | \xd854 [\x2c00-\x2ffe] 421 | | \xd855 [\x3000-\x33fe] 422 | | \xd856 [\x3400-\x37fe] 423 | | \xd857 [\x3800-\x3bfe] 424 | | \xd858 [\x3c00-\x3ffe] 425 | | \xd859 [\x4000-\x43fe] 426 | | \xd85a [\x4400-\x47fe] 427 | | \xd85b [\x4800-\x4bfe] 428 | | \xd85c [\x4c00-\x4ffe] 429 | | \xd85d [\x5000-\x53fe] 430 | | \xd85e [\x5400-\x57fe] 431 | | \xd85f [\x5800-\x5bfe] 432 | | \xd860 [\x5c00-\x5ffe] 433 | | \xd861 [\x6000-\x63fe] 434 | | \xd862 [\x6400-\x67fe] 435 | | \xd863 [\x6800-\x6bfe] 436 | | \xd864 [\x6c00-\x6ffe] 437 | | \xd865 [\x7000-\x73fe] 438 | | \xd866 [\x7400-\x77fe] 439 | | \xd867 [\x7800-\x7bfe] 440 | | \xd868 [\x7c00-\x7ffe] 441 | | \xd869 [\x8000-\x82d5] 442 | | \xd87e [\xd400-\xd61c] 443 | 444 | -- XID_CONTINUE unicode character class 445 | @xid_continue 446 | = [\x0030-\x0039] 447 | | [\x0041-\x005a] 448 | | \x005f 449 | | [\x0061-\x007a] 450 | | \x00aa 451 | | \x00b5 452 | | \x00b7 453 | | \x00ba 454 | | [\x00c0-\x00d6] 455 | | [\x00d8-\x00f6] 456 | | [\x00f8-\x0236] 457 | | [\x0250-\x02c1] 458 | | [\x02c6-\x02d1] 459 | | [\x02e0-\x02e4] 460 | | \x02ee 461 | | [\x0300-\x0357] 462 | | [\x035d-\x036f] 463 | | \x0386 464 | | [\x0388-\x038a] 465 | | \x038c 466 | | [\x038e-\x03a1] 467 | | [\x03a3-\x03ce] 468 | | [\x03d0-\x03f5] 469 | | [\x03f7-\x03fb] 470 | | [\x0400-\x0481] 471 | | [\x0483-\x0486] 472 | | [\x048a-\x04ce] 473 | | [\x04d0-\x04f5] 474 | | [\x04f8-\x04f9] 475 | | [\x0500-\x050f] 476 | | [\x0531-\x0556] 477 | | \x0559 478 | | [\x0561-\x0587] 479 | | [\x0591-\x05a1] 480 | | [\x05a3-\x05b9] 481 | | [\x05bb-\x05bd] 482 | | \x05bf 483 | | [\x05c1-\x05c2] 484 | | \x05c4 485 | | [\x05d0-\x05ea] 486 | | [\x05f0-\x05f2] 487 | | [\x0610-\x0615] 488 | | [\x0621-\x063a] 489 | | [\x0640-\x0658] 490 | | [\x0660-\x0669] 491 | | [\x066e-\x06d3] 492 | | [\x06d5-\x06dc] 493 | | [\x06df-\x06e8] 494 | | [\x06ea-\x06fc] 495 | | \x06ff 496 | | [\x0710-\x074a] 497 | | [\x074d-\x074f] 498 | | [\x0780-\x07b1] 499 | | [\x0901-\x0939] 500 | | [\x093c-\x094d] 501 | | [\x0950-\x0954] 502 | | [\x0958-\x0963] 503 | | [\x0966-\x096f] 504 | | [\x0981-\x0983] 505 | | [\x0985-\x098c] 506 | | [\x098f-\x0990] 507 | | [\x0993-\x09a8] 508 | | [\x09aa-\x09b0] 509 | | \x09b2 510 | | [\x09b6-\x09b9] 511 | | [\x09bc-\x09c4] 512 | | [\x09c7-\x09c8] 513 | | [\x09cb-\x09cd] 514 | | \x09d7 515 | | [\x09dc-\x09dd] 516 | | [\x09df-\x09e3] 517 | | [\x09e6-\x09f1] 518 | | [\x0a01-\x0a03] 519 | | [\x0a05-\x0a0a] 520 | | [\x0a0f-\x0a10] 521 | | [\x0a13-\x0a28] 522 | | [\x0a2a-\x0a30] 523 | | [\x0a32-\x0a33] 524 | | [\x0a35-\x0a36] 525 | | [\x0a38-\x0a39] 526 | | \x0a3c 527 | | [\x0a3e-\x0a42] 528 | | [\x0a47-\x0a48] 529 | | [\x0a4b-\x0a4d] 530 | | [\x0a59-\x0a5c] 531 | | \x0a5e 532 | | [\x0a66-\x0a74] 533 | | [\x0a81-\x0a83] 534 | | [\x0a85-\x0a8d] 535 | | [\x0a8f-\x0a91] 536 | | [\x0a93-\x0aa8] 537 | | [\x0aaa-\x0ab0] 538 | | [\x0ab2-\x0ab3] 539 | | [\x0ab5-\x0ab9] 540 | | [\x0abc-\x0ac5] 541 | | [\x0ac7-\x0ac9] 542 | | [\x0acb-\x0acd] 543 | | \x0ad0 544 | | [\x0ae0-\x0ae3] 545 | | [\x0ae6-\x0aef] 546 | | [\x0b01-\x0b03] 547 | | [\x0b05-\x0b0c] 548 | | [\x0b0f-\x0b10] 549 | | [\x0b13-\x0b28] 550 | | [\x0b2a-\x0b30] 551 | | [\x0b32-\x0b33] 552 | | [\x0b35-\x0b39] 553 | | [\x0b3c-\x0b43] 554 | | [\x0b47-\x0b48] 555 | | [\x0b4b-\x0b4d] 556 | | [\x0b56-\x0b57] 557 | | [\x0b5c-\x0b5d] 558 | | [\x0b5f-\x0b61] 559 | | [\x0b66-\x0b6f] 560 | | \x0b71 561 | | [\x0b82-\x0b83] 562 | | [\x0b85-\x0b8a] 563 | | [\x0b8e-\x0b90] 564 | | [\x0b92-\x0b95] 565 | | [\x0b99-\x0b9a] 566 | | \x0b9c 567 | | [\x0b9e-\x0b9f] 568 | | [\x0ba3-\x0ba4] 569 | | [\x0ba8-\x0baa] 570 | | [\x0bae-\x0bb5] 571 | | [\x0bb7-\x0bb9] 572 | | [\x0bbe-\x0bc2] 573 | | [\x0bc6-\x0bc8] 574 | | [\x0bca-\x0bcd] 575 | | \x0bd7 576 | | [\x0be7-\x0bef] 577 | | [\x0c01-\x0c03] 578 | | [\x0c05-\x0c0c] 579 | | [\x0c0e-\x0c10] 580 | | [\x0c12-\x0c28] 581 | | [\x0c2a-\x0c33] 582 | | [\x0c35-\x0c39] 583 | | [\x0c3e-\x0c44] 584 | | [\x0c46-\x0c48] 585 | | [\x0c4a-\x0c4d] 586 | | [\x0c55-\x0c56] 587 | | [\x0c60-\x0c61] 588 | | [\x0c66-\x0c6f] 589 | | [\x0c82-\x0c83] 590 | | [\x0c85-\x0c8c] 591 | | [\x0c8e-\x0c90] 592 | | [\x0c92-\x0ca8] 593 | | [\x0caa-\x0cb3] 594 | | [\x0cb5-\x0cb9] 595 | | [\x0cbc-\x0cc4] 596 | | [\x0cc6-\x0cc8] 597 | | [\x0cca-\x0ccd] 598 | | [\x0cd5-\x0cd6] 599 | | \x0cde 600 | | [\x0ce0-\x0ce1] 601 | | [\x0ce6-\x0cef] 602 | | [\x0d02-\x0d03] 603 | | [\x0d05-\x0d0c] 604 | | [\x0d0e-\x0d10] 605 | | [\x0d12-\x0d28] 606 | | [\x0d2a-\x0d39] 607 | | [\x0d3e-\x0d43] 608 | | [\x0d46-\x0d48] 609 | | [\x0d4a-\x0d4d] 610 | | \x0d57 611 | | [\x0d60-\x0d61] 612 | | [\x0d66-\x0d6f] 613 | | [\x0d82-\x0d83] 614 | | [\x0d85-\x0d96] 615 | | [\x0d9a-\x0db1] 616 | | [\x0db3-\x0dbb] 617 | | \x0dbd 618 | | [\x0dc0-\x0dc6] 619 | | \x0dca 620 | | [\x0dcf-\x0dd4] 621 | | \x0dd6 622 | | [\x0dd8-\x0ddf] 623 | | [\x0df2-\x0df3] 624 | | [\x0e01-\x0e3a] 625 | | [\x0e40-\x0e4e] 626 | | [\x0e50-\x0e59] 627 | | [\x0e81-\x0e82] 628 | | \x0e84 629 | | [\x0e87-\x0e88] 630 | | \x0e8a 631 | | \x0e8d 632 | | [\x0e94-\x0e97] 633 | | [\x0e99-\x0e9f] 634 | | [\x0ea1-\x0ea3] 635 | | \x0ea5 636 | | \x0ea7 637 | | [\x0eaa-\x0eab] 638 | | [\x0ead-\x0eb9] 639 | | [\x0ebb-\x0ebd] 640 | | [\x0ec0-\x0ec4] 641 | | \x0ec6 642 | | [\x0ec8-\x0ecd] 643 | | [\x0ed0-\x0ed9] 644 | | [\x0edc-\x0edd] 645 | | \x0f00 646 | | [\x0f18-\x0f19] 647 | | [\x0f20-\x0f29] 648 | | \x0f35 649 | | \x0f37 650 | | \x0f39 651 | | [\x0f3e-\x0f47] 652 | | [\x0f49-\x0f6a] 653 | | [\x0f71-\x0f84] 654 | | [\x0f86-\x0f8b] 655 | | [\x0f90-\x0f97] 656 | | [\x0f99-\x0fbc] 657 | | \x0fc6 658 | | [\x1000-\x1021] 659 | | [\x1023-\x1027] 660 | | [\x1029-\x102a] 661 | | [\x102c-\x1032] 662 | | [\x1036-\x1039] 663 | | [\x1040-\x1049] 664 | | [\x1050-\x1059] 665 | | [\x10a0-\x10c5] 666 | | [\x10d0-\x10f8] 667 | | [\x1100-\x1159] 668 | | [\x115f-\x11a2] 669 | | [\x11a8-\x11f9] 670 | | [\x1200-\x1206] 671 | | [\x1208-\x1246] 672 | | \x1248 673 | | [\x124a-\x124d] 674 | | [\x1250-\x1256] 675 | | \x1258 676 | | [\x125a-\x125d] 677 | | [\x1260-\x1286] 678 | | \x1288 679 | | [\x128a-\x128d] 680 | | [\x1290-\x12ae] 681 | | \x12b0 682 | | [\x12b2-\x12b5] 683 | | [\x12b8-\x12be] 684 | | \x12c0 685 | | [\x12c2-\x12c5] 686 | | [\x12c8-\x12ce] 687 | | [\x12d0-\x12d6] 688 | | [\x12d8-\x12ee] 689 | | [\x12f0-\x130e] 690 | | \x1310 691 | | [\x1312-\x1315] 692 | | [\x1318-\x131e] 693 | | [\x1320-\x1346] 694 | | [\x1348-\x135a] 695 | | [\x1369-\x1371] 696 | | [\x13a0-\x13f4] 697 | | [\x1401-\x166c] 698 | | [\x166f-\x1676] 699 | | [\x1681-\x169a] 700 | | [\x16a0-\x16ea] 701 | | [\x16ee-\x16f0] 702 | | [\x1700-\x170c] 703 | | [\x170e-\x1714] 704 | | [\x1720-\x1734] 705 | | [\x1740-\x1753] 706 | | [\x1760-\x176c] 707 | | [\x176e-\x1770] 708 | | [\x1772-\x1773] 709 | | [\x1780-\x17b3] 710 | | [\x17b6-\x17d3] 711 | | \x17d7 712 | | [\x17dc-\x17dd] 713 | | [\x17e0-\x17e9] 714 | | [\x180b-\x180d] 715 | | [\x1810-\x1819] 716 | | [\x1820-\x1877] 717 | | [\x1880-\x18a9] 718 | | [\x1900-\x191c] 719 | | [\x1920-\x192b] 720 | | [\x1930-\x193b] 721 | | [\x1946-\x196d] 722 | | [\x1970-\x1974] 723 | | [\x1d00-\x1d6b] 724 | | [\x1e00-\x1e9b] 725 | | [\x1ea0-\x1ef9] 726 | | [\x1f00-\x1f15] 727 | | [\x1f18-\x1f1d] 728 | | [\x1f20-\x1f45] 729 | | [\x1f48-\x1f4d] 730 | | [\x1f50-\x1f57] 731 | | \x1f59 732 | | \x1f5b 733 | | \x1f5d 734 | | [\x1f5f-\x1f7d] 735 | | [\x1f80-\x1fb4] 736 | | [\x1fb6-\x1fbc] 737 | | \x1fbe 738 | | [\x1fc2-\x1fc4] 739 | | [\x1fc6-\x1fcc] 740 | | [\x1fd0-\x1fd3] 741 | | [\x1fd6-\x1fdb] 742 | | [\x1fe0-\x1fec] 743 | | [\x1ff2-\x1ff4] 744 | | [\x1ff6-\x1ffc] 745 | | [\x203f-\x2040] 746 | | \x2054 747 | | \x2071 748 | | \x207f 749 | | [\x20d0-\x20dc] 750 | | \x20e1 751 | | [\x20e5-\x20ea] 752 | | \x2102 753 | | \x2107 754 | | [\x210a-\x2113] 755 | | \x2115 756 | | [\x2118-\x211d] 757 | | \x2124 758 | | \x2126 759 | | \x2128 760 | | [\x212a-\x2131] 761 | | [\x2133-\x2139] 762 | | [\x213d-\x213f] 763 | | [\x2145-\x2149] 764 | | [\x2160-\x2183] 765 | | [\x3005-\x3007] 766 | | [\x3021-\x302f] 767 | | [\x3031-\x3035] 768 | | [\x3038-\x303c] 769 | | [\x3041-\x3096] 770 | | [\x3099-\x309a] 771 | | [\x309d-\x309f] 772 | | [\x30a1-\x30ff] 773 | | [\x3105-\x312c] 774 | | [\x3131-\x318e] 775 | | [\x31a0-\x31b7] 776 | | [\x31f0-\x31ff] 777 | | [\x3400-\x4db5] 778 | | [\x4e00-\x9fa5] 779 | | [\xa000-\xa48c] 780 | | [\xac00-\xd7a3] 781 | | [\xf900-\xfa2d] 782 | | [\xfa30-\xfa6a] 783 | | [\xfb00-\xfb06] 784 | | [\xfb13-\xfb17] 785 | | [\xfb1d-\xfb28] 786 | | [\xfb2a-\xfb36] 787 | | [\xfb38-\xfb3c] 788 | | \xfb3e 789 | | [\xfb40-\xfb41] 790 | | [\xfb43-\xfb44] 791 | | [\xfb46-\xfbb1] 792 | | [\xfbd3-\xfc5d] 793 | | [\xfc64-\xfd3d] 794 | | [\xfd50-\xfd8f] 795 | | [\xfd92-\xfdc7] 796 | | [\xfdf0-\xfdf9] 797 | | [\xfe00-\xfe0f] 798 | | [\xfe20-\xfe23] 799 | | [\xfe33-\xfe34] 800 | | [\xfe4d-\xfe4f] 801 | | \xfe71 802 | | \xfe73 803 | | \xfe77 804 | | \xfe79 805 | | \xfe7b 806 | | \xfe7d 807 | | [\xfe7f-\xfefc] 808 | | [\xff10-\xff19] 809 | | [\xff21-\xff3a] 810 | | \xff3f 811 | | [\xff41-\xff5a] 812 | | [\xff65-\xffbe] 813 | | [\xffc2-\xffc7] 814 | | [\xffca-\xffcf] 815 | | [\xffd2-\xffd7] 816 | | [\xffda-\xffdc] 817 | | \xd800 [\xdc00-\xdc0a] 818 | | \xd800 [\xdc0d-\xdc25] 819 | | \xd800 [\xdc28-\xdc39] 820 | | \xd800 [\xdc3c-\xdc3c] 821 | | \xd800 [\xdc3f-\xdc4c] 822 | | \xd800 [\xdc50-\xdc5c] 823 | | \xd800 [\xdc80-\xdcf9] 824 | | \xd800 [\xdf00-\xdf1d] 825 | | \xd800 [\xdf30-\xdf49] 826 | | \xd800 [\xdf80-\xdf9c] 827 | | \xd801 [\xe000-\xe09c] 828 | | \xd801 [\xe0a0-\xe0a8] 829 | | \xd802 [\xe400-\xe404] 830 | | \xd802 \x0808 831 | | \xd802 [\xe40a-\xe434] 832 | | \xd802 [\xe437-\xe437] 833 | | \xd802 \x083c 834 | | \xd802 \x083f 835 | | \xd834 [\xad65-\xad68] 836 | | \xd834 [\xad6d-\xad71] 837 | | \xd834 [\xad7b-\xad81] 838 | | \xd834 [\xad85-\xad8a] 839 | | \xd834 [\xadaa-\xadac] 840 | | \xd835 [\xb000-\xb053] 841 | | \xd835 [\xb056-\xb09b] 842 | | \xd835 [\xb09e-\xb09e] 843 | | \xd835 \xd4a2 844 | | \xd835 [\xb0a5-\xb0a5] 845 | | \xd835 [\xb0a9-\xb0ab] 846 | | \xd835 [\xb0ae-\xb0b8] 847 | | \xd835 \xd4bb 848 | | \xd835 [\xb0bd-\xb0c2] 849 | | \xd835 [\xb0c5-\xb104] 850 | | \xd835 [\xb107-\xb109] 851 | | \xd835 [\xb10d-\xb113] 852 | | \xd835 [\xb116-\xb11b] 853 | | \xd835 [\xb11e-\xb138] 854 | | \xd835 [\xb13b-\xb13d] 855 | | \xd835 [\xb140-\xb143] 856 | | \xd835 \xd546 857 | | \xd835 [\xb14a-\xb14f] 858 | | \xd835 [\xb152-\xb2a2] 859 | | \xd835 [\xb2a8-\xb2bf] 860 | | \xd835 [\xb2c2-\xb2d9] 861 | | \xd835 [\xb2dc-\xb2f9] 862 | | \xd835 [\xb2fc-\xb313] 863 | | \xd835 [\xb316-\xb333] 864 | | \xd835 [\xb336-\xb34d] 865 | | \xd835 [\xb350-\xb36d] 866 | | \xd835 [\xb370-\xb387] 867 | | \xd835 [\xb38a-\xb3a7] 868 | | \xd835 [\xb3aa-\xb3c1] 869 | | \xd835 [\xb3c4-\xb3c8] 870 | | \xd835 [\xb3ce-\xb3fe] 871 | | \xd840 [\xdc00-\xdffe] 872 | | \xd841 [\xe000-\xe3fe] 873 | | \xd842 [\xe400-\xe7fe] 874 | | \xd843 [\xe800-\xebfe] 875 | | \xd844 [\xec00-\xeffe] 876 | | \xd845 [\xf000-\xf3fe] 877 | | \xd846 [\xf400-\xf7fe] 878 | | \xd847 [\xf800-\xfbfe] 879 | | \xd848 [\xfc00-\xfffe] 880 | | \xd849 [\x0000-\x03fe] 881 | | \xd84a [\x0400-\x07fe] 882 | | \xd84b [\x0800-\x0bfe] 883 | | \xd84c [\x0c00-\x0ffe] 884 | | \xd84d [\x1000-\x13fe] 885 | | \xd84e [\x1400-\x17fe] 886 | | \xd84f [\x1800-\x1bfe] 887 | | \xd850 [\x1c00-\x1ffe] 888 | | \xd851 [\x2000-\x23fe] 889 | | \xd852 [\x2400-\x27fe] 890 | | \xd853 [\x2800-\x2bfe] 891 | | \xd854 [\x2c00-\x2ffe] 892 | | \xd855 [\x3000-\x33fe] 893 | | \xd856 [\x3400-\x37fe] 894 | | \xd857 [\x3800-\x3bfe] 895 | | \xd858 [\x3c00-\x3ffe] 896 | | \xd859 [\x4000-\x43fe] 897 | | \xd85a [\x4400-\x47fe] 898 | | \xd85b [\x4800-\x4bfe] 899 | | \xd85c [\x4c00-\x4ffe] 900 | | \xd85d [\x5000-\x53fe] 901 | | \xd85e [\x5400-\x57fe] 902 | | \xd85f [\x5800-\x5bfe] 903 | | \xd860 [\x5c00-\x5ffe] 904 | | \xd861 [\x6000-\x63fe] 905 | | \xd862 [\x6400-\x67fe] 906 | | \xd863 [\x6800-\x6bfe] 907 | | \xd864 [\x6c00-\x6ffe] 908 | | \xd865 [\x7000-\x73fe] 909 | | \xd866 [\x7400-\x77fe] 910 | | \xd867 [\x7800-\x7bfe] 911 | | \xd868 [\x7c00-\x7ffe] 912 | | \xd869 [\x8000-\x82d5] 913 | | \xd87e [\xd400-\xd61c] 914 | | \xdb40 [\xdd00-\xddee] 915 | 916 | @ident = @xid_start @xid_continue* 917 | @raw_ident = r \# @ident 918 | 919 | @lifetime = \' @ident 920 | 921 | $hexit = [0-9a-fA-F] 922 | 923 | @char_escape 924 | = [nrt\\'"0] 925 | | [xX] [0-7] $hexit 926 | | u\{ $hexit \} 927 | | u\{ $hexit $hexit \} 928 | | u\{ $hexit $hexit $hexit \} 929 | | u\{ $hexit $hexit $hexit $hexit \} 930 | | u\{ $hexit $hexit $hexit $hexit $hexit \} 931 | | u\{ $hexit $hexit $hexit $hexit $hexit $hexit \} 932 | 933 | @byte_escape 934 | = [xX] $hexit $hexit 935 | | [nrt\\'"0] 936 | 937 | -- literals 938 | 939 | @lit_char 940 | = \' ( \\ @char_escape 941 | | [^\\'\n\t\r] 942 | | [ \ud800-\udbff \udc00-\udfff ] 943 | ) 944 | \' 945 | 946 | @lit_byte 947 | = b\' ( \\ @byte_escape 948 | | [^\\'\n\t\r] [ \udc00-\udfff ]? 949 | ) 950 | \' 951 | 952 | @lit_integer 953 | = [0-9][0-9_]* 954 | | 0b [01_]+ 955 | | 0o [0-8_]+ 956 | | 0x [0-9a-fA-F_]+ 957 | 958 | @decimal_suffix = \. [0-9][0-9_]* 959 | @exponent_suffix = [eE] [\-\+]? [0-9][0-9_]* 960 | 961 | @lit_float = [0-9][0-9_]* @decimal_suffix? @exponent_suffix? 962 | @lit_float2 = [0-9][0-9_]* \. 963 | 964 | @lit_str = \" (\\\n | \\\r\n | \\ @char_escape | [^\\\"] | \n | \r)* \" 965 | @lit_byte_str = b \" (\\\n | \\\r\n | \\ @byte_escape | [^\\\"] | \n | \r)* \" 966 | 967 | @lit_raw_str = r \#* \" 968 | @lit_raw_bstr = br \#* \" 969 | 970 | 971 | -- Comments 972 | 973 | @outer_doc_line = "///" ( [^\r\n\/] [^\r\n]* )? 974 | @outer_doc_inline = "/**" 975 | 976 | @inner_doc_line = "//!" [^\r\n]* 977 | @inner_doc_inline = "/*!" 978 | 979 | @line_comment = "//" ( [^\n\/]* [^\n]* )? 980 | @inline_comment = "/*" 981 | 982 | -- Macro related 983 | 984 | @subst_nt = "$" @ident 985 | 986 | tokens :- 987 | 988 | $white+ { \s -> pure (Space Whitespace s) } 989 | 990 | "=" { token Equal } 991 | "<" { token Less } 992 | ">" { token Greater } 993 | "&" { token Ampersand } 994 | "|" { token Pipe } 995 | "!" { token Exclamation } 996 | "~" { token Tilde } 997 | "+" { token Plus } 998 | "-" { token Minus } 999 | "*" { token Star } 1000 | "/" { token Slash } 1001 | "%" { token Percent } 1002 | "^" { token Caret } 1003 | 1004 | "||" { token PipePipe } 1005 | "&&" { token AmpersandAmpersand } 1006 | ">=" { token GreaterEqual } 1007 | ">>=" { token GreaterGreaterEqual } 1008 | "<<" { token LessLess } 1009 | ">>" { token GreaterGreater } 1010 | 1011 | "==" { token EqualEqual } 1012 | "!=" { token NotEqual } 1013 | "<=" { token LessEqual } 1014 | "<<=" { token LessLessEqual } 1015 | "-=" { token MinusEqual } 1016 | "&=" { token AmpersandEqual } 1017 | "|=" { token PipeEqual } 1018 | "+=" { token PlusEqual } 1019 | "*=" { token StarEqual } 1020 | "/=" { token SlashEqual } 1021 | "^=" { token CaretEqual } 1022 | "%=" { token PercentEqual } 1023 | 1024 | 1025 | "@" { token At } 1026 | "." { token Dot } 1027 | ".." { token DotDot } 1028 | "..." { token DotDotDot } 1029 | "..=" { token DotDotEqual } 1030 | "," { token Comma } 1031 | ";" { token Semicolon } 1032 | ":" { token Colon } 1033 | "::" { token ModSep } 1034 | "->" { token RArrow } 1035 | "<-" { token LArrow } 1036 | "=>" { token FatArrow } 1037 | "(" { token (OpenDelim Paren) } 1038 | ")" { token (CloseDelim Paren) } 1039 | "[" { token (OpenDelim Bracket) } 1040 | "]" { token (CloseDelim Bracket) } 1041 | "{" { token (OpenDelim Brace) } 1042 | "}" { token (CloseDelim Brace) } 1043 | "#" { token Pound } 1044 | "$" { token Dollar } 1045 | 1046 | @lit_integer { \i -> literal (IntegerTok i) } 1047 | @lit_float { \f -> literal (FloatTok f) } 1048 | @lit_float2 / ( [^\._a-zA-Z] | \r | \n ) 1049 | { \f -> literal (FloatTok f) } 1050 | @lit_float2 / { \_ _ _ (_,is) -> inputStreamEmpty is } 1051 | { \f -> literal (FloatTok f) } 1052 | 1053 | @lit_byte { \c -> literal (ByteTok (drop 2 (init c))) } 1054 | @lit_char { \c -> literal (CharTok (drop 1 (init c))) } 1055 | @lit_str { \s -> literal (StrTok (cleanWindowsNewlines (drop 1 (init s)))) } 1056 | @lit_byte_str { \s -> literal (ByteStrTok (cleanWindowsNewlines (drop 2 (init s)))) } 1057 | 1058 | @lit_raw_str { \s -> let n = length s - 2 1059 | in do 1060 | str <- cleanWindowsNewlines `fmap` rawString n 1061 | literal (StrRawTok str (fromIntegral n)) 1062 | } 1063 | @lit_raw_bstr { \s -> let n = length s - 3 1064 | in do 1065 | str <- cleanWindowsNewlines `fmap` rawString n 1066 | literal (ByteStrRawTok str (fromIntegral n)) 1067 | } 1068 | 1069 | "" ; 1070 | @ident { \s -> pure (IdentTok (mkIdent s)) } 1071 | 1072 | \? { token Question } 1073 | @raw_ident { \s -> pure (IdentTok ((mkIdent (drop 2 s)){ raw = True })) } 1074 | @ident { \s -> pure (IdentTok (mkIdent s)) } 1075 | @lifetime { \s -> (pure (LifetimeTok (mkIdent (tail s))) :: P Token) } 1076 | 1077 | 1078 | @outer_doc_line { \c -> pure (Doc (drop 3 c) Outer False) } 1079 | @outer_doc_line \r { \c -> pure (Doc (drop 3 (init c)) Outer False) } 1080 | @outer_doc_inline / ( [^\*] | \r | \n ) 1081 | { \_ -> Doc <$> nestedComment <*> pure Outer <*> pure True } 1082 | 1083 | @inner_doc_line { \c -> pure (Doc (drop 3 c) Inner False) } 1084 | @inner_doc_inline { \_ -> Doc <$> nestedComment <*> pure Inner <*> pure True } 1085 | 1086 | @line_comment { \c -> pure (Space Comment (drop 2 c)) } 1087 | @inline_comment { \_ -> Space Comment <$> nestedComment } 1088 | 1089 | { 1090 | 1091 | -- | Make a token. 1092 | token :: Token -> String -> P Token 1093 | token t _ = pure t 1094 | 1095 | -- | Given the first part of a literal, try to parse also a suffix. Even if 1096 | -- the allowed suffixes are very well defined and only valid on integer and 1097 | -- float literals, we need to put in the same token whatever suffix follows. 1098 | -- This is for backwards compatibility if Rust decides to ever add suffixes. 1099 | literal :: LitTok -> P Token 1100 | literal lit = do 1101 | pos <- getPosition 1102 | inp <- getInput 1103 | case alexScan (pos,inp) lits of 1104 | AlexToken (pos',inp') len action -> do 1105 | tok <- action (peekChars len inp) 1106 | case tok of 1107 | IdentTok (Ident suf False _) -> do 1108 | setPosition pos' 1109 | setInput inp' 1110 | pure (LiteralTok lit (Just suf)) 1111 | _ -> pure (LiteralTok lit Nothing) 1112 | _ -> pure (LiteralTok lit Nothing) 1113 | 1114 | -- | Parses a raw string, the closing quotation, and the appropriate number of 1115 | -- '#' characters. 1116 | rawString :: Int -> P String 1117 | rawString n = do 1118 | c_m <- nextChar 1119 | case c_m of 1120 | -- The string was never closed 1121 | Nothing -> fail "Invalid raw (byte)string" 1122 | 1123 | -- The string has a chance of being closed 1124 | Just '"' -> do 1125 | n' <- greedyChar '#' n 1126 | if n' == n 1127 | then pure "" 1128 | else (('"' : replicate n' '#') ++) <$> rawString n 1129 | 1130 | -- Just another character... 1131 | Just c -> ([c] ++) <$> rawString n 1132 | 1133 | -- | Consume a full inline comment (which may be nested). 1134 | nestedComment :: P String 1135 | nestedComment = go 1 "" 1136 | where 1137 | go :: Int -> String -> P String 1138 | go 0 s = pure (reverse (drop 2 s)) 1139 | go n s = do 1140 | c <- nextChar 1141 | case c of 1142 | Nothing -> fail "Unclosed comment" 1143 | Just '*' -> do 1144 | c' <- peekChar 1145 | case c' of 1146 | Nothing -> fail "Unclosed comment" 1147 | Just '/' -> nextChar *> go (n-1) ('/':'*':s) 1148 | Just _ -> go n ('*':s) 1149 | Just '/' -> do 1150 | c' <- peekChar 1151 | case c' of 1152 | Nothing -> fail "Unclosed comment" 1153 | Just '*' -> nextChar *> go (n+1) ('*':'/':s) 1154 | Just _ -> go n ('/':s) 1155 | Just c' -> go n (c':s) 1156 | 1157 | 1158 | -- Monadic functions 1159 | 1160 | -- | Retrieve the next character (if there is one), updating the parser state accordingly. 1161 | nextChar :: P (Maybe Char) 1162 | nextChar = do 1163 | pos <- getPosition 1164 | inp <- getInput 1165 | if inputStreamEmpty inp 1166 | then pure Nothing 1167 | else let (c,inp') = takeChar inp 1168 | pos' = alexMove pos c 1169 | in pos' `seq` (setPosition pos' *> setInput inp' *> pure (Just c)) 1170 | 1171 | -- | Retrieve the next character (if there is one), without updating the 1172 | -- parser state. 1173 | peekChar :: P (Maybe Char) 1174 | peekChar = do 1175 | inp <- getInput 1176 | if inputStreamEmpty inp 1177 | then pure Nothing 1178 | else let (c,_) = takeChar inp 1179 | in pure (Just c) 1180 | 1181 | -- | Greedily try to eat as many of a given character as possible (and return 1182 | -- how many characters were eaten). The second argument is an upper limit. 1183 | greedyChar :: Char -> Int -> P Int 1184 | greedyChar _ 0 = pure 0 1185 | greedyChar c limit = do 1186 | c_m <- peekChar 1187 | case c_m of 1188 | Just c' | c == c' -> do { _ <- nextChar; n <- greedyChar c (limit - 1); pure (n+1) } 1189 | _ -> pure 0 1190 | 1191 | -- | Signal a lexical error. 1192 | lexicalError :: P a 1193 | lexicalError = do 1194 | c <- peekChar 1195 | fail ("Lexical error: the character " ++ show c ++ " does not fit here") 1196 | 1197 | 1198 | -- Functions required by Alex 1199 | 1200 | -- | type passed around by Alex functions (required by Alex) 1201 | type AlexInput = (Position, -- current position, 1202 | InputStream) -- current input string 1203 | 1204 | -- | get previous character (required by Alex). Since this is never used, the 1205 | -- implementation just raises an error. 1206 | alexInputPrevChar :: AlexInput -> Char 1207 | alexInputPrevChar _ = error "alexInputPrevChar not used" 1208 | 1209 | -- | get the next byte and new input from the current input (required by Alex 1210 | -- 3.0) 1211 | alexGetByte :: AlexInput -> Maybe (Word8, AlexInput) 1212 | alexGetByte (pos,inp) 1213 | | inputStreamEmpty inp = Nothing 1214 | | otherwise = let (b,inp') = takeByte inp 1215 | -- this is safe for latin-1, but ugly 1216 | pos' = alexMove pos (chr (fromIntegral b)) 1217 | in pos' `seq` Just (b, (pos', inp')) 1218 | 1219 | -- | find the new position given the next character 1220 | alexMove :: Position -> Char -> Position 1221 | alexMove pos ' ' = incPos pos 1 1222 | alexMove pos '\n' = retPos pos 1223 | alexMove pos '\r' = incOffset pos 1 1224 | alexMove pos _ = incPos pos 1 1225 | 1226 | -- | Lexer for one 'Token'. The only token this cannot produce is 'Interpolated'. 1227 | lexToken :: P (Spanned Token) 1228 | lexToken = do 1229 | tok_maybe <- popToken 1230 | case tok_maybe of 1231 | Just tok -> pure tok 1232 | Nothing -> do 1233 | pos <- getPosition 1234 | inp <- getInput 1235 | case alexScan (pos, inp) 0 of 1236 | AlexEOF -> pure (Spanned Eof (Span pos pos)) 1237 | AlexError _ -> fail "lexical error" 1238 | AlexSkip (pos',inp') _ -> setPosition pos' *> setInput inp' *> lexToken 1239 | AlexToken (pos',inp') len action -> do 1240 | setPosition pos' 1241 | setInput inp' 1242 | tok <- action (peekChars len inp) 1243 | tok' <- swapToken tok 1244 | pos'' <- getPosition 1245 | return (Spanned tok' (Span pos pos'')) 1246 | 1247 | -- | Lexer for one non-whitespace 'Token'. The only tokens this cannot produce are 'Interpolated' 1248 | -- and 'Space' (which includes comments that aren't doc comments). 1249 | lexNonSpace :: P (Spanned Token) 1250 | lexNonSpace = do 1251 | tok <- lexToken 1252 | case tok of 1253 | Spanned Space{} _ -> lexNonSpace 1254 | _ -> pure tok 1255 | 1256 | -- | Apply the given lexer repeatedly until (but not including) the 'Eof' token. Meant for debugging 1257 | -- purposes - in general this defeats the point of a threaded lexer. 1258 | lexTokens :: P (Spanned Token) -> P [Spanned Token] 1259 | lexTokens lexer = do 1260 | tok <- lexer 1261 | case tok of 1262 | Spanned Eof _ -> pure [] 1263 | _ -> (tok :) <$> lexTokens lexer 1264 | 1265 | -- | Lex the first line, if it immediately starts with @#!@ (but not @#![@ - that should be an 1266 | -- inner attribute). If this fails to find a shebang line, it consumes no input. 1267 | lexShebangLine :: P (Maybe String) 1268 | lexShebangLine = do 1269 | inp <- getInput 1270 | case peekChars 3 inp of 1271 | '#':'!':r | r /= "[" -> Just <$> toNewline 1272 | _ -> pure Nothing 1273 | where 1274 | -- Lexes a string until a newline 1275 | toNewline :: P String 1276 | toNewline = do 1277 | c <- peekChar 1278 | case c of 1279 | Nothing -> pure "" 1280 | Just '\n' -> pure "" 1281 | Just c' -> do 1282 | _ <- nextChar 1283 | (c' :) <$> toNewline 1284 | 1285 | -- | If we're running on Windows, we need to normalize to "\n" instead of "\r\n", to match Rust's 1286 | -- handling of newlines in strings. 1287 | cleanWindowsNewlines :: String -> String 1288 | cleanWindowsNewlines "" = "" 1289 | cleanWindowsNewlines ('\r':'\n':rest) = '\n' : cleanWindowsNewlines rest 1290 | cleanWindowsNewlines (x:rest) = x : cleanWindowsNewlines rest 1291 | } 1292 | -------------------------------------------------------------------------------- /src/Language/Rust/Parser/Literals.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | Module : Language.Rust.Parser.Literals 3 | Description : Parsing literals 4 | Copyright : (c) Alec Theriault, 2017-2018 5 | License : BSD-style 6 | Maintainer : alec.theriault@gmail.com 7 | Stability : experimental 8 | Portability : portable 9 | 10 | Functions for parsing literals from valid literal tokens. Note the functions in this module fail 11 | badly is fed invalid 'LitTok's; it is expected their input is coming from Alex and is correct. 12 | -} 13 | 14 | module Language.Rust.Parser.Literals ( 15 | translateLit 16 | ) where 17 | 18 | import Language.Rust.Syntax.Token ( LitTok(..) ) 19 | import Language.Rust.Syntax.AST ( IntRep(..), Lit(..), StrStyle(..), Suffix(..) ) 20 | 21 | import Data.Char ( chr, digitToInt, ord, isHexDigit, isSpace ) 22 | import Data.List ( unfoldr ) 23 | import Data.Maybe ( fromMaybe ) 24 | import Data.Word ( Word8 ) 25 | 26 | import Text.Read ( readMaybe ) 27 | 28 | -- | Parse a valid 'LitTok' into a 'Lit'. 29 | translateLit :: LitTok -> Suffix -> a -> Lit a 30 | translateLit (ByteTok s) = Byte (unescapeByte' s) 31 | translateLit (CharTok s) = Char (unescapeChar' s) 32 | translateLit (FloatTok s) = Float (unescapeFloat s) 33 | translateLit (StrTok s) = Str (unfoldr (unescapeChar True) s) Cooked 34 | translateLit (StrRawTok s n) = Str s (Raw n) 35 | translateLit (ByteStrTok s) = ByteStr (unfoldr (unescapeByte True) s) Cooked 36 | translateLit (ByteStrRawTok s n) = ByteStr (map (fromIntegral . ord) s) (Raw n) 37 | translateLit (IntegerTok s) = \suf -> case (suf, unescapeInteger s) of 38 | (F32, (Dec, n)) -> Float (fromInteger n) F32 39 | (F64, (Dec, n)) -> Float (fromInteger n) F64 40 | (_, (rep, n)) -> Int rep n suf 41 | 42 | -- | Given a string of characters read from a Rust source, extract the next underlying char taking 43 | -- into account escapes and unicode. 44 | unescapeChar :: Bool -- ^ multi-line strings allowed 45 | -> String -- ^ input string 46 | -> Maybe (Char, String) 47 | unescapeChar multiline ('\\':c:cs) = case c of 48 | 'n' -> pure ('\n', cs) 49 | 'r' -> pure ('\r', cs) 50 | 't' -> pure ('\t', cs) 51 | '\\' -> pure ('\\', cs) 52 | '\'' -> pure ('\'', cs) 53 | '"' -> pure ('"', cs) 54 | '0' -> pure ('\0', cs) 55 | 'x' -> do (h,cs') <- readHex 2 cs; pure (chr h, cs') 56 | 'X' -> do (h,cs') <- readHex 2 cs; pure (chr h, cs') 57 | 'U' -> do (h,cs') <- readHex 8 cs; pure (chr h, cs') 58 | 'u' -> case cs of 59 | '{':x1:'}':cs' -> do (h,_) <- readHex 1 [x1]; pure (chr h, cs') 60 | '{':x1:x2:'}':cs' -> do (h,_) <- readHex 2 [x1,x2]; pure (chr h, cs') 61 | '{':x1:x2:x3:'}':cs' -> do (h,_) <- readHex 3 [x1,x2,x3]; pure (chr h, cs') 62 | '{':x1:x2:x3:x4:'}':cs' -> do (h,_) <- readHex 4 [x1,x2,x3,x4]; pure (chr h, cs') 63 | '{':x1:x2:x3:x4:x5:'}':cs' -> do (h,_) <- readHex 5 [x1,x2,x3,x4,x5]; pure (chr h, cs') 64 | '{':x1:x2:x3:x4:x5:x6:'}':cs' -> do (h,_) <- readHex 6 [x1,x2,x3,x4,x5,x6]; pure (chr h, cs') 65 | _ -> do (h,cs') <- readHex 4 cs; pure (chr h, cs') 66 | '\n' | multiline -> unescapeChar multiline $ dropWhile isSpace cs 67 | _ -> error "unescape char: bad escape sequence" 68 | unescapeChar _ (c:cs) = Just (c, cs) 69 | unescapeChar _ [] = fail "unescape char: empty string" 70 | 71 | -- | Given a string of characters read from a Rust source, extract the next underlying byte taking 72 | -- into account escapes. 73 | unescapeByte :: Bool -- ^ multi-line strings allowed 74 | -> String -- ^ input string 75 | -> Maybe (Word8, String) 76 | unescapeByte multiline ('\\':c:cs) = case c of 77 | 'n' -> pure (toEnum $ fromEnum '\n', cs) 78 | 'r' -> pure (toEnum $ fromEnum '\r', cs) 79 | 't' -> pure (toEnum $ fromEnum '\t', cs) 80 | '\\' -> pure (toEnum $ fromEnum '\\', cs) 81 | '\'' -> pure (toEnum $ fromEnum '\'', cs) 82 | '"' -> pure (toEnum $ fromEnum '"', cs) 83 | '0' -> pure (toEnum $ fromEnum '\0', cs) 84 | 'x' -> do (h,cs') <- readHex 2 cs; pure (h, cs') 85 | 'X' -> do (h,cs') <- readHex 2 cs; pure (h, cs') 86 | '\n' | multiline -> unescapeByte multiline $ dropWhile isSpace cs 87 | _ -> error "unescape byte: bad escape sequence" 88 | unescapeByte _ (c:cs) = Just (toEnum $ fromEnum c, cs) 89 | unescapeByte _ [] = fail "unescape byte: empty string" 90 | 91 | -- | Given a string Rust representation of a character, parse it into a character 92 | unescapeChar' :: String -> Char 93 | unescapeChar' s = case unescapeChar False s of 94 | Just (c, "") -> c 95 | _ -> error "unescape char: bad character literal" 96 | 97 | -- | Given a string Rust representation of a byte, parse it into a byte 98 | unescapeByte' :: String -> Word8 99 | unescapeByte' s = case unescapeByte False s of 100 | Just (w8, "") -> w8 101 | _ -> error "unescape byte: bad byte literal" 102 | 103 | -- | Given a string Rust representation of an integer, parse it into a number 104 | unescapeInteger :: Num a => String -> (IntRep,a) 105 | unescapeInteger ('0':'b':cs@(_:_)) | all (`elem` "_01") cs = (Bin, numBase 2 (filter (/= '_') cs)) 106 | unescapeInteger ('0':'o':cs@(_:_)) | all (`elem` "_01234567") cs = (Oct, numBase 8 (filter (/= '_') cs)) 107 | unescapeInteger ('0':'x':cs@(_:_)) | all (`elem` "_0123456789abcdefABCDEF") cs = (Hex, numBase 16 (filter (/= '_') cs)) 108 | unescapeInteger cs@(_:_) | all (`elem` "_0123456789") cs = (Dec, numBase 10 (filter (/= '_') cs)) 109 | unescapeInteger _ = error "unescape integer: bad decimal literal" 110 | 111 | -- | Given a string Rust representation of a float, parse it into a float. 112 | -- NOTE: this is a bit hacky. Eventually, we might not do this and change the internal 113 | -- representation of a float to a string (what language-c has opted to do). 114 | unescapeFloat :: String -> Double 115 | unescapeFloat cs = fromMaybe (error $ "unescape float: cannot parse float " ++ cs') (readMaybe cs') 116 | where cs' = filter (/= '_') (if last cs == '.' then cs ++ "0" else cs) 117 | 118 | -- | Try to read a hexadecimal number of the specified length off of the front of the string 119 | -- provided. If there are not enough characters to do this, or the characters don't fall in the 120 | -- right range, this fails with 'Nothing'. 121 | readHex :: Num a => Int -> String -> Maybe (a, String) 122 | readHex n cs = let digits = take n cs 123 | in if length digits == n && all isHexDigit digits 124 | then Just (numBase 16 digits, drop n cs) 125 | else Nothing 126 | 127 | -- | Convert a list of characters to the number they represent. 128 | numBase :: Num a => a -> String -> a 129 | numBase b = foldl (\n x -> fromIntegral (digitToInt x) + b * n) 0 130 | 131 | -------------------------------------------------------------------------------- /src/Language/Rust/Parser/ParseMonad.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | Module : Language.Rust.Parser.ParseMonad 3 | Description : Parsing monad for lexer/parser 4 | Copyright : (c) Alec Theriault, 2017-2018 5 | License : BSD-style 6 | Maintainer : alec.theriault@gmail.com 7 | Stability : experimental 8 | Portability : GHC 9 | 10 | Both the lexer and the parser run inside of the 'P' monad. As detailed in the section on 11 | on [threaded-lexers](https://www.haskell.org/happy/doc/html/sec-monads.html#sec-lexers) in Happy's 12 | instruction manual, the benefits of this are that: 13 | 14 | * Lexical errors can be treated in the same way as parse errors 15 | * Information such as the current position in the file shared between the lexer and parser 16 | * General information can be passed back from the parser to the lexer too 17 | 18 | In our case, this shared information is held in 'PState'. 19 | -} 20 | {-# LANGUAGE Rank2Types #-} 21 | {-# LANGUAGE BangPatterns #-} 22 | {-# LANGUAGE DeriveDataTypeable #-} 23 | 24 | module Language.Rust.Parser.ParseMonad ( 25 | -- * Parsing monad 26 | P, 27 | execParser, 28 | execParser', 29 | initPos, 30 | PState(..), 31 | 32 | -- * Monadic operations 33 | getPState, 34 | setPState, 35 | getPosition, 36 | setPosition, 37 | getInput, 38 | setInput, 39 | popToken, 40 | pushToken, 41 | swapToken, 42 | 43 | -- * Error reporting 44 | ParseFail(..), 45 | parseError, 46 | ) where 47 | 48 | import Language.Rust.Data.InputStream ( InputStream ) 49 | import Language.Rust.Data.Position ( Spanned, Position, initPos, prettyPosition ) 50 | import Language.Rust.Syntax.Token ( Token ) 51 | 52 | import Control.Monad.Fail as Fail 53 | import Control.Exception ( Exception ) 54 | import Data.Maybe ( listToMaybe ) 55 | import Data.Typeable ( Typeable ) 56 | 57 | -- | Parsing and lexing monad. A value of type @'P' a@ represents a parser that can be run (using 58 | -- 'execParser') to possibly produce a value of type @a@. 59 | newtype P a = P { unParser :: forall r. PState -- State being passed along 60 | -> (a -> PState -> r) -- Successful parse continuation 61 | -> (String -> Position -> r) -- Failed parse continuation 62 | -> r -- Final output 63 | } 64 | 65 | -- | State that the lexer and parser share 66 | data PState = PState 67 | { curPos :: !Position -- ^ position at current input location 68 | , curInput :: !InputStream -- ^ the current input 69 | , prevPos :: Position -- ^ position at previous input location 70 | , pushedTokens :: [Spanned Token] -- ^ tokens manually pushed by the user 71 | , swapFunction :: Token -> Token -- ^ function to swap token 72 | } 73 | 74 | instance Functor P where 75 | fmap f m = P $ \ !s pOk pFailed -> unParser m s (pOk . f) pFailed 76 | 77 | instance Applicative P where 78 | pure x = P $ \ !s pOk _ -> pOk x s 79 | 80 | m <*> k = P $ \ !s pOk pFailed -> 81 | let pOk' x s' = unParser k s' (pOk . x) pFailed 82 | in unParser m s pOk' pFailed 83 | 84 | instance Monad P where 85 | return = pure 86 | 87 | m >>= k = P $ \ !s pOk pFailed -> 88 | let pOk' x s' = unParser (k x) s' pOk pFailed 89 | in unParser m s pOk' pFailed 90 | 91 | instance Fail.MonadFail P where 92 | fail msg = P $ \ !s _ pFailed -> pFailed msg (curPos s) 93 | 94 | -- | Exceptions that occur during parsing 95 | data ParseFail = ParseFail Position String deriving (Eq, Typeable) 96 | 97 | instance Show ParseFail where 98 | showsPrec p (ParseFail pos msg) = showParen (p >= 11) (showString err) 99 | where err = unwords [ "parse failure at", prettyPosition pos, "(" ++ msg ++ ")" ] 100 | 101 | instance Exception ParseFail 102 | 103 | -- | Execute the given parser on the supplied input stream at the given start position, returning 104 | -- either the position of an error and the error message, or the value parsed. 105 | execParser :: P a -> InputStream -> Position -> Either ParseFail a 106 | execParser p input pos = execParser' p input pos id 107 | 108 | -- | Generalized version of 'execParser' that expects an extra argument that lets you hot-swap a 109 | -- token that was just lexed before it gets passed to the parser. 110 | execParser' :: P a -> InputStream -> Position -> (Token -> Token) -> Either ParseFail a 111 | execParser' parser input pos swap = unParser parser 112 | initialState 113 | (\result _ -> Right result) 114 | (\message errPos -> Left (ParseFail errPos message)) 115 | where initialState = PState 116 | { curPos = pos 117 | , curInput = input 118 | , prevPos = error "ParseMonad.execParser: Touched undefined position!" 119 | , pushedTokens = [] 120 | , swapFunction = swap 121 | } 122 | 123 | -- | Swap a token using the swap function. 124 | swapToken :: Token -> P Token 125 | swapToken t = P $ \ !s@PState{ swapFunction = f } pOk _ -> pOk (f $! t) s 126 | 127 | -- | Extract the state stored in the parser. 128 | getPState :: P PState 129 | getPState = P $ \ !s pOk _ -> pOk s s 130 | 131 | -- | Update the state stored in the parser. 132 | setPState :: PState -> P () 133 | setPState s = P $ \ _ pOk _ -> pOk () s 134 | 135 | -- | Modify the state stored in the parser. 136 | modifyPState :: (PState -> PState) -> P () 137 | modifyPState f = P $ \ !s pOk _ -> pOk () (f $! s) 138 | 139 | -- | Retrieve the current position of the parser. 140 | getPosition :: P Position 141 | getPosition = curPos <$> getPState 142 | 143 | -- | Update the current position of the parser. 144 | setPosition :: Position -> P () 145 | setPosition pos = modifyPState $ \ s -> s{ curPos = pos } 146 | 147 | -- | Retrieve the current 'InputStream' of the parser. 148 | getInput :: P InputStream 149 | getInput = curInput <$> getPState 150 | 151 | -- | Update the current 'InputStream' of the parser. 152 | setInput :: InputStream -> P () 153 | setInput i = modifyPState $ \s -> s{ curInput = i } 154 | 155 | -- | Manually push a @'Spanned' 'Token'@. This turns out to be useful when parsing tokens that need 156 | -- to be broken up. For example, when seeing a 'Language.Rust.Syntax.GreaterEqual' token but only 157 | -- expecting a 'Language.Rust.Syntax.Greater' token, one can consume the 158 | -- 'Language.Rust.Syntax.GreaterEqual' token and push back an 'Language.Rust.Syntax.Equal' token. 159 | pushToken :: Spanned Token -> P () 160 | pushToken tok = modifyPState $ \s@PState{ pushedTokens = toks } -> s{ pushedTokens = tok : toks } 161 | 162 | -- | Manually pop a @'Spanned' 'Token'@ (if there are no tokens to pop, returns 'Nothing'). See 163 | -- 'pushToken' for more details. 164 | popToken :: P (Maybe (Spanned Token)) 165 | popToken = P $ \ !s@PState{ pushedTokens = toks } pOk _ -> pOk (listToMaybe toks) s{ pushedTokens = drop 1 toks } 166 | 167 | -- | Signal a syntax error. 168 | parseError :: Show b => b -> P a 169 | parseError b = Fail.fail ("Syntax error: the symbol `" ++ show b ++ "' does not fit here") 170 | 171 | -------------------------------------------------------------------------------- /src/Language/Rust/Parser/Reversed.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | Module : Language.Rust.Parser.Reversed 3 | Description : Parsing literals 4 | Copyright : (c) Alec Theriault, 2017-2018 5 | License : BSD-style 6 | Maintainer : alec.theriault@gmail.com 7 | Stability : experimental 8 | Portability : GHC 9 | 10 | Datatypes wrapping lists and non-empty lists designed for fast append (as opposed to prepend) 11 | along with the usual class instances. 12 | -} 13 | {-# LANGUAGE CPP #-} 14 | {-# LANGUAGE FlexibleInstances #-} 15 | {-# LANGUAGE TypeFamilies #-} 16 | #if __GLASGOW_HASKELL__ < 800 17 | {-# LANGUAGE FlexibleContexts #-} 18 | #endif 19 | 20 | module Language.Rust.Parser.Reversed ( 21 | Reversed(..), 22 | toNonEmpty, 23 | unsnoc, 24 | snoc, 25 | ) where 26 | 27 | import Language.Rust.Data.Position 28 | 29 | import Data.Foldable ( Foldable(toList) ) 30 | import Data.Semigroup as Sem ( Semigroup(..) ) 31 | 32 | import qualified Data.List.NonEmpty as N 33 | import qualified GHC.Exts as G 34 | 35 | -- | Wrap a data type where all the operations are reversed 36 | newtype Reversed f a = Reversed (f a) 37 | 38 | instance Functor f => Functor (Reversed f) where 39 | fmap f (Reversed xs) = Reversed (fmap f xs) 40 | 41 | instance Foldable (Reversed []) where 42 | foldMap f (Reversed xs) = foldMap f (reverse xs) 43 | toList (Reversed xs) = reverse xs 44 | 45 | instance Foldable (Reversed N.NonEmpty) where 46 | foldMap f (Reversed xs) = foldMap f (N.reverse xs) 47 | toList (Reversed xs) = reverse (toList xs) 48 | 49 | instance Sem.Semigroup (f a) => Sem.Semigroup (Reversed f a) where 50 | Reversed xs <> Reversed ys = Reversed (ys Sem.<> xs) 51 | 52 | instance Monoid (f a) => Monoid (Reversed f a) where 53 | mempty = Reversed mempty 54 | mappend (Reversed xs) (Reversed ys) = Reversed (mappend ys xs) 55 | 56 | instance G.IsList (f a) => G.IsList (Reversed f a) where 57 | type Item (Reversed f a) = G.Item (f a) 58 | fromList xs = Reversed (G.fromList (reverse xs)) 59 | toList (Reversed xs) = reverse (G.toList xs) 60 | 61 | instance Located (f a) => Located (Reversed f a) where 62 | spanOf (Reversed xs) = spanOf xs 63 | 64 | -- | Convert a reversed 'N.NonEmpty' back into a normal one. 65 | {-# INLINE toNonEmpty #-} 66 | toNonEmpty :: Reversed N.NonEmpty a -> N.NonEmpty a 67 | toNonEmpty (Reversed xs) = N.reverse xs 68 | 69 | -- | Remove an element from the end of a non-empty reversed sequence 70 | {-# INLINE unsnoc #-} 71 | unsnoc :: Reversed N.NonEmpty a -> (Reversed [] a, a) 72 | unsnoc (Reversed (x N.:| xs)) = (Reversed xs, x) 73 | 74 | -- | Add an element to the end of a reversed sequence to produce a non-empty 75 | -- reversed sequence 76 | {-# INLINE snoc #-} 77 | snoc :: Reversed [] a -> a -> Reversed N.NonEmpty a 78 | snoc (Reversed xs) x = Reversed (x N.:| xs) 79 | -------------------------------------------------------------------------------- /src/Language/Rust/Pretty.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | Module : Language.Rust.Pretty 3 | Description : Pretty printing 4 | Copyright : (c) Alec Theriault, 2017-2018 5 | License : BSD-style 6 | Maintainer : alec.theriault@gmail.com 7 | Stability : experimental 8 | Portability : portable 9 | 10 | This module provides functions for turning ASTs into values of type 'Doc'. These values can then be 11 | rendered into concrete string types using functions from the @prettyprinter@ package. This has some 12 | advantages over printing plain old strings: 13 | 14 | * /Backend independent/: you can use a variety of existing backends to efficiently render to all 15 | sorts of formats like 'Data.Text.Text', 'String', HTML, and terminal. 16 | 17 | * /Dynamic layouts/: the AST will render differently depending on the desired page width 18 | 19 | >>> :set -XTypeApplications -XOverloadedStrings 20 | >>> import Language.Rust.Parser 21 | >>> import Data.Text.Prettyprint.Doc.Util ( putDocW ) 22 | >>> let src = parse' @(SourceFile Span) "fn foo(x: i32, y: i32, z: i32) -> i32 { x - y + z }" 23 | >>> let doc = pretty' src <> "\n" 24 | >>> putDocW 80 doc 25 | fn foo(x: i32, y: i32, z: i32) -> i32 { 26 | x - y + z 27 | } 28 | >>> putDocW 10 doc 29 | fn foo( 30 | x: i32, 31 | y: i32, 32 | z: i32, 33 | ) -> i32 { 34 | x - y + z 35 | } 36 | 37 | * /Annotations/: Depending on the backend you are using to render the 'Doc', annotations can 38 | determine colours, styling, links, etc. 39 | 40 | The examples below assume the following GHCi flag and import: 41 | 42 | >>> :set -XOverloadedStrings 43 | >>> import Language.Rust.Syntax.AST 44 | 45 | -} 46 | {-# OPTIONS_GHC -Wall -fno-warn-orphans #-} 47 | 48 | module Language.Rust.Pretty ( 49 | -- * Printing 50 | pretty, 51 | pretty', 52 | prettyAnnotated, 53 | prettyAnnotated', 54 | writeSourceFile, 55 | writeTokens, 56 | Pretty(..), 57 | PrettyAnnotated(..), 58 | Doc, 59 | 60 | -- * Resolving 61 | Resolve(..), 62 | 63 | -- * Error reporting 64 | ResolveFail(..), 65 | Issue(..), 66 | Severity(..), 67 | ) where 68 | 69 | import Language.Rust.Data.Ident 70 | import Language.Rust.Data.Position 71 | 72 | import Language.Rust.Syntax.AST 73 | import Language.Rust.Syntax.Token 74 | 75 | import Language.Rust.Pretty.Internal 76 | import Language.Rust.Pretty.Resolve 77 | 78 | import System.IO ( Handle ) 79 | import Data.Typeable ( Typeable ) 80 | import Data.Text.Prettyprint.Doc.Render.Text ( renderIO ) 81 | import Data.Text.Prettyprint.Doc ( Doc ) 82 | import qualified Data.Text.Prettyprint.Doc as PP 83 | 84 | import Control.Exception ( throw ) 85 | 86 | -- | Resolve (see the 'Resolve' typeclass) and pretty print something. 87 | -- 88 | -- >>> let one = Lit [] (Int Dec 1 Unsuffixed ()) () 89 | -- >>> let two = Lit [] (Int Dec 2 Unsuffixed ()) () 90 | -- >>> let three = Lit [] (Int Dec 3 Unsuffixed ()) () 91 | -- >>> let bogusVar = PathExpr [] Nothing (Path False [PathSegment "let" Nothing ()] ()) () 92 | -- >>> pretty (Binary [] MulOp (Binary [] AddOp one two ()) three ()) 93 | -- Right (1 + 2) * 3 94 | -- >>> pretty (Binary [] AddOp one bogusVar ()) 95 | -- Left (invalid AST (identifier `let' is a keyword)) 96 | -- 97 | pretty :: (Resolve a, Pretty a) => a -> Either ResolveFail (Doc b) 98 | pretty = fmap prettyUnresolved . resolve 99 | 100 | -- | Same as 'pretty', but throws a 'ResolveFail' exception on invalid ASTs. This function is 101 | -- intended for situations in which you are already stuck catching exceptions - otherwise you should 102 | -- prefer 'pretty'. 103 | -- 104 | -- >>> let one = Lit [] (Int Dec 1 Unsuffixed ()) () 105 | -- >>> let two = Lit [] (Int Dec 2 Unsuffixed ()) () 106 | -- >>> let three = Lit [] (Int Dec 3 Unsuffixed ()) () 107 | -- >>> let bogusVar = PathExpr [] Nothing (Path False [PathSegment "let" Nothing ()] ()) () 108 | -- >>> pretty' (Binary [] MulOp (Binary [] AddOp one two ()) three ()) 109 | -- (1 + 2) * 3 110 | -- >>> pretty' (Binary [] AddOp one bogusVar ()) 111 | -- *** Exception: invalid AST (identifier `let' is a keyword)) 112 | -- 113 | pretty' :: (Resolve a, Pretty a) => a -> Doc b 114 | pretty' = either throw id . pretty 115 | 116 | -- | Resolve (see the 'Resolve' typeclass) and pretty print something with annotations. Read more 117 | -- about annotations in "Data.Text.Prettyprint.Doc". 118 | -- 119 | -- prop> fmap Data.Text.Prettyprint.Doc.noAnnotate . prettyAnnotated = pretty 120 | -- 121 | prettyAnnotated :: (Resolve (f a), PrettyAnnotated f) => f a -> Either ResolveFail (Doc a) 122 | prettyAnnotated = fmap prettyAnnUnresolved . resolve 123 | 124 | -- | Same as 'prettyAnnotated', but throws a 'ResolveFail' exception on invalid ASTs. This function 125 | -- is intended for situations in which you are already stuck catching exceptions - otherwise you 126 | -- should prefer 'prettyAnnotated'. 127 | -- 128 | -- prop> Data.Text.Prettyprint.Doc.noAnnotate . prettyAnnotated' = pretty' 129 | -- 130 | prettyAnnotated' :: (Resolve (f a), PrettyAnnotated f) => f a -> Doc a 131 | prettyAnnotated' = either throw id . prettyAnnotated 132 | 133 | -- | Given a handle to a file, write a 'SourceFile' in with a desired width of 100 characters. 134 | writeSourceFile :: (Monoid a, Typeable a) => Handle -> SourceFile a -> IO () 135 | writeSourceFile hdl = renderIO hdl . PP.layoutPretty layout . prettyAnnotated' 136 | where layout = PP.LayoutOptions (PP.AvailablePerLine 100 1.0) 137 | 138 | -- | Given a handle to a file, write a 'SourceFile' in with a desired width of 100 characters. 139 | -- 140 | -- The 'Span' associated with the tokens (if present) will be used as a hint for laying out and 141 | -- spacing the tokens. 142 | writeTokens :: Handle -> [Spanned Token] -> IO () 143 | writeTokens hdl = renderIO hdl . PP.layoutPretty layout . pretty' . Stream . map mkTT 144 | where layout = PP.LayoutOptions (PP.AvailablePerLine 100 1.0) 145 | mkTT (Spanned s t) = Tree (Token t s) 146 | 147 | -- | Describes things that can be pretty printed. 148 | class Pretty a where 149 | -- | Pretty print the given value without resolving it. 150 | prettyUnresolved :: a -> Doc b 151 | 152 | instance Pretty Abi where prettyUnresolved = printAbi 153 | instance Pretty BindingMode where prettyUnresolved = printBindingMode 154 | instance Pretty BinOp where prettyUnresolved = printBinOp 155 | instance Pretty Ident where prettyUnresolved = printIdent 156 | instance Pretty ImplPolarity where prettyUnresolved = printPolarity 157 | instance Pretty Suffix where prettyUnresolved = printLitSuffix 158 | instance Pretty LitTok where prettyUnresolved = printLitTok 159 | instance Pretty Mutability where prettyUnresolved = printMutability 160 | instance Pretty RangeLimits where prettyUnresolved = printRangeLimits 161 | instance Pretty Token where prettyUnresolved = printToken 162 | instance Pretty TokenTree where prettyUnresolved = printTt 163 | instance Pretty TokenStream where prettyUnresolved = printTokenStream 164 | instance Pretty UnOp where prettyUnresolved = printUnOp 165 | instance Pretty Unsafety where prettyUnresolved = printUnsafety 166 | instance Pretty (Attribute a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 167 | instance Pretty (Block a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 168 | instance Pretty (SourceFile a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 169 | instance Pretty (Expr a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 170 | instance Pretty (Field a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 171 | instance Pretty (FieldPat a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 172 | instance Pretty (FnDecl a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 173 | instance Pretty (ForeignItem a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 174 | instance Pretty (Generics a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 175 | instance Pretty (ImplItem a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 176 | instance Pretty (Item a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 177 | instance Pretty (Lifetime a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 178 | instance Pretty (LifetimeDef a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 179 | instance Pretty (Lit a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 180 | instance Pretty (Mac a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 181 | instance Pretty (Nonterminal a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 182 | instance Pretty (Pat a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 183 | instance Pretty (Path a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 184 | instance Pretty (PolyTraitRef a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 185 | instance Pretty (Stmt a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 186 | instance Pretty (StructField a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 187 | instance Pretty (TraitItem a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 188 | instance Pretty (TraitRef a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 189 | instance Pretty (Ty a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 190 | instance Pretty (TyParam a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 191 | instance Pretty (TyParamBound a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 192 | instance Pretty (Variant a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 193 | instance Pretty (UseTree a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 194 | instance Pretty (Visibility a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 195 | instance Pretty (WhereClause a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 196 | instance Pretty (WherePredicate a) where prettyUnresolved = PP.unAnnotate . prettyAnnUnresolved 197 | instance Pretty Position where prettyUnresolved = PP.pretty . prettyPosition 198 | instance Pretty Span where prettyUnresolved = PP.pretty . prettySpan 199 | 200 | -- | Similar to 'Pretty', but for types which are parametrized over an annotation type. 201 | class PrettyAnnotated p where 202 | -- | Pretty print the given value without resolving it, adding annotations in the 'Doc' whenever 203 | -- possible. 204 | prettyAnnUnresolved :: p a -> Doc a 205 | 206 | -- | This instance prints attributes inline 207 | instance PrettyAnnotated Attribute where prettyAnnUnresolved = flip printAttr True 208 | instance PrettyAnnotated Block where prettyAnnUnresolved = printBlock 209 | instance PrettyAnnotated SourceFile where prettyAnnUnresolved = printSourceFile 210 | instance PrettyAnnotated Expr where prettyAnnUnresolved = printExpr 211 | instance PrettyAnnotated Field where prettyAnnUnresolved = printField 212 | instance PrettyAnnotated FieldPat where prettyAnnUnresolved = printFieldPat 213 | instance PrettyAnnotated FnDecl where prettyAnnUnresolved = printFnArgsAndRet 214 | instance PrettyAnnotated ForeignItem where prettyAnnUnresolved = printForeignItem 215 | instance PrettyAnnotated Generics where prettyAnnUnresolved = printGenerics 216 | instance PrettyAnnotated ImplItem where prettyAnnUnresolved = printImplItem 217 | instance PrettyAnnotated Item where prettyAnnUnresolved = printItem 218 | instance PrettyAnnotated Lifetime where prettyAnnUnresolved = printLifetime 219 | instance PrettyAnnotated LifetimeDef where prettyAnnUnresolved = printLifetimeDef 220 | instance PrettyAnnotated Lit where prettyAnnUnresolved = printLit 221 | instance PrettyAnnotated Mac where prettyAnnUnresolved = printMac Paren 222 | instance PrettyAnnotated Nonterminal where prettyAnnUnresolved = printNonterminal 223 | instance PrettyAnnotated Pat where prettyAnnUnresolved = printPat 224 | instance PrettyAnnotated Path where prettyAnnUnresolved = flip printPath False 225 | instance PrettyAnnotated PolyTraitRef where prettyAnnUnresolved = printPolyTraitRef 226 | instance PrettyAnnotated Stmt where prettyAnnUnresolved = printStmt 227 | instance PrettyAnnotated StructField where prettyAnnUnresolved = printStructField 228 | instance PrettyAnnotated TraitItem where prettyAnnUnresolved = printTraitItem 229 | instance PrettyAnnotated TraitRef where prettyAnnUnresolved = printTraitRef 230 | instance PrettyAnnotated Ty where prettyAnnUnresolved = printType 231 | instance PrettyAnnotated TyParam where prettyAnnUnresolved = printTyParam 232 | instance PrettyAnnotated TyParamBound where prettyAnnUnresolved = printBound 233 | instance PrettyAnnotated Variant where prettyAnnUnresolved = printVariant 234 | instance PrettyAnnotated UseTree where prettyAnnUnresolved = printUseTree 235 | instance PrettyAnnotated Visibility where prettyAnnUnresolved = printVis 236 | instance PrettyAnnotated WhereClause where prettyAnnUnresolved = printWhereClause True 237 | instance PrettyAnnotated WherePredicate where prettyAnnUnresolved = printWherePredicate 238 | 239 | -------------------------------------------------------------------------------- /src/Language/Rust/Pretty/Literals.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | Module : Language.Rust.Pretty.Literals 3 | Description : Parsing literals 4 | Copyright : (c) Alec Theriault, 2017-2018 5 | License : BSD-style 6 | Maintainer : alec.theriault@gmail.com 7 | Stability : experimental 8 | Portability : portable 9 | 10 | Functions for pretty printing literals. 11 | -} 12 | {-# LANGUAGE OverloadedStrings #-} 13 | 14 | module Language.Rust.Pretty.Literals ( 15 | printLit, 16 | printLitSuffix, 17 | ) where 18 | 19 | import Language.Rust.Syntax.AST 20 | import Language.Rust.Pretty.Util 21 | 22 | import Data.Text.Prettyprint.Doc ( hcat, annotate, (<>), Doc, pretty, group, hardline, flatAlt ) 23 | 24 | import Data.Char ( intToDigit, ord, chr ) 25 | import Data.Word ( Word8 ) 26 | 27 | -- | Print a literal (@print_literal@) 28 | printLit :: Lit a -> Doc a 29 | printLit lit = noIndent $ case lit of 30 | Str str Cooked s x -> annotate x (hcat [ "\"", group (foldMap (escapeChar True) str), "\"", suf s ]) 31 | Str str (Raw m) s x -> annotate x (hcat [ "r", pad m, "\"", string hardline str, "\"", pad m, suf s ]) 32 | ByteStr str Cooked s x -> annotate x (hcat [ "b\"", group (foldMap (escapeByte True) str), "\"", suf s ]) 33 | ByteStr str (Raw m) s x -> annotate x (hcat [ "br", pad m, "\"", string hardline (map byte2Char str), "\"", pad m, suf s ]) 34 | Char c s x -> annotate x (hcat [ "'", escapeChar False c, "'", suf s ]) 35 | Byte b s x -> annotate x (hcat [ "b'", escapeByte False b, "'", suf s ]) 36 | Int b i s x -> annotate x (hcat [ printIntLit i b, suf s ]) 37 | Float d s x -> annotate x (hcat [ pretty d, suf s ]) 38 | Bool True s x -> annotate x (hcat [ "true", suf s ]) 39 | Bool False s x -> annotate x (hcat [ "false", suf s ]) 40 | where 41 | pad :: Int -> Doc a 42 | pad m = pretty (replicate m '#') 43 | 44 | suf :: Suffix -> Doc a 45 | suf = printLitSuffix 46 | 47 | -- | Print literal suffix 48 | printLitSuffix :: Suffix -> Doc a 49 | printLitSuffix Unsuffixed = mempty 50 | printLitSuffix Is = "isize" 51 | printLitSuffix I8 = "i8" 52 | printLitSuffix I16 = "i16" 53 | printLitSuffix I32 = "i32" 54 | printLitSuffix I64 = "i64" 55 | printLitSuffix I128 = "i128" 56 | printLitSuffix Us = "usize" 57 | printLitSuffix U8 = "u8" 58 | printLitSuffix U16 = "u16" 59 | printLitSuffix U32 = "u32" 60 | printLitSuffix U64 = "u64" 61 | printLitSuffix U128 = "u128" 62 | printLitSuffix F32 = "f32" 63 | printLitSuffix F64 = "f64" 64 | 65 | -- | Print an integer literal 66 | printIntLit :: Integer -> IntRep -> Doc a 67 | printIntLit i r | i < 0 = "-" <> baseRep r <> toNBase (abs i) (baseVal r) 68 | | i == 0 = baseRep r <> "0" 69 | | otherwise = baseRep r <> toNBase (abs i) (baseVal r) 70 | where 71 | baseRep :: IntRep -> Doc a 72 | baseRep Bin = "0b" 73 | baseRep Oct = "0o" 74 | baseRep Dec = mempty 75 | baseRep Hex = "0x" 76 | 77 | baseVal :: IntRep -> Integer 78 | baseVal Bin = 2 79 | baseVal Oct = 8 80 | baseVal Dec = 10 81 | baseVal Hex = 16 82 | 83 | toDigit :: Integer -> Char 84 | toDigit l = "0123456789ABCDEF" !! fromIntegral l 85 | 86 | toNBase :: Integer -> Integer -> Doc a 87 | l `toNBase` b | l < b = pretty (toDigit l) 88 | | otherwise = let ~(d,e) = l `quotRem` b in toNBase d b <> pretty (toDigit e) 89 | 90 | 91 | -- | Extend a byte into a unicode character 92 | byte2Char :: Word8 -> Char 93 | byte2Char = chr . fromIntegral 94 | 95 | -- | Constrain a unicode character to a byte 96 | -- This assumes the character is in the right range already 97 | char2Byte :: Char -> Word8 98 | char2Byte = fromIntegral . ord 99 | 100 | -- | Escape a byte. Based on @std::ascii::escape_default@. 101 | -- 102 | -- If the first argument is true, newlines may become a literal newline characters if the string is 103 | -- too long. 104 | escapeByte :: Bool -> Word8 -> Doc a 105 | escapeByte nl w8 = case byte2Char w8 of 106 | '\t' -> "\\t" 107 | '\r' -> "\\r" 108 | '\\' -> "\\\\" 109 | '\'' -> "\\'" 110 | '"' -> "\\\"" 111 | '\n'| nl -> flatAlt hardline "\\n" 112 | | otherwise -> "\\n" 113 | c | 0x20 <= w8 && w8 <= 0x7e -> pretty c 114 | _ -> "\\x" <> padHex 2 w8 115 | 116 | -- | Escape a unicode character. Based on @std::ascii::escape_default@. 117 | -- 118 | -- If the first argument is true, newlines may become a literal newline characters if the string is 119 | -- too long. 120 | escapeChar :: Bool -> Char -> Doc a 121 | escapeChar nl c | c <= '\x7f' = escapeByte nl (char2Byte c) 122 | | c <= '\xffff' = "\\u{" <> padHex 4 (ord c) <> "}" 123 | | otherwise = "\\u{" <> padHex 6 (ord c) <> "}" 124 | 125 | -- | Convert a number to its padded hexadecimal form 126 | padHex :: Integral a => Int -> a -> Doc b 127 | padHex i 0 = pretty (replicate i '0') 128 | padHex i m = let (m',r) = m `divMod` 0x10 129 | in padHex (i-1) m' <> pretty (intToDigit (fromIntegral r)) 130 | 131 | -------------------------------------------------------------------------------- /src/Language/Rust/Pretty/Util.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | Module : Language.Rust.Pretty.Util 3 | Description : pretty printing utilities 4 | Copyright : (c) Alec Theriault, 2017-2018 5 | License : BSD-style 6 | Maintainer : alec.theriault@gmail.com 7 | Stability : experimental 8 | Portability : portable 9 | 10 | This module contains a variety of utility functions for pretty printing. Most of these require 11 | inspecting the internal structure of 'Doc'. 12 | 13 | Wadler's take on pretty printing is super useful because it allows us to print thinks like blocks 14 | much more nicely (see [this](http://stackoverflow.com/a/41424946/3072788)) that Hughes'. 15 | Unfortunately, unlike Hughes', Wadler does not have 'mempty' as the identity of @<+>@ - the 16 | space between the arguments of @<+>@ does not go away even if either argument is empty. The 17 | same problem shows up for @hsep@, @<#>@, @vsep@, @@, etc. 18 | 19 | My solution has been to redefine my versions of these functions which _do_ treat 'mempty' as a 20 | neutral element for @<+>@, @hsep@, @<#>@, @vsep@, and @@. 21 | -} 22 | {-# LANGUAGE OverloadedStrings #-} 23 | 24 | module Language.Rust.Pretty.Util where 25 | 26 | import Data.Monoid as M 27 | 28 | import qualified Data.Text.Prettyprint.Doc as PP 29 | import Data.Text.Prettyprint.Doc.Internal.Type ( Doc(..) ) 30 | 31 | import Language.Rust.Syntax.Token ( Delim(..) ) 32 | 33 | -- | Indentation level 34 | n :: Int 35 | n = 2 36 | 37 | -- | Similar to 'maybe', but where the 'Nothing' case is an empty 'Doc' 38 | emptyElim :: Doc a -- ^ result if scrutinee is empty 39 | -> (Doc a -> Doc a) -- ^ how to process scrutinee if it is not empty 40 | -> Doc a -- ^ scrutinee 'Doc' 41 | -> Doc a 42 | emptyElim a _ Empty = a 43 | emptyElim _ f doc = f doc 44 | 45 | -- | Vertically concatenate two 'Doc's with a collapsible line between them 46 | (<##>) :: Doc a -> Doc a -> Doc a 47 | d1 <##> d2 = d1 M.<> PP.line' M.<> d2 48 | 49 | -- | Flatten a 'Doc' 50 | flatten :: Doc a -> Doc a 51 | flatten d@Fail{} = d 52 | flatten d@Empty{} = d 53 | flatten d@Char{} = d 54 | flatten d@Text{} = d 55 | flatten d@Line{} = d 56 | flatten (FlatAlt _ d) = d 57 | flatten (Cat d1 d2) = Cat (flatten d1) (flatten d2) 58 | flatten (Nest i d) = Nest i (flatten d) 59 | flatten (Union d _) = flatten d 60 | flatten (Column f) = Column (flatten . f) 61 | flatten (WithPageWidth f) = WithPageWidth (flatten . f) 62 | flatten (Nesting f) = Nesting (flatten . f) 63 | flatten (Annotated a d) = Annotated a (flatten d) 64 | 65 | -- | Map the list of items into 'Doc's using the provided function and add comma punctuation 66 | commas :: [a] -> (a -> Doc b) -> Doc b 67 | commas xs f = PP.hsep (PP.punctuate "," (map f xs)) 68 | 69 | -- | Take a binary operation on docs and lift it to one that has (left and right) identity 'mempty' 70 | liftOp :: (Doc a -> Doc a -> Doc a) -> Doc a -> Doc a -> Doc a 71 | liftOp _ Empty d = d 72 | liftOp _ d Empty = d 73 | liftOp (#) d d' = d # d' 74 | 75 | -- | Lifted version of Wadler's @<+>@ 76 | (<+>) :: Doc a -> Doc a -> Doc a 77 | (<+>) = liftOp (PP.<+>) 78 | 79 | -- | Lifted version of Wadler's @hsep@ 80 | hsep :: Foldable f => f (Doc a) -> Doc a 81 | hsep = foldr (<+>) mempty 82 | 83 | -- | Lifted version of Wadler's @<#>@ 84 | (<#>) :: Doc a -> Doc a -> Doc a 85 | (<#>) = liftOp (\x y -> x <> PP.line <> y) 86 | 87 | -- | Lifted version of Wadler's @vsep@ 88 | vsep :: Foldable f => f (Doc a) -> Doc a 89 | vsep = foldr (<#>) mempty 90 | 91 | -- | Lifted version of Wadler's @@ 92 | () :: Doc a -> Doc a -> Doc a 93 | () = liftOp (\x y -> x <> PP.softline <> y) 94 | 95 | -- | Unless the condition holds, print the document 96 | unless :: Bool -> Doc a -> Doc a 97 | unless b = when (not b) 98 | 99 | -- | When the condition holds, print the document 100 | when :: Bool -> Doc a -> Doc a 101 | when cond d = if cond then d else mempty 102 | 103 | -- | Apply a printing function to an optional value. If the value is 'Nothing', 'perhaps' returns 104 | -- the empty 'Doc'. 105 | perhaps :: (a -> Doc b) -> Maybe a -> Doc b 106 | perhaps = maybe mempty 107 | 108 | -- | Indent the given 'Doc', but only if multi-line 109 | indent :: Int -> Doc a -> Doc a 110 | indent m doc = PP.flatAlt (PP.indent m doc) (flatten doc) 111 | 112 | -- | Undo what group does. This function is pretty dangerous... 113 | ungroup :: Doc a -> Doc a 114 | ungroup (Union _ x) = x 115 | ungroup y = y 116 | 117 | -- | Remove all indent 118 | noIndent :: Doc a -> Doc a 119 | noIndent d = PP.nesting (\i -> PP.nest (negate i) d) 120 | 121 | -- | Translate '\n' in a string using the provided 'Doc' instead of 'line' 122 | string :: Doc a -> String -> Doc a 123 | string new = foldMap (\c -> case c of { '\n' -> new; _ -> Char c }) 124 | 125 | -- | This is the most general function for printing blocks. It operates with any delimiter, any 126 | -- seperator, an optional leading attribute doc (which isn't followed by a seperator), and wraps a 127 | -- list of entries. It has been tweaked to look Just Right (TM) for the usual cases. 128 | -- 129 | -- Note that this will try to fit things on one line when possible, so if you want a block that is 130 | -- sure /not/ to be condensed on one line (e.g. for a function), you have to construct it manually. 131 | block :: Delim -- ^ outer delimiters 132 | -> Bool -- ^ prefer to be on one line (as opposed to multiline)? 133 | -> Doc a -- ^ seperator 134 | -> Doc a -- ^ attributes doc, after which no seperator will (use 'mempty' to ignore) 135 | -> [Doc a] -- ^ entries 136 | -> Doc a 137 | block delim p s as xs = group' (lDel # PP.vsep (as' ++ ys) # rDel) 138 | where 139 | group' = if p || null (as' ++ ys) then PP.group else id 140 | 141 | -- left and right delimiter lists 142 | (lDel,rDel) = case delim of 143 | Paren -> ("(", ")") 144 | Bracket -> ("[", "]") 145 | Brace -> ("{", "}") 146 | NoDelim -> (mempty, mempty) 147 | 148 | -- method of contenating delimiters with the rest of the body 149 | (#) = case delim of { Paren -> (<##>); _ -> (<#>) } 150 | 151 | -- attributes 152 | as' = case as of 153 | Empty -> [] 154 | _ -> [ PP.flatAlt (PP.indent n as) (flatten as) ] 155 | 156 | -- list of entries 157 | ys = go xs where go [] = [] 158 | go [z] = [ PP.flatAlt (PP.indent n z <> s) (flatten z) ] 159 | go (z:zs) = PP.flatAlt (PP.indent n z <> s) (flatten z <> s) : go zs 160 | 161 | 162 | -------------------------------------------------------------------------------- /src/Language/Rust/Quote.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | Module : Language.Rust.Quote 3 | Description : Quasiquotes for Rust AST 4 | Copyright : (c) Alec Theriault, 2017-2018 5 | License : BSD-style 6 | Maintainer : alec.theriault@gmail.com 7 | Stability : experimental 8 | Portability : GHC 9 | 10 | Quasiquoters for converting Rust code into the equivalent Haskell patterns and expressions. 11 | These are just convenience wrappers over 'dataToExpQ' and 'dataToPatQ'. These quasiquoters 12 | only work as expressions and patterns, not as declarations or types. The pattern quasiquoters 13 | recursively ignore any 'Span' or 'Position' fields (replacing them with wildcard patterns). 14 | 15 | Using quasiquotes instead of manually writing out the AST means that even if the AST evolves 16 | (perhaps by adding extra fields to certain nodes), your code is likely to continue to work. 17 | 18 | The examples below assume the following GHCi flag and import: 19 | 20 | >>> :set -XQuasiQuotes 21 | >>> import Control.Monad ( void ) 22 | -} 23 | 24 | 25 | module Language.Rust.Quote ( 26 | lit, attr, ty, pat, stmt, expr, item, sourceFile, implItem, traitItem, tokenTree, block 27 | ) where 28 | 29 | {- 30 | In the future, we may try to do something similar to Rust macros to extract or inject ASTs out or 31 | into the quasiquotes. 32 | 33 | Eventually, one should be able to just import this module for code generation. The following 34 | interaction is what should eventually work. 35 | 36 | >>> import qualified Language.Rust.Quote as Q 37 | >>> :set -XQuasiQuotes +t 38 | >>> let one = [Q.expr| 1i32 |] 39 | one :: Expr Span 40 | >>> [Q.expr| |x: i32| -> $retTy:ty $body:block |] = [Q.expr| |x: i32| -> i32 { ($one) + x } |] 41 | retTy :: Ty Span 42 | body :: Block Span 43 | >>> import Language.Rust.Pretty 44 | >>> pretty retTy 45 | i32 46 | >>> pretty body 47 | { (1i32) + x } 48 | 49 | For now, however, you cannot use @$x@ or @$x:ty@ meta variables. 50 | -} 51 | 52 | import Language.Rust.Parser.ParseMonad 53 | import Language.Rust.Parser.Internal 54 | import Language.Rust.Data.InputStream ( inputStreamFromString ) 55 | import Language.Rust.Data.Position ( Position(..), Span ) 56 | 57 | import Language.Haskell.TH 58 | import Language.Haskell.TH.Quote ( QuasiQuoter(..), dataToExpQ, dataToPatQ ) 59 | 60 | import Control.Applicative ( (<|>) ) 61 | import Control.Monad ( (>=>) ) 62 | import Data.Functor ( ($>) ) 63 | import Data.Typeable ( cast, Typeable ) 64 | import Data.Data ( Data ) 65 | 66 | -- | Given a parser, convert it into a quasiquoter. The quasiquoter produced does not support 67 | -- declarations and types. For patterns, it replaces any 'Span' and 'Position' field with a 68 | -- wild pattern. 69 | quoter :: Data a => P a -> QuasiQuoter 70 | quoter p = QuasiQuoter 71 | { quoteExp = parse >=> dataToExpQ (const Nothing) 72 | , quotePat = parse >=> dataToPatQ wildSpanPos 73 | , quoteDec = error "this quasiquoter does not support declarations" 74 | , quoteType = error "this quasiquoter does not support types" 75 | } 76 | where 77 | -- | Given a parser and an input string, turn it into the corresponding Haskell expression/pattern. 78 | parse inp = do 79 | Loc{ loc_start = (r,c) } <- location 80 | 81 | -- Run the parser 82 | case execParser p (inputStreamFromString inp) (Position 0 r c) of 83 | Left (ParseFail _ msg) -> fail msg 84 | Right x -> pure x 85 | 86 | -- | Replace 'Span' and 'Position' with wild patterns 87 | wildSpanPos :: Typeable b => b -> Maybe (Q Pat) 88 | wildSpanPos x = ((cast x :: Maybe Span) $> wildP) <|> ((cast x :: Maybe Position) $> wildP) 89 | 90 | 91 | -- | Quasiquoter for literals (see 'Language.Rust.Syntax.Lit'). 92 | -- 93 | -- >>> void [lit| 1.4e29f64 |] 94 | -- Float 1.4e29 F64 () 95 | -- 96 | lit :: QuasiQuoter 97 | lit = quoter parseLit 98 | 99 | -- | Quasiquoter for attributes (see 'Language.Rust.Syntax.Attribute') 100 | -- 101 | -- >>> void [attr| #[no_mangle] |] 102 | -- Attribute Outer (Path False [PathSegment "no_mangle" Nothing ()] ()) (Stream []) () 103 | -- 104 | attr :: QuasiQuoter 105 | attr = quoter parseAttr 106 | 107 | -- | Quasiquoter for types (see 'Language.Rust.Syntax.Ty') 108 | -- 109 | -- >>> void [ty| &(_,_) |] 110 | -- Rptr Nothing Immutable (TupTy [Infer (),Infer ()] ()) () 111 | -- 112 | ty :: QuasiQuoter 113 | ty = quoter parseTy 114 | 115 | -- | Quasiquoter for patterns (see 'Language.Rust.Syntax.Pat') 116 | -- 117 | -- >>> void [pat| x @ 1...5 |] 118 | -- IdentP (ByValue Immutable) "x" (Just (RangeP (Lit [] (Int Dec 1 Unsuffixed ()) ()) 119 | -- (Lit [] (Int Dec 5 Unsuffixed ()) ()) ())) () 120 | -- 121 | pat :: QuasiQuoter 122 | pat = quoter parsePat 123 | 124 | -- | Quasiquoter for statements (see 'Language.Rust.Syntax.Stmt') 125 | -- 126 | -- >>> void [stmt| let x = 4i32; |] 127 | -- Local (IdentP (ByValue Immutable) "x" Nothing ()) Nothing (Just (Lit [] (Int Dec 4 I32 ()) ())) [] () 128 | -- 129 | stmt :: QuasiQuoter 130 | stmt = quoter parseStmt 131 | 132 | -- | Quasiquoter for expressions (see 'Language.Rust.Syntax.Expr') 133 | -- 134 | -- >>> void [expr| (x,) |] 135 | -- TupExpr [] [PathExpr [] Nothing (Path False [PathSegment "x" Nothing ()] ()) ()] () 136 | -- 137 | expr :: QuasiQuoter 138 | expr = quoter parseExpr 139 | 140 | -- | Quasiquoter for items (see 'Language.Rust.Syntax.Item') 141 | -- 142 | -- >>> void [item| type Unit = (); |] 143 | -- TyAlias [] InheritedV "Unit" (TupTy [] ()) (Generics [] [] (WhereClause [] ()) ()) () 144 | -- 145 | item :: QuasiQuoter 146 | item = quoter parseItem 147 | 148 | -- | Quasiquoter for a whole source file (see 'Language.Rust.Syntax.SourceFile') 149 | -- 150 | -- >>> void [sourceFile| fn main() { } |] 151 | -- SourceFile Nothing [] [Fn [] InheritedV "main" 152 | -- (FnDecl [] Nothing False ()) 153 | -- Normal NotConst Rust 154 | -- (Generics [] [] (WhereClause [] ()) ()) 155 | -- (Block [] Normal ()) ()] 156 | -- 157 | sourceFile :: QuasiQuoter 158 | sourceFile = quoter parseSourceFile 159 | 160 | -- | Quasiquoter for blocks (see 'Language.Rust.Syntax.Block') 161 | -- 162 | -- >>> void [block| unsafe { 1i32 } |] 163 | -- Block [NoSemi (Lit [] (Int Dec 1 I32 ()) ()) ()] Unsafe () 164 | -- 165 | block :: QuasiQuoter 166 | block = quoter parseBlock 167 | 168 | -- | Quasiquoter for impl items (see 'Language.Rust.Syntax.ImplItem') 169 | -- 170 | -- >>> void [implItem| type Item = (); |] 171 | -- TypeI [] InheritedV Final "Item" (TupTy [] ()) () 172 | -- 173 | implItem :: QuasiQuoter 174 | implItem = quoter parseImplItem 175 | 176 | -- | Quasiquoter for trait items (see 'Language.Rust.Syntax.TraitItem') 177 | -- 178 | -- >>> void [traitItem| type Item; |] 179 | -- TypeT [] "Item" [] Nothing () 180 | -- 181 | traitItem :: QuasiQuoter 182 | traitItem = quoter parseTraitItem 183 | 184 | -- | Quasiquoter for token trees (see 'Language.Rust.Syntax.TokenTree') 185 | -- 186 | -- >>> [tokenTree| fn |] 187 | -- Token (Span (Position 1 2 14) (Position 3 2 16)) fn 188 | -- 189 | tokenTree :: QuasiQuoter 190 | tokenTree = quoter parseTt 191 | 192 | -------------------------------------------------------------------------------- /src/Language/Rust/Syntax.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | Module : Language.Rust.Syntax 3 | Description : Syntax data defintions 4 | Copyright : (c) Alec Theriault, 2017-2018 5 | License : BSD-style 6 | Maintainer : alec.theriault@gmail.com 7 | Stability : experimental 8 | Portability : GHC 9 | 10 | This module defines Haskell data types corresponding to the abstract syntax tree(s) of the Rust 11 | language, based on the definitions @rustc@ uses (defined in @libsyntax@) whenever possible. 12 | Unfortunately, since the internals of @rustc@ are not exposed, there are no official 13 | docs. are the 14 | unofficial docs. 15 | -} 16 | 17 | module Language.Rust.Syntax ( 18 | -- * Abstract syntax trees 19 | module Language.Rust.Syntax.AST, 20 | -- * Tokens 21 | module Language.Rust.Syntax.Token, 22 | ) where 23 | 24 | import Language.Rust.Syntax.AST 25 | import Language.Rust.Syntax.Token 26 | 27 | -- Using import/export shortcut screws up Haddock 28 | {-# ANN module "HLint: ignore Use import/export shortcut" #-} 29 | 30 | -------------------------------------------------------------------------------- /src/Language/Rust/Syntax/Token.hs: -------------------------------------------------------------------------------- 1 | {-| 2 | Module : Language.Rust.Syntax.Token 3 | Description : Token definitions 4 | Copyright : (c) Alec Theriault, 2017-2018 5 | License : BSD-style 6 | Maintainer : alec.theriault@gmail.com 7 | Stability : experimental 8 | Portability : GHC 9 | 10 | Contains roughly the same stuff as @syntax::parse::token@ - data definitions for tokens. 11 | -} 12 | {-# LANGUAGE DeriveDataTypeable #-} 13 | {-# LANGUAGE DeriveGeneric #-} 14 | {-# LANGUAGE DeriveAnyClass #-} 15 | 16 | module Language.Rust.Syntax.Token ( 17 | Token(..), 18 | spaceNeeded, 19 | Space(..), 20 | Delim(..), 21 | LitTok(..), 22 | AttrStyle(..), 23 | ) where 24 | 25 | import GHC.Generics ( Generic ) 26 | 27 | import Control.DeepSeq ( NFData ) 28 | import Data.Data ( Data ) 29 | import Data.Maybe ( fromMaybe ) 30 | import Data.Typeable ( Typeable ) 31 | 32 | import Language.Rust.Data.Ident ( Ident(name), Name ) 33 | import Language.Rust.Data.Position ( Span ) 34 | import Language.Rust.Syntax.AST ( Nonterminal, AttrStyle(..) ) 35 | 36 | -- | A general token (based on @syntax::parse::token::Token@). 37 | -- 38 | -- Unlike its @libsyntax@ counterpart, 'Token' has folded in @syntax::parse::token::BinOpToken@ 39 | -- and @syntax::parse::token::BinOpEqToken@ as regular tokens. 40 | data Token 41 | -- Single character expression-operator symbols. 42 | = Equal -- ^ @=@ token 43 | | Less -- ^ @<@ token 44 | | Greater -- ^ @>@ token 45 | | Ampersand -- ^ @&@ token 46 | | Pipe -- ^ @|@ token 47 | | Exclamation -- ^ @!@ token 48 | | Tilde -- ^ @~@ token 49 | | Plus -- ^ @+@ token 50 | | Minus -- ^ @-@ token 51 | | Star -- ^ @*@ token 52 | | Slash -- ^ @/@ token 53 | | Percent -- ^ @%@ token 54 | | Caret -- ^ @^@ token 55 | 56 | -- Multi character expression-operator symbols 57 | | GreaterEqual -- ^ @>=@ token 58 | | GreaterGreaterEqual -- ^ @>>=@ token 59 | | AmpersandAmpersand -- ^ @&&@ token 60 | | PipePipe -- ^ @||@ token 61 | | LessLess -- ^ @<<@ token 62 | | GreaterGreater -- ^ @>>@ token 63 | | EqualEqual -- ^ @==@ token 64 | | NotEqual -- ^ @!=@ token 65 | | LessEqual -- ^ @<=@ token 66 | | LessLessEqual -- ^ @<<=@ token 67 | | MinusEqual -- ^ @-=@ token 68 | | AmpersandEqual -- ^ @&=@ token 69 | | PipeEqual -- ^ @|=@ token 70 | | PlusEqual -- ^ @+=@ token 71 | | StarEqual -- ^ @*=@ token 72 | | SlashEqual -- ^ @/=@ token 73 | | CaretEqual -- ^ @^=@ token 74 | | PercentEqual -- ^ @%=@ token 75 | 76 | -- Structural symbols 77 | | At -- ^ @\@@ token 78 | | Dot -- ^ @.@ token 79 | | DotDot -- ^ @..@ token 80 | | DotDotEqual -- ^ @..=@ token 81 | | DotDotDot -- ^ @...@ token 82 | | Comma -- ^ @,@ token 83 | | Semicolon -- ^ @;@ token 84 | | Colon -- ^ @:@ token 85 | | ModSep -- ^ @::@ token 86 | | RArrow -- ^ @->@ token 87 | | LArrow -- ^ @<-@ token 88 | | FatArrow -- ^ @=>@ token 89 | | Pound -- ^ @#@ token 90 | | Dollar -- ^ @$@ token 91 | | Question -- ^ @?@ token 92 | 93 | -- Delimiters 94 | | OpenDelim !Delim -- ^ One of @(@, @[@, @{@ 95 | | CloseDelim !Delim -- ^ One of @)@, @]@, @}@ 96 | 97 | -- Literals 98 | | LiteralTok LitTok (Maybe Name) -- ^ a literal token with an optional suffix (something like @i32@) 99 | 100 | -- Name components 101 | | IdentTok Ident -- ^ an arbitrary identifier (something like @x@ or @foo@ or @and_then@) 102 | | LifetimeTok Ident -- ^ a lifetime (something like @\'a@ or @\'static@) 103 | | Space Space Name -- ^ whitespace 104 | | Doc String !AttrStyle !Bool 105 | -- ^ doc comment with its contents, whether it is outer/inner, and whether it is inline or not 106 | | Shebang -- ^ @#!@ shebang token 107 | | Eof -- ^ end of file token 108 | 109 | -- NOT PRODUCED IN TOKENIZATION!! 110 | | Interpolated (Nonterminal Span) -- ^ can be expanded into several tokens in macro-expansion 111 | deriving (Eq, Ord, Data, Typeable, Generic, NFData) 112 | 113 | -- | Rust is whitespace independent. Short of providing space between tokens, whitespace is all the 114 | -- same to the parser. 115 | data Space 116 | = Whitespace -- ^ usual white space: @[\\ \\t\\n\\f\\v\\r]+@ 117 | | Comment -- ^ comment (either inline or not) 118 | deriving (Eq, Ord, Show, Enum, Bounded, Data, Typeable, Generic, NFData) 119 | 120 | -- TODO: BANISH NoDelim! (or rather: distinguish DelimToken from Delim, as rustc does) 121 | -- | A delimiter token (@syntax::parse::token::DelimToken@). 122 | data Delim 123 | = Paren -- ^ round parenthesis: @(@ or @)@ 124 | | Bracket -- ^ square bracket: @[@ or @]@ 125 | | Brace -- ^ curly brace: @{@ or @}@ 126 | | NoDelim -- ^ empty delimiter 127 | deriving (Eq, Ord, Enum, Bounded, Show, Data, Typeable, Generic, NFData) 128 | 129 | -- | A literal token (@syntax::parse::token::Lit@) 130 | data LitTok 131 | = ByteTok Name -- ^ byte 132 | | CharTok Name -- ^ character 133 | | IntegerTok Name -- ^ integral literal (could have type @i32@, @int@, @u128@, etc.) 134 | | FloatTok Name -- ^ floating point literal (could have type @f32@, @f64@, etc.) 135 | | StrTok Name -- ^ string literal 136 | | StrRawTok Name !Int -- ^ raw string literal and the number of @#@ marks around it 137 | | ByteStrTok Name -- ^ byte string literal 138 | | ByteStrRawTok Name !Int -- ^ raw byte string literal and the number of @#@ marks around it 139 | deriving (Eq, Ord, Show, Data, Typeable, Generic, NFData) 140 | 141 | 142 | -- | Check whether a space is needed between two tokens to avoid confusion. 143 | spaceNeeded :: Token -> Token -> Bool 144 | -- conflicts with 'GreaterEqual' 145 | spaceNeeded Greater Equal = True 146 | spaceNeeded Greater EqualEqual = True 147 | spaceNeeded Greater FatArrow = True 148 | 149 | -- conflicts with 'GreaterGreaterEqual' 150 | spaceNeeded Greater GreaterEqual = True 151 | spaceNeeded GreaterGreater Equal = True 152 | spaceNeeded GreaterGreater EqualEqual = True 153 | spaceNeeded GreaterGreater FatArrow = True 154 | 155 | -- conflicts with 'AmpersandAmpersand' 156 | spaceNeeded Ampersand Ampersand = True 157 | spaceNeeded Ampersand AmpersandAmpersand = True 158 | spaceNeeded Ampersand AmpersandEqual = True 159 | 160 | -- conflicts with 'PipePipe' 161 | spaceNeeded Pipe Pipe = True 162 | spaceNeeded Pipe PipePipe = True 163 | spaceNeeded Pipe PipeEqual = True 164 | 165 | -- conflicts with 'LessLess' 166 | spaceNeeded Less Less = True 167 | spaceNeeded Less LessLess = True 168 | spaceNeeded Less LessLessEqual = True 169 | spaceNeeded Less LArrow = True 170 | 171 | -- conflicts with 'GreaterGreater' 172 | spaceNeeded Greater Greater = True 173 | spaceNeeded Greater GreaterGreater = True 174 | spaceNeeded Greater GreaterGreaterEqual = True 175 | 176 | -- conflicts with 'EqualEqual' 177 | spaceNeeded Equal Equal = True 178 | spaceNeeded Equal EqualEqual = True 179 | spaceNeeded Equal FatArrow = True 180 | 181 | -- conflicts with 'NotEqual' 182 | spaceNeeded Exclamation Equal = True 183 | spaceNeeded Exclamation EqualEqual = True 184 | spaceNeeded Exclamation FatArrow = True 185 | 186 | -- conflicts with 'LessEqual' 187 | spaceNeeded Less Equal = True 188 | spaceNeeded Less EqualEqual = True 189 | spaceNeeded Less FatArrow = True 190 | 191 | -- conflicts with 'LessLessEqual' 192 | spaceNeeded Less LessEqual = True 193 | spaceNeeded LessLess Equal = True 194 | spaceNeeded LessLess EqualEqual = True 195 | spaceNeeded LessLess FatArrow = True 196 | 197 | -- conflicts with 'MinusEqual' 198 | spaceNeeded Minus Equal = True 199 | spaceNeeded Minus EqualEqual = True 200 | spaceNeeded Minus FatArrow = True 201 | 202 | -- conflicts with 'AmpersandEqual' 203 | spaceNeeded Ampersand Equal = True 204 | spaceNeeded Ampersand EqualEqual = True 205 | spaceNeeded Ampersand FatArrow = True 206 | 207 | -- conflicts with 'PipeEqual' 208 | spaceNeeded Pipe Equal = True 209 | spaceNeeded Pipe EqualEqual = True 210 | spaceNeeded Pipe FatArrow = True 211 | 212 | -- conflicts with 'PlusEqual' 213 | spaceNeeded Plus Equal = True 214 | spaceNeeded Plus EqualEqual = True 215 | spaceNeeded Plus FatArrow = True 216 | 217 | -- conflicts with 'StarEqual' 218 | spaceNeeded Star Equal = True 219 | spaceNeeded Star EqualEqual = True 220 | spaceNeeded Star FatArrow = True 221 | 222 | -- conflicts with 'SlashEqual' 223 | spaceNeeded Slash Equal = True 224 | spaceNeeded Slash EqualEqual = True 225 | spaceNeeded Slash FatArrow = True 226 | 227 | -- conflicts with 'CaretEqual' 228 | spaceNeeded Caret Equal = True 229 | spaceNeeded Caret EqualEqual = True 230 | spaceNeeded Caret FatArrow = True 231 | 232 | -- conflicts with 'PercentEqual' 233 | spaceNeeded Percent Equal = True 234 | spaceNeeded Percent EqualEqual = True 235 | spaceNeeded Percent FatArrow = True 236 | 237 | -- conflicts with 'DotDot' 238 | spaceNeeded Dot Dot = True 239 | spaceNeeded Dot DotDot = True 240 | spaceNeeded Dot DotDotDot = True 241 | 242 | -- conflicts with 'DotDotDot' 243 | spaceNeeded DotDot Dot = True 244 | spaceNeeded DotDot DotDot = True 245 | spaceNeeded DotDot DotDotDot = True 246 | 247 | -- conflicts with 'DotDotEqual' 248 | spaceNeeded DotDot Equal = True 249 | spaceNeeded DotDot EqualEqual = True 250 | spaceNeeded DotDot FatArrow = True 251 | 252 | -- conflicts with 'ModSep' 253 | spaceNeeded Colon Colon = True 254 | spaceNeeded Colon ModSep = True 255 | 256 | -- conflicts with 'RArrow' 257 | spaceNeeded Minus Greater = True 258 | spaceNeeded Minus GreaterGreater = True 259 | spaceNeeded Minus GreaterEqual = True 260 | spaceNeeded Minus GreaterGreaterEqual = True 261 | 262 | -- conflicts with 'LArrow' 263 | spaceNeeded Less Minus = True 264 | spaceNeeded Less MinusEqual = True 265 | spaceNeeded Less RArrow = True 266 | 267 | -- conflicts with 'FatArrow' 268 | spaceNeeded Equal Greater = True 269 | spaceNeeded Equal GreaterGreater = True 270 | spaceNeeded Equal GreaterEqual = True 271 | spaceNeeded Equal GreaterGreaterEqual = True 272 | 273 | -- conflicts with 'LiteralTok' 274 | spaceNeeded LiteralTok{} IdentTok{} = True 275 | 276 | -- conflicts with 'IdentTok' 277 | spaceNeeded IdentTok{} IdentTok{} = True 278 | 279 | -- conflicts with 'Shebang' 280 | spaceNeeded Pound Exclamation = True 281 | spaceNeeded Pound NotEqual = True 282 | 283 | -- there are no other conflicts 284 | spaceNeeded _ _ = False 285 | 286 | 287 | -- | This instance is only for error messages and debugging purposes. 288 | instance Show Token where 289 | -- Single character expression-operator symbols. 290 | show Equal = "=" 291 | show Less = "<" 292 | show Greater = ">" 293 | show Ampersand = "&" 294 | show Pipe = "|" 295 | show Exclamation = "!" 296 | show Tilde = "~" 297 | show Plus = "+" 298 | show Minus = "-" 299 | show Star = "*" 300 | show Slash = "/" 301 | show Percent = "%" 302 | -- Multi character eexpression-operator symbols 303 | show GreaterEqual = ">=" 304 | show GreaterGreaterEqual = ">>=" 305 | show AmpersandAmpersand = "&&" 306 | show PipePipe = "||" 307 | show LessLess = "<<" 308 | show GreaterGreater = ">>" 309 | show EqualEqual = "==" 310 | show NotEqual = "!=" 311 | show LessEqual = "<=" 312 | show LessLessEqual = "<<=" 313 | show MinusEqual = "-=" 314 | show AmpersandEqual = "&=" 315 | show PipeEqual = "|=" 316 | show PlusEqual = "+=" 317 | show StarEqual = "*=" 318 | show SlashEqual = "/=" 319 | show CaretEqual = "^=" 320 | show PercentEqual = "%=" 321 | show Caret = "^" 322 | -- Structural symbols 323 | show At = "@" 324 | show Dot = "." 325 | show DotDot = ".." 326 | show DotDotDot = "..." 327 | show DotDotEqual = "..=" 328 | show Comma = "," 329 | show Semicolon = ";" 330 | show Colon = ":" 331 | show ModSep = "::" 332 | show RArrow = "->" 333 | show LArrow = "<-" 334 | show FatArrow = "=>" 335 | show Pound = "#" 336 | show Dollar = "$" 337 | show Question = "?" 338 | -- Delimiters, eg. @{@, @]@, @(@ 339 | show (OpenDelim Paren) = "(" 340 | show (OpenDelim Bracket) = "[" 341 | show (OpenDelim Brace) = "{" 342 | show (OpenDelim NoDelim) = "" 343 | show (CloseDelim Paren) = ")" 344 | show (CloseDelim Bracket) = "]" 345 | show (CloseDelim Brace) = "}" 346 | show (CloseDelim NoDelim) = "" 347 | -- Literals 348 | show (LiteralTok (ByteTok n) s) = "b'" ++ n ++ "'" ++ fromMaybe "" s 349 | show (LiteralTok (CharTok n) s) = "'" ++ n ++ "'" ++ fromMaybe "" s 350 | show (LiteralTok (IntegerTok n) s) = n ++ fromMaybe "" s 351 | show (LiteralTok (FloatTok n) s) = n ++ fromMaybe "" s 352 | show (LiteralTok (StrTok n) s) = "\"" ++ n ++ "\"" ++ fromMaybe "" s 353 | show (LiteralTok (StrRawTok n i) s) = "r" ++ replicate i '#' ++ "\"" ++ n ++ "\"" ++ replicate i '#' ++ fromMaybe "" s 354 | show (LiteralTok (ByteStrTok n) s) = "b\"" ++ n ++ "\"" ++ fromMaybe "" s 355 | show (LiteralTok (ByteStrRawTok n i) s) = "br" ++ replicate i '#' ++ "\"" ++ n ++ "\"" ++ replicate i '#' ++ fromMaybe "" s 356 | -- Name components 357 | show (IdentTok i) = name i 358 | show (LifetimeTok l) = "'" ++ show l 359 | show (Space Whitespace _) = "" 360 | show (Space Comment n) = "/*" ++ show n ++ " */" 361 | show (Doc d Inner True) = "/*!" ++ d ++ "*/" 362 | show (Doc d Outer True) = "/**" ++ d ++ "*/" 363 | show (Doc d Inner False) = "//!" ++ d 364 | show (Doc d Outer False) = "///" ++ d 365 | show Shebang = "#!" 366 | show Eof = "" 367 | -- Macro related 368 | show Interpolated{} = "" 369 | 370 | -------------------------------------------------------------------------------- /src/Language/Rust/Syntax/Token.hs-boot: -------------------------------------------------------------------------------- 1 | -- .hs-boot files play badly with associated type families like 'Rep' 2 | {-# LANGUAGE CPP #-} 3 | #if __GLASGOW_HASKELL__ >= 800 4 | {-# OPTIONS_GHC -Wno-missing-methods #-} 5 | #endif 6 | 7 | module Language.Rust.Syntax.Token where 8 | 9 | import GHC.Generics (Generic) 10 | import Data.Data (Data) 11 | -- import Data.Typeable (Typeable) 12 | import Control.DeepSeq (NFData) 13 | 14 | data LitTok 15 | 16 | data Token 17 | instance Eq Token 18 | instance Ord Token 19 | instance Show Token 20 | instance Data Token 21 | -- instance Typeable Token 22 | instance Generic Token 23 | instance NFData Token 24 | 25 | data Delim 26 | instance Eq Delim 27 | instance Ord Delim 28 | instance Data Delim 29 | -- instance Typeable Delim 30 | instance Generic Delim 31 | instance Show Delim 32 | instance NFData Delim 33 | 34 | data Space 35 | instance NFData Space 36 | instance Eq Space 37 | instance Ord Space 38 | 39 | -------------------------------------------------------------------------------- /test/README.md: -------------------------------------------------------------------------------- 1 | We have two categories of tests. If you are using `stack` you can run them with 2 | 3 | ``` 4 | $ stack test # runs all tests 5 | $ stack test :unit-tests # runs unit test suite 6 | $ stack test :rustc-tests # runs rustc test suite 7 | $ stack test :rustc-tests --test-arguments "-t regex|src|dfa.rs" # runs just one test file 8 | $ stack test --test-arguments "--hide-successes" # show only failing tests 9 | $ stack test --test-arguments "--help" # learn about more options! 10 | ``` 11 | 12 | ## `unit-tests` 13 | 14 | These are mostly regression/coverage style tests. They cover lexing, parsing, and pretty printing. 15 | The case for parsing is actually a bit more involved: on top of just checking that inputs parse 16 | correctly, it also checks that: 17 | 18 | * resolving the parsed AST does nothing 19 | * pretty printing the AST and then re-parsing does nothing 20 | * parsing substrings of the input corresponding to the span of sub-ASTs does nothing to those 21 | sub-ASTs 22 | 23 | Whenever a bug is fixed or a feature introduced, new tests covering that case should be added to 24 | `unit-tests` to prevent regressions. 25 | 26 | ## `rustc-tests` 27 | 28 | These require `rustup` to be installed. The idea is to feed in as input many rust source files, run 29 | them through the Rust compiler with `rustc -Z ast-json -Z parse-only` (now only available on 30 | nightly) to get a JSON of the AST, then parse the same file on our side, and finally find the 31 | differences between the two. Specifically, these tests check that: 32 | 33 | * we parse the same thing as `rustc` 34 | * pretty printing the AST and then reparsing it doesn't change anything 35 | 36 | Any test source files should be placed in the `sample-sources` directory at the project root. 37 | 38 | This test suite also automatically downloads and sets `rustc` to nightly (only for this folder). In 39 | the interest of reproducibility, we pin the specific nightly version (see the top of the `Main` 40 | module). If the version of nightly being tested against is more than a month old, you will get 41 | warnings when the tests are run. 42 | -------------------------------------------------------------------------------- /test/rustc-tests/DiffUtils.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | #if __GLASGOW_HASKELL__ >= 800 3 | {-# OPTIONS_GHC -Wno-missing-methods #-} 4 | #endif 5 | module DiffUtils where 6 | 7 | import qualified Data.Aeson as Aeson 8 | import qualified Data.HashMap.Lazy as HM 9 | import qualified Data.Vector as V 10 | import qualified Data.List.NonEmpty as N 11 | import Control.Monad 12 | import Data.String 13 | import Data.ByteString.Lazy.Char8 (unpack) 14 | import Control.Exception 15 | import Data.Typeable 16 | import Data.Foldable 17 | import Data.Word (Word8) 18 | 19 | 20 | -- | This type is a straightforward hack to let me index by both 'String' and 'Int' in '(!)' below. 21 | data AesonKey = Index Int | Key String 22 | instance Num AesonKey where fromInteger = Index . fromIntegral 23 | instance IsString AesonKey where fromString = Key 24 | 25 | -- | Accessor method for JSON with helpful error messages. 26 | (!) :: Aeson.Value -> AesonKey -> Aeson.Value 27 | val@(Aeson.Object hashmap) ! Key key = 28 | case HM.lookup (fromString key) hashmap of 29 | Nothing -> error $ "No key `" ++ key ++ "' on JSON object `" ++ showAeson val ++ "'" 30 | Just v -> v 31 | val ! Key key = error $ "Cannot lookup key `" ++ key ++ "' on non-object JSON `" ++ showAeson val ++ "'" 32 | val@(Aeson.Array vect) ! Index key = 33 | case vect V.!? key of 34 | Nothing -> error $ "Index `" ++ show key ++ "' is OOB on JSON array `" ++ showAeson val ++ "'" 35 | Just v -> v 36 | val ! Index key = error $ "Cannot lookup index `" ++ show key ++ "' on non-array JSON `" ++ showAeson val ++ "'" 37 | 38 | -- | Pretty print 'Value' 39 | showAeson :: Aeson.Value -> String 40 | showAeson = unpack . Aeson.encode 41 | 42 | -- | Accessor method for JSON which fails with 'Nothing' 43 | (!?) :: Aeson.Value -> AesonKey -> Maybe Aeson.Value 44 | Aeson.Object hashmap !? Key key = HM.lookup (fromString key) hashmap 45 | Aeson.Array vect !? Index key = vect V.!? key 46 | _ !? _ = Nothing 47 | 48 | -- | This lets us do whatever we want while comparing @rustc@ with our parser 49 | type Diff = IO () 50 | 51 | -- | This data type exists only as an easy way to throw a new type of error 52 | data DiffError = DiffError String deriving (Typeable) 53 | instance Exception DiffError 54 | instance Show DiffError where show (DiffError msg) = msg 55 | 56 | -- | Class of things that can be diff-ed against their JSON debug output 57 | class Show a => Diffable a where 58 | (===) :: a -> Aeson.Value -> Diff 59 | 60 | instance Diffable a => Diffable (N.NonEmpty a) where 61 | xs === json = toList xs === json 62 | 63 | instance Diffable a => Diffable [a] where 64 | xs === json@(Aeson.Array v) = do 65 | let xs' = toList v 66 | when (length xs /= length xs') $ 67 | diff ("arrays have different lengths " ++ show (length xs) ++ " /= " ++ show (length xs')) xs json 68 | sequence_ (zipWith (===) xs xs') 69 | xs === json = diff "comparing array to non-array" xs json 70 | 71 | -- | Solely for an instance of 'Diffable [a]' where the empty list == null 72 | newtype NullList a = NullList [a] deriving (Show) 73 | instance Diffable a => Diffable (NullList a) where 74 | NullList xs === val = (if null xs then Nothing else Just xs) === val 75 | 76 | -- | a comparision to accept 'null' as 'Nothing' 77 | instance Diffable a => Diffable (Maybe a) where 78 | Just x === json = x === json 79 | Nothing === Aeson.Null = pure () 80 | n@Nothing === json = diff "expected the JSON to be null" n json 81 | 82 | instance Diffable Bool where 83 | b1 === j@(Aeson.Bool b2) | b1 == b2 = pure () 84 | | otherwise = diff "boolean values are different" b1 j 85 | b === j = diff "expected the JSON to be a boolean" b j 86 | 87 | instance Diffable Word8 where (===) = diffIntegral 88 | instance Diffable Int where (===) = diffIntegral 89 | instance Diffable Integer where (===) = diffIntegral 90 | 91 | -- | Diff something that is a number and can be shown 92 | diffIntegral :: (Show i, Integral i) => i -> Aeson.Value -> Diff 93 | diffIntegral i (Aeson.Number s) | fromIntegral i == s = pure () 94 | diffIntegral i val = diff "different integral values" i val 95 | 96 | -- | Report a difference 97 | diff :: Show a => String -> a -> Aeson.Value -> IO b 98 | diff explanation v j = throw (DiffError msg) 99 | where msg = unlines [ explanation ++ " in" 100 | , " * parsed AST" 101 | , cropped (show v) 102 | , " * dumped JSON" 103 | , cropped (unpack (Aeson.encode j)) 104 | ] 105 | cropped msg' | length msg' > 500 = take 500 msg' ++ "..." 106 | | otherwise = msg' 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /test/rustc-tests/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE MultiParamTypeClasses #-} 2 | module Main where 3 | 4 | import Diff () 5 | import DiffUtils 6 | 7 | import Control.Monad (filterM, when) 8 | import Control.Exception (catch, SomeException, evaluate) 9 | import Data.Typeable (Typeable) 10 | 11 | import Data.ByteString.Lazy (hGetContents) 12 | import Data.ByteString.Lazy.Char8 (unpack) 13 | import Data.Aeson (decode', Value) 14 | 15 | import Language.Rust.Parser (readSourceFile) 16 | import Language.Rust.Pretty (prettyUnresolved, Resolve(..), Issue(..), Severity(Clean)) 17 | import Language.Rust.Syntax (SourceFile) 18 | 19 | import System.Directory (getCurrentDirectory, getTemporaryDirectory, listDirectory, doesFileExist, findExecutable) 20 | import System.Process (withCreateProcess, proc, CreateProcess(..), StdStream(..), callProcess, readProcess) 21 | import System.FilePath ((), takeFileName) 22 | import System.IO (withFile, IOMode(WriteMode,ReadMode)) 23 | import System.Exit (exitSuccess) 24 | 25 | import Data.Time.Clock (utctDay, getCurrentTime) 26 | import Data.Time.Calendar (fromGregorian, showGregorian, diffDays) 27 | 28 | import qualified Data.Text.Prettyprint.Doc as PP 29 | import Data.Text.Prettyprint.Doc.Render.Text (renderIO) 30 | 31 | import Test.Framework (defaultMain) 32 | import Test.Framework.Providers.API 33 | 34 | main :: IO () 35 | main = do 36 | -- Check last time `rustc` version was bumped 37 | let lastDay = fromGregorian 2018 4 19 38 | today <- utctDay <$> getCurrentTime 39 | when (diffDays today lastDay > 32) $ 40 | putStrLn $ "\x1b[33m" ++ "\nThe version of `rustc' the tests will try to use is older than 1 month" ++ "\x1b[0m" 41 | 42 | -- Don't bother running the tests if you don't have `rustup` or `rustc` installed. 43 | missingProgs <- any null <$> traverse findExecutable ["rustup","rustc"] 44 | when missingProgs $ do 45 | putStrLn $ "Could not find `rustup`/`rustc`, so skipping these tests" 46 | exitSuccess 47 | 48 | -- Setting `rustc` version to the right nightly, just in this directory 49 | callProcess "rustup" ["override", "set", "nightly-" ++ showGregorian lastDay] 50 | version <- readProcess "rustc" ["--version"] "" 51 | putStrLn $ "\x1b[32m" ++ "Running tests with " ++ version ++ "\x1b[0m" 52 | 53 | -- Run the tests 54 | workingDirectory <- getCurrentDirectory 55 | let folder = workingDirectory "sample-sources" 56 | entries <- map (folder ) <$> listDirectory folder 57 | files <- filterM doesFileExist (filter (/= folder ".benchignore") entries) 58 | defaultMain (map (\f -> Test (takeFileName f) (DiffTest f)) files) 59 | 60 | -- | Given a path pointing to a rust source file, read that file and parse it into JSON 61 | getJsonAST :: FilePath -> IO Value 62 | getJsonAST fileName = do 63 | let cp = (proc "rustc" [ "-Z", "ast-json-noexpand" 64 | , "-Z", "no-analysis" 65 | , fileName ]){ std_out = CreatePipe 66 | , std_err = NoStream 67 | , std_in = NoStream 68 | } 69 | withCreateProcess cp $ \_ (Just hOut) _ _ -> do 70 | jsonContents <- hGetContents hOut 71 | case decode' jsonContents of 72 | Just value -> pure value 73 | Nothing -> error ("Failed to get `rustc' JSON\n" ++ unpack jsonContents) 74 | 75 | -- | Given an AST and a file name, print it into a temporary file (without resolving) and return 76 | -- that path 77 | prettySourceFile :: FilePath -> SourceFile a -> IO FilePath 78 | prettySourceFile path ast = do 79 | tmp <- getTemporaryDirectory 80 | let path' = tmp takeFileName path 81 | opts = PP.LayoutOptions (PP.AvailablePerLine 100 1.0) 82 | withFile path' WriteMode (\hdl -> renderIO hdl (PP.layoutPretty opts (prettyUnresolved ast))) 83 | pure path' 84 | 85 | resolveDiff :: (Monoid a, Typeable a) => SourceFile a -> IO () 86 | resolveDiff ast = when (sev /= Clean) $ 87 | error ("Resolve thinks there is (are) some " ++ show sev ++ "\n" ++ msgs) 88 | where (_, sev, iss) = resolveVerbose ast 89 | msgs = unlines [ " " ++ show sev' ++ " " ++ desc | Issue desc sev' _ <- iss ] 90 | 91 | 92 | -- * Difference tests 93 | 94 | -- | A 'DiffTest' only needs to know the name of the file it is diffing 95 | data DiffTest = DiffTest String 96 | 97 | -- | These are the possible pending statuses of a 'DiffTest' 98 | data DiffRunning = ParsingReference 99 | | ParsingImplementation 100 | | ParsingDiffing 101 | | PrintingParsed 102 | | ReparsingReference 103 | | ReparsingDiffing 104 | | ResolveInvariant 105 | 106 | 107 | instance Show DiffRunning where 108 | show ParsingReference = "Parsing using `rustc'" 109 | show ParsingImplementation = "Parsing using our parser" 110 | show ParsingDiffing = "Comparing the two parsed outputs" 111 | show PrintingParsed = "Pretty printing the parsed syntax tree" 112 | show ReparsingReference = "Reparsing using `rustc'" 113 | show ReparsingDiffing = "Comparing to the reparsed output" 114 | show ResolveInvariant = "Checking that the parsed output is unchanged by `resolve'" 115 | 116 | -- | These are the possible final states of a 'DiffTest' 117 | data DiffResult = Error DiffRunning String 118 | | Done 119 | 120 | instance Show DiffResult where 121 | show (Error improvement message) = "ERROR (" ++ show improvement ++ "): " ++ message 122 | show Done = "OK" 123 | 124 | -- | A test is successful if it finishes and has no diffs 125 | instance TestResultlike DiffRunning DiffResult where 126 | testSucceeded Done = True 127 | testSucceeded (Error _ _) = False 128 | 129 | -- | With timeouts and catching errors 130 | instance Testlike DiffRunning DiffResult DiffTest where 131 | testTypeName _ = "Difference tests" 132 | 133 | runTest TestOptions{ topt_timeout = K timeout } (DiffTest file) = runImprovingIO $ 134 | step timeout ParsingReference (getJsonAST file) $ \parsedRustc -> 135 | step timeout ParsingImplementation (evaluate =<< withFile file ReadMode readSourceFile) $ \parsedOurs -> 136 | step timeout ParsingDiffing (parsedOurs === parsedRustc) $ \_ -> 137 | step timeout PrintingParsed (prettySourceFile file parsedOurs) $ \tmpFile -> 138 | step timeout ReparsingReference (getJsonAST tmpFile) $ \reparsedRustc -> 139 | step timeout ReparsingDiffing (parsedOurs === reparsedRustc) $ \_ -> 140 | step timeout ResolveInvariant (resolveDiff parsedOurs) $ \_ -> 141 | pure Done 142 | 143 | 144 | step :: Maybe Int -- ^ timeout for the step 145 | -> DiffRunning -- ^ improvement for the step 146 | -> IO a -- ^ content of the step 147 | -> (a -> ImprovingIO DiffRunning DiffResult DiffResult) -- ^ continuation to run afterwards 148 | -> ImprovingIO DiffRunning DiffResult DiffResult 149 | step timeout improvement action continuation = do 150 | yieldImprovement improvement 151 | val_me <- maybeTimeoutImprovingIO timeout $ liftIO (try' action) 152 | case val_me of 153 | Nothing -> pure (Error improvement "Timed out") 154 | Just (Left e) -> pure (Error improvement e) 155 | Just (Right val) -> continuation val 156 | 157 | 158 | 159 | 160 | -- | Variant of 'try' which separates the error case by just returning 'Left msg' when there is an 161 | -- exception. 162 | try' :: IO a -> IO (Either String a) 163 | try' io = catch (Right <$> io) 164 | (\e -> pure (Left (show (e :: SomeException)))) 165 | 166 | -------------------------------------------------------------------------------- /test/unit-tests/CompleteTest.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ScopedTypeVariables #-} 2 | module CompleteTest (completeSuite) where 3 | 4 | import Test.Framework (testGroup, Test) 5 | import Test.Framework.Providers.HUnit 6 | import Test.HUnit hiding (Test) 7 | 8 | import Language.Rust.Syntax (SourceFile) 9 | import Language.Rust.Parser (parse, Span, inputStreamFromString, ParseFail(..)) 10 | import Language.Rust.Pretty (pretty') 11 | 12 | import Data.Text.Prettyprint.Doc (layoutPretty, LayoutOptions(..), PageWidth(..)) 13 | import Data.Text.Prettyprint.Doc.Render.String (renderShowS) 14 | 15 | -- The following tests render with width 50 and ribbon length 50 too. 16 | -- | | 17 | 18 | completeSuite :: Test 19 | completeSuite = testGroup "complete suite" 20 | [ testComplete "short mod" 21 | "mod foo { }" 22 | , testComplete "short function in mod" 23 | "mod foo {\n\ 24 | \ pub fn bar(x: i32) -> i32 {\n\ 25 | \ return x + 1\n\ 26 | \ }\n\ 27 | \}" 28 | , functionArgs 29 | , functionCalls 30 | , methodCalls 31 | , lets 32 | , generics 33 | , whereClauses 34 | , functions 35 | , typeAliases 36 | , traits 37 | , structs 38 | , enums 39 | , matchExpressions 40 | ] 41 | 42 | functionArgs :: Test 43 | functionArgs = testGroup "function args" 44 | [ testComplete "function with args" 45 | "fn foo(\n\ 46 | \ u: i32,\n\ 47 | \ v: i32,\n\ 48 | \ x: i32,\n\ 49 | \ y: i32,\n\ 50 | \ z: i32,\n\ 51 | \ w: i32,\n\ 52 | \) -> i32 { }" 53 | , testComplete "short function with args" 54 | "fn foo(x: i32, y: i32) -> i32 { }" 55 | ] 56 | 57 | functionCalls :: Test 58 | functionCalls = testGroup "function calls" 59 | [ testComplete "short call" 60 | "fn main() {\n\ 61 | \ foo(1, 2, 3, 4);\n\ 62 | \}" 63 | , testComplete "multi line call" 64 | "fn main() {\n\ 65 | \ foo(\n\ 66 | \ foooooooooooooooo,\n\ 67 | \ baaaaaaaaaaaaaaar,\n\ 68 | \ baaaaaaaaaaaaaaaz,\n\ 69 | \ );\n\ 70 | \}" 71 | , testComplete "nested mutli line call" 72 | "fn main() {\n\ 73 | \ foo(\n\ 74 | \ 0,\n\ 75 | \ bar(\n\ 76 | \ foooooooooooooooo,\n\ 77 | \ baaaaaaaaaaaaaaar,\n\ 78 | \ baaaaaaaaaaaaaaaz,\n\ 79 | \ ),\n\ 80 | \ );\n\ 81 | \}" 82 | , testComplete "nested one-arg multi line call" 83 | "fn main() {\n\ 84 | \ foo(bar(\n\ 85 | \ foooooooooooooooo,\n\ 86 | \ baaaaaaaaaaaaaaar,\n\ 87 | \ baaaaaaaaaaaaaaaz,\n\ 88 | \ ));\n\ 89 | \}" 90 | , testComplete "nested one-arg multi line call" 91 | "fn main() {\n\ 92 | \ foo(bar(baz(boo(far(faz(\n\ 93 | \ foooooooooooooooo,\n\ 94 | \ baaaaaaaaaaaaaaar,\n\ 95 | \ baaaaaaaaaaaaaaaz,\n\ 96 | \ ))))));\n\ 97 | \}" 98 | ] 99 | 100 | methodCalls :: Test 101 | methodCalls = testGroup "method calls" 102 | [ testComplete "short chained method call" 103 | "fn foo() {\n\ 104 | \ obj.bar()?.baz();\n\ 105 | \}" 106 | , testComplete "long chained method call" 107 | "fn foo() {\n\ 108 | \ (foo as ObjectBuilderFactory)\n\ 109 | \ .baaaaaaaaaaar(foo, bar, baz)\n\ 110 | \ .baaaaaaaaaaz(\n\ 111 | \ fooooooooo,\n\ 112 | \ baaaaaaaar,\n\ 113 | \ baaaaaaaaz,\n\ 114 | \ )\n\ 115 | \ .clone();\n\ 116 | \}" 117 | , testComplete "long chained method call / fields / try / index" 118 | "fn foo() {\n\ 119 | \ (foo as ObjectBuilderFactory)\n\ 120 | \ .baaaaaaaaaaar(foo, bar, baz)\n\ 121 | \ .foo\n\ 122 | \ .bar[0]\n\ 123 | \ .baz?\n\ 124 | \ .baf[0][0].4\n\ 125 | \ .bar(baaaaaaaaaaz(\n\ 126 | \ fooooooooo,\n\ 127 | \ baaaaaaaar,\n\ 128 | \ baaaaaaaaz,\n\ 129 | \ ))\n\ 130 | \ .clone();\n\ 131 | \}" 132 | , testComplete "long caller" 133 | "fn foo() {\n\ 134 | \ Point {\n\ 135 | \ withExceeeeeeeeeedingly: 1,\n\ 136 | \ looooooooooooongFields: 2,\n\ 137 | \ }\n\ 138 | \ .baaaaaaaaaaar(foo, bar, baz)\n\ 139 | \ .foo\n\ 140 | \ .bar[0]\n\ 141 | \ .baz?\n\ 142 | \}" 143 | ] 144 | 145 | lets :: Test 146 | lets = testGroup "let statements" 147 | [ testComplete "short let" 148 | "fn foo() {\n\ 149 | \ let shortVar: i32 = otherVariable;\n\ 150 | \}" 151 | , testComplete "long let" 152 | "fn foo() {\n\ 153 | \ let looooooooooooooooooongVar: i32 =\n\ 154 | \ otherVariable;\n\ 155 | \}" 156 | , testComplete "longer let" 157 | "fn foo() {\n\ 158 | \ let looooooooooooooooooongVar:\n\ 159 | \ loooooooooooooooooooongType =\n\ 160 | \ otherVariable;\n\ 161 | \}" 162 | ] 163 | 164 | -- See 165 | generics :: Test 166 | generics = testGroup "generics" 167 | [ testComplete "one line" 168 | "fn func1(x: Vec, y: Vec) {\n\ 169 | \ return;\n\ 170 | \}" 171 | , testComplete "break arguments before generics" 172 | "fn func1(\n\ 173 | \ looooooooooongx: Vec,\n\ 174 | \ y: Vec,\n\ 175 | \) {\n\ 176 | \ return;\n\ 177 | \}" 178 | , testComplete "break one generic" 179 | "fn ridiculously_long_name_1<\n\ 180 | \ AnotherExcessivelyLongGenericName,\n\ 181 | \>(\n\ 182 | \ x: Vec,\n\ 183 | \ y: Vec,\n\ 184 | \) -> ReturnType {\n\ 185 | \ return;\n\ 186 | \}" 187 | , testComplete "break two generics" 188 | "fn ridiculously_long_name_1<\n\ 189 | \ ExcessivelyLongGenericName,\n\ 190 | \ AnotherExcessivelyLongGenericName,\n\ 191 | \>(\n\ 192 | \ x: Vec,\n\ 193 | \ y: Vec,\n\ 194 | \) -> ReturnType {\n\ 195 | \ return;\n\ 196 | \}" 197 | , testComplete "break generics and bounds" 198 | "fn ridiculously_long_name_1<\n\ 199 | \ ExcessivelyLongGenericName:\n\ 200 | \ Sized +\n\ 201 | \ PartialEq +\n\ 202 | \ PartialOrd +,\n\ 203 | \ AnotherExcessivelyLongGenericName: Sized,\n\ 204 | \>(\n\ 205 | \ x: Vec,\n\ 206 | \ y: Vec,\n\ 207 | \) -> ReturnType {\n\ 208 | \ return;\n\ 209 | \}" 210 | ] 211 | 212 | -- See 213 | whereClauses :: Test 214 | whereClauses = testGroup "where clauses" 215 | [ testComplete "where" 216 | "fn function(args: i32)\n\ 217 | \where\n\ 218 | \ T: Bound,\n\ 219 | \ U: AnotherBound,\n\ 220 | \{\n\ 221 | \ body\n\ 222 | \}" 223 | , testComplete "method without body where" 224 | "trait T {\n\ 225 | \ fn foo() -> ReturnType\n\ 226 | \ where\n\ 227 | \ T: Bound,\n\ 228 | \ U: AnotherBound;\n\ 229 | \}" 230 | , testComplete "long where" 231 | "fn itemize_list<'a, T, I, F1, F2, F3>(\n\ 232 | \ codemap: &'a CodeMap,\n\ 233 | \ inner: I,\n\ 234 | \ terminator: &'a str,\n\ 235 | \ get_lo: F1,\n\ 236 | \ get_hi: F2,\n\ 237 | \ get_item_string: F3,\n\ 238 | \ prev_span_end: BytePos,\n\ 239 | \ next_span_start: BytePos,\n\ 240 | \) -> ListItems<'a, I, F1, F2, F3>\n\ 241 | \where\n\ 242 | \ I: Iterator,\n\ 243 | \ F1: Fn(&T) -> BytePos,\n\ 244 | \ F2: Fn(&T) -> BytePos,\n\ 245 | \ F3: Fn(&T) -> Option,\n\ 246 | \{\n\ 247 | \ ListItems {\n\ 248 | \ codemap: codemap,\n\ 249 | \ inner: inner.peekable(),\n\ 250 | \ get_lo: get_lo,\n\ 251 | \ get_hi: get_hi,\n\ 252 | \ get_item_string: get_item_string,\n\ 253 | \ prev_span_end: prev_span_end,\n\ 254 | \ next_span_start: next_span_start,\n\ 255 | \ terminator: terminator,\n\ 256 | \ }\n\ 257 | \}" 258 | , testComplete "impl no where" 259 | "impl HashMap {\n\ 260 | \ fn add(key: K, value: V) { }\n\ 261 | \}" 262 | , testComplete "impl" 263 | "impl HashMap\n\ 264 | \where\n\ 265 | \ K: Hash + Eq,\n\ 266 | \{\n\ 267 | \ fn add(key: K, value: V) { }\n\ 268 | \}" 269 | ] 270 | 271 | functions :: Test 272 | functions = testGroup "functions" 273 | -- The '-> ret_ty' should never be on a line of its own - split the args instead. 274 | -- See 275 | [ testComplete "long return type" 276 | "fn bit(\n\ 277 | \ x: i32,\n\ 278 | \) -> LoooooooooooooooooooooooongType { }" 279 | ] 280 | 281 | -- See 282 | typeAliases :: Test 283 | typeAliases = testGroup "type aliases" 284 | [ testComplete "type alias short" 285 | "type FormattedAlias = Vec;" 286 | , testComplete "type alias long" 287 | "type LoooooooooooooooonnnnnnnnnnnnnngAlias =\n\ 288 | \ Vec;" 289 | , testComplete "type alias where" 290 | "type FormattedAlias\n\ 291 | \where\n\ 292 | \ T: Copy,\n\ 293 | \= Vec;" 294 | ] 295 | 296 | traits :: Test 297 | traits = testGroup "traits" 298 | [ testComplete "simple trait" 299 | "trait Animal {\n\ 300 | \ fn new(name: &'static str) -> Self;\n\ 301 | \}" 302 | , testComplete "generic trait" 303 | "trait DoubleDrop {\n\ 304 | \ fn double_drop(self, _: T);\n\ 305 | \}" 306 | , testComplete "trait with bounds" 307 | "pub trait Ord: Eq + PartialOrd {\n\ 308 | \ fn cmp(&self, other: &Self) -> Ordering;\n\ 309 | \}" 310 | ] 311 | 312 | structs :: Test 313 | structs = testGroup "structs" 314 | [ testComplete "unit struct" 315 | "struct Bleh;" 316 | , testComplete "tuple struct" 317 | "struct Bleh(i32, i32);" 318 | , testComplete "regular struct" 319 | "struct Baz {\n\ 320 | \ field: i32,\n\ 321 | \}" 322 | , testComplete "generic unit struct" 323 | "struct Bleh;" 324 | , testComplete "generic tuple struct" 325 | "struct Bleh(T, U);" 326 | , testComplete "generic regular struct" 327 | "struct Baz {\n\ 328 | \ field: T,\n\ 329 | \}" 330 | , testComplete "where unit struct" 331 | "struct Bleh\n\ 332 | \where\n\ 333 | \ T: Copy,\n\ 334 | \ U: Sized;" 335 | , testComplete "where tuple struct" 336 | "struct Bleh(T, U)\n\ 337 | \where\n\ 338 | \ T: Copy,\n\ 339 | \ U: Sized;" 340 | , testComplete "where regular struct" 341 | "struct Baz\n\ 342 | \where\n\ 343 | \ T: Copy,\n\ 344 | \{\n\ 345 | \ field: T,\n\ 346 | \}" 347 | ] 348 | 349 | enums :: Test 350 | enums = testGroup "enums" 351 | [ testComplete "empty enum" 352 | "enum Foo { }" 353 | , testComplete "basic enum" 354 | "enum Foo {\n\ 355 | \ UnitCon,\n\ 356 | \ UnitCon = 3,\n\ 357 | \ Baz {\n\ 358 | \ foo: i32,\n\ 359 | \ bar: (),\n\ 360 | \ },\n\ 361 | \ Bar(i32, i32),\n\ 362 | \}" 363 | , testComplete "generic enum" 364 | "enum Foo {\n\ 365 | \ UnitCon,\n\ 366 | \ UnitCon = 3,\n\ 367 | \ Baz {\n\ 368 | \ foo: T,\n\ 369 | \ bar: (),\n\ 370 | \ },\n\ 371 | \ Bar(T, i32),\n\ 372 | \}" 373 | , testComplete "where enum" 374 | "enum Foo\n\ 375 | \where\n\ 376 | \ T: Sized,\n\ 377 | \{\n\ 378 | \ UnitCon,\n\ 379 | \ UnitCon = 3,\n\ 380 | \ Baz {\n\ 381 | \ foo: T,\n\ 382 | \ bar: (),\n\ 383 | \ },\n\ 384 | \ Bar(T, i32),\n\ 385 | \}" 386 | ] 387 | 388 | -- See 389 | matchExpressions :: Test 390 | matchExpressions = testGroup "match expressions" 391 | [ testComplete "empty match" 392 | "fn foo() {\n\ 393 | \ match expr { }\n\ 394 | \}" 395 | , testComplete "simple match" 396 | "fn foo() {\n\ 397 | \ match expr {\n\ 398 | \ 0 => 1,\n\ 399 | \ 1 => { 2 },\n\ 400 | \ 2 => 3,\n\ 401 | \ }\n\ 402 | \}" 403 | , testComplete "multiple patterns one line match" 404 | "fn foo() {\n\ 405 | \ match expr {\n\ 406 | \ 0 => 1,\n\ 407 | \ 1 | 2 | 3 | 4 => { 2 },\n\ 408 | \ 5 => 3,\n\ 409 | \ }\n\ 410 | \}" 411 | , testComplete "multiple patterns multiple lines match" 412 | "fn foo() {\n\ 413 | \ match expr {\n\ 414 | \ 0 => 1,\n\ 415 | \ 1432482379423 |\n\ 416 | \ 2423894732 |\n\ 417 | \ 3423423 |\n\ 418 | \ 4234273 => { 2 },\n\ 419 | \ 5 => 3,\n\ 420 | \ }\n\ 421 | \}" 422 | ] 423 | 424 | testComplete :: String -> String -> Test 425 | testComplete name inp = testCase name $ do 426 | -- Parse the file 427 | x :: SourceFile Span 428 | <- case parse (inputStreamFromString inp) of 429 | Left (ParseFail pos msg) -> fail $ show pos ++ " " ++ msg 430 | Right x -> pure x 431 | 432 | -- Pretty print it 433 | let opts = LayoutOptions (AvailablePerLine 50 1.0) 434 | inp' = renderShowS (layoutPretty opts (pretty' x)) "" 435 | 436 | -- Assert that the input and output are the same 437 | inp @=? inp' 438 | -------------------------------------------------------------------------------- /test/unit-tests/LexerTest.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings, UnicodeSyntax #-} 2 | module LexerTest (lexerSuite) where 3 | 4 | import Test.Framework (testGroup, Test) 5 | import Test.Framework.Providers.HUnit 6 | import Test.HUnit hiding (Test) 7 | 8 | import Language.Rust.Parser.Lexer 9 | import Language.Rust.Parser.ParseMonad 10 | import Language.Rust.Syntax 11 | import Language.Rust.Data.Ident 12 | import Language.Rust.Data.Position 13 | import Language.Rust.Data.InputStream 14 | 15 | lexerSuite :: Test 16 | lexerSuite = testGroup "lexer suite" [ commonCode, literals ] 17 | 18 | -- | This contains some random real-life code fragments. The purpose here is 19 | -- primarily black-box testing. 20 | commonCode :: Test 21 | commonCode = testGroup "lexing common code fragments" 22 | [ testCode "let span = $p.span;" 23 | [ IdentTok (mkIdent "let") 24 | , Space Whitespace " " 25 | , IdentTok (mkIdent "span") 26 | , Space Whitespace " " 27 | , Equal 28 | , Space Whitespace " " 29 | , Dollar 30 | , IdentTok (mkIdent "p") 31 | , Dot 32 | , IdentTok (mkIdent "span") 33 | , Semicolon 34 | ] 35 | , testCode "$(p.span),+" 36 | [ Dollar 37 | , OpenDelim Paren 38 | , IdentTok (mkIdent "p") 39 | , Dot 40 | , IdentTok (mkIdent "span") 41 | , CloseDelim Paren 42 | , Comma 43 | , Plus 44 | ] 45 | , testCode "pub s: pp::Printer<'a>," 46 | [ IdentTok (mkIdent "pub") 47 | , Space Whitespace " " 48 | , IdentTok (mkIdent "s") 49 | , Colon 50 | , Space Whitespace " " 51 | , IdentTok (mkIdent "pp") 52 | , ModSep 53 | , IdentTok (mkIdent "Printer") 54 | , Less 55 | , LifetimeTok (mkIdent "a") 56 | , Greater 57 | , Comma 58 | ] 59 | , testCode "impl<'a,T> Tr for &'a T {}" 60 | [ IdentTok (mkIdent "impl") 61 | , Less 62 | , LifetimeTok (mkIdent "a") 63 | , Comma 64 | , IdentTok (mkIdent "T") 65 | , Greater 66 | , Space Whitespace " " 67 | , IdentTok (mkIdent "Tr") 68 | , Space Whitespace " " 69 | , IdentTok (mkIdent "for") 70 | , Space Whitespace " " 71 | , Ampersand 72 | , LifetimeTok (mkIdent "a") 73 | , Space Whitespace " " 74 | , IdentTok (mkIdent "T") 75 | , Space Whitespace " " 76 | , OpenDelim Brace 77 | , CloseDelim Brace 78 | ] 79 | , testCode "x /* some comment */ y" 80 | [ IdentTok (mkIdent "x") 81 | , Space Whitespace " " 82 | , Space Comment " some comment " 83 | , Space Whitespace " " 84 | , IdentTok (mkIdent "y") 85 | ] 86 | , testCode "x /* some /* nested */ comment */ y" 87 | [ IdentTok (mkIdent "x") 88 | , Space Whitespace " " 89 | , Space Comment " some /* nested */ comment " 90 | , Space Whitespace " " 91 | , IdentTok (mkIdent "y") 92 | ] 93 | , testCode "fn ܐ_ܐ() { println!(\"Hello, čušpajž日本語\"); }" 94 | [ IdentTok (mkIdent "fn") 95 | , Space Whitespace " " 96 | , IdentTok (mkIdent "ܐ_ܐ") 97 | , OpenDelim Paren 98 | , CloseDelim Paren 99 | , Space Whitespace " " 100 | , OpenDelim Brace 101 | , Space Whitespace " " 102 | , IdentTok (mkIdent "println") 103 | , Exclamation 104 | , OpenDelim Paren 105 | , LiteralTok (StrTok "Hello, čušpajž日本語") Nothing 106 | , CloseDelim Paren 107 | , Semicolon 108 | , Space Whitespace " " 109 | , CloseDelim Brace 110 | ] 111 | , testCode "123.f32" 112 | [ LiteralTok (IntegerTok "123") Nothing 113 | , Dot 114 | , IdentTok (mkIdent "f32") 115 | ] 116 | , testCode "0e+10" 117 | [ LiteralTok (FloatTok "0e+10") Nothing 118 | ] 119 | , testCode "123.+1" 120 | [ LiteralTok (FloatTok "123.") Nothing 121 | , Plus 122 | , LiteralTok (IntegerTok "1") Nothing 123 | ] 124 | 125 | ] 126 | 127 | 128 | -- | test group for literals. Note that literals can have any suffix (even if 129 | -- almost all of those suffixes end up being invalid). 130 | literals :: Test 131 | literals = testGroup "literals (numbers, characters, strings, etc.)" 132 | -- byte's 133 | [ testCode "b'a'" [ LiteralTok (ByteTok "a") Nothing ] 134 | , testCode "b'\\n'" [ LiteralTok (ByteTok "\\n") Nothing ] 135 | , testCode "b'a'suffix" [ LiteralTok (ByteTok "a") (Just "suffix") ] 136 | -- char's 137 | , testCode "'a'" [ LiteralTok (CharTok "a") Nothing ] 138 | , testCode "'\\n'" [ LiteralTok (CharTok "\\n") Nothing ] 139 | , testCode "'a'suffix" [ LiteralTok (CharTok "a") (Just "suffix") ] 140 | -- integers 141 | , testCode "123" [ LiteralTok (IntegerTok "123") Nothing ] 142 | , testCode "123i32" [ LiteralTok (IntegerTok "123") (Just "i32") ] 143 | , testCode "0b1100_1101" [ LiteralTok (IntegerTok "0b1100_1101") Nothing ] 144 | , testCode "0b1100_1101isize" [ LiteralTok (IntegerTok "0b1100_1101") (Just "isize") ] 145 | , testCode "0o3170" [ LiteralTok (IntegerTok "0o3170") Nothing ] 146 | , testCode "0o3170i64" [ LiteralTok (IntegerTok "0o3170") (Just "i64") ] 147 | , testCode "0xAFAC" [ LiteralTok (IntegerTok "0xAFAC") Nothing ] 148 | , testCode "0xAFACu32" [ LiteralTok (IntegerTok "0xAFAC") (Just "u32") ] 149 | -- float's 150 | , testCode "123." [ LiteralTok (FloatTok "123.") Nothing ] 151 | , testCode "123.1" [ LiteralTok (FloatTok "123.1") Nothing ] 152 | , testCode "123.1f32" [ LiteralTok (FloatTok "123.1") (Just "f32") ] 153 | , testCode "123e-9f32" [ LiteralTok (FloatTok "123e-9") (Just "f32") ] 154 | , testCode "9e+10" [ LiteralTok (FloatTok "9e+10") Nothing ] 155 | -- string's 156 | , testCode "\"hello \\n world!\"" [ LiteralTok (StrTok "hello \\n world!") Nothing ] 157 | , testCode "\"hello \\n world!\"suffix" [ LiteralTok (StrTok "hello \\n world!") (Just "suffix") ] 158 | -- raw string's 159 | , testCode "r\"hello \n world!\"" [ LiteralTok (StrRawTok "hello \n world!" 0) Nothing ] 160 | , testCode "r\"hello \n world!\"suffix" [ LiteralTok (StrRawTok "hello \n world!" 0) (Just "suffix") ] 161 | , testCode "r##\"hello \"#\n world!\"##suffix" [ LiteralTok (StrRawTok "hello \"#\n world!" 2) (Just "suffix") ] 162 | -- bytestring's 163 | , testCode "b\"hello \\n world!\"" [ LiteralTok (ByteStrTok "hello \\n world!") Nothing ] 164 | , testCode "b\"hello \\n world!\"suffix" [ LiteralTok (ByteStrTok "hello \\n world!") (Just "suffix") ] 165 | -- raw bytestring's 166 | , testCode "br\"hello \n world!\"" [ LiteralTok (ByteStrRawTok "hello \n world!" 0) Nothing ] 167 | , testCode "br\"hello \n world!\"suffix" [ LiteralTok (ByteStrRawTok "hello \n world!" 0) (Just "suffix") ] 168 | , testCode "br##\"hello \"#\n world!\"##suffix" [ LiteralTok (ByteStrRawTok "hello \"#\n world!" 2) (Just "suffix") ] 169 | -- multiline strings 170 | , testCode "\"hello \\\n world!\"" [ LiteralTok (StrTok "hello \\\n world!") Nothing ] 171 | , testCode "b\"hello \\\n world!\"" [ LiteralTok (ByteStrTok "hello \\\n world!") Nothing ] 172 | ] 173 | 174 | -- | Create a test for a code fragment that should tokenize. 175 | testCode :: String -> [Token] -> Test 176 | testCode inp toks = testCase inp $ Right toks @=? lexTokensNoSpans (inputStreamFromString inp) 177 | 178 | -- | Turn an InputStream into either an error or a list of tokens. 179 | lexTokensNoSpans :: InputStream -> Either ParseFail [Token] 180 | lexTokensNoSpans inp = map unspan <$> execParser (lexTokens lexToken) inp initPos 181 | 182 | -------------------------------------------------------------------------------- /test/unit-tests/Main.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import System.IO 4 | 5 | import LexerTest (lexerSuite) 6 | import ParserTest (parserSuite) 7 | import PrettyTest (prettySuite) 8 | import CompleteTest (completeSuite) 9 | 10 | import Test.Framework (defaultMain) 11 | 12 | main :: IO () 13 | main = hSetEncoding stdout utf8 *> defaultMain [ lexerSuite, parserSuite, prettySuite, completeSuite ] 14 | --------------------------------------------------------------------------------