├── .gitignore ├── Rakefile ├── lib ├── antlr4-native │ ├── version.rb │ ├── string_helpers.rb │ ├── context_method_arg.rb │ ├── context_method.rb │ ├── visitor_generator.rb │ ├── context.rb │ └── generator.rb └── antlr4-native.rb ├── spec ├── lua-parser-rb │ ├── lib │ │ ├── lua_parser │ │ │ └── version.rb │ │ └── lua_parser.rb │ ├── .gitignore │ ├── Gemfile │ ├── parse_test.rb │ ├── Rakefile │ ├── lua_parser.gemspec │ ├── ext │ │ └── lua_parser │ │ │ └── extconf.rb │ └── Lua.g4 └── run-lua-test.sh ├── vendor └── antlr-4.10.1-complete.jar ├── Gemfile ├── Dockerfile ├── .github └── workflows │ └── ci.yml ├── antlr4-native.gemspec ├── LICENSE.txt ├── CHANGELOG.md └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | Gemfile.lock 2 | pkg/ 3 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler' 2 | 3 | Bundler::GemHelper.install_tasks 4 | -------------------------------------------------------------------------------- /lib/antlr4-native/version.rb: -------------------------------------------------------------------------------- 1 | module Antlr4Native 2 | VERSION = '2.2.1' 3 | end 4 | -------------------------------------------------------------------------------- /spec/lua-parser-rb/lib/lua_parser/version.rb: -------------------------------------------------------------------------------- 1 | module LuaParser 2 | VERSION = '1.0.0' 3 | end 4 | -------------------------------------------------------------------------------- /vendor/antlr-4.10.1-complete.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camertron/antlr4-native-rb/HEAD/vendor/antlr-4.10.1-complete.jar -------------------------------------------------------------------------------- /spec/lua-parser-rb/lib/lua_parser.rb: -------------------------------------------------------------------------------- 1 | # load the native extension 2 | require File.expand_path(File.join('..', 'ext', 'lua_parser', 'lua_parser'), __dir__) 3 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | gemspec 4 | 5 | group :development do 6 | gem 'pry-byebug' 7 | end 8 | 9 | group :development, :test do 10 | gem 'rake' 11 | end 12 | -------------------------------------------------------------------------------- /spec/lua-parser-rb/.gitignore: -------------------------------------------------------------------------------- 1 | Gemfile.lock 2 | pkg/ 3 | *.o 4 | *.o.tmp 5 | ext/lua_parser/Makefile 6 | ext/lua_parser/mkmf.log 7 | *.bundle 8 | lua_parser.cpp 9 | lua_parser.bundle 10 | lua_parser.so 11 | -------------------------------------------------------------------------------- /spec/lua-parser-rb/Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | gemspec 4 | 5 | group :development do 6 | gem 'pry-byebug' 7 | gem 'antlr4-native', path: '../../../' 8 | end 9 | 10 | group :development, :test do 11 | gem 'rake' 12 | end 13 | -------------------------------------------------------------------------------- /spec/run-lua-test.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | antlr_version=$(bundle exec ruby -Ilib -rantlr4-native -e "puts Antlr4Native::Generator::ANTLR_VERSION") 4 | docker build --build-arg ANTLR_VERSION="${antlr_version}" -t antlr4-native-rb:latest . 5 | docker run -t antlr4-native-rb:latest bundle exec ruby parse_test.rb 6 | -------------------------------------------------------------------------------- /lib/antlr4-native.rb: -------------------------------------------------------------------------------- 1 | module Antlr4Native 2 | autoload :Context, 'antlr4-native/context' 3 | autoload :ContextMethod, 'antlr4-native/context_method' 4 | autoload :ContextMethodArg, 'antlr4-native/context_method_arg' 5 | autoload :Generator, 'antlr4-native/generator' 6 | autoload :StringHelpers, 'antlr4-native/string_helpers' 7 | autoload :VisitorGenerator, 'antlr4-native/visitor_generator' 8 | end 9 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ruby:3.2 2 | ARG ANTLR_VERSION 3 | 4 | RUN apt-get update && apt-get install -y default-jre 5 | 6 | WORKDIR /usr/src 7 | COPY . . 8 | 9 | WORKDIR /usr/src/spec/lua-parser-rb 10 | RUN git clone https://github.com/lua/lua.git 11 | RUN git clone https://github.com/antlr/antlr4 ext/lua_parser/antlr4-upstream 12 | RUN cd ext/lua_parser/antlr4-upstream && git checkout ${ANTLR_VERSION} 13 | RUN bundle install --jobs $(nproc) --retry 3 14 | RUN bundle exec rake generate compile 15 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: [push] 3 | jobs: 4 | build: 5 | runs-on: ubuntu-latest 6 | strategy: 7 | matrix: 8 | ruby-version: [2.7, 3.0, 3.1, 3.2] 9 | steps: 10 | - uses: actions/checkout@v2 11 | - name: Set up Ruby ${{ matrix.ruby-version }} 12 | uses: ruby/setup-ruby@v1 13 | with: 14 | ruby-version: ${{ matrix.ruby-version }} 15 | bundler-cache: true 16 | - name: Run Lua Parser Smoke Test 17 | run: ./spec/run-lua-test.sh 18 | -------------------------------------------------------------------------------- /lib/antlr4-native/string_helpers.rb: -------------------------------------------------------------------------------- 1 | module Antlr4Native 2 | module StringHelpers 3 | def capitalize(str) 4 | str.sub(/\A(.)/) { $1.upcase } 5 | end 6 | 7 | def underscore(str) 8 | str 9 | .gsub(/([A-Z\d]+)([A-Z][a-z])/, '\1_\2') 10 | .gsub(/([a-z\d])([A-Z])/, '\1_\2') 11 | .gsub('-', '_') 12 | .downcase 13 | end 14 | 15 | def dasherize(str) 16 | underscore(str).gsub('_', '-') 17 | end 18 | end 19 | 20 | StringHelpers.extend(StringHelpers) 21 | end 22 | -------------------------------------------------------------------------------- /lib/antlr4-native/context_method_arg.rb: -------------------------------------------------------------------------------- 1 | module Antlr4Native 2 | class ContextMethodArg 3 | attr_reader :raw_arg 4 | 5 | def initialize(raw_arg) 6 | @raw_arg = raw_arg 7 | end 8 | 9 | def name 10 | @name ||= parts[1] 11 | end 12 | 13 | def type 14 | @type ||= parts[0].gsub(' ', '') 15 | end 16 | 17 | def pointer? 18 | type.end_with?('*') 19 | end 20 | 21 | private 22 | 23 | def parts 24 | @parts ||= raw_arg.scan(/([\w\d:]+\s?\*?\s?)/).flatten 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /spec/lua-parser-rb/parse_test.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "lua_parser" 4 | 5 | class FuncVisitor < LuaParser::Visitor 6 | def visit_functioncall(ctx) 7 | puts ctx.var_or_exp.text 8 | visit_children(ctx) 9 | end 10 | end 11 | 12 | Dir.glob('lua/**/*.lua').each do |file_name| 13 | # this file contains some weird non-UTF8 strings, so let's just skip it 14 | next if File.basename(file_name) == "strings.lua" 15 | 16 | lua_code = File.read(file_name) 17 | parser = LuaParser::Parser.parse(lua_code) 18 | visitor = FuncVisitor.new 19 | parser.visit(visitor) 20 | end 21 | -------------------------------------------------------------------------------- /spec/lua-parser-rb/Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler' 2 | 3 | require 'antlr4-native' 4 | require 'etc' 5 | 6 | def ruby_installer? 7 | Object.const_defined?(:RubyInstaller) 8 | end 9 | 10 | Bundler::GemHelper.install_tasks 11 | 12 | task :generate do 13 | generator = Antlr4Native::Generator.new( 14 | grammar_files: ["./Lua.g4"], 15 | output_dir: 'ext/', 16 | parser_root_method: 'chunk' 17 | ) 18 | 19 | generator.generate 20 | end 21 | 22 | task :compile do 23 | Dir.chdir(File.join(%w(ext lua_parser))) do 24 | load 'extconf.rb' 25 | RubyInstaller::Runtime.enable_msys_apps if ruby_installer? 26 | exec "make -j #{Etc.nprocessors}" 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /antlr4-native.gemspec: -------------------------------------------------------------------------------- 1 | $:.unshift File.join(File.dirname(__FILE__), 'lib') 2 | require 'antlr4-native/version' 3 | 4 | Gem::Specification.new do |s| 5 | s.name = 'antlr4-native' 6 | s.version = ::Antlr4Native::VERSION 7 | s.authors = ['Cameron Dutro'] 8 | s.email = ['camertron@gmail.com'] 9 | s.homepage = 'http://github.com/camertron/antlr4-native-rb' 10 | 11 | s.description = s.summary = 'Create a Ruby native extension from any ANTLR4 grammar.' 12 | 13 | s.platform = Gem::Platform::RUBY 14 | 15 | s.require_path = 'lib' 16 | s.files = Dir['{lib,spec,vendor}/**/*', 'Gemfile', 'README.md', 'Rakefile', 'antlr4-native.gemspec'] 17 | 18 | s.add_runtime_dependency "rice", "~> 4.0" 19 | end 20 | -------------------------------------------------------------------------------- /spec/lua-parser-rb/lua_parser.gemspec: -------------------------------------------------------------------------------- 1 | $:.unshift File.join(File.dirname(__FILE__), 'lib') 2 | require 'lua_parser/version' 3 | 4 | Gem::Specification.new do |s| 5 | s.name = 'lua_parser' 6 | s.version = ::LuaParser::VERSION 7 | s.authors = ['Mickey Mouse'] 8 | s.email = ['mickey@disney.com'] 9 | s.homepage = 'https://github.com/mickeymouse/lua-parser-rb' 10 | 11 | s.description = s.summary = 'A Lua parser for Ruby' 12 | 13 | s.platform = Gem::Platform::RUBY 14 | 15 | s.add_dependency 'rice', '~> 4.0' 16 | 17 | s.extensions = File.join(*%w(ext lua_parser extconf.rb)) 18 | 19 | s.require_path = 'lib' 20 | s.files = Dir[ 21 | '{lib,spec}/**/*', 22 | 'ext/lua_parser/*.{cpp,h}', 23 | 'ext/lua_parser/extconf.rb', 24 | 'ext/lua_parser/antlrgen/*', 25 | 'ext/lua_parser/antlr4-upstream/runtime/Cpp/runtime/src/**/*.{cpp,h}', 26 | 'Gemfile', 27 | 'README.md', 28 | 'Rakefile', 29 | 'lua_parser.gemspec' 30 | ] 31 | end 32 | -------------------------------------------------------------------------------- /spec/lua-parser-rb/ext/lua_parser/extconf.rb: -------------------------------------------------------------------------------- 1 | require 'mkmf-rice' 2 | 3 | extension_name = 'lua_parser' 4 | dir_config(extension_name) 5 | 6 | have_library('stdc++') 7 | 8 | $CFLAGS << ' -std=c++14' 9 | 10 | if enable_config('static') 11 | $defs.push '-DANTLR4CPP_STATIC' unless $defs.include?('-DANTLR4CPP_STATIC') 12 | end 13 | 14 | include_paths = [ 15 | '.', 16 | 'antlrgen', 17 | 'antlr4-upstream/runtime/Cpp/runtime/src', 18 | 'antlr4-upstream/runtime/Cpp/runtime/src/atn', 19 | 'antlr4-upstream/runtime/Cpp/runtime/src/dfa', 20 | 'antlr4-upstream/runtime/Cpp/runtime/src/misc', 21 | 'antlr4-upstream/runtime/Cpp/runtime/src/support', 22 | 'antlr4-upstream/runtime/Cpp/runtime/src/tree', 23 | 'antlr4-upstream/runtime/Cpp/runtime/src/tree/pattern', 24 | 'antlr4-upstream/runtime/Cpp/runtime/src/tree/xpath' 25 | ] 26 | 27 | $srcs = [] 28 | 29 | include_paths.each do |include_path| 30 | $INCFLAGS << " -I#{include_path}" 31 | $VPATH << include_path 32 | 33 | Dir.glob("#{include_path}/*.cpp").each do |path| 34 | $srcs << path 35 | end 36 | end 37 | 38 | create_makefile(extension_name) 39 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2020 Cameron Dutro 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 2.2.1 2 | * Remove the `Return().keepAlive()` statements added in v2.0.1, as they cause segfaults for the README example. 3 | 4 | ## 2.2.0 5 | * Use underscores in gem names instead of dashes (@dsisnero, #14) 6 | 7 | ## 2.1.0 8 | * Upgrade to ANTLR 4.10 (#13, @maxirmx) 9 | 10 | ## 2.0.1 11 | * Address segfaults for enhanced stability (#11, @maxirmx) 12 | - Return a copy of children from `getChildren()` calls instead of a reference. 13 | - Add `Return().keepAlive()` to several key methods to prevent the ANTLR parser, tokens, lexer, etc from being destroyed if the Ruby interpreter holds a reference to them. 14 | 15 | ## 2.0.0 16 | * Upgrade to Rice v4 (#8, @lutaml) 17 | 18 | ## 1.1.0 19 | * Add support for MS Windows (#2, @zakjan) 20 | * Return values from visit methods (#3, @zakjan) 21 | * Support optional tokens in rules (#4, @zakjan) 22 | * Add root method to ParserProxy (#5, @zakjan) 23 | - Designed to enable passing the root node to Visitor#visit, which is more consistent with ANTLR patterns. 24 | 25 | ## 1.0.2 26 | * Fix terminal node declaration. 27 | 28 | ## 1.0.1 29 | * Include ANTLR jar in gem release. 30 | 31 | ## 1.0.0 32 | * Birthday! 33 | -------------------------------------------------------------------------------- /lib/antlr4-native/context_method.rb: -------------------------------------------------------------------------------- 1 | module Antlr4Native 2 | class ContextMethod 3 | RULE_METHODS = %w(enterRule exitRule getRuleIndex).freeze 4 | META_METHODS = %w(accept copyFrom).freeze 5 | 6 | attr_reader :name, :raw_args, :return_type, :context 7 | 8 | def initialize(name, raw_args, return_type, context) 9 | @name = name 10 | @raw_args = raw_args 11 | @return_type = return_type 12 | @context = context 13 | end 14 | 15 | def cpp_name 16 | @cpp_name ||= 17 | if args.size == 1 && args.first.name == 'i' 18 | # special case 19 | "#{name}At" 20 | else 21 | [name, *args.map(&:name)].join('_') 22 | end 23 | end 24 | 25 | def args 26 | @args ||= raw_args.split(',').map do |arg| 27 | ContextMethodArg.new(arg.strip) 28 | end 29 | end 30 | 31 | def returns_vector? 32 | return_type.start_with?('std::vector') 33 | end 34 | 35 | # @TODO: consider revising this 36 | def context_method? 37 | !token_method? && 38 | !rule_method? && 39 | !meta_method? && 40 | !constructor? 41 | end 42 | 43 | def token_method? 44 | name[0].upcase == name[0] 45 | end 46 | 47 | def rule_method? 48 | RULE_METHODS.include?(name) 49 | end 50 | 51 | def meta_method? 52 | META_METHODS.include?(name) 53 | end 54 | 55 | def constructor? 56 | name == context.name 57 | end 58 | end 59 | end 60 | -------------------------------------------------------------------------------- /lib/antlr4-native/visitor_generator.rb: -------------------------------------------------------------------------------- 1 | module Antlr4Native 2 | class VisitorGenerator 3 | VISITOR_METHOD_BLACKLIST = %w(visit visitChildren).freeze 4 | 5 | include StringHelpers 6 | 7 | attr_reader :visitor_methods, :antlr_ns, :parser_ns 8 | 9 | def initialize(visitor_methods, antlr_ns, parser_ns) 10 | @visitor_methods = visitor_methods 11 | @antlr_ns = antlr_ns 12 | @parser_ns = parser_ns 13 | end 14 | 15 | def class_name 16 | @class_name ||= 'Visitor' 17 | end 18 | 19 | def cpp_class_name 20 | @cpp_class_name ||= 'VisitorProxy' 21 | end 22 | 23 | def each_visitor_method 24 | return to_enum(__method__) unless block_given? 25 | 26 | visitor_methods.each do |visitor_method| 27 | yield visitor_method unless VISITOR_METHOD_BLACKLIST.include?(visitor_method) 28 | end 29 | end 30 | 31 | def visitor_proxy 32 | vms = each_visitor_method.flat_map do |visitor_method| 33 | context = "#{capitalize(visitor_method.sub(/\Avisit/, ''))}Context" 34 | 35 | [ 36 | " virtual antlrcpp::Any #{visitor_method}(#{parser_ns}::#{context} *ctx) override {", 37 | " #{context}Proxy proxy(ctx);", 38 | " return getSelf().call(\"#{underscore(visitor_method)}\", &proxy);", 39 | " }\n" 40 | ] 41 | end 42 | 43 | <<~END 44 | class #{cpp_class_name} : public #{antlr_ns}BaseVisitor, public Director { 45 | public: 46 | #{cpp_class_name}(Object self) : Director(self) { } 47 | 48 | Object ruby_visit(ContextProxy* proxy) { 49 | auto result = visit(proxy -> getOriginal()); 50 | try { 51 | return std::any_cast(result); 52 | } catch(std::bad_cast) { 53 | return Qnil; 54 | } 55 | } 56 | 57 | Object ruby_visitChildren(ContextProxy* proxy) { 58 | auto result = visitChildren(proxy -> getOriginal()); 59 | try { 60 | return std::any_cast(result); 61 | } catch(std::bad_cast) { 62 | return Qnil; 63 | } 64 | } 65 | 66 | #{vms.join("\n")} 67 | }; 68 | END 69 | end 70 | 71 | def visitor_proxy_methods(indent) 72 | @visitor_proxy_methods ||= each_visitor_method.map do |visitor_method| 73 | "#{indent}.define_method(\"#{underscore(visitor_method)}\", &#{cpp_class_name}::ruby_visitChildren)" 74 | end 75 | end 76 | end 77 | end 78 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # antlr4-native 2 | 3 | Create a Ruby native extension from (almost) any ANTLR4 grammar. 4 | 5 | ## What is this thing? 6 | 7 | This gem generates native Ruby extensions from ANTLR grammars, enabling Ruby developers to generate parsers for numerous programming languages, file formats, etc. 8 | 9 | ## Who needs this? 10 | 11 | If you're a Ruby programmer who wants to parse and traverse source code written in a plethora of programming languages, antlr4-native might be able to help you. A number of community-developed ANTLR grammars are available in ANTLR's [grammars-v4](https://github.com/antlr/grammars-v4) repo. Grab one, then use antlr4-native to generate a bunch of Ruby-compatible C++ code from it. The C++ code can be compiled and used as a native extension. 12 | 13 | Rather than use antlr4-native directly, consider using its sister project, the [antlr-gemerator](https://github.com/camertron/antlr-gemerator), which can generate a complete rubygem from an ANTLR grammar. 14 | 15 | ## Code Generation 16 | 17 | Here's how to generate a native extension for a given lexer and parser (Python in this case), defined in two .g4 files: 18 | 19 | ```ruby 20 | require 'antlr4-native' 21 | 22 | generator = Antlr4Native::Generator.new( 23 | grammar_files: ['Python3Lexer.g4', 'Python3Parser.g4'], 24 | output_dir: 'ext', 25 | parser_root_method: 'file_input' 26 | ) 27 | 28 | generator.generate 29 | ``` 30 | 31 | In the example above, the output directory is set to the standard Ruby native extensions directory, 'ext'. Antlr4-native will generate code into ext/\, where \ is the name of the parser as defined in the grammar file(s). In this case, PythonParser.g4 contains: 32 | 33 | ```antlr 34 | parser grammar Python3Parser; 35 | ``` 36 | 37 | so antlr4-native will generate code into the ext/python3-parser directory. 38 | 39 | Finally, the `parser_root_method` option tells antlr4-native which context represents the root of the parse tree. This context functions as the starting point for visitors. 40 | 41 | ## Using extensions in Ruby 42 | 43 | Parsers contain several methods for parsing source code. Use `#parse` to parse a string and `#parse_file` to parse the contents of a file: 44 | 45 | 46 | ```ruby 47 | parser = Python3Parser::Parser.parse(File.read('path/to/file.py')) 48 | 49 | # equivalent to: 50 | parser = Python3Parser::Parser.parse_file('path/to/file.py') 51 | ``` 52 | 53 | Use the `#visit` method on an instance of `Parser` to make use of a visitor: 54 | 55 | ```ruby 56 | visitor = MyVisitor.new 57 | parser.visit(visitor) 58 | ``` 59 | 60 | See the next section for more info regarding creating and using visitors. 61 | 62 | ## Visitors 63 | 64 | A visitor class is automatically created during code generation. Visitors are just classes with a bunch of special methods, each corresponding to a specific part of the source language's syntax. The methods are essentially callbacks that are triggered in-order as the parser walks over the parse tree. For example, here's a visitor with a method that will be called whenever the parser walks over a Python function definition: 65 | 66 | 67 | ```ruby 68 | class FuncDefVisitor < Python3Parser::Visitor 69 | def visit_func_def(ctx) 70 | puts ctx.NAME.text # print the name of the method 71 | visit_children(ctx) 72 | end 73 | end 74 | ``` 75 | 76 | Make sure to always call `#visit_children` at some point in your `visit_*` methods. If you don't, the subtree under the current context won't get visited. 77 | 78 | Finally, if you override `#initialize` in your visitor subclasses, don't forget to call `super`. If you don't, you'll get a nice big segfault. 79 | 80 | ## Caveats 81 | 82 | 1. Avoid retaining references to contexts, tokens, etc anywhere in your Ruby code. Contexts (i.e. the `ctx` variables in the examples above) and other objects that are created by ANTLR's C++ runtime are automatically cleaned up without the Ruby interpreter's knowledge. You'll almost surely see a segfault if you retain a reference to one of these objects and try to use it after the call to `Parser#visit`. 83 | 1. Due to an ANTLR limitation, parsers cannot be used in a multi-threaded environment, even if each parser instance is used entirely in the context of a single thread (i.e. parsers are not shared between threads). According to the ANTLR C++ developers, parsers should be threadsafe. Unfortunately firsthand experience has proven otherwise. Your mileage may vary. 84 | 1. The description of this gem says "(almost) any ANTLR4 grammar" because many grammars contain target-specific code. For example, the Python3 grammar referenced in the examples above contains inline Java code that the C++ compiler won't understand. You'll need to port any such code to C++ before you'll be able to compile and use the native extension. 85 | 86 | ## System Requirements 87 | 88 | * A Java runtime (version 1.6 or higher) is required to generate parsers, since ANTLR is a Java tool. The ANTLR .jar file is distributed inside the antlr4-native gem, so there's no need to download it separately. You can download a Java runtime [here](https://www.java.com/en/download/). 89 | * Ruby >= 2.3. 90 | * A C compiler (like gcc or clang) that supports C++14. If Ruby is working on your machine then you likely already have this. 91 | 92 | ## License 93 | 94 | Licensed under the MIT license. See LICENSE.txt for details. 95 | 96 | ## Authors 97 | 98 | * Cameron C. Dutro: http://github.com/camertron 99 | -------------------------------------------------------------------------------- /lib/antlr4-native/context.rb: -------------------------------------------------------------------------------- 1 | module Antlr4Native 2 | class Context 3 | include StringHelpers 4 | 5 | attr_reader :name, :parser_ns, :cpp_parser_source 6 | 7 | def initialize(name, parser_ns, cpp_parser_source) 8 | @name = name 9 | @parser_ns = parser_ns 10 | @cpp_parser_source = cpp_parser_source 11 | end 12 | 13 | def each_context_method 14 | return to_enum(__method__) unless block_given? 15 | 16 | mtds.each do |mtd| 17 | yield mtd if mtd.context_method? 18 | end 19 | end 20 | 21 | def each_token_method 22 | return to_enum(__method__) unless block_given? 23 | 24 | mtds.each do |mtd| 25 | yield mtd if mtd.token_method? 26 | end 27 | end 28 | 29 | def proxy_class_variable 30 | @proxy_class_variable ||= "rb_c#{name}" 31 | end 32 | 33 | def proxy_class_header 34 | @proxy_class_header ||= begin 35 | <<~END 36 | class #{name}Proxy : public ContextProxy { 37 | public: 38 | #{name}Proxy(tree::ParseTree* ctx) : ContextProxy(ctx) {}; 39 | #{method_signatures_for(each_context_method)} 40 | #{method_signatures_for(each_token_method)} 41 | }; 42 | END 43 | end 44 | end 45 | 46 | def method_signatures_for(mtds) 47 | mtds 48 | .map { |mtd| " Object #{mtd.cpp_name}(#{mtd.raw_args});" } 49 | .join("\n") 50 | end 51 | 52 | def conversions 53 | @class_conversions ||= <<~END 54 | namespace Rice::detail { 55 | template <> 56 | class To_Ruby<#{parser_ns}::#{name}*> { 57 | public: 58 | VALUE convert(#{parser_ns}::#{name}* const &x) { 59 | if (!x) return Nil; 60 | return Data_Object<#{parser_ns}::#{name}>(x, false, #{proxy_class_variable}); 61 | } 62 | }; 63 | 64 | template <> 65 | class To_Ruby<#{name}Proxy*> { 66 | public: 67 | VALUE convert(#{name}Proxy* const &x) { 68 | if (!x) return Nil; 69 | return Data_Object<#{name}Proxy>(x, false, #{proxy_class_variable}); 70 | } 71 | }; 72 | } 73 | END 74 | end 75 | 76 | def proxy_class_methods 77 | proxy_class_context_methods + proxy_class_token_methods 78 | end 79 | 80 | def proxy_class_context_methods 81 | each_context_method.map do |ctx_method| 82 | return_type = "#{capitalize(ctx_method.name)}Context" 83 | return_proxy_type = "#{return_type}Proxy" 84 | params = ctx_method.args.map(&:name).join(', ') 85 | 86 | if ctx_method.returns_vector? 87 | <<~END 88 | Object #{name}Proxy::#{ctx_method.cpp_name}(#{ctx_method.raw_args}) { 89 | Array a; 90 | 91 | if (orig != nullptr) { 92 | size_t count = ((#{parser_ns}::#{name}*)orig) -> #{ctx_method.name}(#{params}).size(); 93 | 94 | for (size_t i = 0; i < count; i ++) { 95 | a.push(#{ctx_method.name}At(i)); 96 | } 97 | } 98 | 99 | return std::move(a); 100 | } 101 | END 102 | else 103 | <<~END 104 | Object #{name}Proxy::#{ctx_method.cpp_name}(#{ctx_method.raw_args}) { 105 | if (orig == nullptr) { 106 | return Qnil; 107 | } 108 | 109 | auto ctx = ((#{parser_ns}::#{name}*)orig) -> #{ctx_method.name}(#{params}); 110 | 111 | if (ctx == nullptr) { 112 | return Qnil; 113 | } 114 | 115 | for (auto child : getChildren()) { 116 | if (ctx == detail::From_Ruby().convert(child.value()).getOriginal()) { 117 | return child; 118 | } 119 | } 120 | 121 | return Nil; 122 | } 123 | END 124 | end 125 | end 126 | end 127 | 128 | def proxy_class_token_methods 129 | each_token_method.map do |token_mtd| 130 | params = token_mtd.args.map(&:name).join(', ') 131 | 132 | if token_mtd.returns_vector? 133 | <<~END 134 | Object #{name}Proxy::#{token_mtd.cpp_name}(#{token_mtd.raw_args}) { 135 | Array a; 136 | 137 | if (orig == nullptr) { 138 | return std::move(a); 139 | } 140 | 141 | auto vec = ((#{parser_ns}::#{name}*)orig) -> #{token_mtd.name}(#{params}); 142 | 143 | for (auto it = vec.begin(); it != vec.end(); it ++) { 144 | TerminalNodeProxy proxy(*it); 145 | a.push(detail::To_Ruby().convert(proxy)); 146 | } 147 | 148 | return std::move(a); 149 | } 150 | END 151 | else 152 | <<~END 153 | Object #{name}Proxy::#{token_mtd.cpp_name}(#{token_mtd.raw_args}) { 154 | if (orig == nullptr) { 155 | return Qnil; 156 | } 157 | 158 | auto token = ((#{parser_ns}::#{name}*)orig) -> #{token_mtd.name}(#{params}); 159 | 160 | if (token == nullptr) { 161 | return Qnil; 162 | } 163 | 164 | TerminalNodeProxy proxy(token); 165 | return detail::To_Ruby().convert(proxy); 166 | } 167 | END 168 | end 169 | end 170 | end 171 | 172 | def class_wrapper(module_var) 173 | @class_wrapper ||= begin 174 | lines = [ 175 | %(#{proxy_class_variable} = define_class_under<#{name}Proxy, ContextProxy>(#{module_var}, "#{name}")) 176 | ] 177 | 178 | each_context_method do |ctx_method| 179 | lines << ".define_method(\"#{underscore(ctx_method.cpp_name)}\", &#{name}Proxy::#{ctx_method.cpp_name})" 180 | end 181 | 182 | each_token_method do |token_method| 183 | lines << ".define_method(\"#{token_method.cpp_name}\", &#{name}Proxy::#{token_method.name})" 184 | end 185 | 186 | lines[-1] << ';' 187 | 188 | lines 189 | end 190 | end 191 | 192 | private 193 | 194 | def mtds 195 | @mtds ||= begin 196 | puts "Finding methods for #{name}" 197 | 198 | cpp_parser_source 199 | .scan(/^([^\n]+) #{parser_ns}::#{name}::([^\(]*)\(([^\)]*)\)/).flat_map do |return_type, mtd_name, args| 200 | ContextMethod.new(mtd_name, args, return_type, self) 201 | end 202 | end 203 | end 204 | end 205 | end 206 | -------------------------------------------------------------------------------- /spec/lua-parser-rb/Lua.g4: -------------------------------------------------------------------------------- 1 | /* 2 | BSD License 3 | 4 | Copyright (c) 2013, Kazunori Sakamoto 5 | Copyright (c) 2016, Alexander Alexeev 6 | All rights reserved. 7 | 8 | Redistribution and use in source and binary forms, with or without 9 | modification, are permitted provided that the following conditions 10 | are met: 11 | 12 | 1. Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | 2. Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | 3. Neither the NAME of Rainer Schuster nor the NAMEs of its contributors 18 | may be used to endorse or promote products derived from this software 19 | without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | This grammar file derived from: 34 | 35 | Lua 5.3 Reference Manual 36 | http://www.lua.org/manual/5.3/manual.html 37 | 38 | Lua 5.2 Reference Manual 39 | http://www.lua.org/manual/5.2/manual.html 40 | 41 | Lua 5.1 grammar written by Nicolai Mainiero 42 | http://www.antlr3.org/grammar/1178608849736/Lua.g 43 | 44 | Tested by Kazunori Sakamoto with Test suite for Lua 5.2 (http://www.lua.org/tests/5.2/) 45 | 46 | Tested by Alexander Alexeev with Test suite for Lua 5.3 http://www.lua.org/tests/lua-5.3.2-tests.tar.gz 47 | */ 48 | 49 | grammar Lua; 50 | 51 | chunk 52 | : block EOF 53 | ; 54 | 55 | block 56 | : stat* retstat? 57 | ; 58 | 59 | stat 60 | : ';' 61 | | varlist '=' explist 62 | | functioncall 63 | | label 64 | | 'break' 65 | | 'goto' NAME 66 | | 'do' block 'end' 67 | | 'while' exp 'do' block 'end' 68 | | 'repeat' block 'until' exp 69 | | 'if' exp 'then' block ('elseif' exp 'then' block)* ('else' block)? 'end' 70 | | 'for' NAME '=' exp ',' exp (',' exp)? 'do' block 'end' 71 | | 'for' namelist 'in' explist 'do' block 'end' 72 | | 'function' funcname funcbody 73 | | 'local' 'function' NAME funcbody 74 | | 'local' namelist ('=' explist)? 75 | ; 76 | 77 | retstat 78 | : 'return' explist? ';'? 79 | ; 80 | 81 | label 82 | : '::' NAME '::' 83 | ; 84 | 85 | funcname 86 | : NAME ('.' NAME)* (':' NAME)? 87 | ; 88 | 89 | varlist 90 | : var (',' var)* 91 | ; 92 | 93 | namelist 94 | : NAME (',' NAME)* 95 | ; 96 | 97 | explist 98 | : exp (',' exp)* 99 | ; 100 | 101 | exp 102 | : 'nil' | 'false' | 'true' 103 | | number 104 | | string 105 | | '...' 106 | | functiondef 107 | | prefixexp 108 | | tableconstructor 109 | | exp operatorPower exp 110 | | operatorUnary exp 111 | | exp operatorMulDivMod exp 112 | | exp operatorAddSub exp 113 | | exp operatorStrcat exp 114 | | exp operatorComparison exp 115 | | exp operatorAnd exp 116 | | exp operatorOr exp 117 | | exp operatorBitwise exp 118 | ; 119 | 120 | prefixexp 121 | : varOrExp nameAndArgs* 122 | ; 123 | 124 | functioncall 125 | : varOrExp nameAndArgs+ 126 | ; 127 | 128 | varOrExp 129 | : var | '(' exp ')' 130 | ; 131 | 132 | var 133 | : (NAME | '(' exp ')' varSuffix) varSuffix* 134 | ; 135 | 136 | varSuffix 137 | : nameAndArgs* ('[' exp ']' | '.' NAME) 138 | ; 139 | 140 | nameAndArgs 141 | : (':' NAME)? args 142 | ; 143 | 144 | /* 145 | var 146 | : NAME | prefixexp '[' exp ']' | prefixexp '.' NAME 147 | ; 148 | 149 | prefixexp 150 | : var | functioncall | '(' exp ')' 151 | ; 152 | 153 | functioncall 154 | : prefixexp args | prefixexp ':' NAME args 155 | ; 156 | */ 157 | 158 | args 159 | : '(' explist? ')' | tableconstructor | string 160 | ; 161 | 162 | functiondef 163 | : 'function' funcbody 164 | ; 165 | 166 | funcbody 167 | : '(' parlist? ')' block 'end' 168 | ; 169 | 170 | parlist 171 | : namelist (',' '...')? | '...' 172 | ; 173 | 174 | tableconstructor 175 | : '{' fieldlist? '}' 176 | ; 177 | 178 | fieldlist 179 | : field (fieldsep field)* fieldsep? 180 | ; 181 | 182 | field 183 | : '[' exp ']' '=' exp | NAME '=' exp | exp 184 | ; 185 | 186 | fieldsep 187 | : ',' | ';' 188 | ; 189 | 190 | operatorOr 191 | : 'or'; 192 | 193 | operatorAnd 194 | : 'and'; 195 | 196 | operatorComparison 197 | : '<' | '>' | '<=' | '>=' | '~=' | '=='; 198 | 199 | operatorStrcat 200 | : '..'; 201 | 202 | operatorAddSub 203 | : '+' | '-'; 204 | 205 | operatorMulDivMod 206 | : '*' | '/' | '%' | '//'; 207 | 208 | operatorBitwise 209 | : '&' | '|' | '~' | '<<' | '>>'; 210 | 211 | operatorUnary 212 | : 'not' | '#' | '-' | '~'; 213 | 214 | operatorPower 215 | : '^'; 216 | 217 | number 218 | : INT | HEX | FLOAT | HEX_FLOAT 219 | ; 220 | 221 | string 222 | : NORMALSTRING | CHARSTRING | LONGSTRING 223 | ; 224 | 225 | // LEXER 226 | 227 | NAME 228 | : [a-zA-Z_][a-zA-Z_0-9]* 229 | ; 230 | 231 | NORMALSTRING 232 | : '"' ( EscapeSequence | ~('\\'|'"') )* '"' 233 | ; 234 | 235 | CHARSTRING 236 | : '\'' ( EscapeSequence | ~('\''|'\\') )* '\'' 237 | ; 238 | 239 | LONGSTRING 240 | : '[' NESTED_STR ']' 241 | ; 242 | 243 | fragment 244 | NESTED_STR 245 | : '=' NESTED_STR '=' 246 | | '[' .*? ']' 247 | ; 248 | 249 | INT 250 | : Digit+ 251 | ; 252 | 253 | HEX 254 | : '0' [xX] HexDigit+ 255 | ; 256 | 257 | FLOAT 258 | : Digit+ '.' Digit* ExponentPart? 259 | | '.' Digit+ ExponentPart? 260 | | Digit+ ExponentPart 261 | ; 262 | 263 | HEX_FLOAT 264 | : '0' [xX] HexDigit+ '.' HexDigit* HexExponentPart? 265 | | '0' [xX] '.' HexDigit+ HexExponentPart? 266 | | '0' [xX] HexDigit+ HexExponentPart 267 | ; 268 | 269 | fragment 270 | ExponentPart 271 | : [eE] [+-]? Digit+ 272 | ; 273 | 274 | fragment 275 | HexExponentPart 276 | : [pP] [+-]? Digit+ 277 | ; 278 | 279 | fragment 280 | EscapeSequence 281 | : '\\' [abfnrtvz"'\\] 282 | | '\\' '\r'? '\n' 283 | | DecimalEscape 284 | | HexEscape 285 | | UtfEscape 286 | ; 287 | 288 | fragment 289 | DecimalEscape 290 | : '\\' Digit 291 | | '\\' Digit Digit 292 | | '\\' [0-2] Digit Digit 293 | ; 294 | 295 | fragment 296 | HexEscape 297 | : '\\' 'x' HexDigit HexDigit 298 | ; 299 | 300 | fragment 301 | UtfEscape 302 | : '\\' 'u{' HexDigit+ '}' 303 | ; 304 | 305 | fragment 306 | Digit 307 | : [0-9] 308 | ; 309 | 310 | fragment 311 | HexDigit 312 | : [0-9a-fA-F] 313 | ; 314 | 315 | COMMENT 316 | : '--[' NESTED_STR ']' -> channel(HIDDEN) 317 | ; 318 | 319 | LINE_COMMENT 320 | : '--' 321 | ( // -- 322 | | '[' '='* // --[== 323 | | '[' '='* ~('='|'['|'\r'|'\n') ~('\r'|'\n')* // --[==AA 324 | | ~('['|'\r'|'\n') ~('\r'|'\n')* // --AAA 325 | ) ('\r\n'|'\r'|'\n'|EOF) 326 | -> channel(HIDDEN) 327 | ; 328 | 329 | WS 330 | : [ \t\u000C\r\n]+ -> skip 331 | ; 332 | 333 | SHEBANG 334 | : '#' '!' ~('\n'|'\r')* -> channel(HIDDEN) 335 | ; 336 | -------------------------------------------------------------------------------- /lib/antlr4-native/generator.rb: -------------------------------------------------------------------------------- 1 | require 'fileutils' 2 | 3 | module Antlr4Native 4 | class Generator 5 | ANTLR_VERSION = '4.10.1'.freeze 6 | 7 | ANTLR_JAR = File.expand_path( 8 | File.join('..', '..', 'vendor', "antlr-#{ANTLR_VERSION}-complete.jar"), __dir__ 9 | ).freeze 10 | 11 | include StringHelpers 12 | 13 | attr_reader :grammar_files, :output_dir, :parser_root_method 14 | 15 | def initialize(grammar_files:, output_dir:, parser_root_method:) 16 | @grammar_files = grammar_files 17 | @output_dir = output_dir 18 | @parser_root_method = parser_root_method 19 | 20 | end 21 | 22 | def generate 23 | generate_antlr_code 24 | write_interop_file 25 | end 26 | 27 | def gem_name 28 | @gem_name ||= underscore(parser_ns) 29 | end 30 | 31 | def antlr_ns 32 | grammar_names['parser'] || grammar_names['default'] 33 | end 34 | 35 | def parser_ns 36 | @parser_ns ||= grammar_names['parser'] || "#{grammar_names['default']}Parser" 37 | end 38 | 39 | def lexer_ns 40 | @lexer_ns ||= grammar_names['lexer'] || "#{grammar_names['default']}Lexer" 41 | end 42 | 43 | def ext_name 44 | @ext_name ||= underscore(parser_ns) 45 | end 46 | 47 | private 48 | 49 | def generate_antlr_code 50 | FileUtils.mkdir_p(antlrgen_dir) 51 | 52 | system(<<~END) 53 | java -jar #{ANTLR_JAR} \ 54 | -o #{antlrgen_dir} \ 55 | -Dlanguage=Cpp \ 56 | -visitor \ 57 | #{grammar_files.join(' ')} 58 | END 59 | end 60 | 61 | def write_interop_file 62 | File.write(interop_file, interop_code) 63 | end 64 | 65 | def interop_code 66 | <<~END 67 | #include 68 | 69 | #include 70 | 71 | #include "antlrgen/#{parser_ns}.h" 72 | #include "antlrgen/#{antlr_ns}BaseVisitor.h" 73 | #include "antlrgen/#{lexer_ns}.h" 74 | 75 | #include 76 | #include 77 | 78 | #ifdef _WIN32 79 | #undef OPTIONAL 80 | #undef IN 81 | #undef OUT 82 | #endif 83 | 84 | #undef FALSE 85 | #undef TRUE 86 | 87 | #undef TYPE 88 | 89 | using namespace std; 90 | using namespace Rice; 91 | using namespace antlr4; 92 | 93 | #{proxy_class_declarations} 94 | 95 | namespace Rice::detail { 96 | template <> 97 | class To_Ruby { 98 | public: 99 | VALUE convert(Token* const &x) { 100 | if (!x) return Nil; 101 | return Data_Object(x, false, rb_cToken); 102 | } 103 | }; 104 | 105 | template <> 106 | class To_Ruby { 107 | public: 108 | VALUE convert(tree::ParseTree* const &x) { 109 | if (!x) return Nil; 110 | return Data_Object(x, false, rb_cParseTree); 111 | } 112 | }; 113 | 114 | template <> 115 | class To_Ruby { 116 | public: 117 | VALUE convert(tree::TerminalNode* const &x) { 118 | if (!x) return Nil; 119 | return Data_Object(x, false, rb_cTerminalNode); 120 | } 121 | }; 122 | } 123 | 124 | class ContextProxy { 125 | public: 126 | ContextProxy(tree::ParseTree* orig) { 127 | this -> orig = orig; 128 | } 129 | 130 | tree::ParseTree* getOriginal() { 131 | return orig; 132 | } 133 | 134 | std::string getText() { 135 | return orig -> getText(); 136 | } 137 | 138 | Object getStart() { 139 | auto token = ((ParserRuleContext*) orig) -> getStart(); 140 | 141 | return detail::To_Ruby().convert(token); 142 | } 143 | 144 | Object getStop() { 145 | auto token = ((ParserRuleContext*) orig) -> getStop(); 146 | 147 | return detail::To_Ruby().convert(token); 148 | } 149 | 150 | Array getChildren() { 151 | Array children; 152 | if (orig != nullptr) { 153 | for (auto it = orig -> children.begin(); it != orig -> children.end(); it ++) { 154 | Object parseTree = ContextProxy::wrapParseTree(*it); 155 | 156 | if (parseTree != Nil) { 157 | children.push(parseTree); 158 | } 159 | } 160 | } 161 | return children; 162 | } 163 | 164 | Object getParent() { 165 | return orig == nullptr ? Nil : ContextProxy::wrapParseTree(orig -> parent); 166 | } 167 | 168 | size_t childCount() { 169 | return orig == nullptr ? 0 : orig -> children.size(); 170 | } 171 | 172 | bool doubleEquals(Object other) { 173 | if (other.is_a(rb_cContextProxy)) { 174 | return detail::From_Ruby().convert(other) -> getOriginal() == getOriginal(); 175 | } else { 176 | return false; 177 | } 178 | } 179 | 180 | private: 181 | 182 | static Object wrapParseTree(tree::ParseTree* node); 183 | 184 | protected: 185 | tree::ParseTree* orig = nullptr; 186 | }; 187 | 188 | class TerminalNodeProxy : public ContextProxy { 189 | public: 190 | TerminalNodeProxy(tree::ParseTree* tree) : ContextProxy(tree) { } 191 | }; 192 | 193 | 194 | #{proxy_class_headers} 195 | 196 | #{conversions} 197 | 198 | #{proxy_class_methods} 199 | 200 | #{visitor_generator.visitor_proxy} 201 | 202 | #{parser_class} 203 | 204 | #{context_proxy_methods} 205 | 206 | #{init_function} 207 | END 208 | end 209 | 210 | def proxy_class_headers 211 | @proxy_class_headers ||= contexts 212 | .map(&:proxy_class_header) 213 | .join("\n") 214 | end 215 | 216 | def proxy_class_declarations 217 | @proxy_class_declarations ||= contexts 218 | .map { |ctx| "Class #{ctx.proxy_class_variable};" } 219 | .concat([ 220 | 'Class rb_cToken;', 221 | 'Class rb_cParser;', 222 | 'Class rb_cParseTree;', 223 | 'Class rb_cTerminalNode;', 224 | 'Class rb_cContextProxy;' 225 | ]) 226 | .join("\n") 227 | end 228 | 229 | def conversions 230 | @conversions ||= contexts.map(&:conversions).join("\n") 231 | end 232 | 233 | def proxy_class_methods 234 | @proxy_class_methods ||= contexts.flat_map(&:proxy_class_methods).join("\n") 235 | end 236 | 237 | def parser_class 238 | @parser_class ||= <<~END 239 | class ParserProxy { 240 | public: 241 | static ParserProxy* parse(string code) { 242 | auto input = new ANTLRInputStream(code); 243 | return parseStream(input); 244 | } 245 | 246 | static ParserProxy* parseFile(string file) { 247 | ifstream stream; 248 | stream.open(file); 249 | 250 | auto input = new ANTLRInputStream(stream); 251 | auto parser = parseStream(input); 252 | 253 | stream.close(); 254 | 255 | return parser; 256 | } 257 | 258 | Object #{parser_root_method}() { 259 | auto ctx = this -> parser -> #{parser_root_method}(); 260 | 261 | #{capitalize(parser_root_method)}ContextProxy proxy((#{parser_ns}::#{capitalize(parser_root_method)}Context*) ctx); 262 | return detail::To_Ruby<#{capitalize(parser_root_method)}ContextProxy>().convert(proxy); 263 | } 264 | 265 | Object visit(VisitorProxy* visitor) { 266 | auto result = visitor -> visit(this -> parser -> #{parser_root_method}()); 267 | 268 | // reset for the next visit call 269 | this -> lexer -> reset(); 270 | this -> parser -> reset(); 271 | 272 | return std::any_cast(result); 273 | } 274 | 275 | ~ParserProxy() { 276 | delete this -> parser; 277 | delete this -> tokens; 278 | delete this -> lexer; 279 | delete this -> input; 280 | } 281 | 282 | private: 283 | static ParserProxy* parseStream(ANTLRInputStream* input) { 284 | ParserProxy* parser = new ParserProxy(); 285 | 286 | parser -> input = input; 287 | parser -> lexer = new #{lexer_ns}(parser -> input); 288 | parser -> tokens = new CommonTokenStream(parser -> lexer); 289 | parser -> parser = new #{parser_ns}(parser -> tokens); 290 | 291 | return parser; 292 | } 293 | 294 | ParserProxy() {}; 295 | 296 | ANTLRInputStream* input; 297 | #{lexer_ns}* lexer; 298 | CommonTokenStream* tokens; 299 | #{parser_ns}* parser; 300 | }; 301 | 302 | namespace Rice::detail { 303 | template <> 304 | class To_Ruby { 305 | public: 306 | VALUE convert(ParserProxy* const &x) { 307 | if (!x) return Nil; 308 | return Data_Object(x, false, rb_cParser); 309 | } 310 | }; 311 | } 312 | END 313 | end 314 | 315 | def init_function 316 | <<~END 317 | extern "C" 318 | void Init_#{ext_name}() { 319 | Module rb_m#{parser_ns} = define_module("#{capitalize(parser_ns)}"); 320 | 321 | rb_cToken = define_class_under(rb_m#{parser_ns}, "Token") 322 | .define_method("text", &Token::getText) 323 | .define_method("channel", &Token::getChannel) 324 | .define_method("token_index", &Token::getTokenIndex); 325 | 326 | rb_cParseTree = define_class_under(rb_m#{parser_ns}, "ParseTree"); 327 | 328 | rb_cContextProxy = define_class_under(rb_m#{parser_ns}, "Context") 329 | .define_method("children", &ContextProxy::getChildren) 330 | .define_method("child_count", &ContextProxy::childCount) 331 | .define_method("text", &ContextProxy::getText) 332 | .define_method("start", &ContextProxy::getStart) 333 | .define_method("stop", &ContextProxy::getStop) 334 | .define_method("parent", &ContextProxy::getParent) 335 | .define_method("==", &ContextProxy::doubleEquals); 336 | 337 | rb_cTerminalNode = define_class_under(rb_m#{parser_ns}, "TerminalNodeImpl"); 338 | 339 | define_class_under<#{antlr_ns}BaseVisitor>(rb_m#{parser_ns}, "#{visitor_generator.class_name}") 340 | .define_director<#{visitor_generator.cpp_class_name}>() 341 | .define_constructor(Constructor<#{visitor_generator.cpp_class_name}, Object>()) 342 | .define_method("visit", &#{visitor_generator.cpp_class_name}::ruby_visit) 343 | .define_method("visit_children", &#{visitor_generator.cpp_class_name}::ruby_visitChildren) 344 | #{visitor_generator.visitor_proxy_methods(' ').join("\n")}; 345 | 346 | rb_cParser = define_class_under(rb_m#{parser_ns}, "Parser") 347 | .define_singleton_function("parse", &ParserProxy::parse) 348 | .define_singleton_function("parse_file", &ParserProxy::parseFile) 349 | .define_method("#{parser_root_method}", &ParserProxy::#{parser_root_method}) 350 | .define_method("visit", &ParserProxy::visit); 351 | 352 | #{class_wrappers_str(' ')} 353 | } 354 | END 355 | end 356 | 357 | def context_proxy_methods 358 | @context_proxy_methods ||= begin 359 | wrapper_branches = contexts.flat_map.with_index do |context, idx| 360 | [ 361 | " #{idx == 0 ? 'if' : 'else if'} (antlrcpp::is<#{parser_ns}::#{context.name}*>(node)) {", 362 | " #{context.name}Proxy proxy((#{parser_ns}::#{context.name}*)node);", 363 | " return detail::To_Ruby<#{context.name}Proxy>().convert(proxy);", 364 | " }" 365 | ] 366 | end 367 | 368 | <<~END 369 | Object ContextProxy::wrapParseTree(tree::ParseTree* node) { 370 | #{wrapper_branches.join("\n")} 371 | else if (antlrcpp::is(node)) { 372 | TerminalNodeProxy proxy(node); 373 | return detail::To_Ruby().convert(proxy); 374 | } else { 375 | return Nil; 376 | } 377 | } 378 | END 379 | end 380 | end 381 | 382 | def class_wrappers_str(indent) 383 | class_wrappers.map do |cw| 384 | ["#{indent}#{cw[0]}", *cw[1..-1].map { |line| "#{indent} #{line}" }].join("\n") 385 | end.join("\n\n") 386 | end 387 | 388 | def class_wrappers 389 | @class_wrappers ||= contexts.map do |ctx| 390 | ctx.class_wrapper("rb_m#{parser_ns}") 391 | end 392 | end 393 | 394 | def contexts 395 | @contexts ||= cpp_parser_source 396 | .scan(/#{parser_ns}::([^\s:\(\)]+Context)/) 397 | .flatten 398 | .uniq 399 | .reject { |c| c == '_sharedContext' } 400 | .map { |name| Context.new(name, parser_ns, cpp_parser_source) } 401 | end 402 | 403 | def visitor_methods 404 | @visitor_methods ||= cpp_visitor_source 405 | .scan(/visit[A-Z][^\(\s]*/) 406 | .flatten 407 | .uniq 408 | end 409 | 410 | def visitor_generator 411 | @visitor_generator ||= VisitorGenerator.new(visitor_methods, antlr_ns, parser_ns) 412 | end 413 | 414 | def antlrgen_dir 415 | @antlrgen_dir ||= File.join(output_dir, gem_name, 'antlrgen') 416 | end 417 | 418 | def interop_file 419 | @interop_file ||= File.join(output_dir, gem_name, "#{ext_name}.cpp") 420 | end 421 | 422 | def grammar_names 423 | @grammar_names ||= begin 424 | grammar_files.each_with_object({}) do |grammar_file, ret| 425 | kind, name = File.read(grammar_file).scan(/^(parser|lexer)?\s*grammar\s*([^;]+);/).flatten 426 | ret[kind&.strip || 'default'] = name 427 | end 428 | end 429 | end 430 | 431 | def cpp_parser_source 432 | @cpp_parser_source ||= File.read(File.join(antlrgen_dir, "#{parser_ns}.cpp")) 433 | end 434 | 435 | def cpp_visitor_source 436 | @cpp_visitor_source ||= File.read(File.join(antlrgen_dir, "#{antlr_ns}BaseVisitor.h")) 437 | end 438 | end 439 | end 440 | --------------------------------------------------------------------------------