├── CREDITS ├── ebnf.rb ├── VERSION ├── spec ├── spec.opts ├── .gitignore ├── matchers.rb ├── spec_helper.rb ├── unescape_spec.rb ├── bnf_spec.rb ├── ll1 │ ├── parser_spec.rb │ ├── scanner_spec.rb │ └── data │ │ └── parser.rb ├── peg_spec.rb ├── ebnf_spec.rb ├── peg │ ├── data │ │ └── parser.rb │ └── parser_spec.rb ├── native_spec.rb ├── parser_spec.rb └── base_spec.rb ├── AUTHORS ├── examples ├── ebnf-ll1-parser │ ├── .gitignore │ ├── Rakefile │ └── parse ├── ebnf-peg-parser │ ├── .gitignore │ ├── Rakefile │ └── parse ├── abnf │ ├── examples │ │ ├── 1star.abnf │ │ ├── strs.abnf │ │ ├── postal-address.abnf │ │ ├── json.abnf │ │ ├── uri.abnf │ │ └── http.abnf │ ├── Rakefile │ ├── core.rb │ ├── parse │ ├── abnf.sxp │ └── abnf.peg.sxp ├── calc │ ├── Rakefile │ ├── calc.ebnf │ ├── calc.peg.sxp │ ├── README.md │ ├── calc │ ├── .byebug_history │ └── calc.rb ├── isoebnf │ ├── Rakefile │ ├── examples │ │ ├── pascal.isoebnf │ │ ├── ebnf.isoebnf │ │ ├── postal-address.isoebnf │ │ └── html.isoebnf │ ├── parse │ ├── iso-ebnf.sxp │ ├── iso-ebnf.peg.sxp │ └── meta.rb └── JSON │ ├── JSON.ebnf │ └── JSON-ab.ebnf ├── .coveralls.yml ├── .gitignore ├── .yardopts ├── lib ├── ebnf │ ├── version.rb │ ├── bnf.rb │ ├── peg.rb │ ├── terminals.rb │ ├── abnf │ │ └── core.rb │ ├── unescape.rb │ ├── ll1 │ │ └── scanner.rb │ └── isoebnf │ │ └── meta.rb └── ebnf.rb ├── Gemfile ├── UNLICENSE ├── .github └── workflows │ ├── ci.yml │ └── generate-docs.yml ├── etc ├── abnf-core.ebnf ├── ebnf.sxp ├── ebnf.ebnf ├── doap.ttl ├── abnf.sxp ├── iso-ebnf.sxp ├── abnf.abnf ├── turtle.ebnf ├── abnf.ebnf ├── ebnf.peg.sxp ├── turtle.sxp ├── iso-ebnf.isoebnf ├── iso-ebnf.ebnf └── ebnf.ll1.sxp ├── ebnf.gemspec ├── Rakefile └── bin └── ebnf /CREDITS: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ebnf.rb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 2.6.0 2 | -------------------------------------------------------------------------------- /spec/spec.opts: -------------------------------------------------------------------------------- 1 | --colour -------------------------------------------------------------------------------- /spec/.gitignore: -------------------------------------------------------------------------------- 1 | /uri-cache/ 2 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | * Gregg Kellogg 2 | -------------------------------------------------------------------------------- /examples/ebnf-ll1-parser/.gitignore: -------------------------------------------------------------------------------- 1 | /.byebug_history 2 | -------------------------------------------------------------------------------- /examples/ebnf-peg-parser/.gitignore: -------------------------------------------------------------------------------- 1 | /.byebug_history 2 | -------------------------------------------------------------------------------- /.coveralls.yml: -------------------------------------------------------------------------------- 1 | repo_token: nZpCGeEQmrpsuh3Er4qfKarr00VUGU5Lx 2 | -------------------------------------------------------------------------------- /examples/abnf/examples/1star.abnf: -------------------------------------------------------------------------------- 1 | suffix = 1*("I" / "V" / "X") 2 | -------------------------------------------------------------------------------- /examples/abnf/examples/strs.abnf: -------------------------------------------------------------------------------- 1 | x = *y %d1.3.10 2 | y = %s"Ab" / (2z / %x30-39) 3 | z = x ["ab"] 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | .bundle 4 | .config 5 | coverage 6 | InstalledFiles 7 | lib/bundler/man 8 | pkg 9 | rdoc 10 | spec/reports 11 | test/tmp 12 | test/version_tmp 13 | tmp 14 | /doc/ 15 | 16 | # YARD artifacts 17 | .yardoc 18 | _yardoc 19 | /Gemfile.lock 20 | /.byebug_history 21 | -------------------------------------------------------------------------------- /.yardopts: -------------------------------------------------------------------------------- 1 | --title "EBNF parser and parser generator" 2 | --output-dir doc/yard 3 | --protected 4 | --no-private 5 | --hide-void-return 6 | --markup markdown 7 | --readme README.md 8 | lib/**/*.rb 9 | examples/**/*.rb 10 | - 11 | AUTHORS 12 | CREDITS 13 | VERSION 14 | UNLICENSE 15 | etc/ebnf.ebnf 16 | etc/ebnf.ll1.sxp 17 | etc/ebnf.peg.sxp 18 | examples/**/README.md 19 | examples/**/*.rb 20 | -------------------------------------------------------------------------------- /examples/calc/Rakefile: -------------------------------------------------------------------------------- 1 | task default: ["calc.peg.sxp", :doc] 2 | 3 | file "calc.peg.sxp" => "calc.ebnf" do |t| 4 | sh %{ 5 | ebnf --peg --output calc.peg.sxp calc.ebnf 6 | } 7 | end 8 | 9 | desc "Generate literal documentation for parser" 10 | task doc: %w(doc/calc.html) 11 | 12 | file "doc/calc.html" => "calc.rb" do 13 | `rocco -t doc/layout.mustache calc.rb -o doc` 14 | end 15 | -------------------------------------------------------------------------------- /examples/calc/calc.ebnf: -------------------------------------------------------------------------------- 1 | /* 2 | * The following grammar is an example from the 3 | * [Wikipedia entry on Parsing Expression Grammar](https://en.wikipedia.org/wiki/Parsing_expression_grammar#Examples). 4 | */ 5 | [1] Expr ::= Sum 6 | [2] Sum ::= Product (('+' | '-') Product)* 7 | [3] Product ::= Power (('*' | '/') Power)* 8 | [4] Power ::= Value ('^' Power)? 9 | [5] Value ::= NUMBER | '(' Expr ')' 10 | [6] NUMBER ::= [0-9]+ 11 | -------------------------------------------------------------------------------- /examples/ebnf-peg-parser/Rakefile: -------------------------------------------------------------------------------- 1 | task default: [:meta, :doc] 2 | 3 | desc 'Build rules table' 4 | task meta: "meta.rb" 5 | 6 | file "meta.rb" => "../../etc/ebnf.ebnf" do |t| 7 | sh %{ 8 | ebnf --peg --format rb \ 9 | --mod-name EBNFPegMeta \ 10 | --output meta.rb \ 11 | #{t.prerequisites.first} 12 | } 13 | end 14 | 15 | desc "Generate literal documentation for parser" 16 | task doc: %w(doc/parser.html) 17 | 18 | file "doc/parser.html" => "parser.rb" do 19 | `rocco -t doc/layout.mustache parser.rb -o doc` 20 | end 21 | -------------------------------------------------------------------------------- /examples/ebnf-ll1-parser/Rakefile: -------------------------------------------------------------------------------- 1 | task default: [:meta, :doc] 2 | 3 | desc 'Build first, follow and branch tables' 4 | task meta: "meta.rb" 5 | 6 | file "meta.rb" => "../../etc/ebnf.ebnf" do |t| 7 | sh %{ 8 | ebnf --ll1 ebnf --format rb \ 9 | --mod-name EBNFParserMeta \ 10 | --output meta.rb \ 11 | #{t.prerequisites.first} 12 | } 13 | end 14 | 15 | desc "Generate literal documentation for parser" 16 | task doc: %w(doc/parser.html) 17 | 18 | file "doc/parser.html" => "parser.rb" do 19 | `rocco -t doc/layout.mustache parser.rb -o doc` 20 | end 21 | -------------------------------------------------------------------------------- /lib/ebnf/version.rb: -------------------------------------------------------------------------------- 1 | module EBNF 2 | module VERSION 3 | VERSION_FILE = File.join(File.expand_path(File.dirname(__FILE__)), "..", "..", "VERSION") 4 | MAJOR, MINOR, TINY, EXTRA = File.read(VERSION_FILE).chomp.split(".") 5 | 6 | STRING = [MAJOR, MINOR, TINY, EXTRA].compact.join('.') 7 | 8 | ## 9 | # @return [String] 10 | def self.to_s() STRING end 11 | 12 | ## 13 | # @return [String] 14 | def self.to_str() STRING end 15 | 16 | ## 17 | # @return [Array(Integer, Integer, Integer)] 18 | def self.to_a() [MAJOR, MINOR, TINY] end 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /examples/calc/calc.peg.sxp: -------------------------------------------------------------------------------- 1 | ( 2 | (rule Expr "1" (seq Sum)) 3 | (rule Sum "2" (seq Product _Sum_1)) 4 | (rule _Sum_1 "2.1" (star _Sum_2)) 5 | (rule _Sum_2 "2.2" (seq _Sum_3 Product)) 6 | (rule _Sum_3 "2.3" (alt "+" "-")) 7 | (rule Product "3" (seq Power _Product_1)) 8 | (rule _Product_1 "3.1" (star _Product_2)) 9 | (rule _Product_2 "3.2" (seq _Product_3 Power)) 10 | (rule _Product_3 "3.3" (alt "*" "/")) 11 | (rule Power "4" (seq Value _Power_1)) 12 | (rule _Power_1 "4.1" (opt _Power_2)) 13 | (rule _Power_2 "4.2" (seq "^" Power)) 14 | (rule Value "5" (alt NUMBER _Value_1)) 15 | (rule _Value_1 "5.1" (seq "(" Expr ")")) 16 | (terminal NUMBER "6" (plus _NUMBER_1)) 17 | (terminal _NUMBER_1 "6.1" (range "0-9"))) 18 | -------------------------------------------------------------------------------- /examples/abnf/examples/postal-address.abnf: -------------------------------------------------------------------------------- 1 | postal-address = name-part street zip-part 2 | 3 | name-part = *(personal-part SP) last-name [SP suffix] CRLF 4 | name-part =/ personal-part CRLF 5 | 6 | personal-part = first-name / (initial ".") 7 | first-name = *ALPHA 8 | initial = ALPHA 9 | last-name = *ALPHA 10 | suffix = ("Jr." / "Sr." / 1*("I" / "V" / "X")) 11 | 12 | street = [apt SP] house-num SP street-name CRLF 13 | apt = 1*4DIGIT 14 | house-num = 1*8(DIGIT / ALPHA) 15 | street-name = 1*VCHAR 16 | 17 | zip-part = town-name "," SP state 1*2SP zip-code CRLF 18 | town-name = 1*(ALPHA / SP) 19 | state = 2ALPHA 20 | zip-code = 5DIGIT ["-" 4DIGIT] 21 | -------------------------------------------------------------------------------- /lib/ebnf/bnf.rb: -------------------------------------------------------------------------------- 1 | module EBNF 2 | module BNF 3 | ## 4 | # Transform EBNF Rule set to BNF: 5 | # 6 | # * Add rule [0] (_empty rule (seq)) 7 | # * Transform each rule into a set of rules that are just BNF, using {Rule#to_bnf}. 8 | # @return [ENBF] self 9 | def make_bnf 10 | progress("make_bnf") {"Start: #{@ast.length} rules"} 11 | new_ast = [Rule.new(:_empty, "0", [:seq], kind: :rule)] 12 | 13 | ast.each do |rule| 14 | debug("make_bnf") {"expand from: #{rule.inspect}"} 15 | new_rules = rule.to_bnf 16 | debug(" => ") {new_rules.map(&:sym).join(', ')} 17 | new_ast += new_rules 18 | end 19 | 20 | @ast = new_ast 21 | progress("make_bnf") {"End: #{@ast.length} rules"} 22 | self 23 | end 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gemspec 4 | gem 'sxp', github: "dryruby/sxp.rb", branch: "develop" 5 | gem 'rdf', github: "ruby-rdf/rdf", branch: "develop" 6 | 7 | group :development do 8 | gem 'rdf-isomorphic', github: "ruby-rdf/rdf-isomorphic", branch: "develop" 9 | gem 'rdf-spec', github: "ruby-rdf/rdf-spec", branch: "develop" 10 | gem 'rdf-turtle', github: "ruby-rdf/rdf-turtle", branch: "develop" 11 | gem "byebug", platforms: :mri 12 | gem 'psych', platforms: [:mri, :rbx] 13 | gem "redcarpet", platforms: :mri 14 | gem "rocco", platforms: :mri 15 | gem "pygmentize", platforms: :mri 16 | gem 'getoptlong' 17 | end 18 | 19 | group :development, :test do 20 | gem 'simplecov', '~> 0.22', platforms: :mri 21 | gem 'simplecov-lcov', '~> 0.8', platforms: :mri 22 | end 23 | -------------------------------------------------------------------------------- /examples/isoebnf/Rakefile: -------------------------------------------------------------------------------- 1 | task default: ['iso-ebnf.sxp', 'iso-ebnf.peg.sxp', :meta, :doc] 2 | 3 | desc 'Build rules table' 4 | task meta: "meta.rb" 5 | 6 | file "meta.rb" => "../../etc/iso-ebnf.ebnf" do |t| 7 | sh %{ 8 | ebnf --peg --format rb \ 9 | --mod-name ISOEBNFMeta \ 10 | --output meta.rb \ 11 | #{t.prerequisites.first} 12 | } 13 | end 14 | 15 | file 'iso-ebnf.sxp' => "../../etc/iso-ebnf.ebnf" do |t| 16 | sh %{ 17 | ebnf --output iso-ebnf.sxp #{t.prerequisites.first} 18 | } 19 | end 20 | 21 | file 'iso-ebnf.peg.sxp' => "../../etc/iso-ebnf.ebnf" do |t| 22 | sh %{ 23 | ebnf --peg --output iso-ebnf.peg.sxp #{t.prerequisites.first} 24 | } 25 | end 26 | 27 | desc "Generate literal documentation for parser" 28 | task doc: %w(doc/parser.html) 29 | 30 | file "doc/parser.html" => "parser.rb" do 31 | `rocco -t doc/layout.mustache parser.rb -o doc` 32 | end 33 | -------------------------------------------------------------------------------- /examples/calc/README.md: -------------------------------------------------------------------------------- 1 | # Simple Calculator 2 | 3 | This example is based on the 4 | [Wikipedia entry on Parsing Expression Grammar](https://en.wikipedia.org/wiki/Parsing_expression_grammar#Examples). The parser calculates intermediate expressions and applies operations returning the value of the expression. 5 | 6 | The Grammar is expressed as follows: 7 | 8 | [1] Expr ::= Sum 9 | [2] Sum ::= Product (('+' | '-') Product)* 10 | [3] Product ::= Power (('*' | '/') Power)* 11 | [4] Power ::= Value ('^' Power)? 12 | [5] Value ::= NUMBER | '(' Expr ')' 13 | [6] NUMBER ::= [0-9]+ 14 | 15 | ## Running the calculator 16 | 17 | The calculator is expressed in `calc.rb` and can be exercised using the `calc` wrapper, but at its simplest can be invoked as follows: 18 | 19 | require 'calc' 20 | calc = Calc.new 21 | result = calc.evaluate('1 + 2 * 3') 22 | #=> 7 23 | 24 | -------------------------------------------------------------------------------- /examples/isoebnf/examples/pascal.isoebnf: -------------------------------------------------------------------------------- 1 | (* a simple program syntax in EBNF − Wikipedia *) 2 | program = 'PROGRAM', white_space, identifier, white_space, 3 | 'BEGIN', white_space, 4 | { assignment, ";", white_space }, 5 | 'END.' ; 6 | identifier = alphabetic_character, { alphabetic_character | digit } ; 7 | number = [ "-" ], digit, { digit } ; 8 | string = '"' , { all_characters - '"' }, '"' ; 9 | assignment = identifier , ":=" , ( number | identifier | string ) ; 10 | alphabetic_character = "A" | "B" | "C" | "D" | "E" | "F" | "G" 11 | | "H" | "I" | "J" | "K" | "L" | "M" | "N" 12 | | "O" | "P" | "Q" | "R" | "S" | "T" | "U" 13 | | "V" | "W" | "X" | "Y" | "Z" ; 14 | digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; 15 | white_space = ? white_space characters ? ; 16 | all_characters = ? all visible characters ? ; 17 | -------------------------------------------------------------------------------- /spec/matchers.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | def normalize(obj) 3 | if obj.is_a?(String) 4 | obj.gsub(/\s+/m, ' '). 5 | gsub(/\s+\)/m, ')'). 6 | gsub(/\(\s+/m, '('). 7 | strip 8 | else 9 | obj 10 | end 11 | end 12 | 13 | Info = Struct.new(:id, :logger, :action, :result, :format) 14 | 15 | RSpec::Matchers.define :produce do |expected, info| 16 | match do |actual| 17 | @info = if info.is_a?(Logger) 18 | Info.new("", info) 19 | elsif info.is_a?(Hash) 20 | Info.new(info[:id], info[:logger], info[:action], info[:result]) 21 | else 22 | Info.new(info) 23 | end 24 | expect(normalize(actual)).to eq normalize(expected) 25 | end 26 | 27 | failure_message do |actual| 28 | "Expected: #{normalize(expected)}\n" + 29 | "Actual : #{normalize(actual)}\n" + 30 | "Raw : #{expected}\n" + 31 | "Result : #{actual}\n" + 32 | "Processing results:\n#{@info.logger}" 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /lib/ebnf.rb: -------------------------------------------------------------------------------- 1 | require 'sxp' unless defined?(SXP) 2 | 3 | module EBNF 4 | autoload :ABNF, "ebnf/abnf" 5 | autoload :Base, "ebnf/base" 6 | autoload :BNF, "ebnf/bnf" 7 | autoload :ISOEBNF, "ebnf/isoebnf" 8 | autoload :LL1, "ebnf/ll1" 9 | autoload :Native, "ebnf/native" 10 | autoload :Parser, "ebnf/parser" 11 | autoload :PEG, "ebnf/peg" 12 | autoload :Rule, "ebnf/rule" 13 | autoload :Terminals,"ebnf/terminals" 14 | autoload :Unescape, "ebnf/unescape" 15 | autoload :Writer, "ebnf/writer" 16 | autoload :VERSION, "ebnf/version" 17 | 18 | ## 19 | # Parse the given EBNF `query` input. 20 | # 21 | # @example 22 | # ebnf = EBNF.parse(input) 23 | # 24 | # @param [#read, String, #to_s] input 25 | # @param [Hash{Symbol => Object}] options 26 | # @return [EBNF::Base] 27 | # @raise [Exception] on invalid input 28 | def self.parse(input, **options) 29 | ::EBNF::Base.new(input, **options) 30 | end 31 | end 32 | -------------------------------------------------------------------------------- /examples/abnf/Rakefile: -------------------------------------------------------------------------------- 1 | task default: ['abnf.sxp', 'abnf.peg.sxp', "meta.rb", "core.rb", :doc] 2 | 3 | file "meta.rb" => "../../etc/abnf.ebnf" do |t| 4 | sh %{ 5 | ebnf --peg --format rb \ 6 | --mod-name ABNFMeta \ 7 | --output meta.rb \ 8 | #{t.prerequisites.first} 9 | } 10 | end 11 | 12 | file "core.rb" => "../../etc/abnf-core.ebnf" do |t| 13 | sh %{ 14 | ebnf --format rb \ 15 | --mod-name ABNFCore \ 16 | --output core.rb \ 17 | #{t.prerequisites.first} 18 | } 19 | end 20 | 21 | file 'abnf.sxp' => "../../etc/abnf.ebnf" do |t| 22 | sh %{ 23 | ebnf --output abnf.sxp #{t.prerequisites.first} 24 | } 25 | end 26 | 27 | file 'abnf.peg.sxp' => "../../etc/abnf.ebnf" do |t| 28 | sh %{ 29 | ebnf --peg --output abnf.peg.sxp #{t.prerequisites.first} 30 | } 31 | end 32 | 33 | desc "Generate literal documentation for parser" 34 | task doc: %w(doc/parser.html) 35 | 36 | file "doc/parser.html" => "parser.rb" do 37 | `rocco -t doc/layout.mustache parser.rb -o doc` 38 | end 39 | -------------------------------------------------------------------------------- /examples/isoebnf/examples/ebnf.isoebnf: -------------------------------------------------------------------------------- 1 | letter = "A" | "B" | "C" | "D" | "E" | "F" | "G" 2 | | "H" | "I" | "J" | "K" | "L" | "M" | "N" 3 | | "O" | "P" | "Q" | "R" | "S" | "T" | "U" 4 | | "V" | "W" | "X" | "Y" | "Z" | "a" | "b" 5 | | "c" | "d" | "e" | "f" | "g" | "h" | "i" 6 | | "j" | "k" | "l" | "m" | "n" | "o" | "p" 7 | | "q" | "r" | "s" | "t" | "u" | "v" | "w" 8 | | "x" | "y" | "z" ; 9 | digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; 10 | symbol = "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">" 11 | | "'" | '"' | "=" | "|" | "." | "," | ";" ; 12 | character = letter | digit | symbol | "_" ; 13 | 14 | identifier = letter , { letter | digit | "_" } ; 15 | terminal = "'" , character , { character } , "'" 16 | | '"' , character , { character } , '"' ; 17 | 18 | lhs = identifier ; 19 | rhs = identifier 20 | | terminal 21 | | "[" , rhs , "]" 22 | | "{" , rhs , "}" 23 | | "(" , rhs , ")" 24 | | rhs , "|" , rhs 25 | | rhs , "," , rhs ; 26 | 27 | rule = lhs , "=" , rhs , ";" ; 28 | grammar = { rule } ; 29 | -------------------------------------------------------------------------------- /examples/isoebnf/examples/postal-address.isoebnf: -------------------------------------------------------------------------------- 1 | postal_address = name_part, street, zip_part ; 2 | 3 | name_part = {personal_part, SP}, last_name, [SP, suffix], CRLF 4 | | personal_part, CRLF 5 | ; 6 | 7 | personal_part = first_name | (initial, ".") ; 8 | first_name = {ALPHA} ; 9 | initial = ALPHA ; 10 | last_name = {ALPHA} ; 11 | suffix = ("Jr." | "Sr." | ("I" | "V" | "X"), {"I" | "V" | "X"}) ; 12 | 13 | street = [apt, SP], house_num, SP, street_name, CRLF ; 14 | apt = DIGIT, [DIGIT, [DIGIT, [DIGIT]]] ; 15 | house_num = (DIGIT | ALPHA), 16 | [(DIGIT | ALPHA), 17 | [(DIGIT | ALPHA), 18 | [(DIGIT | ALPHA), 19 | [(DIGIT | ALPHA), 20 | [(DIGIT | ALPHA), 21 | [(DIGIT | ALPHA), 22 | [(DIGIT | ALPHA)]]]]]]] 23 | ; 24 | street_name = VCHAR, {VCHAR} ; 25 | 26 | zip_part = town_name, ",", SP, state, SP, [SP], zip_code, CRLF ; 27 | town_name = (ALPHA | SP), {ALPHA | SP} ; 28 | state = 2*ALPHA ; 29 | zip_code = 5*DIGIT, ["-", 4*DIGIT] ; -------------------------------------------------------------------------------- /lib/ebnf/peg.rb: -------------------------------------------------------------------------------- 1 | module EBNF 2 | module PEG 3 | autoload :Parser, 'ebnf/peg/parser' 4 | autoload :Rule, 'ebnf/peg/rule' 5 | 6 | ## 7 | # Transform EBNF Rule set for PEG parsing: 8 | # 9 | # * Transform each rule into a set of sub-rules extracting unnamed sequences into new rules, using {Rule#to_peg}. 10 | # @return [ENBF] self 11 | def make_peg 12 | progress("make_peg") {"Start: #{@ast.length} rules"} 13 | new_ast = [] 14 | 15 | ast.each do |rule| 16 | debug("make_peg") {"expand from: #{rule.inspect}"} 17 | new_rules = rule.to_peg 18 | debug(" => ") {new_rules.map(&:sym).join(', ')} 19 | new_ast += new_rules 20 | end 21 | 22 | @ast = new_ast 23 | progress("make_peg") {"End: #{@ast.length} rules"} 24 | self 25 | end 26 | 27 | ## 28 | # Output Ruby parser files for PEG parsing 29 | # 30 | # @param [IO, StringIO] output 31 | def to_ruby_peg(output, **options) 32 | output.puts " RULES = [" 33 | ast.each do |rule| 34 | output.puts " " + rule.to_ruby + (rule.is_a?(EBNF::PEG::Rule) ? '.extend(EBNF::PEG::Rule)' : '') + ',' 35 | end 36 | output.puts " ]" 37 | end 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /lib/ebnf/terminals.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # Terminal definitions for the EBNF grammar 3 | module EBNF::Terminals 4 | SYMBOL_BASE = %r(\b[a-zA-Z0-9_\.]+\b)u.freeze # Word boundaries 5 | SYMBOL = %r((?:#{SYMBOL_BASE}|(?:<#{SYMBOL_BASE}>))(?!\s*::=))u.freeze 6 | HEX = %r(\#x\h+)u.freeze 7 | CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze 8 | R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze 9 | LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*?\s*::=)u.freeze 10 | RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX})|#{R_CHAR}|#{HEX})+-?\])u.freeze 11 | RANGE_NOT_LHS = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX})|#{R_CHAR}|#{HEX})+-?\](?!\s*?\s*::=))u.freeze 12 | O_RANGE = %r(\[\^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX}|#{R_CHAR}|#{HEX}))+-?\])u.freeze 13 | STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze 14 | STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze 15 | POSTFIX = %r([?*+])u.freeze 16 | PASS = %r(( 17 | \s 18 | | (?:(?:\#[^x]|//)[^\n\r]*) 19 | | (?:/\*(?:(?:\*[^/])|[^*])*\*/) 20 | | (?:\(\*(?:(?:\*[^\)])|[^*])*\*\)) 21 | )+)xmu.freeze 22 | end 23 | -------------------------------------------------------------------------------- /UNLICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # This workflow runs continuous CI across different versions of ruby on all branches and pull requests to develop. 2 | 3 | name: CI 4 | on: 5 | push: 6 | branches: [ '**' ] 7 | pull_request: 8 | branches: [ develop ] 9 | workflow_dispatch: 10 | 11 | jobs: 12 | tests: 13 | name: Ruby ${{ matrix.ruby }} 14 | if: "contains(github.event.commits[0].message, '[ci skip]') == false" 15 | runs-on: ubuntu-latest 16 | env: 17 | CI: true 18 | ALLOW_FAILURES: ${{ endsWith(matrix.ruby, 'head') || matrix.ruby == 'jruby' }} 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | ruby: ['3.0', 3.1, 3.2, 3.3, 3.4, ruby-head, jruby] 23 | steps: 24 | - name: Clone repository 25 | uses: actions/checkout@v3 26 | - name: Set up Ruby 27 | uses: ruby/setup-ruby@v1 28 | with: 29 | ruby-version: ${{ matrix.ruby }} 30 | - name: Install dependencies 31 | run: bundle install --jobs 4 --retry 3 32 | - name: Run tests 33 | run: ruby --version; bundle exec rspec spec || $ALLOW_FAILURES 34 | - name: Coveralls GitHub Action 35 | uses: coverallsapp/github-action@v2 36 | if: "matrix.ruby == '3.3'" 37 | with: 38 | github-token: ${{ secrets.GITHUB_TOKEN }} 39 | -------------------------------------------------------------------------------- /lib/ebnf/abnf/core.rb: -------------------------------------------------------------------------------- 1 | # This file is automatically generated by ebnf version 2.0.0 2 | # Derived from etc/abnf-core.ebnf 3 | module ABNFCore 4 | RULES = [ 5 | EBNF::Rule.new(:ALPHA, nil, [:range, "#x41-#x5A#x61-#x7A"], kind: :terminal), 6 | EBNF::Rule.new(:BIT, nil, [:alt, "0", "1"], kind: :terminal), 7 | EBNF::Rule.new(:CHAR, nil, [:range, "#x01-#x7F"], kind: :terminal), 8 | EBNF::Rule.new(:CR, nil, [:hex, "#x0D"], kind: :terminal), 9 | EBNF::Rule.new(:CRLF, nil, [:seq, [:opt, :CR], :LF], kind: :terminal), 10 | EBNF::Rule.new(:CTL, nil, [:alt, [:range, "#x00-#x1F"], [:hex, "#x7F"]], kind: :terminal), 11 | EBNF::Rule.new(:DIGIT, nil, [:range, "#x30-#x39"], kind: :terminal), 12 | EBNF::Rule.new(:DQUOTE, nil, [:hex, "#x22"], kind: :terminal), 13 | EBNF::Rule.new(:HEXDIG, nil, [:alt, :DIGIT, [:range, "A-F"]], kind: :terminal), 14 | EBNF::Rule.new(:HTAB, nil, [:hex, "#x09"], kind: :terminal), 15 | EBNF::Rule.new(:LF, nil, [:hex, "#x0A"], kind: :terminal), 16 | EBNF::Rule.new(:LWSP, nil, [:star, [:alt, :WSP, [:seq, :CRLF, :WSP]]], kind: :terminal), 17 | EBNF::Rule.new(:OCTET, nil, [:range, "#x00-#xFF"], kind: :terminal), 18 | EBNF::Rule.new(:SP, nil, [:hex, "#x20"], kind: :terminal), 19 | EBNF::Rule.new(:VCHAR, nil, [:range, "#x21-#x7E"], kind: :terminal), 20 | EBNF::Rule.new(:WSP, nil, [:alt, :SP, :HTAB], kind: :terminal), 21 | ] 22 | end 23 | 24 | -------------------------------------------------------------------------------- /examples/abnf/core.rb: -------------------------------------------------------------------------------- 1 | # This file is automatically generated by ebnf version 2.0.0 2 | # Derived from ../../etc/abnf-core.ebnf 3 | module ABNFCore 4 | RULES = [ 5 | EBNF::Rule.new(:ALPHA, nil, [:range, "#x41-#x5A#x61-#x7A"], kind: :terminal), 6 | EBNF::Rule.new(:BIT, nil, [:alt, "0", "1"], kind: :terminal), 7 | EBNF::Rule.new(:CHAR, nil, [:range, "#x01-#x7F"], kind: :terminal), 8 | EBNF::Rule.new(:CR, nil, [:hex, "#x0D"], kind: :terminal), 9 | EBNF::Rule.new(:CRLF, nil, [:seq, [:opt, :CR], :LF], kind: :terminal), 10 | EBNF::Rule.new(:CTL, nil, [:alt, [:range, "#x00-#x1F"], [:hex, "#x7F"]], kind: :terminal), 11 | EBNF::Rule.new(:DIGIT, nil, [:range, "#x30-#x39"], kind: :terminal), 12 | EBNF::Rule.new(:DQUOTE, nil, [:hex, "#x22"], kind: :terminal), 13 | EBNF::Rule.new(:HEXDIG, nil, [:alt, :DIGIT, [:range, "A-F"]], kind: :terminal), 14 | EBNF::Rule.new(:HTAB, nil, [:hex, "#x09"], kind: :terminal), 15 | EBNF::Rule.new(:LF, nil, [:hex, "#x0A"], kind: :terminal), 16 | EBNF::Rule.new(:LWSP, nil, [:star, [:alt, :WSP, [:seq, :CRLF, :WSP]]], kind: :terminal), 17 | EBNF::Rule.new(:OCTET, nil, [:range, "#x00-#xFF"], kind: :terminal), 18 | EBNF::Rule.new(:SP, nil, [:hex, "#x20"], kind: :terminal), 19 | EBNF::Rule.new(:VCHAR, nil, [:range, "#x21-#x7E"], kind: :terminal), 20 | EBNF::Rule.new(:WSP, nil, [:alt, :SP, :HTAB], kind: :terminal), 21 | ] 22 | end 23 | 24 | -------------------------------------------------------------------------------- /examples/abnf/parse: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # parse --- Process EBNF to generate AST S-Expression 3 | 4 | $:.unshift(File.expand_path("../../../lib", __FILE__)) 5 | $:.unshift(File.expand_path("..", __FILE__)) 6 | require 'rubygems' 7 | require 'getoptlong' 8 | require 'parser' 9 | require 'sxp' 10 | 11 | out = STDOUT 12 | 13 | OPT_ARGS = [ 14 | ["--evaluate","-e", GetoptLong::REQUIRED_ARGUMENT, "Evaluate argument"], 15 | ["--trace", GetoptLong::OPTIONAL_ARGUMENT, "Trace output level (0-3)"], 16 | ["--help", "-?", GetoptLong::NO_ARGUMENT, "This message"] 17 | ] 18 | def usage 19 | require 'ebnf' 20 | STDERR.puts %{#{$0} Version #{EBNF::VERSION}} 21 | STDERR.puts %{Usage: #{$0} [options] file ...} 22 | width = OPT_ARGS.map do |o| 23 | l = o.first.length 24 | l += o[1].length + 2 if o[1].is_a?(String) 25 | l 26 | end.max 27 | OPT_ARGS.each do |o| 28 | s = " %-*s " % [width, (o[1].is_a?(String) ? "#{o[0,2].join(', ')}" : o[0])] 29 | s += o.last 30 | STDERR.puts s 31 | end 32 | exit(1) 33 | end 34 | 35 | options = {} 36 | input = nil 37 | 38 | opts = GetoptLong.new(*OPT_ARGS.map {|o| o[0..-2]}) 39 | 40 | opts.each do |opt, arg| 41 | case opt 42 | when '--evaluate' then input = arg 43 | when '--trace' then options[:level] = arg.to_i 44 | when '--help' then usage 45 | end 46 | end 47 | 48 | input = File.open(ARGV[0]) if ARGV[0] 49 | 50 | # Collect rules 51 | ebnf = ABNFParser.new(input || STDIN, **options) 52 | 53 | puts ebnf.to_sxp 54 | -------------------------------------------------------------------------------- /examples/isoebnf/parse: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # parse --- Process EBNF to generate AST S-Expression 3 | 4 | $:.unshift(File.expand_path("../../../lib", __FILE__)) 5 | $:.unshift(File.expand_path("..", __FILE__)) 6 | require 'rubygems' 7 | require 'getoptlong' 8 | require 'parser' 9 | require 'sxp' 10 | 11 | out = STDOUT 12 | 13 | OPT_ARGS = [ 14 | ["--evaluate","-e", GetoptLong::REQUIRED_ARGUMENT, "Evaluate argument"], 15 | ["--trace", GetoptLong::OPTIONAL_ARGUMENT, "Trace output level (0-3)"], 16 | ["--help", "-?", GetoptLong::NO_ARGUMENT, "This message"] 17 | ] 18 | def usage 19 | require 'ebnf' 20 | STDERR.puts %{#{$0} Version #{EBNF::VERSION}} 21 | STDERR.puts %{Usage: #{$0} [options] file ...} 22 | width = OPT_ARGS.map do |o| 23 | l = o.first.length 24 | l += o[1].length + 2 if o[1].is_a?(String) 25 | l 26 | end.max 27 | OPT_ARGS.each do |o| 28 | s = " %-*s " % [width, (o[1].is_a?(String) ? "#{o[0,2].join(', ')}" : o[0])] 29 | s += o.last 30 | STDERR.puts s 31 | end 32 | exit(1) 33 | end 34 | 35 | options = {} 36 | input = nil 37 | 38 | opts = GetoptLong.new(*OPT_ARGS.map {|o| o[0..-2]}) 39 | 40 | opts.each do |opt, arg| 41 | case opt 42 | when '--evaluate' then input = arg 43 | when '--trace' then options[:level] = arg.to_i 44 | when '--help' then usage 45 | end 46 | end 47 | 48 | input = File.open(ARGV[0]) if ARGV[0] 49 | 50 | # Collect rules 51 | ebnf = ISOEBNFPegParser.new(input || STDIN, **options) 52 | 53 | puts ebnf.to_sxp 54 | -------------------------------------------------------------------------------- /examples/ebnf-ll1-parser/parse: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # parse --- Process EBNF to generate AST S-Expression 3 | 4 | $:.unshift(File.expand_path("../../../lib", __FILE__)) 5 | $:.unshift(File.expand_path("..", __FILE__)) 6 | require 'rubygems' 7 | require 'getoptlong' 8 | require 'parser' 9 | require 'sxp' 10 | 11 | out = STDOUT 12 | 13 | OPT_ARGS = [ 14 | ["--evaluate","-e", GetoptLong::REQUIRED_ARGUMENT, "Evaluate argument"], 15 | ["--trace", GetoptLong::OPTIONAL_ARGUMENT, "Trace output level (0-3)"], 16 | ["--help", "-?", GetoptLong::NO_ARGUMENT, "This message"] 17 | ] 18 | def usage 19 | require 'ebnf' 20 | STDERR.puts %{#{$0} Version #{::EBNF::VERSION}} 21 | STDERR.puts %{Usage: #{$0} [options] file ...} 22 | width = OPT_ARGS.map do |o| 23 | l = o.first.length 24 | l += o[1].length + 2 if o[1].is_a?(String) 25 | l 26 | end.max 27 | OPT_ARGS.each do |o| 28 | s = " %-*s " % [width, (o[1].is_a?(String) ? "#{o[0,2].join(', ')}" : o[0])] 29 | s += o.last 30 | STDERR.puts s 31 | end 32 | exit(1) 33 | end 34 | 35 | options = {} 36 | input = nil 37 | 38 | opts = GetoptLong.new(*OPT_ARGS.map {|o| o[0..-2]}) 39 | 40 | opts.each do |opt, arg| 41 | case opt 42 | when '--evaluate' then input = arg 43 | when '--trace' then options[:level] = arg.to_i 44 | when '--help' then usage 45 | end 46 | end 47 | 48 | input = File.open(ARGV[0]) if ARGV[0] 49 | 50 | # Collect rules 51 | ebnf = EBNFLL1Parser.new(input || STDIN, **options) 52 | 53 | puts ebnf.to_sxp 54 | -------------------------------------------------------------------------------- /examples/ebnf-peg-parser/parse: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # parse --- Process EBNF to generate AST S-Expression 3 | 4 | $:.unshift(File.expand_path("../../../lib", __FILE__)) 5 | $:.unshift(File.expand_path("..", __FILE__)) 6 | require 'rubygems' 7 | require 'getoptlong' 8 | require 'parser' 9 | require 'sxp' 10 | 11 | out = STDOUT 12 | 13 | OPT_ARGS = [ 14 | ["--evaluate","-e", GetoptLong::REQUIRED_ARGUMENT, "Evaluate argument"], 15 | ["--trace", GetoptLong::OPTIONAL_ARGUMENT, "Trace output level (0-3)"], 16 | ["--help", "-?", GetoptLong::NO_ARGUMENT, "This message"] 17 | ] 18 | def usage 19 | require 'ebnf' 20 | STDERR.puts %{#{$0} Version #{EBNF::VERSION}} 21 | STDERR.puts %{Usage: #{$0} [options] file ...} 22 | width = OPT_ARGS.map do |o| 23 | l = o.first.length 24 | l += o[1].length + 2 if o[1].is_a?(String) 25 | l 26 | end.max 27 | OPT_ARGS.each do |o| 28 | s = " %-*s " % [width, (o[1].is_a?(String) ? "#{o[0,2].join(', ')}" : o[0])] 29 | s += o.last 30 | STDERR.puts s 31 | end 32 | exit(1) 33 | end 34 | 35 | options = {} 36 | input = nil 37 | 38 | opts = GetoptLong.new(*OPT_ARGS.map {|o| o[0..-2]}) 39 | 40 | opts.each do |opt, arg| 41 | case opt 42 | when '--evaluate' then input = arg 43 | when '--trace' then options[:level] = arg.to_i 44 | when '--help' then usage 45 | end 46 | end 47 | 48 | input = File.open(ARGV[0]) if ARGV[0] 49 | 50 | # Collect rules 51 | ebnf = EBNFPegParser.new(input || STDIN, **options) 52 | 53 | puts ebnf.to_sxp 54 | -------------------------------------------------------------------------------- /examples/calc/calc: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # parse --- Process EBNF to generate AST S-Expression 3 | 4 | $:.unshift(File.expand_path("../../../lib", __FILE__)) 5 | $:.unshift(File.expand_path("..", __FILE__)) 6 | require 'rubygems' 7 | require 'getoptlong' 8 | require 'calc' 9 | 10 | out = STDOUT 11 | 12 | OPT_ARGS = [ 13 | ["--evaluate","-e", GetoptLong::REQUIRED_ARGUMENT, "Evaluate argument"], 14 | ["--trace", GetoptLong::OPTIONAL_ARGUMENT, "Trace output level (0-3)"], 15 | ["--help", "-?", GetoptLong::NO_ARGUMENT, "This message"] 16 | ] 17 | def usage 18 | require 'ebnf' 19 | STDERR.puts %{#{$0} Version #{EBNF::VERSION}} 20 | STDERR.puts %{Usage: #{$0} [options] file ...} 21 | width = OPT_ARGS.map do |o| 22 | l = o.first.length 23 | l += o[1].length + 2 if o[1].is_a?(String) 24 | l 25 | end.max 26 | OPT_ARGS.each do |o| 27 | s = " %-*s " % [width, (o[1].is_a?(String) ? "#{o[0,2].join(', ')}" : o[0])] 28 | s += o.last 29 | STDERR.puts s 30 | end 31 | exit(1) 32 | end 33 | 34 | options = {} 35 | input = nil 36 | 37 | opts = GetoptLong.new(*OPT_ARGS.map {|o| o[0..-2]}) 38 | 39 | opts.each do |opt, arg| 40 | case opt 41 | when '--evaluate' then input = arg 42 | when '--trace' then options[:trace] = arg.to_i 43 | when '--help' then usage 44 | end 45 | end 46 | 47 | input = File.open(ARGV[1]) if ARGV[1] 48 | 49 | # Collect rules 50 | calculator = Calc.new(**options) 51 | 52 | # Evaluate the input 53 | puts calculator.evaluate(input.respond_to?(:read) ? input.read : input) 54 | -------------------------------------------------------------------------------- /examples/isoebnf/examples/html.isoebnf: -------------------------------------------------------------------------------- 1 | (* from https://tomassetti.me/ebnf/ *) 2 | htmlDocument 3 | = {scriptlet | SEA_WS}, [xml], {scriptlet | SEA_WS}, [dtd], {scriptlet | SEA_WS}, {htmlElements} 4 | ; 5 | 6 | htmlElements 7 | : {htmlMisc}, htmlElement, {htmlMisc} 8 | ; 9 | 10 | htmlElement 11 | : TAG_OPEN, htmlTagName, {htmlAttribute}, TAG_CLOSE, htmlContent, TAG_OPEN, TAG_SLASH, htmlTagName, TAG_CLOSE 12 | | TAG_OPEN, htmlTagName, {htmlAttribute}, TAG_SLASH_CLOSE 13 | | TAG_OPEN, htmlTagName, {htmlAttribute}, TAG_CLOSE 14 | | scriptlet 15 | | script 16 | | style 17 | ; 18 | 19 | htmlContent 20 | : [htmlChardata], {(htmlElement | xhtmlCDATA | htmlComment), [htmlChardata]} 21 | ; 22 | 23 | htmlAttribute 24 | : htmlAttributeName, TAG_EQUALS, htmlAttributeValue 25 | | htmlAttributeName 26 | ; 27 | 28 | htmlAttributeName 29 | : TAG_NAME 30 | ; 31 | 32 | htmlAttributeValue 33 | : ATTVALUE_VALUE 34 | ; 35 | 36 | htmlTagName 37 | : TAG_NAME 38 | ; 39 | 40 | htmlChardata 41 | : HTML_TEXT 42 | | SEA_WS 43 | ; 44 | 45 | htmlMisc 46 | : htmlComment 47 | | SEA_WS 48 | ; 49 | 50 | htmlComment 51 | : HTML_COMMENT 52 | | HTML_CONDITIONAL_COMMENT 53 | ; 54 | 55 | xhtmlCDATA 56 | : CDATA 57 | ; 58 | 59 | dtd 60 | : DTD 61 | ; 62 | 63 | xml 64 | : XML_DECLARATION 65 | ; 66 | 67 | scriptlet 68 | : SCRIPTLET 69 | ; 70 | 71 | script 72 | : SCRIPT_OPEN, ( SCRIPT_BODY | SCRIPT_SHORT_BODY) 73 | ; 74 | 75 | style 76 | : STYLE_OPEN, ( STYLE_BODY | STYLE_SHORT_BODY) 77 | ; -------------------------------------------------------------------------------- /etc/abnf-core.ebnf: -------------------------------------------------------------------------------- 1 | # Core terminals available in uses of ABNF 2 | ALPHA ::= [#x41-#x5A#x61-#x7A] # A-Z | a-z 3 | 4 | BIT ::= '0' | '1' 5 | 6 | CHAR ::= [#x01-#x7F] 7 | # any 7-bit US-ASCII character, 8 | # excluding NUL 9 | CR ::= #x0D 10 | # carriage return 11 | 12 | CRLF ::= CR? LF 13 | # Internet standard newline 14 | 15 | CTL ::= [#x00-#x1F] | #x7F 16 | # controls 17 | 18 | DIGIT ::= [#x30-#x39] 19 | # 0-9 20 | 21 | DQUOTE ::= #x22 22 | # " (Double Quote) 23 | 24 | HEXDIG ::= DIGIT | [A-F] # [0-9A-F] 25 | 26 | HTAB ::= #x09 27 | # horizontal tab 28 | 29 | LF ::= #x0A 30 | # linefeed 31 | 32 | LWSP ::= (WSP | CRLF WSP)* 33 | # Use of this linear-white-space rule 34 | # permits lines containing only white 35 | # space that are no longer legal in 36 | # mail headers and have caused 37 | # interoperability problems in other 38 | # contexts. 39 | # Do not use when defining mail 40 | # headers and use with caution in 41 | # other contexts. 42 | 43 | OCTET ::= [#x00-#xFF] 44 | # 8 bits of data 45 | 46 | SP ::= #x20 47 | 48 | VCHAR ::= [#x21-#x7E] 49 | # visible (printing) characters 50 | 51 | WSP ::= SP | HTAB 52 | # white space 53 | -------------------------------------------------------------------------------- /examples/JSON/JSON.ebnf: -------------------------------------------------------------------------------- 1 | quoted_string ::= quote ( unescaped_char | escaped_char )+ quote 2 | 3 | escaped_char ::= escape ( '"' | "/" | "b" | "f" | "n" | "r" | "t" | unicode | escape ) 4 | escaped_literal ::= escaped_char | escape "`" 5 | unescaped_char ::= digit | letter | " " | "!" | "#" | "$" | "%" | "&" | "(" | ")" | "*+" | "," | "-" | "." | "/" | ":" | ";" | "<" | ">" | "?" | "@" | "[" | "]" | "^" | "_" | "`" | "{" | "|" | "}" | "~" 6 | unescaped_literal ::= digit | letter | " " | "!" | "#" | "$" | "%" | "&" | "'" | "(" | ")" | "*+" | "," | "-" | "." | "/" | ":" | ";" | "<" | ">" | "?" | "@" | "[" | "]" | "^" | "_" | "{" | "|" | "}" | "~" 7 | 8 | unicode ::= "u" digit digit digit digit 9 | 10 | escape ::= "\" 11 | 12 | digit ::= [0-9] 13 | 14 | letter ::= [A-Z] | [a-z] | "_" 15 | quote ::= '"' 16 | 17 | /* The ``json-value`` is any valid JSON value with the one exception that the */ 18 | /* ``%x60`` character must be escaped. While it's encouraged that implementations */ 19 | /* use any existing JSON parser for this grammar rule (after handling the escaped */ 20 | /* literal characters), the grammar rule is shown below for completeness:: */ 21 | 22 | json_value ::= json_array 23 | | json_boolean 24 | | json_null 25 | | json_number 26 | | json_object 27 | | json_string 28 | 29 | json_null ::= "null" 30 | json_boolean ::= "true" | "false" 31 | json_number ::= "-"? ( "0" | [1-9] [0-9]* ) ( "." [0-9]+ )? ( "e" ( "-" | "+" ) [0-9]+ )? 32 | json_array ::= ws "[" ( ws json_value ws ( "," ws json_value ws )* )? "]" ws 33 | json_object ::= ws "{" ws ( member ws ( "," ws member ws )* )? "}" ws 34 | json_string ::= quote ( unescaped_literal | escaped_literal )* quote 35 | 36 | member ::= quoted_string ws ":" ws json_value 37 | ws ::= " "* 38 | -------------------------------------------------------------------------------- /.github/workflows/generate-docs.yml: -------------------------------------------------------------------------------- 1 | name: Build & deploy documentation 2 | on: 3 | push: 4 | branches: 5 | - master 6 | workflow_dispatch: 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | name: Update gh-pages with docs 11 | steps: 12 | - name: Clone repository 13 | uses: actions/checkout@v3 14 | - name: Set up Ruby 15 | uses: ruby/setup-ruby@v1 16 | with: 17 | ruby-version: "3.1" 18 | - name: Install required gem dependencies 19 | run: gem install yard --no-document 20 | - name: Build YARD Ruby Documentation 21 | run: yardoc 22 | - name: Copy etc files 23 | run: mkdir -p ./doc/yard/etc && cp ./etc/doap.* ./etc/ebnf.* ./doc/yard/etc/ 24 | - name: Copy calc example 25 | run: mkdir -p ./doc/yard/examples/calc/doc && cp ./examples/calc/doc/calc.html ./doc/yard/examples/calc/doc 26 | - name: Copy PEG example 27 | run: mkdir -p ./doc/yard/examples/ebnf-peg-parser/doc && cp ./examples/ebnf-peg-parser/doc/parser.html ./doc/yard/examples/ebnf-peg-parser/doc 28 | - name: Copy LL(1) example 29 | run: mkdir -p ./doc/yard/examples/ebnf-ll1-parser/doc && cp ./examples/ebnf-ll1-parser/doc/parser.html ./doc/yard/examples/ebnf-ll1-parser/doc 30 | - name: Copy ISOEBNF example 31 | run: mkdir -p ./doc/yard/examples/isoebnf/doc && cp ./examples/isoebnf/doc/parser.html ./doc/yard/examples/isoebnf/doc 32 | - name: Copy ABNF example 33 | run: mkdir -p ./doc/yard/examples/abnf/doc && cp ./examples/abnf/doc/parser.html ./doc/yard/examples/abnf/doc 34 | - name: Deploy 35 | uses: peaceiris/actions-gh-pages@v3 36 | with: 37 | github_token: ${{ secrets.GITHUB_TOKEN }} 38 | publish_dir: ./doc/yard 39 | publish_branch: gh-pages 40 | -------------------------------------------------------------------------------- /examples/JSON/JSON-ab.ebnf: -------------------------------------------------------------------------------- 1 | ::= ( | )+ 2 | 3 | ::= ( '"' | "/" | "b" | "f" | "n" | "r" | "t" | | ) 4 | ::= | "`" 5 | ::= | | " " | "!" | "#" | "$" | "%" | "&" | "'" | "(" | ")" | "*+" | "," | "-" | "." | "/" | ":" | ";" | "<" | ">" | "?" | "@" | "[" | "]" | "^" | "_" | "`" | "{" | "|" | "}" | "~" 6 | ::= | | " " | "!" | "#" | "$" | "%" | "&" | "'" | "(" | ")" | "*+" | "," | "-" | "." | "/" | ":" | ";" | "<" | ">" | "?" | "@" | "[" | "]" | "^" | "_" | "{" | "|" | "}" | "~" 7 | 8 | ::= "u" 9 | 10 | ::= "\" 11 | ::= [0-9] 12 | ::= [A-Z] | [a-z] | "_" 13 | ::= '"' 14 | 15 | /* The ``json-value`` is any valid JSON value with the one exception that the */ 16 | /* ``%x60`` character must be escaped. While it's encouraged that implementations */ 17 | /* use any existing JSON parser for this grammar rule (after handling the escaped */ 18 | /* literal characters), the grammar rule is shown below for completeness:: */ 19 | 20 | ::= 21 | | 22 | | 23 | | 24 | | 25 | | 26 | 27 | ::= "null" 28 | ::= "true" | "false" 29 | ::= "-"? ( "0" | [1-9] [0-9]* ) ( "." [0-9]+ )? ( "e" ( "-" | "+" ) [0-9]+ )? 30 | ::= "[" ( ( "," )* )? "]" 31 | ::= "{" ( ( "," )* )? "}" 32 | ::= ( | )* 33 | 34 | ::= ":" 35 | ::= " "* 36 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | $:.unshift(File.expand_path("../../lib", __FILE__)) 2 | $:.unshift File.dirname(__FILE__) 3 | 4 | require 'bundler/setup' 5 | require 'amazing_print' 6 | require 'rdf/spec' 7 | require 'rdf/spec/matchers' 8 | require 'rspec' 9 | require 'rspec/matchers' 10 | require 'rspec/its' 11 | require 'matchers' 12 | 13 | begin 14 | require 'simplecov' 15 | require 'simplecov-lcov' 16 | 17 | SimpleCov::Formatter::LcovFormatter.config do |config| 18 | #Coveralls is coverage by default/lcov. Send info results 19 | config.report_with_single_file = true 20 | config.single_report_path = 'coverage/lcov.info' 21 | end 22 | 23 | SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter.new([ 24 | SimpleCov::Formatter::HTMLFormatter, 25 | SimpleCov::Formatter::LcovFormatter 26 | ]) 27 | SimpleCov.start do 28 | add_filter "/spec/" 29 | end 30 | rescue LoadError => e 31 | STDERR.puts "Coverage Skipped: #{e.message}" 32 | end 33 | 34 | require 'ebnf' 35 | 36 | ::RSpec.configure do |c| 37 | c.filter_run focus: true 38 | c.run_all_when_everything_filtered = true 39 | c.filter_run_excluding ruby: ->(version) do 40 | case version.to_s 41 | when "!jruby" 42 | RUBY_ENGINE == "jruby" 43 | when /^> (.*)/ 44 | !(RUBY_VERSION.to_s > $1) 45 | else 46 | !(RUBY_VERSION.to_s =~ /^#{version.to_s}/) 47 | end 48 | end 49 | end 50 | 51 | RSpec::Matchers.define :be_valid_html do 52 | match do |actual| 53 | return true unless Nokogiri.const_defined?(:HTML5) 54 | root = Nokogiri::HTML5("" + actual, max_parse_errors: 1000) 55 | @errors = Array(root && root.errors.map(&:to_s)) 56 | @errors.empty? 57 | end 58 | 59 | failure_message do |actual| 60 | "expected no errors, was #{@errors.join("\n")}\n" + actual 61 | end 62 | end 63 | 64 | PARSED_EBNF_GRAMMAR = EBNF.parse(File.open(File.expand_path("../../etc/ebnf.ebnf", __FILE__)), format: :native).freeze -------------------------------------------------------------------------------- /etc/ebnf.sxp: -------------------------------------------------------------------------------- 1 | ( 2 | (rule ebnf "1" (star (alt declaration rule))) 3 | (rule declaration "2" (alt '@terminals' pass)) 4 | (rule rule "3" (seq LHS expression)) 5 | (rule expression "4" (seq alt)) 6 | (rule alt "5" (seq seq (star (seq '|' seq)))) 7 | (rule seq "6" (plus diff)) 8 | (rule diff "7" (seq postfix (opt (seq '-' postfix)))) 9 | (rule postfix "8" (seq primary (opt POSTFIX))) 10 | (rule primary "9" 11 | (alt HEX SYMBOL O_RANGE RANGE STRING1 STRING2 (seq '(' expression ')'))) 12 | (rule pass "10" (seq '@pass' expression)) 13 | (terminals _terminals (seq)) 14 | (terminal LHS "11" (seq (opt (seq '[' SYMBOL ']' (plus ' '))) SYMBOL (star ' ') '::=')) 15 | (terminal SYMBOL "12" (alt (seq '<' O_SYMBOL '>') O_SYMBOL)) 16 | (terminal O_SYMBOL "12a" (plus (alt (range "a-z") (range "A-Z") (range "0-9") '_' '.'))) 17 | (terminal HEX "13" (seq '#x' (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) 18 | (terminal RANGE "14" 19 | (seq '[' (plus (alt (seq R_CHAR '-' R_CHAR) (seq HEX '-' HEX) R_CHAR HEX)) (opt '-') ']')) 20 | (terminal O_RANGE "15" 21 | (seq '[^' (plus (alt (seq R_CHAR '-' R_CHAR) (seq HEX '-' HEX) R_CHAR HEX)) (opt '-') ']')) 22 | (terminal STRING1 "16" (seq '"' (star (diff CHAR '"')) '"')) 23 | (terminal STRING2 "17" (seq "'" (star (diff CHAR "'")) "'")) 24 | (terminal CHAR "18" 25 | (alt 26 | (range "#x9#xA#xD") 27 | (range "#x20-#xD7FF") 28 | (range "#xE000-#xFFFD") 29 | (range "#x10000-#x10FFFF")) ) 30 | (terminal R_CHAR "19" (diff CHAR (alt ']' '-' HEX))) 31 | (terminal POSTFIX "20" (range "?*+")) 32 | (terminal PASS "21" 33 | (alt 34 | (range "#x9#xA#xD#x20") 35 | (seq (alt (diff '#' '#x') '//') (star (range "^#xA#xD"))) 36 | (seq '/*' (star (alt (opt (seq '*' (range "^/"))) (range "^*"))) '*/') 37 | (seq '(*' (star (alt (opt (seq '*' (range "^)"))) (range "^*"))) '*)')) ) 38 | (pass _pass (seq PASS))) 39 | -------------------------------------------------------------------------------- /examples/abnf/examples/json.abnf: -------------------------------------------------------------------------------- 1 | JSON-text = ws value ws 2 | 3 | begin-array = ws %x5B ws ; [ left square bracket 4 | 5 | begin-object = ws %x7B ws ; { left curly bracket 6 | 7 | end-array = ws %x5D ws ; ] right square bracket 8 | 9 | end-object = ws %x7D ws ; } right curly bracket 10 | 11 | name-separator = ws %x3A ws ; : colon 12 | 13 | value-separator = ws %x2C ws ; , comma 14 | 15 | ws = *( 16 | %x20 / ; Space 17 | %x09 / ; Horizontal tab 18 | %x0A / ; Line feed or New line 19 | %x0D ) ; Carriage return 20 | 21 | value = false / null / true / object / array / number / string 22 | 23 | false = %x66.61.6c.73.65 ; false 24 | 25 | null = %x6e.75.6c.6c ; null 26 | 27 | true = %x74.72.75.65 ; true 28 | 29 | object = begin-object [ member *( value-separator member ) ] 30 | end-object 31 | 32 | member = string name-separator value 33 | 34 | array = begin-array [ value *( value-separator value ) ] end-array 35 | 36 | number = [ minus ] int [ frac ] [ exp ] 37 | 38 | decimal-point = %x2E ; . 39 | 40 | digit1-9 = %x31-39 ; 1-9 41 | 42 | e = %x65 / %x45 ; e E 43 | 44 | exp = e [ minus / plus ] 1*DIGIT 45 | 46 | frac = decimal-point 1*DIGIT 47 | int = zero / ( digit1-9 *DIGIT ) 48 | 49 | minus = %x2D ; - 50 | 51 | plus = %x2B ; + 52 | 53 | zero = %x30 ; 0 54 | string = quotation-mark *char quotation-mark 55 | 56 | char = unescaped / 57 | escape ( 58 | %x22 / ; " quotation mark U+0022 59 | %x5C / ; \ reverse solidus U+005C 60 | %x2F / ; / solidus U+002F 61 | %x62 / ; b backspace U+0008 62 | %x66 / ; f form feed U+000C 63 | %x6E / ; n line feed U+000A 64 | %x72 / ; r carriage return U+000D 65 | %x74 / ; t tab U+0009 66 | %x75 4HEXDIG ) ; uXXXX U+XXXX 67 | 68 | escape = %x5C ; \ 69 | 70 | quotation-mark = %x22 ; " 71 | 72 | unescaped = %x20-21 / %x23-5B / %x5D-10FFFF 73 | -------------------------------------------------------------------------------- /etc/ebnf.ebnf: -------------------------------------------------------------------------------- 1 | /* An EBNF grammar for EBNF */ 2 | [1] ebnf ::= (declaration | rule)* 3 | 4 | [2] declaration ::= '@terminals' | pass 5 | 6 | # Use the LHS terminal to match the identifier, rule name and assignment due to 7 | # confusion between the identifier and RANGE. 8 | # The PEG parser has special rules for matching LHS and RANGE 9 | # so that RANGE is not confused with LHS. 10 | [3] rule ::= LHS expression 11 | 12 | [4] expression ::= alt 13 | 14 | [5] alt ::= seq ('|' seq)* 15 | 16 | [6] seq ::= diff+ 17 | 18 | [7] diff ::= postfix ('-' postfix)? 19 | 20 | [8] postfix ::= primary POSTFIX? 21 | 22 | [9] primary ::= HEX 23 | | SYMBOL 24 | | O_RANGE 25 | | RANGE 26 | | STRING1 27 | | STRING2 28 | | '(' expression ')' 29 | 30 | [10] pass ::= '@pass' expression 31 | 32 | @terminals 33 | 34 | [11] LHS ::= ('[' SYMBOL ']' ' '+)? SYMBOL ' '* '::=' 35 | 36 | [12] SYMBOL ::= '<' O_SYMBOL '>' | O_SYMBOL 37 | 38 | [12a] O_SYMBOL ::= ([a-z] | [A-Z] | [0-9] | '_' | '.')+ 39 | 40 | [13] HEX ::= '#x' ([a-f] | [A-F] | [0-9])+ 41 | 42 | [14] RANGE ::= '[' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX) | R_CHAR | HEX)+ '-'? ']' 43 | 44 | [15] O_RANGE ::= '[^' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX) | R_CHAR | HEX)+ '-'? ']' 45 | 46 | # Strings are unescaped Unicode, excepting control characters and hash (#) 47 | [16] STRING1 ::= '"' (CHAR - '"')* '"' 48 | 49 | [17] STRING2 ::= "'" (CHAR - "'")* "'" 50 | 51 | [18] CHAR ::= [#x9#xA#xD] | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] 52 | 53 | [19] R_CHAR ::= CHAR - (']' | '-' | HEX) 54 | 55 | [20] POSTFIX ::= [?*+] 56 | 57 | # Ignore all whitespace and comments between non-terminals 58 | [21] PASS ::= [#x9#xA#xD#x20] 59 | | ( ('#' - '#x') | '//' ) [^#xA#xD]* 60 | | '/*' (( '*' [^/] )? | [^*] )* '*/' 61 | | '(*' (( '*' [^)] )? | [^*] )* '*)' 62 | 63 | @pass PASS 64 | -------------------------------------------------------------------------------- /etc/doap.ttl: -------------------------------------------------------------------------------- 1 | @base . 2 | @prefix rdf: . 3 | @prefix rdfs: . 4 | @prefix dc: . 5 | @prefix earl: . 6 | @prefix foaf: . 7 | @prefix doap: . 8 | @prefix ex: . 9 | @prefix xsd: . 10 | 11 | <> a doap:Project, earl:TestSubject, earl:Software ; 12 | doap:name "ebnf" ; 13 | doap:homepage ; 14 | doap:license ; 15 | doap:shortdesc "EBNF parser and parser generator in Ruby."@en ; 16 | doap:description "EBNF is a Ruby parser for W3C EBNF and a parser generator for PEG and LL(1). Also includes parsing modes for ISO EBNF and ABNF."@en ; 17 | doap:created "2011-08-29"^^xsd:date ; 18 | doap:programming-language "Ruby" ; 19 | doap:implements , 20 | , 21 | , 22 | , 23 | , 24 | , 25 | , 26 | ; 27 | doap:category , 28 | ; 29 | doap:download-page <> ; 30 | doap:mailing-list ; 31 | doap:bug-database ; 32 | doap:blog ; 33 | doap:developer ; 34 | doap:maintainer ; 35 | doap:documenter ; 36 | foaf:maker ; 37 | dc:creator . 38 | -------------------------------------------------------------------------------- /examples/calc/.byebug_history: -------------------------------------------------------------------------------- 1 | c 2 | data 3 | c 4 | data 5 | c 6 | data 7 | context 8 | s 9 | context 10 | c 11 | context 12 | content 13 | data 14 | s 15 | c 16 | n 17 | value 18 | c 19 | input 20 | ARGV 21 | input 22 | exit 23 | ARGV[0] 24 | input 25 | c 26 | val 27 | pow 28 | c 29 | rhs 30 | value 31 | c 32 | value 33 | exit 34 | value 35 | exit 36 | value 37 | c 38 | value 39 | c 40 | operations 41 | exit 42 | value 43 | operations 44 | product 45 | c 46 | operations 47 | n 48 | c 49 | operations 50 | c 51 | product 52 | n 53 | c 54 | value 55 | product 56 | operations 57 | n 58 | c 59 | value 60 | exit 61 | 11*22/3 62 | value 63 | operations.inject(power) {|accumulator, vv| accumulator.send(*vv)} 64 | operations.inject(power) {|accumulator, vv| accumulator.send(*vv)}} 65 | operations 66 | operations.inject(power) {|op, val| power.send(op.to_sym, val)} 67 | operations 68 | c 69 | operations 70 | value 71 | exit 72 | operations = value.last[:_Product_1] 73 | power 74 | value 75 | c 76 | value 77 | exit 78 | c 79 | value 80 | value.map(&:values).flatten 81 | value.map(&:values).flattent 82 | value.map(&:values).compact 83 | value.values.compact 84 | value 85 | c 86 | value 87 | c 88 | value 89 | rhs 90 | c 91 | rhs 92 | exit 93 | c 94 | rhs 95 | c 96 | rhs 97 | value 98 | c 99 | value 100 | c 101 | value 102 | c 103 | value 104 | c 105 | value 106 | c 107 | value 108 | c 109 | value 110 | c 111 | value 112 | c 113 | value 114 | c 115 | value 116 | c 117 | value 118 | c 119 | value 120 | c 121 | value 122 | c 123 | value 124 | vale 125 | c 126 | exit 127 | c 128 | value 129 | c 130 | value 131 | c 132 | value 133 | c 134 | value 135 | c 136 | value 137 | c 138 | value 139 | c 140 | value 141 | c 142 | value 143 | c 144 | pow 145 | n 146 | c 147 | n 148 | c 149 | rhs 150 | n 151 | c 152 | value 153 | exit 154 | 10.pow(2) 155 | 10.pow(1) 156 | 10.pow(10) 157 | 1.pow(10) 158 | Math.exponent 159 | Math.power 160 | Math.public_methods 161 | Math 162 | Math. 163 | value 164 | c 165 | value 166 | c 167 | value 168 | c 169 | value 170 | exit 171 | value 172 | c 173 | value 174 | exit 175 | c 176 | value 177 | c 178 | value 179 | c 180 | value 181 | c 182 | value 183 | c 184 | value 185 | c 186 | value 187 | c 188 | value 189 | c 190 | value 191 | exit 192 | value 193 | c 194 | value 195 | c 196 | exit 197 | s 198 | input 199 | -------------------------------------------------------------------------------- /spec/unescape_spec.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | $:.unshift ".." 3 | require 'spec_helper' 4 | require 'ebnf' 5 | 6 | describe EBNF::Unescape do 7 | 8 | describe ".unescape_codepoints" do 9 | # @see https://www.w3.org/TR/rdf-sparql-query/#codepointEscape 10 | 11 | it "unescapes \\uXXXX codepoint escape sequences" do 12 | inputs = { 13 | %q(\\u0020) => %q( ), 14 | %q() => %Q(), 15 | %q(\\u03B1:a) => %Q(\xCE\xB1:a), 16 | %q(a\\u003Ab) => %Q(a\x3Ab), 17 | } 18 | inputs.each do |input, output| 19 | expect(EBNF::Unescape.unescape_codepoints(input)).to eq output 20 | end 21 | end 22 | 23 | it "unescapes \\UXXXXXXXX codepoint escape sequences" do 24 | inputs = { 25 | %q(\\U00000020) => %q( ), 26 | %q(\\U00010000) => %Q(\xF0\x90\x80\x80), 27 | %q(\\U000EFFFF) => %Q(\xF3\xAF\xBF\xBF), 28 | } 29 | inputs.each do |input, output| 30 | expect(EBNF::Unescape.unescape_codepoints(input)).to eq output 31 | end 32 | end 33 | 34 | context "escaped strings" do 35 | { 36 | 'Dürst' => 'D\\u00FCrst', 37 | "é" => '\\u00E9', 38 | "€" => '\\u20AC', 39 | "resumé" => 'resum\\u00E9', 40 | }.each_pair do |unescaped, escaped| 41 | it "unescapes #{unescaped.inspect}" do 42 | expect(EBNF::Unescape.unescape_codepoints(escaped)).to eq unescaped 43 | end 44 | end 45 | end 46 | end 47 | 48 | describe ".unescape_string" do 49 | # @see https://www.w3.org/TR/rdf-sparql-query/#grammarEscapes 50 | 51 | context "escape sequences" do 52 | EBNF::Unescape::ESCAPE_CHARS.each do |escaped, unescaped| 53 | it "unescapes #{unescaped.inspect}" do 54 | expect(EBNF::Unescape.unescape_string(escaped)).to eq unescaped 55 | end 56 | end 57 | end 58 | 59 | context "escaped strings" do 60 | { 61 | 'simple literal' => 'simple literal', 62 | 'backslash:\\' => 'backslash:\\\\', 63 | 'dquote:"' => 'dquote:\\"', 64 | "newline:\n" => 'newline:\\n', 65 | "return\r" => 'return\\r', 66 | "tab:\t" => 'tab:\\t', 67 | }.each_pair do |unescaped, escaped| 68 | it "unescapes #{unescaped.inspect}" do 69 | expect(EBNF::Unescape.unescape_string(escaped)).to eq unescaped 70 | end 71 | end 72 | end 73 | end 74 | end -------------------------------------------------------------------------------- /examples/abnf/abnf.sxp: -------------------------------------------------------------------------------- 1 | ( 2 | (rule rulelist (plus (alt rule (seq (star c_wsp) c_nl)))) 3 | (rule rule (seq rulename defined_as elements c_nl)) 4 | (rule elements (seq alternation (star c_wsp))) 5 | (rule alternation 6 | (seq concatenation (star (seq (star c_wsp) "/" (star c_wsp) concatenation)))) 7 | (rule concatenation (seq repetition (star (seq (plus c_wsp) repetition)))) 8 | (rule repetition (seq (opt repeat) element)) 9 | (rule repeat (alt (seq (star DIGIT) "*" (star DIGIT)) (plus DIGIT))) 10 | (rule element (alt rulename group option char_val num_val prose_val)) 11 | (rule group (seq "(" (star c_wsp) alternation (star c_wsp) ")")) 12 | (rule option (seq "[" (star c_wsp) alternation (star c_wsp) "]")) 13 | (rule char_val (alt case_insensitive_string case_sensitive_string)) 14 | (rule case_insensitive_string (seq (opt "%i") quoted_string)) 15 | (rule case_sensitive_string (seq "%s" quoted_string)) 16 | (rule num_val (seq "%" (alt bin_val dec_val hex_val))) 17 | (terminals _terminals (seq)) 18 | (terminal rulename (seq ALPHA (star (alt ALPHA DIGIT "-")))) 19 | (terminal defined_as (seq (star c_wsp) (alt "=" "=/") (star c_wsp))) 20 | (terminal c_wsp (alt WSP (seq c_nl WSP))) 21 | (terminal c_nl (alt COMMENT CRLF)) 22 | (terminal comment (seq ";" (star (alt WSP VCHAR)) CRLF)) 23 | (terminal quoted_string (seq DQUOTE (star (range "#x20-#x21#x23-#x7E")) DQUOTE)) 24 | (terminal bin_val (seq "b" (plus BIT) (opt (alt (plus (seq "." (plus BIT))) (seq "-" (plus BIT)))))) 25 | (terminal dec_val 26 | (seq "d" (plus DIGIT) (opt (alt (plus (seq "." (plus DIGIT))) (seq "-" (plus DIGIT)))))) 27 | (terminal hex_val 28 | (seq "x" (plus HEXDIG) (opt (alt (plus (seq "." (plus HEXDIG))) (seq "-" (plus HEXDIG)))))) 29 | (terminal prose_val (seq "<" (star (range "#x20-#x3D#x3F-#x7E")) ">")) 30 | (terminal ALPHA (range "#x41-#x5A#x61-#x7A")) 31 | (terminal BIT (alt "0" "1")) 32 | (terminal CHAR (range "#x01-#x7F")) 33 | (terminal CR (hex "#x0D")) 34 | (terminal CRLF (seq (opt CR) LF)) 35 | (terminal CTL (alt (range "#x00-#x1F") (hex "#x7F"))) 36 | (terminal DIGIT (range "#x30-#x39")) 37 | (terminal DQUOTE (hex "#x22")) 38 | (terminal HEXDIG (alt DIGIT "A" "B" "C" "D" "E" "F")) 39 | (terminal HTAB (hex "#x09")) 40 | (terminal LF (hex "#x0A")) 41 | (terminal LWSP (star (alt WSP (seq CRLF WSP)))) 42 | (terminal OCTET (range "#x00-#xFF")) 43 | (terminal SP (hex "#x20")) 44 | (terminal VCHAR (range "#x21-#x7E")) 45 | (terminal WSP (alt SP HTAB))) 46 | -------------------------------------------------------------------------------- /lib/ebnf/unescape.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # Unsecape strings 3 | module EBNF::Unescape 4 | ESCAPE_CHARS = { 5 | '\\t' => "\t", # \u0009 (tab) 6 | '\\n' => "\n", # \u000A (line feed) 7 | '\\r' => "\r", # \u000D (carriage return) 8 | '\\b' => "\b", # \u0008 (backspace) 9 | '\\f' => "\f", # \u000C (form feed) 10 | '\\"' => '"', # \u0022 (quotation mark, double quote mark) 11 | "\\'" => '\'', # \u0027 (apostrophe-quote, single quote mark) 12 | '\\\\' => '\\' # \u005C (backslash) 13 | }.freeze 14 | ESCAPE_CHAR4 = /\\u(?:[0-9A-Fa-f]{4,4})/u.freeze # \uXXXX 15 | ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})/u.freeze # \UXXXXXXXX 16 | ECHAR = /\\./u.freeze # More liberal unescaping 17 | UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/n.freeze 18 | 19 | ## 20 | # Returns a copy of the given `input` string with all `\uXXXX` and 21 | # `\UXXXXXXXX` Unicode codepoint escape sequences replaced with their 22 | # unescaped UTF-8 character counterparts. 23 | # 24 | # @param [String] string 25 | # @return [String] 26 | # @see https://www.w3.org/TR/rdf-sparql-query/#codepointEscape 27 | def unescape_codepoints(string) 28 | string = string.dup 29 | string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding) 30 | 31 | # Decode \uXXXX and \UXXXXXXXX code points: 32 | string = string.gsub(UCHAR) do |c| 33 | s = [(c[2..-1]).hex].pack('U*') 34 | s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s 35 | end 36 | 37 | string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding) 38 | string 39 | end 40 | module_function :unescape_codepoints 41 | 42 | ## 43 | # Returns a copy of the given `input` string with all string escape 44 | # sequences (e.g. `\n` and `\t`) replaced with their unescaped UTF-8 45 | # character counterparts. 46 | # 47 | # @param [String] input 48 | # @return [String] 49 | # @see https://www.w3.org/TR/rdf-sparql-query/#grammarEscapes 50 | def unescape_string(input) 51 | input.gsub(ECHAR) {|escaped| ESCAPE_CHARS[escaped] || escaped} 52 | end 53 | module_function :unescape_string 54 | 55 | # Perform string and codepoint unescaping if defined for this terminal 56 | # @param [String] string 57 | # @return [String] 58 | def unescape(string) 59 | unescape_string(unescape_codepoints(string)) 60 | end 61 | module_function :unescape 62 | end -------------------------------------------------------------------------------- /ebnf.gemspec: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby -rubygems 2 | # -*- encoding: utf-8 -*- 3 | 4 | Gem::Specification.new do |gem| 5 | gem.version = File.read('VERSION').chomp 6 | gem.date = File.mtime('VERSION').strftime('%Y-%m-%d') 7 | 8 | gem.name = "ebnf" 9 | gem.homepage = "https://github.com/dryruby/ebnf" 10 | gem.license = 'Unlicense' 11 | gem.summary = "EBNF parser and parser generator in Ruby." 12 | gem.description = %q{EBNF is a Ruby parser for W3C EBNF and a parser generator for PEG and LL(1). Also includes parsing modes for ISO EBNF and ABNF.} 13 | gem.metadata = { 14 | "documentation_uri" => "https://dryruby.github.io/ebnf", 15 | "bug_tracker_uri" => "https://github.com/dryruby/ebnf/issues", 16 | "homepage_uri" => "https://github.com/dryruby/ebnf", 17 | "source_code_uri" => "https://github.com/dryruby/ebnf", 18 | } 19 | 20 | gem.authors = ['Gregg Kellogg'] 21 | gem.email = 'public-rdf-ruby@w3.org' 22 | 23 | gem.platform = Gem::Platform::RUBY 24 | gem.files = %w(AUTHORS CREDITS README.md UNLICENSE VERSION) + 25 | Dir.glob('lib/**/*.rb') + 26 | Dir.glob('etc/*') 27 | gem.bindir = %q(bin) 28 | gem.executables = %w(ebnf) 29 | gem.require_paths = %w(lib) 30 | 31 | gem.required_ruby_version = '>= 3.0' 32 | gem.requirements = [] 33 | gem.add_runtime_dependency 'sxp', '~> 2.0' 34 | gem.add_runtime_dependency 'scanf', '~> 1.0' 35 | gem.add_runtime_dependency 'rdf', '~> 3.3' # Required by sxp 36 | gem.add_runtime_dependency 'htmlentities', '~> 4.3' 37 | gem.add_runtime_dependency 'unicode-types', '~> 1.10' 38 | gem.add_runtime_dependency 'base64', '~> 0.2' 39 | gem.add_development_dependency 'amazing_print', '~> 1.6' 40 | gem.add_development_dependency 'rdf-spec', '~> 3.3' 41 | gem.add_development_dependency 'rdf-turtle', '~> 3.3' 42 | gem.add_development_dependency 'nokogiri', '~> 1.16' 43 | gem.add_development_dependency 'erubis', '~> 2.7' 44 | gem.add_development_dependency 'rspec', '~> 3.13' 45 | gem.add_development_dependency 'rspec-its', '~> 1.3' 46 | gem.add_development_dependency 'yard', '~> 0.9' 47 | gem.add_development_dependency 'rake', '~> 13.2' 48 | 49 | gem.post_install_message = nil 50 | end 51 | -------------------------------------------------------------------------------- /etc/abnf.sxp: -------------------------------------------------------------------------------- 1 | ( 2 | (rule rulelist (plus (alt rule (seq (star c_wsp) c_nl)))) 3 | (rule rule (seq rulename defined_as elements c_nl)) 4 | (rule elements (seq alternation (star c_wsp))) 5 | (rule alternation 6 | (seq concatenation (star (seq (star c_wsp) "/" (star c_wsp) concatenation)))) 7 | (rule concatenation (seq repetition (star (seq (plus c_wsp) repetition)))) 8 | (rule repetition (seq (opt repeat) element)) 9 | (rule repeat (alt (seq (star DIGIT) "*" (star DIGIT)) (plus DIGIT))) 10 | (rule element (alt rulename group option char_val num_val prose_val)) 11 | (rule group (seq "(" (star c_wsp) alternation (star c_wsp) ")")) 12 | (rule option (seq "[" (star c_wsp) alternation (star c_wsp) "]")) 13 | (rule char_val (alt case_insensitive_string case_sensitive_string)) 14 | (rule case_insensitive_string (seq (opt "%i") quoted_string)) 15 | (rule case_sensitive_string (seq "%s" quoted_string)) 16 | (rule num_val (seq "%" (alt bin_val dec_val hex_val))) 17 | (terminals _terminals (seq)) 18 | (terminal rulename (seq ALPHA (star (alt ALPHA DIGIT "-")))) 19 | (terminal defined_as (seq (star c_wsp) (alt "=" "=/") (star c_wsp))) 20 | (terminal c_wsp (alt WSP (seq c_nl WSP))) 21 | (terminal c_nl (alt COMMENT CRLF)) 22 | (terminal comment (seq ";" (star (alt WSP VCHAR)) CRLF)) 23 | (terminal quoted_string (seq DQUOTE (star (range "#x20-#x21#x23-#x7E")) DQUOTE)) 24 | (terminal bin_val (seq "b" (plus BIT) (opt (alt (plus (seq "." (plus BIT))) (seq "-" (plus BIT)))))) 25 | (terminal dec_val 26 | (seq "d" (plus DIGIT) (opt (alt (plus (seq "." (plus DIGIT))) (seq "-" (plus DIGIT)))))) 27 | (terminal hex_val 28 | (seq "x" (plus HEXDIG) (opt (alt (plus (seq "." (plus HEXDIG))) (seq "-" (plus HEXDIG)))))) 29 | (terminal prose_val (seq "<" (star (range "#x20-#x3D#x3F-#x7E")) ">")) 30 | (terminal ALPHA (range "#x41-#x5A#x61-#x7A")) 31 | (terminal BIT (alt '0' '1')) 32 | (terminal CHAR (range "#x01-#x7F")) 33 | (terminal CR (hex "#x0D")) 34 | (terminal CRLF (seq (opt CR) LF)) 35 | (terminal CTL (alt (range "#x00-#x1F") (hex "#x7F"))) 36 | (terminal DIGIT (range "#x30-#x39")) 37 | (terminal DQUOTE (hex "#x22")) 38 | (terminal HEXDIG (alt DIGIT "A" "B" "C" "D" "E" "F")) 39 | (terminal HTAB (hex "#x09")) 40 | (terminal LF (hex "#x0A")) 41 | (terminal LWSP (star (alt WSP (seq CRLF WSP)))) 42 | (terminal OCTET (range "#x00-#xFF")) 43 | (terminal SP (hex "#x20")) 44 | (terminal VCHAR (range "#x21-#x7E")) 45 | (terminal WSP (alt SP HTAB))) 46 | -------------------------------------------------------------------------------- /spec/bnf_spec.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | $:.unshift "." 3 | require 'spec_helper' 4 | require 'ebnf' 5 | require 'sxp' 6 | 7 | describe EBNF::BNF do 8 | describe "#make_bnf" do 9 | { 10 | %{[2] Prolog ::= BaseDecl? PrefixDecl*} => 11 | %{((rule _empty "0" (seq)) 12 | (rule Prolog "2" (seq _Prolog_1 _Prolog_2)) 13 | (rule _Prolog_1 "2.1" (cleanup opt) (alt _empty BaseDecl)) 14 | (rule _Prolog_2 "2.2" (cleanup star) (alt _empty _Prolog_3)) 15 | (rule _Prolog_3 "2.3" (cleanup merge) (seq PrefixDecl _Prolog_2)))}, 16 | %{ 17 | [9] primary ::= HEX 18 | | RANGE 19 | | O_RANGE 20 | | STRING1 21 | | STRING2 22 | | '(' expression ')' 23 | 24 | } => 25 | %{((rule _empty "0" (seq)) 26 | (rule primary "9" (alt HEX RANGE O_RANGE STRING1 STRING2 _primary_1 )) 27 | (rule _primary_1 "9.1" (seq '(' expression ')')))}, 28 | %{ 29 | [1] R1 ::= 1 2 30 | [2] R2 ::= 1 2 31 | } => 32 | %{((rule _empty "0" (seq)) 33 | (terminal R1 "1" (seq 1 2)) 34 | (terminal R2 "2" (seq 1 2)))} 35 | }.each do |input, expected| 36 | it "parses #{input.inspect}" do 37 | expect(parse(input).make_bnf.ast.to_sxp).to produce(expected, @debug) 38 | end 39 | end 40 | 41 | context "EBNF Grammar" do 42 | subject {parse(File.read(File.expand_path("../../etc/ebnf.ebnf", __FILE__))).make_bnf} 43 | it "rule expressions should be flat, terminal or alt/seq" do 44 | subject.ast.each do |rule| 45 | case 46 | when !rule.rule? then true 47 | when !rule.expr.is_a?(Array) then true 48 | else 49 | expect("#{rule.sym}: #{rule.expr.first}").to match(/#{rule.sym}: (alt|seq)/) 50 | end 51 | end 52 | end 53 | end 54 | 55 | context "Turtle Grammar" do 56 | subject {parse(File.read(File.expand_path("../../etc/turtle.ebnf", __FILE__))).make_bnf} 57 | it "rule expressions should be flat, terminal or alt/seq" do 58 | subject.ast.each do |rule| 59 | case 60 | when rule.terminal? then true 61 | when !rule.expr.is_a?(Array) then true 62 | else 63 | expect("#{rule.sym}: #{rule.expr.first}").to match(/#{rule.sym}: (alt|seq)/) 64 | end 65 | end 66 | end 67 | end 68 | end 69 | 70 | def parse(value, **options) 71 | @debug = [] 72 | options = {debug: @debug, format: :native}.merge(options) 73 | EBNF::Base.new(value, **options) 74 | end 75 | end 76 | -------------------------------------------------------------------------------- /spec/ll1/parser_spec.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | $:.unshift ".." 3 | require 'spec_helper' 4 | require 'ebnf' 5 | 6 | describe EBNF::LL1::Parser do 7 | class LL1ParserTest 8 | include EBNF::LL1::Parser 9 | end 10 | 11 | before(:all) { 12 | LL1ParserTest.start_production(:term) {"foo"} 13 | LL1ParserTest.production(:term) {"foo"} 14 | LL1ParserTest.terminal(:escape, /escape/) {"foo"} 15 | LL1ParserTest.terminal(:unescape, /unescape/, unescape: true) {"foo"} 16 | } 17 | let(:logger) {RDF::Spec.logger} 18 | after(:each) do |example| 19 | puts logger.to_s if example.exception && !example.exception.is_a?(RSpec::Expectations::ExpectationNotMetError) 20 | end 21 | 22 | describe "ClassMethods" do 23 | describe "production" do 24 | it "adds as a start_handler" do 25 | expect(LL1ParserTest.start_handlers.keys).to eq [:term] 26 | expect(LL1ParserTest.start_handlers[:term]).to be_a(Proc) 27 | end 28 | it "adds as a production_handler" do 29 | expect(LL1ParserTest.production_handlers.keys).to eq [:term] 30 | expect(LL1ParserTest.production_handlers[:term]).to be_a(Proc) 31 | end 32 | end 33 | 34 | describe "terminal" do 35 | it "adds as a terminal_handler" do 36 | expect(LL1ParserTest.terminal_handlers.keys).to include(:escape, :unescape) 37 | expect(LL1ParserTest.terminal_handlers[:escape]).to be_a(Proc) 38 | expect(LL1ParserTest.terminal_handlers[:unescape]).to be_a(Proc) 39 | end 40 | 41 | it "adds patterns" do 42 | expect(LL1ParserTest.patterns).to include( 43 | EBNF::LL1::Lexer::Terminal.new(:escape, /escape/), 44 | EBNF::LL1::Lexer::Terminal.new(:unescape, /unescape/, unescape: true) 45 | ) 46 | end 47 | end 48 | end 49 | 50 | describe "#parse" do 51 | subject {LL1ParserTest.new} 52 | it "raises error if no branch table defined" do 53 | expect {subject.parse("foo")}.to raise_error(EBNF::LL1::Parser::Error, "Branch table not defined") 54 | end 55 | 56 | it "raises error if starting production not defined" do 57 | expect { 58 | subject.parse("foo", nil, branch: {a: {b: ["c"]}}) 59 | }.to raise_error(EBNF::LL1::Parser::Error, "Starting production not defined") 60 | end 61 | 62 | it "raises error on inalid input" do 63 | expect { 64 | subject.parse("bar", :foo, branch: {foo: {bar: ["baz"]}}) 65 | }.to raise_error(EBNF::LL1::Parser::Error, /Invalid token "bar"/) 66 | end 67 | end 68 | 69 | require_relative "data/parser" 70 | 71 | describe EBNFParser do 72 | before {logger.level = Logger::INFO} 73 | let(:input) {File.expand_path("../../../etc/ebnf.ebnf", __FILE__)} 74 | let(:sxp) {File.read File.expand_path("../../../etc/ebnf.sxp", __FILE__)} 75 | let(:parser) {EBNFParser.new(File.open(input), debug: true, logger: logger)} 76 | 77 | it "parses EBNF Grammar" do 78 | expect(parser.to_sxp).to produce(sxp, logger) 79 | end 80 | end 81 | end -------------------------------------------------------------------------------- /spec/peg_spec.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | $:.unshift "." 3 | require 'spec_helper' 4 | require 'ebnf' 5 | require 'sxp' 6 | 7 | describe EBNF::PEG do 8 | describe "#make_peg" do 9 | { 10 | %{[2] Prolog ::= BaseDecl? PrefixDecl*} => 11 | %{((rule Prolog "2" (seq _Prolog_1 _Prolog_2)) 12 | (rule _Prolog_1 "2.1" (opt BaseDecl)) 13 | (rule _Prolog_2 "2.2" (star PrefixDecl)))}, 14 | %{ 15 | [9] primary ::= HEX 16 | | RANGE 17 | | O_RANGE 18 | | STRING1 19 | | STRING2 20 | | '(' expression ')' 21 | 22 | } => 23 | %{((rule primary "9" (alt HEX RANGE O_RANGE STRING1 STRING2 _primary_1)) 24 | (rule _primary_1 "9.1" (seq '(' expression ')')))}, 25 | %{ 26 | primary ::= HEX 27 | | RANGE 28 | | O_RANGE 29 | | STRING1 30 | | STRING2 31 | | '(' expression ')' 32 | } => 33 | %{((rule primary (alt HEX RANGE O_RANGE STRING1 STRING2 _primary_1)) 34 | (rule _primary_1 (seq '(' expression ')')))}, 35 | %{ 36 | ::= 37 | | 38 | | 39 | | 40 | | 41 | | '(' ')' 42 | } => 43 | %{((rule primary (alt HEX RANGE O_RANGE STRING1 STRING2 _primary_1)) 44 | (rule _primary_1 (seq '(' expression ')')))}, 45 | %{[1] start ::= A B C} => 46 | %{((rule start "1" (seq A B C)))}, 47 | %{[1] start ::= A B? C* D+} => 48 | %{((rule start "1" (seq A _start_1 _start_2 _start_3)) 49 | (rule _start_1 "1.1" (opt B)) 50 | (rule _start_2 "1.2" (star C)) 51 | (rule _start_3 "1.3" (plus D)))}, 52 | %{[1] start ::= A (B C) D} => 53 | %{((rule start "1" (seq A _start_1 D)) 54 | (rule _start_1 "1.1" (seq B C)))}, 55 | %{[1] start ::= A (B) C} => 56 | %{((rule start "1" (seq A B C)))}, 57 | %{[1] start ::= A (B (C D)) (E F)} => 58 | %{((rule start "1" (seq A _start_1 _start_2)) 59 | (rule _start_1 "1.1" (seq B _start_3)) 60 | (rule _start_3 "1.3" (seq C D)) 61 | (rule _start_2 "1.2" (seq E F)))}, 62 | %{[1] r1 ::= (A B) C 63 | [2] r2 ::= (A B) E} => 64 | %{((rule r1 "1" (seq _r1_1 C)) 65 | (rule _r1_1 "1.1" (seq A B)) 66 | (rule r2 "2" (seq _r2_1 E)) 67 | (rule _r2_1 "2.1" (seq A B)))} 68 | }.each do |input, expected| 69 | it "parses #{input.inspect}" do 70 | expect(parse(input).make_peg.ast.to_sxp).to produce(expected, @debug) 71 | end 72 | end 73 | end 74 | 75 | def parse(value, **options) 76 | @debug = [] 77 | options = {debug: @debug, format: :native}.merge(options) 78 | EBNF::Base.new(value, **options) 79 | end 80 | end 81 | -------------------------------------------------------------------------------- /examples/abnf/examples/uri.abnf: -------------------------------------------------------------------------------- 1 | URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] 2 | 3 | hier-part = "//" authority path-abempty 4 | / path-absolute 5 | / path-rootless 6 | / path-empty 7 | 8 | URI-reference = URI / relative-ref 9 | 10 | absolute-URI = scheme ":" hier-part [ "?" query ] 11 | 12 | relative-ref = relative-part [ "?" query ] [ "#" fragment ] 13 | 14 | relative-part = "//" authority path-abempty 15 | / path-absolute 16 | / path-noscheme 17 | / path-empty 18 | 19 | scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 20 | 21 | authority = [ userinfo "@" ] host [ ":" port ] 22 | userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) 23 | host = IP-literal / IPv4address / reg-name 24 | port = *DIGIT 25 | 26 | IP-literal = "[" ( IPv6address / IPvFuture ) "]" 27 | 28 | IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) 29 | 30 | IPv6address = 6( h16 ":" ) ls32 31 | / "::" 5( h16 ":" ) ls32 32 | / [ h16 ] "::" 4( h16 ":" ) ls32 33 | / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 34 | / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 35 | / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 36 | / [ *4( h16 ":" ) h16 ] "::" ls32 37 | / [ *5( h16 ":" ) h16 ] "::" h16 38 | / [ *6( h16 ":" ) h16 ] "::" 39 | 40 | h16 = 1*4HEXDIG 41 | ls32 = ( h16 ":" h16 ) / IPv4address 42 | IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet 43 | dec-octet = DIGIT ; 0-9 44 | / %x31-39 DIGIT ; 10-99 45 | / "1" 2DIGIT ; 100-199 46 | / "2" %x30-34 DIGIT ; 200-249 47 | / "25" %x30-35 ; 250-255 48 | reg-name = *( unreserved / pct-encoded / sub-delims ) 49 | path = path-abempty ; begins with "/" or is empty 50 | / path-absolute ; begins with "/" but not "//" 51 | / path-noscheme ; begins with a non-colon segment 52 | / path-rootless ; begins with a segment 53 | / path-empty ; zero characters 54 | path-abempty = *( "/" segment ) 55 | path-absolute = "/" [ segment-nz *( "/" segment ) ] 56 | path-noscheme = segment-nz-nc *( "/" segment ) 57 | path-rootless = segment-nz *( "/" segment ) 58 | path-empty = 0 59 | segment = *pchar 60 | segment-nz = 1*pchar 61 | segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) 62 | ; non-zero-length segment without any colon ":" 63 | pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 64 | query = *( pchar / "/" / "?" ) 65 | fragment = *( pchar / "/" / "?" ) 66 | pct-encoded = "%" HEXDIG HEXDIG 67 | unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 68 | reserved = gen-delims / sub-delims 69 | gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" 70 | sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 71 | / "*" / "+" / "," / ";" / "=" 72 | -------------------------------------------------------------------------------- /examples/isoebnf/iso-ebnf.sxp: -------------------------------------------------------------------------------- 1 | ( 2 | (rule syntax (star syntax_rule)) 3 | (rule syntax_rule 4 | (seq meta_identifier defining_symbol definitions_list terminator_symbol)) 5 | (rule definitions_list 6 | (seq single_definition (star (seq definition_separator_symbol definitions_list)))) 7 | (rule single_definition (seq term (star (seq "," term)))) 8 | (rule term (seq factor (opt (seq "-" exception)))) 9 | (rule exception (seq factor)) 10 | (rule factor (seq (opt (seq integer "*")) primary)) 11 | (rule primary 12 | (alt optional_sequence repeated_sequence special_sequence grouped_sequence 13 | meta_identifier terminal_string empty )) 14 | (rule optional_sequence 15 | (seq start_option_symbol definitions_list end_option_symbol)) 16 | (rule repeated_sequence 17 | (seq start_repeat_symbol definitions_list end_repeat_symbol)) 18 | (rule grouped_sequence (seq "(" definitions_list ")")) 19 | (terminals _terminals (seq)) 20 | (terminal terminal_string 21 | (alt 22 | (seq "'" (plus first_terminal_character) "'") 23 | (seq "\"" (plus second_terminal_character) "\"")) ) 24 | (terminal meta_identifier (seq letter (star meta_identifier_character))) 25 | (terminal integer (plus decimal_digit)) 26 | (terminal special_sequence (seq "?" (star special_sequence_character) "?")) 27 | (terminal comment (seq start_comment_symbol (star comment_symbol) end_comment_symbol)) 28 | (terminal comment_symbol (alt comment commentless_symbol other_character)) 29 | (terminal commentless_symbol 30 | (alt terminal_character meta_identifier integer terminal_string special_sequence)) 31 | (terminal letter (range "a-zA-Z")) 32 | (terminal decimal_digit (range "0-9")) 33 | (terminal meta_identifier_character (alt letter decimal_digit "_")) 34 | (terminal first_terminal_character (diff terminal_character "'")) 35 | (terminal second_terminal_character (diff terminal_character "\"")) 36 | (terminal special_sequence_character (diff terminal_character "?")) 37 | (terminal terminal_character 38 | (alt letter decimal_digit concatenate_symbol defining_symbol 39 | definition_separator_symbol end_comment_symbol end_group_symbol 40 | end_option_symbol end_repeat_symbol except_symbol first_quote_symbol 41 | repetition_symbol second_quote_symbol special_sequence_symbol 42 | start_comment_symbol start_group_symbol start_option_symbol 43 | start_repeat_symbol terminator_symbol other_character )) 44 | (terminal other_character (alt (range ":+_%@&$<>^` ̃#x20#x23") "\\")) 45 | (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20")) 46 | (pass _pass (alt (plus gap_separator) comment)) 47 | (terminal empty (seq "")) 48 | (terminal concatenate_symbol (seq ",")) 49 | (terminal repetition_symbol (seq "*")) 50 | (terminal except_symbol (seq "-")) 51 | (terminal first_quote_symbol (seq "'")) 52 | (terminal second_quote_symbol (seq "\"")) 53 | (terminal start_comment_symbol (seq "(*")) 54 | (terminal end_comment_symbol (seq "*)")) 55 | (terminal start_group_symbol (seq "(")) 56 | (terminal end_group_symbol (seq ")")) 57 | (terminal special_sequence_symbol (seq "?")) 58 | (terminal defining_symbol (alt "=" ":")) 59 | (terminal definition_separator_symbol (alt "|" "/" "!")) 60 | (terminal terminator_symbol (alt ";" ".")) 61 | (terminal start_option_symbol (seq "[")) 62 | (terminal end_option_symbol (seq "]")) 63 | (terminal start_repeat_symbol (alt "{" "(:")) 64 | (terminal end_repeat_symbol (alt "}" ":)")) 65 | (terminal gap_free_symbol (alt (diff terminal_character (range "'\"")) terminal_string))) 66 | -------------------------------------------------------------------------------- /spec/ebnf_spec.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | $:.unshift "." 3 | require 'spec_helper' 4 | require 'ebnf' 5 | require 'sxp' 6 | 7 | describe EBNF do 8 | describe ".parse" do 9 | { 10 | %{[2] Prolog ::= BaseDecl? PrefixDecl*} => 11 | %{((rule Prolog "2" (seq (opt BaseDecl) (star PrefixDecl))))}, 12 | %{ 13 | @terminals 14 | [3] terminal ::= [A-Z]+ 15 | } => %{((terminals _terminals (seq)) 16 | (terminal terminal "3" (plus (range "A-Z"))))}, 17 | %{ 18 | [9] primary ::= HEX 19 | | RANGE 20 | | O_RANGE 21 | | STRING1 22 | | STRING2 23 | | '(' expression ')' 24 | } => %{((rule primary "9" (alt HEX RANGE O_RANGE STRING1 STRING2 (seq '(' expression ')'))))}, 25 | %{ 26 | primary ::= HEX 27 | | RANGE 28 | | O_RANGE 29 | | STRING1 30 | | STRING2 31 | | '(' expression ')' 32 | } => %{((rule primary (alt HEX RANGE O_RANGE STRING1 STRING2 (seq '(' expression ')'))))}, 33 | %{ 34 | ::= 35 | | 36 | | 37 | | 38 | | 39 | | '(' ')' 40 | } => %{((rule primary (alt HEX RANGE O_RANGE STRING1 STRING2 (seq '(' expression ')'))))}, 41 | }.each do |input, expected| 42 | context input do 43 | subject {EBNF.parse(input)} 44 | it "creates ast" do 45 | expect(subject.ast.to_sxp).to produce(expected, []) 46 | end 47 | 48 | it "#to_sxp" do 49 | expect(subject.to_sxp).to produce(expected) 50 | end 51 | 52 | it "#to_ttl" do 53 | expect(subject.to_ttl("ex", "http://example.org/")).not_to be_empty 54 | end 55 | 56 | it "#to_html" do 57 | expect(subject.to_html).not_to be_empty 58 | end 59 | 60 | it "#to_s" do 61 | expect(subject.to_s).not_to be_empty 62 | end 63 | end 64 | end 65 | 66 | context "README" do 67 | let(:ebnf) {PARSED_EBNF_GRAMMAR.dup} 68 | subject {ebnf} 69 | 70 | it "creates ast" do 71 | expect(subject.ast.to_sxp).not_to be_empty 72 | end 73 | 74 | it "#to_sxp" do 75 | expect(subject.to_sxp).not_to be_empty 76 | end 77 | 78 | it "#to_ttl" do 79 | expect(subject.to_ttl("ex", "http://example.org/")).not_to be_empty 80 | end 81 | 82 | it "#to_html" do 83 | expect(subject.to_html).not_to be_empty 84 | end 85 | 86 | it "#to_s" do 87 | expect(subject.to_s).not_to be_empty 88 | end 89 | 90 | context "LL1" do 91 | before {subject.make_bnf} 92 | 93 | before do 94 | subject.first_follow(:ebnf) 95 | subject.build_tables 96 | end 97 | 98 | it "#to_ruby" do 99 | expect {subject.to_ruby}.to write(:something).to(:output) 100 | end 101 | end 102 | 103 | context "PEG" do 104 | before {subject.make_peg} 105 | 106 | it "#to_ruby" do 107 | expect {subject.to_ruby}.to write(:something).to(:output) 108 | end 109 | end 110 | end 111 | end 112 | end 113 | -------------------------------------------------------------------------------- /etc/iso-ebnf.sxp: -------------------------------------------------------------------------------- 1 | ( 2 | (rule syntax (star syntax_rule)) 3 | (rule syntax_rule 4 | (seq meta_identifier defining_symbol definitions_list terminator_symbol)) 5 | (rule definitions_list 6 | (seq single_definition (star (seq definition_separator_symbol definitions_list)))) 7 | (rule single_definition (seq term (star (seq ',' term)))) 8 | (rule term (seq factor (opt (seq '-' exception)))) 9 | (rule exception (seq factor)) 10 | (rule factor (seq (opt (seq integer '*')) primary)) 11 | (rule primary 12 | (alt optional_sequence repeated_sequence special_sequence grouped_sequence 13 | meta_identifier terminal_string empty )) 14 | (rule optional_sequence 15 | (seq start_option_symbol definitions_list end_option_symbol)) 16 | (rule repeated_sequence 17 | (seq start_repeat_symbol definitions_list end_repeat_symbol)) 18 | (rule grouped_sequence (seq '(' definitions_list ')')) 19 | (terminals _terminals (seq)) 20 | (terminal terminal_string 21 | (alt 22 | (seq "'" (plus first_terminal_character) "'") 23 | (seq '"' (plus second_terminal_character) '"')) ) 24 | (terminal meta_identifier (seq letter (star meta_identifier_character))) 25 | (terminal integer (plus decimal_digit)) 26 | (terminal special_sequence (seq '?' (star special_sequence_character) '?')) 27 | (terminal comment (seq start_comment_symbol (star comment_symbol) end_comment_symbol)) 28 | (terminal comment_symbol (alt comment commentless_symbol other_character)) 29 | (terminal commentless_symbol 30 | (alt terminal_character meta_identifier integer terminal_string special_sequence)) 31 | (terminal letter (range "a-zA-Z")) 32 | (terminal decimal_digit (range "0-9")) 33 | (terminal meta_identifier_character (alt letter decimal_digit '_')) 34 | (terminal first_terminal_character (diff terminal_character "'")) 35 | (terminal second_terminal_character (diff terminal_character '"')) 36 | (terminal special_sequence_character (diff terminal_character '?')) 37 | (terminal terminal_character 38 | (alt letter decimal_digit concatenate_symbol defining_symbol 39 | definition_separator_symbol end_comment_symbol end_group_symbol 40 | end_option_symbol end_repeat_symbol except_symbol first_quote_symbol 41 | repetition_symbol second_quote_symbol special_sequence_symbol 42 | start_comment_symbol start_group_symbol start_option_symbol 43 | start_repeat_symbol terminator_symbol other_character )) 44 | (terminal other_character (alt (range ":+_%@&$<>^` ̃#x20#x23") '\\')) 45 | (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20")) 46 | (pass _pass (alt (plus gap_separator) comment)) 47 | (terminal empty (seq '')) 48 | (terminal concatenate_symbol (seq ',')) 49 | (terminal repetition_symbol (seq '*')) 50 | (terminal except_symbol (seq '-')) 51 | (terminal first_quote_symbol (seq "'")) 52 | (terminal second_quote_symbol (seq '"')) 53 | (terminal start_comment_symbol (seq '(*')) 54 | (terminal end_comment_symbol (seq '*)')) 55 | (terminal start_group_symbol (seq '(')) 56 | (terminal end_group_symbol (seq ')')) 57 | (terminal special_sequence_symbol (seq '?')) 58 | (terminal defining_symbol (alt '=' ':')) 59 | (terminal definition_separator_symbol (alt '|' '/' '!')) 60 | (terminal terminator_symbol (alt ';' '.')) 61 | (terminal start_option_symbol (seq '[')) 62 | (terminal end_option_symbol (seq ']')) 63 | (terminal start_repeat_symbol (alt '{' '(:')) 64 | (terminal end_repeat_symbol (alt '}' ':)')) 65 | (terminal gap_free_symbol (alt (diff terminal_character (range "'\"")) terminal_string))) 66 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $:.unshift(File.expand_path(File.join(File.dirname(__FILE__), 'lib'))) 3 | require 'rubygems' 4 | 5 | namespace :gem do 6 | desc "Build the ebnf-#{File.read('VERSION').chomp}.gem file" 7 | task :build do 8 | sh "gem build ebnf.gemspec && mv ebnf-#{File.read('VERSION').chomp}.gem pkg/" 9 | end 10 | 11 | desc "Release the ebnf-#{File.read('VERSION').chomp}.gem file" 12 | task :release do 13 | sh "gem push pkg/ebnf-#{File.read('VERSION').chomp}.gem" 14 | end 15 | end 16 | 17 | desc 'Default: run specs.' 18 | task default: :spec 19 | task specs: :spec 20 | 21 | require 'rspec/core/rake_task' 22 | desc 'Run specifications' 23 | RSpec::Core::RakeTask.new do |spec| 24 | spec.rspec_opts = %w(--options spec/spec.opts) if File.exists?('spec/spec.opts') 25 | end 26 | 27 | desc "Run specs through RCov" 28 | RSpec::Core::RakeTask.new("spec:rcov") do |spec| 29 | spec.rcov = true 30 | spec.rcov_opts = %q[--exclude "spec"] 31 | end 32 | 33 | desc "Generate HTML report specs" 34 | RSpec::Core::RakeTask.new("doc:spec") do |spec| 35 | spec.rspec_opts = ["--format", "html", "-o", "doc/spec.html"] 36 | end 37 | 38 | require 'yard' 39 | namespace :doc do 40 | YARD::Rake::YardocTask.new 41 | end 42 | 43 | namespace :etc do 44 | ETC_FILES = %w{ 45 | etc/abnf.sxp etc/iso-ebnf.sxp 46 | etc/ebnf.sxp etc/ebnf.ll1.sxp etc/ebnf.peg.sxp etc/ebnf.html etc/ebnf.ll1.rb etc/ebnf.peg.rb 47 | etc/turtle.sxp 48 | etc/sparql.sxp 49 | } 50 | desc 'Remove generated files in etc' 51 | task :clean do 52 | %x(rm #{ETC_FILES.join(' ')}) 53 | end 54 | 55 | desc 'Create versions of ebnf files in etc' 56 | task build: ETC_FILES 57 | end 58 | 59 | desc "Build meta files for ABNF, EBNF and ISO EBNF" 60 | task :meta => %w{lib/ebnf/ebnf/meta.rb lib/ebnf/isoebnf/meta.rb lib/ebnf/abnf/meta.rb lib/ebnf/abnf/core.rb} 61 | 62 | file "lib/ebnf/abnf/meta.rb" => "etc/abnf.ebnf" do 63 | %x(bin/ebnf --peg -f rb --mod-name ABNFMeta -o lib/ebnf/abnf/meta.rb etc/abnf.ebnf) 64 | end 65 | 66 | file "lib/ebnf/abnf/core.rb" => "etc/abnf-core.ebnf" do 67 | %x(bin/ebnf -f rb --mod-name ABNFCore -o lib/ebnf/abnf/core.rb etc/abnf-core.ebnf) 68 | end 69 | 70 | file "lib/ebnf/ebnf/meta.rb" => "etc/ebnf.peg.rb" do 71 | %x(cp etc/ebnf.peg.rb lib/ebnf/ebnf/meta.rb) 72 | end 73 | 74 | file "lib/ebnf/isoebnf/meta.rb" => "etc/iso-ebnf.ebnf" do 75 | %x(bin/ebnf --peg -f rb --mod-name ISOEBNFMeta -o lib/ebnf/isoebnf/meta.rb etc/iso-ebnf.ebnf) 76 | end 77 | 78 | 79 | # Build SXP output with leading space to allow for Markdown formatting. 80 | rule ".sxp" => %w{.ebnf} do |t| 81 | puts "build #{t.name}" 82 | File.open(t.name, "w") do |f| 83 | IO.popen(%(bin/ebnf #{t.source})).each_line do |line| 84 | f.puts ' ' + line 85 | end 86 | end 87 | end 88 | 89 | rule ".peg.sxp" => %w{.ebnf} do |t| 90 | puts "build #{t.name}" 91 | File.open(t.name, "w") do |f| 92 | IO.popen(%(bin/ebnf --peg #{t.source})).each_line do |line| 93 | f.puts ' ' + line 94 | end 95 | end 96 | end 97 | 98 | rule ".html" => %w{.ebnf} do |t| 99 | puts "build #{t.name}" 100 | %x(bin/ebnf --format html -o #{t.name} #{t.source}) 101 | end 102 | 103 | file "etc/ebnf.ll1.sxp" => "etc/ebnf.ebnf" do |t| 104 | puts "build #{t.name}" 105 | File.open(t.name, "w") do |f| 106 | IO.popen(%(bin/ebnf --ll1 ebnf #{t.source})).each_line do |line| 107 | f.puts ' ' + line 108 | end 109 | end 110 | end 111 | 112 | file "etc/ebnf.peg.rb" => "etc/ebnf.ebnf" do |t| 113 | puts "build #{t.name}" 114 | %x(bin/ebnf --peg --mod-name EBNFMeta -f rb -o etc/ebnf.peg.rb etc/ebnf.ebnf) 115 | end 116 | 117 | file "etc/ebnf.ll1.rb" => "etc/ebnf.ebnf" do |t| 118 | puts "build #{t.name}" 119 | %x(bin/ebnf --ll1 ebnf -f rb -o etc/ebnf.ll1.rb etc/ebnf.ebnf) 120 | end 121 | -------------------------------------------------------------------------------- /etc/abnf.abnf: -------------------------------------------------------------------------------- 1 | rulelist = 1*( rule / (*c-wsp c-nl) ) 2 | 3 | rule = rulename defined-as elements c-nl 4 | ; continues if next line starts 5 | ; with white space 6 | 7 | rulename = ALPHA *(ALPHA / DIGIT / "-") 8 | 9 | defined-as = *c-wsp ("=" / "=/") *c-wsp 10 | ; basic rules definition and 11 | ; incremental alternatives 12 | 13 | elements = alternation *c-wsp 14 | 15 | c-wsp = WSP / (c-nl WSP) 16 | 17 | c-nl = comment / CRLF 18 | ; comment or newline 19 | 20 | comment = ";" *(WSP / VCHAR) CRLF 21 | 22 | alternation = concatenation 23 | *(*c-wsp "/" *c-wsp concatenation) 24 | 25 | concatenation = repetition *(1*c-wsp repetition) 26 | 27 | repetition = [repeat] element 28 | 29 | repeat = (*DIGIT "*" *DIGIT) / 1*DIGIT 30 | 31 | element = rulename / group / option / 32 | char-val / num-val / prose-val 33 | 34 | group = "(" *c-wsp alternation *c-wsp ")" 35 | 36 | option = "[" *c-wsp alternation *c-wsp "]" 37 | 38 | char-val = case-insensitive-string / 39 | case-sensitive-string 40 | 41 | case-insensitive-string = 42 | [ "%i" ] quoted-string 43 | 44 | case-sensitive-string = 45 | "%s" quoted-string 46 | 47 | quoted-string = DQUOTE *(%x20-21 / %x23-7E) DQUOTE 48 | ; quoted string of SP and VCHAR 49 | ; without DQUOTE 50 | 51 | num-val = "%" (bin-val / dec-val / hex-val) 52 | 53 | bin-val = "b" 1*BIT 54 | [ 1*("." 1*BIT) / ("-" 1*BIT) ] 55 | ; series of concatenated bit values 56 | ; or single ONEOF range 57 | 58 | dec-val = "d" 1*DIGIT 59 | [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ] 60 | 61 | hex-val = "x" 1*HEXDIG 62 | [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ] 63 | 64 | prose-val = "<" *(%x20-3D / %x3F-7E) ">" 65 | ; bracketed string of SP and VCHAR 66 | ; without angles 67 | ; prose description, to be used as 68 | ; last resort 69 | 70 | ALPHA = %x41-5A / %x61-7A ; A-Z / a-z 71 | 72 | BIT = "0" / "1" 73 | 74 | CHAR = %x01-7F 75 | ; any 7-bit US-ASCII character, 76 | ; excluding NUL 77 | CR = %x0D 78 | ; carriage return 79 | 80 | CRLF = [CR] LF 81 | ; Internet standard newline 82 | ; Extended to allow only newline 83 | 84 | CTL = %x00-1F / %x7F 85 | ; controls 86 | 87 | DIGIT = %x30-39 88 | ; 0-9 89 | 90 | DQUOTE = %x22 91 | ; " (Double Quote) 92 | 93 | HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" 94 | 95 | HTAB = %x09 96 | ; horizontal tab 97 | 98 | LF = %x0A 99 | ; linefeed 100 | 101 | LWSP = *(WSP / CRLF WSP) 102 | ; Use of this linear-white-space rule 103 | ; permits lines containing only white 104 | ; space that are no longer legal in 105 | ; mail headers and have caused 106 | ; interoperability problems in other 107 | ; contexts. 108 | ; Do not use when defining mail 109 | ; headers and use with caution in 110 | ; other contexts. 111 | 112 | OCTET = %x00-FF 113 | ; 8 bits of data 114 | 115 | SP = %x20 116 | 117 | VCHAR = %x21-7E 118 | ; visible (printing) characters 119 | 120 | WSP = SP / HTAB 121 | ; white space 122 | -------------------------------------------------------------------------------- /etc/turtle.ebnf: -------------------------------------------------------------------------------- 1 | turtleDoc ::= statement* 2 | statement ::= directive | triples '.' 3 | directive ::= prefixID | base | sparqlPrefix | sparqlBase 4 | prefixID ::= '@prefix' PNAME_NS IRIREF '.' 5 | base ::= '@base' IRIREF '.' 6 | sparqlPrefix ::= "PREFIX" PNAME_NS IRIREF 7 | sparqlBase ::= "BASE" IRIREF 8 | triples ::= subject predicateObjectList | blankNodePropertyList predicateObjectList? 9 | predicateObjectList ::= verb objectList (';' (verb objectList)? )* 10 | objectList ::= object ( ',' object )* 11 | verb ::= predicate | 'a' 12 | subject ::= iri | BlankNode | collection 13 | predicate ::= iri 14 | object ::= iri | BlankNode | collection | blankNodePropertyList | literal 15 | literal ::= RDFLiteral | NumericLiteral | BooleanLiteral 16 | blankNodePropertyList ::= '[' predicateObjectList ']' 17 | collection ::= '(' object* ')' 18 | NumericLiteral ::= INTEGER | DECIMAL | DOUBLE 19 | RDFLiteral ::= String ( LANGTAG | ( '^^' iri ) )? 20 | BooleanLiteral ::= 'true' | 'false' 21 | String ::= STRING_LITERAL_QUOTE | STRING_LITERAL_SINGLE_QUOTE 22 | | STRING_LITERAL_LONG_SINGLE_QUOTE | STRING_LITERAL_LONG_QUOTE 23 | iri ::= IRIREF | PrefixedName 24 | PrefixedName ::= PNAME_LN | PNAME_NS 25 | BlankNode ::= BLANK_NODE_LABEL | ANON 26 | 27 | @terminals 28 | 29 | IRIREF ::= '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>' 30 | PNAME_NS ::= PN_PREFIX? ':' 31 | PNAME_LN ::= PNAME_NS PN_LOCAL 32 | BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)? 33 | LANGTAG ::= '@' [a-zA-Z]+ ( '-' [a-zA-Z0-9]+ )* 34 | INTEGER ::= [+-]? [0-9]+ 35 | DECIMAL ::= [+-]? ( ([0-9])* '.' ([0-9])+ ) 36 | DOUBLE ::= [+-]? ( [0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT ) 37 | EXPONENT ::= [eE] [+-]? [0-9]+ 38 | STRING_LITERAL_QUOTE ::= '"' ( [^#x22#x5C#xA#xD] | ECHAR | UCHAR )* '"' /* #x22=" #x5C=\ #xA=new line #xD=carriage return */ 39 | STRING_LITERAL_SINGLE_QUOTE ::= "'" ( [^#x27#x5C#xA#xD] | ECHAR | UCHAR )* "'" /* #x27=' #x5C=\ #xA=new line #xD=carriage return */ 40 | STRING_LITERAL_LONG_SINGLE_QUOTE ::= "'''" ( ( "'" | "''" )? ( [^'\] | ECHAR | UCHAR ) )* "'''" 41 | STRING_LITERAL_LONG_QUOTE ::= '"""' ( ( '"' | '""' )? ( [^"\] | ECHAR | UCHAR ) )* '"""' 42 | UCHAR ::= ( '\u' HEX HEX HEX HEX ) | ( '\U' HEX HEX HEX HEX HEX HEX HEX HEX ) 43 | ECHAR ::= ('\' [tbnrf\"']) 44 | NIL ::= '(' WS* ')' 45 | WS ::= #x20 | #x9 | #xD | #xA /* #x20=space #x9=character tabulation #xD=carriage return #xA=new line */ 46 | ANON ::= '[' WS* ']' 47 | PN_CHARS_BASE ::= ([A-Z] 48 | | [a-z] 49 | | [#x00C0-#x00D6] 50 | | [#x00D8-#x00F6] 51 | | [#x00F8-#x02FF] 52 | | [#x0370-#x037D] 53 | | [#x037F-#x1FFF] 54 | | [#x200C-#x200D] 55 | | [#x2070-#x218F] 56 | | [#x2C00-#x2FEF] 57 | | [#x3001-#xD7FF] 58 | | [#xF900-#xFDCF] 59 | | [#xFDF0-#xFFFD] 60 | | [#x10000-#xEFFFF]) 61 | PN_CHARS_U ::= PN_CHARS_BASE | '_' 62 | PN_CHARS ::= (PN_CHARS_U 63 | | '-' 64 | | [0-9] 65 | | #x00B7 66 | | [#x0300-#x036F] 67 | | [#x203F-#x2040]) 68 | PN_PREFIX ::= PN_CHARS_BASE ( ( PN_CHARS | '.' )* PN_CHARS )? 69 | PN_LOCAL ::= ( PN_CHARS_U | ':' | [0-9] | PLX ) ( ( PN_CHARS | '.' | ':' | PLX )* ( PN_CHARS | ':' | PLX ) ) ? 70 | PLX ::= PERCENT | PN_LOCAL_ESC 71 | PERCENT ::= '%' HEX HEX 72 | HEX ::= ([0-9] | [A-F] | [a-f]) 73 | PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%' ) 74 | -------------------------------------------------------------------------------- /etc/abnf.ebnf: -------------------------------------------------------------------------------- 1 | rulelist ::= ( rule | (c_wsp* c_nl) )+ 2 | 3 | rule ::= rulename defined_as elements c_nl 4 | # continues if next line starts 5 | # with white space 6 | 7 | elements ::= alternation c_wsp* 8 | 9 | alternation ::= concatenation 10 | (c_wsp* "/" c_wsp* concatenation)* 11 | 12 | concatenation::= repetition (c_wsp+ repetition)* 13 | 14 | repetition ::= repeat? element 15 | 16 | repeat ::= (DIGIT* "*" DIGIT*) | DIGIT+ 17 | 18 | element ::= rulename | group | option | 19 | char_val | num_val | prose_val 20 | 21 | group ::= "(" c_wsp* alternation c_wsp* ")" 22 | 23 | option ::= "[" c_wsp* alternation c_wsp* "]" 24 | 25 | char_val ::= case_insensitive_string | 26 | case_sensitive_string 27 | 28 | case_insensitive_string ::= 29 | "%i"? quoted_string 30 | 31 | case_sensitive_string ::= 32 | "%s" quoted_string 33 | 34 | num_val ::= "%" (bin_val | dec_val | hex_val) 35 | 36 | @terminals 37 | 38 | # Terminals used in ABNF, itself 39 | rulename ::= ALPHA (ALPHA | DIGIT | "-")* 40 | 41 | defined_as ::= c_wsp* ("=" | "=/") c_wsp* 42 | # basic rules definition and 43 | # incremental alternatives 44 | 45 | c_wsp ::= WSP | (c_nl WSP) 46 | 47 | c_nl ::= COMMENT | CRLF 48 | # comment or newline 49 | 50 | comment ::= ";" (WSP | VCHAR)* CRLF 51 | 52 | quoted_string::= DQUOTE [#x20-#x21#x23-#x7E]* DQUOTE 53 | # quoted string of SP and VCHAR 54 | # without DQUOTE 55 | 56 | bin_val ::= "b" BIT+ 57 | (("." BIT+)+ | ("-" BIT+))? 58 | # series of concatenated bit values 59 | # or single ONEOF range 60 | 61 | dec_val ::= "d" DIGIT+ 62 | (("." DIGIT+)+ | ("-" DIGIT+))? 63 | 64 | hex_val ::= "x" HEXDIG+ 65 | (("." HEXDIG+)+ | ("-" HEXDIG+))? 66 | 67 | prose_val ::= "<" [#x20-#x3D#x3F-#x7E]* ">" 68 | # bracketed string of SP and VCHAR 69 | # without angles 70 | # prose description, to be used as 71 | # last resort 72 | 73 | # Core terminals available in uses of ABNF 74 | ALPHA ::= [#x41-#x5A#x61-#x7A] # A-Z | a-z 75 | 76 | BIT ::= '0' | '1' 77 | 78 | CHAR ::= [#x01-#x7F] 79 | # any 7-bit US-ASCII character, 80 | # excluding NUL 81 | CR ::= #x0D 82 | # carriage return 83 | 84 | CRLF ::= CR? LF 85 | # Internet standard newline 86 | 87 | CTL ::= [#x00-#x1F] | #x7F 88 | # controls 89 | 90 | DIGIT ::= [#x30-#x39] 91 | # 0-9 92 | 93 | DQUOTE ::= #x22 94 | # " (Double Quote) 95 | 96 | HEXDIG ::= DIGIT | "A" | "B" | "C" | "D" | "E" | "F" 97 | 98 | HTAB ::= #x09 99 | # horizontal tab 100 | 101 | LF ::= #x0A 102 | # linefeed 103 | 104 | LWSP ::= (WSP | CRLF WSP)* 105 | # Use of this linear-white-space rule 106 | # permits lines containing only white 107 | # space that are no longer legal in 108 | # mail headers and have caused 109 | # interoperability problems in other 110 | # contexts. 111 | # Do not use when defining mail 112 | # headers and use with caution in 113 | # other contexts. 114 | 115 | OCTET ::= [#x00-#xFF] 116 | # 8 bits of data 117 | 118 | SP ::= #x20 119 | 120 | VCHAR ::= [#x21-#x7E] 121 | # visible (printing) characters 122 | 123 | WSP ::= SP | HTAB 124 | # white space 125 | -------------------------------------------------------------------------------- /examples/abnf/examples/http.abnf: -------------------------------------------------------------------------------- 1 | BWS = OWS 2 | 3 | Connection = *( "," OWS ) connection-option *( OWS "," [ OWS connection-option ] ) 4 | 5 | Content-Length = 1*DIGIT 6 | 7 | HTTP-message = start-line *( header-field CRLF ) CRLF [ message-body] 8 | HTTP-name = %x48.54.54.50 ; HTTP 9 | HTTP-version = HTTP-name "/" DIGIT "." DIGIT 10 | Host = uri-host [ ":" port ] 11 | 12 | OWS = *( SP / HTAB ) 13 | 14 | RWS = 1*( SP / HTAB ) 15 | 16 | TE = [ ( "," / t-codings ) *( OWS "," [ OWS t-codings ] ) ] 17 | Trailer = *( "," OWS ) field-name *( OWS "," [ OWS field-name ] ) 18 | Transfer-Encoding = *( "," OWS ) transfer-coding *( OWS "," [ OWS transfer-coding ] ) 19 | 20 | URI-reference = 21 | Upgrade = *( "," OWS ) protocol *( OWS "," [ OWS protocol ] ) 22 | 23 | Via = *( "," OWS ) 24 | ( received-protocol RWS received-by [ RWS comment ] ) 25 | *( OWS "," [ OWS ( received-protocol RWS received-by [ RWS comment ] ) ] ) 26 | 27 | absolute-URI = 28 | absolute-form = absolute-URI 29 | absolute-path = 1*( "/" segment ) 30 | asterisk-form = "*" 31 | authority = 32 | authority-form = authority 33 | chunk = chunk-size [ chunk-ext ] CRLF chunk-data CRLF 34 | chunk-data = 1*OCTET 35 | chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) 36 | chunk-ext-name = token 37 | chunk-ext-val = token / quoted-string 38 | chunk-size = 1*HEXDIG 39 | chunked-body = *chunk last-chunk trailer-part CRLF 40 | comment = "(" *( ctext / quoted-pair / comment ) ")" 41 | connection-option = token 42 | ctext = HTAB / SP / %x21-27 ; '!'-''' 43 | / %x2A-5B ; '*'-'[' 44 | / %x5D-7E ; ']'-'~' 45 | / obs-text 46 | 47 | field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] 48 | field-name = token 49 | field-value = *( field-content / obs-fold ) 50 | field-vchar = VCHAR / obs-text 51 | fragment = 52 | 53 | header-field = field-name ":" OWS field-value OWS 54 | http-URI = "http://" authority path-abempty [ "?" query ] [ "#" fragment ] 55 | https-URI = "https://" authority path-abempty [ "?" query ] [ "#" fragment ] 56 | 57 | last-chunk = 1*"0" [ chunk-ext ] CRLF 58 | 59 | message-body = *OCTET 60 | method = token 61 | 62 | obs-fold = CRLF 1*( SP / HTAB ) 63 | obs-text = %x80-FF 64 | origin-form = absolute-path [ "?" query ] 65 | 66 | partial-URI = relative-part [ "?" query ] 67 | path-abempty = 68 | port = 69 | protocol = protocol-name [ "/" protocol-version ] 70 | protocol-name = token 71 | protocol-version = token 72 | pseudonym = token 73 | 74 | qdtext = HTAB / SP / "!" / %x23-5B ; '#'-'[' 75 | / %x5D-7E ; ']'-'~' 76 | / obs-text 77 | query = 78 | quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) 79 | quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE 80 | 81 | rank = ( "0" [ "." *3DIGIT ] ) / ( "1" [ "." *3"0" ] ) 82 | reason-phrase = *( HTAB / SP / VCHAR / obs-text ) 83 | received-by = ( uri-host [ ":" port ] ) / pseudonym 84 | received-protocol = [ protocol-name "/" ] protocol-version 85 | relative-part = 86 | request-line = method SP request-target SP HTTP-version CRLF 87 | request-target = origin-form / absolute-form / authority-form / asterisk-form 88 | 89 | scheme = 90 | segment = 91 | start-line = request-line / status-line 92 | status-code = 3DIGIT 93 | status-line = HTTP-version SP status-code SP reason-phrase CRLF 94 | 95 | t-codings = "trailers" / ( transfer-coding [ t-ranking ] ) 96 | t-ranking = OWS ";" OWS "q=" rank 97 | tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA 98 | token = 1*tchar 99 | trailer-part = *( header-field CRLF ) 100 | transfer-coding = "chunked" / "compress" / "deflate" / "gzip" / transfer-extension 101 | transfer-extension = token *( OWS ";" OWS transfer-parameter ) 102 | transfer-parameter = token BWS "=" BWS ( token / quoted-string ) 103 | 104 | uri-host = 105 | -------------------------------------------------------------------------------- /examples/isoebnf/iso-ebnf.peg.sxp: -------------------------------------------------------------------------------- 1 | ( 2 | (rule syntax (star syntax_rule)) 3 | (rule syntax_rule 4 | (seq meta_identifier defining_symbol definitions_list terminator_symbol)) 5 | (rule definitions_list (seq single_definition _definitions_list_1)) 6 | (rule _definitions_list_1 (star _definitions_list_2)) 7 | (rule _definitions_list_2 (seq definition_separator_symbol definitions_list)) 8 | (rule single_definition (seq term _single_definition_1)) 9 | (rule _single_definition_1 (star _single_definition_2)) 10 | (rule _single_definition_2 (seq "," term)) 11 | (rule term (seq factor _term_1)) 12 | (rule _term_1 (opt _term_2)) 13 | (rule _term_2 (seq "-" exception)) 14 | (rule exception (seq factor)) 15 | (rule factor (seq _factor_1 primary)) 16 | (rule _factor_1 (opt _factor_2)) 17 | (rule _factor_2 (seq integer "*")) 18 | (rule primary 19 | (alt optional_sequence repeated_sequence special_sequence grouped_sequence 20 | meta_identifier terminal_string empty )) 21 | (rule optional_sequence 22 | (seq start_option_symbol definitions_list end_option_symbol)) 23 | (rule repeated_sequence 24 | (seq start_repeat_symbol definitions_list end_repeat_symbol)) 25 | (rule grouped_sequence (seq "(" definitions_list ")")) 26 | (terminals _terminals (seq)) 27 | (terminal terminal_string (alt _terminal_string_1 _terminal_string_2)) 28 | (rule _terminal_string_1 (seq "'" _terminal_string_3 "'")) 29 | (rule _terminal_string_3 (plus first_terminal_character)) 30 | (rule _terminal_string_2 (seq "\"" _terminal_string_4 "\"")) 31 | (rule _terminal_string_4 (plus second_terminal_character)) 32 | (terminal meta_identifier (seq letter _meta_identifier_1)) 33 | (rule _meta_identifier_1 (star meta_identifier_character)) 34 | (terminal integer (plus decimal_digit)) 35 | (terminal special_sequence (seq "?" _special_sequence_1 "?")) 36 | (rule _special_sequence_1 (star special_sequence_character)) 37 | (terminal comment (seq start_comment_symbol _comment_1 end_comment_symbol)) 38 | (rule _comment_1 (star comment_symbol)) 39 | (terminal comment_symbol (alt comment commentless_symbol other_character)) 40 | (terminal commentless_symbol 41 | (alt terminal_character meta_identifier integer terminal_string special_sequence)) 42 | (terminal letter (range "a-zA-Z")) 43 | (terminal decimal_digit (range "0-9")) 44 | (terminal meta_identifier_character (alt letter decimal_digit "_")) 45 | (terminal first_terminal_character (diff terminal_character "'")) 46 | (terminal second_terminal_character (diff terminal_character "\"")) 47 | (terminal special_sequence_character (diff terminal_character "?")) 48 | (terminal terminal_character 49 | (alt letter decimal_digit concatenate_symbol defining_symbol 50 | definition_separator_symbol end_comment_symbol end_group_symbol 51 | end_option_symbol end_repeat_symbol except_symbol first_quote_symbol 52 | repetition_symbol second_quote_symbol special_sequence_symbol 53 | start_comment_symbol start_group_symbol start_option_symbol 54 | start_repeat_symbol terminator_symbol other_character )) 55 | (terminal other_character (alt _other_character_1 "\\")) 56 | (terminal _other_character_1 (range ":+_%@&$<>^` ̃#x20#x23")) 57 | (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20")) 58 | (pass _pass (alt __pass_1 comment)) 59 | (rule __pass_1 (plus gap_separator)) 60 | (terminal empty (seq "")) 61 | (terminal concatenate_symbol (seq ",")) 62 | (terminal repetition_symbol (seq "*")) 63 | (terminal except_symbol (seq "-")) 64 | (terminal first_quote_symbol (seq "'")) 65 | (terminal second_quote_symbol (seq "\"")) 66 | (terminal start_comment_symbol (seq "(*")) 67 | (terminal end_comment_symbol (seq "*)")) 68 | (terminal start_group_symbol (seq "(")) 69 | (terminal end_group_symbol (seq ")")) 70 | (terminal special_sequence_symbol (seq "?")) 71 | (terminal defining_symbol (alt "=" ":")) 72 | (terminal definition_separator_symbol (alt "|" "/" "!")) 73 | (terminal terminator_symbol (alt ";" ".")) 74 | (terminal start_option_symbol (seq "[")) 75 | (terminal end_option_symbol (seq "]")) 76 | (terminal start_repeat_symbol (alt "{" "(:")) 77 | (terminal end_repeat_symbol (alt "}" ":)")) 78 | (terminal gap_free_symbol (alt _gap_free_symbol_1 terminal_string)) 79 | (rule _gap_free_symbol_1 (seq _gap_free_symbol_3 terminal_character)) 80 | (rule _gap_free_symbol_3 (not _gap_free_symbol_2)) 81 | (terminal _gap_free_symbol_2 (range "'\""))) 82 | -------------------------------------------------------------------------------- /etc/ebnf.peg.sxp: -------------------------------------------------------------------------------- 1 | ( 2 | (rule ebnf "1" (star _ebnf_1)) 3 | (rule _ebnf_1 "1.1" (alt declaration rule)) 4 | (rule declaration "2" (alt '@terminals' pass)) 5 | (rule rule "3" (seq LHS expression)) 6 | (rule expression "4" (seq alt)) 7 | (rule alt "5" (seq seq _alt_1)) 8 | (rule _alt_1 "5.1" (star _alt_2)) 9 | (rule _alt_2 "5.2" (seq '|' seq)) 10 | (rule seq "6" (plus diff)) 11 | (rule diff "7" (seq postfix _diff_1)) 12 | (rule _diff_1 "7.1" (opt _diff_2)) 13 | (rule _diff_2 "7.2" (seq '-' postfix)) 14 | (rule postfix "8" (seq primary _postfix_1)) 15 | (rule _postfix_1 "8.1" (opt POSTFIX)) 16 | (rule primary "9" (alt HEX SYMBOL O_RANGE RANGE STRING1 STRING2 _primary_1)) 17 | (rule _primary_1 "9.1" (seq '(' expression ')')) 18 | (rule pass "10" (seq '@pass' expression)) 19 | (terminals _terminals (seq)) 20 | (terminal LHS "11" (seq _LHS_1 SYMBOL _LHS_2 '::=')) 21 | (terminal _LHS_1 "11.1" (opt _LHS_3)) 22 | (terminal _LHS_3 "11.3" (seq '[' SYMBOL ']' _LHS_4)) 23 | (terminal _LHS_4 "11.4" (plus ' ')) 24 | (terminal _LHS_2 "11.2" (star ' ')) 25 | (terminal SYMBOL "12" (alt _SYMBOL_1 O_SYMBOL)) 26 | (terminal _SYMBOL_1 "12.1" (seq '<' O_SYMBOL '>')) 27 | (terminal O_SYMBOL "12a" (plus _O_SYMBOL_1)) 28 | (terminal _O_SYMBOL_1 "12a.1" (alt _O_SYMBOL_2 _O_SYMBOL_3 _O_SYMBOL_4 '_' '.')) 29 | (terminal _O_SYMBOL_2 "12a.2" (range "a-z")) 30 | (terminal _O_SYMBOL_3 "12a.3" (range "A-Z")) 31 | (terminal _O_SYMBOL_4 "12a.4" (range "0-9")) 32 | (terminal HEX "13" (seq '#x' _HEX_1)) 33 | (terminal _HEX_1 "13.1" (plus _HEX_2)) 34 | (terminal _HEX_2 "13.2" (alt _HEX_3 _HEX_4 _HEX_5)) 35 | (terminal _HEX_3 "13.3" (range "a-f")) 36 | (terminal _HEX_4 "13.4" (range "A-F")) 37 | (terminal _HEX_5 "13.5" (range "0-9")) 38 | (terminal RANGE "14" (seq '[' _RANGE_1 _RANGE_2 ']')) 39 | (terminal _RANGE_1 "14.1" (plus _RANGE_3)) 40 | (terminal _RANGE_3 "14.3" (alt _RANGE_4 _RANGE_5 R_CHAR HEX)) 41 | (terminal _RANGE_4 "14.4" (seq R_CHAR '-' R_CHAR)) 42 | (terminal _RANGE_5 "14.5" (seq HEX '-' HEX)) 43 | (terminal _RANGE_2 "14.2" (opt '-')) 44 | (terminal O_RANGE "15" (seq '[^' _O_RANGE_1 _O_RANGE_2 ']')) 45 | (terminal _O_RANGE_1 "15.1" (plus _O_RANGE_3)) 46 | (terminal _O_RANGE_3 "15.3" (alt _O_RANGE_4 _O_RANGE_5 R_CHAR HEX)) 47 | (terminal _O_RANGE_4 "15.4" (seq R_CHAR '-' R_CHAR)) 48 | (terminal _O_RANGE_5 "15.5" (seq HEX '-' HEX)) 49 | (terminal _O_RANGE_2 "15.2" (opt '-')) 50 | (terminal STRING1 "16" (seq '"' _STRING1_1 '"')) 51 | (terminal _STRING1_1 "16.1" (star _STRING1_2)) 52 | (terminal _STRING1_2 "16.2" (diff CHAR '"')) 53 | (terminal STRING2 "17" (seq "'" _STRING2_1 "'")) 54 | (terminal _STRING2_1 "17.1" (star _STRING2_2)) 55 | (terminal _STRING2_2 "17.2" (diff CHAR "'")) 56 | (terminal CHAR "18" (alt _CHAR_1 _CHAR_2 _CHAR_3 _CHAR_4)) 57 | (terminal _CHAR_1 "18.1" (range "#x9#xA#xD")) 58 | (terminal _CHAR_2 "18.2" (range "#x20-#xD7FF")) 59 | (terminal _CHAR_3 "18.3" (range "#xE000-#xFFFD")) 60 | (terminal _CHAR_4 "18.4" (range "#x10000-#x10FFFF")) 61 | (terminal R_CHAR "19" (diff CHAR _R_CHAR_1)) 62 | (terminal _R_CHAR_1 "19.1" (alt ']' '-' HEX)) 63 | (terminal POSTFIX "20" (range "?*+")) 64 | (terminal PASS "21" (alt _PASS_1 _PASS_2 _PASS_3 _PASS_4)) 65 | (terminal _PASS_1 "21.1" (range "#x9#xA#xD#x20")) 66 | (terminal _PASS_2 "21.2" (seq _PASS_5 _PASS_6)) 67 | (terminal _PASS_5 "21.5" (alt _PASS_7 '//')) 68 | (terminal _PASS_7 "21.7" (diff '#' '#x')) 69 | (terminal _PASS_6 "21.6" (star _PASS_8)) 70 | (terminal _PASS_8 "21.8" (range "^#xA#xD")) 71 | (terminal _PASS_3 "21.3" (seq '/*' _PASS_9 '*/')) 72 | (terminal _PASS_9 "21.9" (star _PASS_10)) 73 | (terminal _PASS_10 "21.10" (alt _PASS_11 _PASS_12)) 74 | (terminal _PASS_11 "21.11" (opt _PASS_13)) 75 | (terminal _PASS_13 "21.13" (seq '*' _PASS_14)) 76 | (terminal _PASS_14 "21.14" (range "^/")) 77 | (terminal _PASS_12 "21.12" (range "^*")) 78 | (terminal _PASS_4 "21.4" (seq '(*' _PASS_15 '*)')) 79 | (terminal _PASS_15 "21.15" (star _PASS_16)) 80 | (terminal _PASS_16 "21.16" (alt _PASS_17 _PASS_18)) 81 | (terminal _PASS_17 "21.17" (opt _PASS_19)) 82 | (terminal _PASS_19 "21.19" (seq '*' _PASS_20)) 83 | (terminal _PASS_20 "21.20" (range "^)")) 84 | (terminal _PASS_18 "21.18" (range "^*")) 85 | (pass _pass (seq PASS))) 86 | -------------------------------------------------------------------------------- /examples/abnf/abnf.peg.sxp: -------------------------------------------------------------------------------- 1 | ( 2 | (rule rulelist (plus _rulelist_1)) 3 | (rule _rulelist_1 (alt rule _rulelist_2)) 4 | (rule _rulelist_2 (seq _rulelist_3 c_nl)) 5 | (rule _rulelist_3 (star c_wsp)) 6 | (rule rule (seq rulename defined_as elements c_nl)) 7 | (rule elements (seq alternation _elements_1)) 8 | (rule _elements_1 (star c_wsp)) 9 | (rule alternation (seq concatenation _alternation_1)) 10 | (rule _alternation_1 (star _alternation_2)) 11 | (rule _alternation_2 (seq _alternation_3 "/" _alternation_4 concatenation)) 12 | (rule _alternation_3 (star c_wsp)) 13 | (rule _alternation_4 (star c_wsp)) 14 | (rule concatenation (seq repetition _concatenation_1)) 15 | (rule _concatenation_1 (star _concatenation_2)) 16 | (rule _concatenation_2 (seq _concatenation_3 repetition)) 17 | (rule _concatenation_3 (plus c_wsp)) 18 | (rule repetition (seq _repetition_1 element)) 19 | (rule _repetition_1 (opt repeat)) 20 | (rule repeat (alt _repeat_1 _repeat_2)) 21 | (rule _repeat_1 (seq _repeat_3 "*" _repeat_4)) 22 | (rule _repeat_3 (star DIGIT)) 23 | (rule _repeat_4 (star DIGIT)) 24 | (rule _repeat_2 (plus DIGIT)) 25 | (rule element (alt rulename group option char_val num_val prose_val)) 26 | (rule group (seq "(" _group_1 alternation _group_2 ")")) 27 | (rule _group_1 (star c_wsp)) 28 | (rule _group_2 (star c_wsp)) 29 | (rule option (seq "[" _option_1 alternation _option_2 "]")) 30 | (rule _option_1 (star c_wsp)) 31 | (rule _option_2 (star c_wsp)) 32 | (rule char_val (alt case_insensitive_string case_sensitive_string)) 33 | (rule case_insensitive_string (seq _case_insensitive_string_1 quoted_string)) 34 | (rule _case_insensitive_string_1 (opt "%i")) 35 | (rule case_sensitive_string (seq "%s" quoted_string)) 36 | (rule num_val (seq "%" _num_val_1)) 37 | (rule _num_val_1 (alt bin_val dec_val hex_val)) 38 | (terminals _terminals (seq)) 39 | (terminal rulename (seq ALPHA _rulename_1)) 40 | (rule _rulename_1 (star _rulename_2)) 41 | (rule _rulename_2 (alt ALPHA DIGIT "-")) 42 | (terminal defined_as (seq _defined_as_1 _defined_as_2 _defined_as_3)) 43 | (rule _defined_as_1 (star c_wsp)) 44 | (rule _defined_as_2 (alt "=" "=/")) 45 | (rule _defined_as_3 (star c_wsp)) 46 | (terminal c_wsp (alt WSP _c_wsp_1)) 47 | (rule _c_wsp_1 (seq c_nl WSP)) 48 | (terminal c_nl (alt COMMENT CRLF)) 49 | (terminal comment (seq ";" _comment_1 CRLF)) 50 | (rule _comment_1 (star _comment_2)) 51 | (rule _comment_2 (alt WSP VCHAR)) 52 | (terminal quoted_string (seq DQUOTE _quoted_string_1 DQUOTE)) 53 | (rule _quoted_string_1 (star _quoted_string_2)) 54 | (terminal _quoted_string_2 (range "#x20-#x21#x23-#x7E")) 55 | (terminal bin_val (seq "b" _bin_val_1 _bin_val_2)) 56 | (rule _bin_val_1 (plus BIT)) 57 | (rule _bin_val_2 (opt _bin_val_3)) 58 | (rule _bin_val_3 (alt _bin_val_4 _bin_val_5)) 59 | (rule _bin_val_4 (plus _bin_val_6)) 60 | (rule _bin_val_6 (seq "." _bin_val_7)) 61 | (rule _bin_val_7 (plus BIT)) 62 | (rule _bin_val_5 (seq "-" _bin_val_8)) 63 | (rule _bin_val_8 (plus BIT)) 64 | (terminal dec_val (seq "d" _dec_val_1 _dec_val_2)) 65 | (rule _dec_val_1 (plus DIGIT)) 66 | (rule _dec_val_2 (opt _dec_val_3)) 67 | (rule _dec_val_3 (alt _dec_val_4 _dec_val_5)) 68 | (rule _dec_val_4 (plus _dec_val_6)) 69 | (rule _dec_val_6 (seq "." _dec_val_7)) 70 | (rule _dec_val_7 (plus DIGIT)) 71 | (rule _dec_val_5 (seq "-" _dec_val_8)) 72 | (rule _dec_val_8 (plus DIGIT)) 73 | (terminal hex_val (seq "x" _hex_val_1 _hex_val_2)) 74 | (rule _hex_val_1 (plus HEXDIG)) 75 | (rule _hex_val_2 (opt _hex_val_3)) 76 | (rule _hex_val_3 (alt _hex_val_4 _hex_val_5)) 77 | (rule _hex_val_4 (plus _hex_val_6)) 78 | (rule _hex_val_6 (seq "." _hex_val_7)) 79 | (rule _hex_val_7 (plus HEXDIG)) 80 | (rule _hex_val_5 (seq "-" _hex_val_8)) 81 | (rule _hex_val_8 (plus HEXDIG)) 82 | (terminal prose_val (seq "<" _prose_val_1 ">")) 83 | (rule _prose_val_1 (star _prose_val_2)) 84 | (terminal _prose_val_2 (range "#x20-#x3D#x3F-#x7E")) 85 | (terminal ALPHA (range "#x41-#x5A#x61-#x7A")) 86 | (terminal BIT (alt "0" "1")) 87 | (terminal CHAR (range "#x01-#x7F")) 88 | (terminal CR (hex "#x0D")) 89 | (terminal CRLF (seq _CRLF_1 LF)) 90 | (terminal _CRLF_1 (opt CR)) 91 | (terminal CTL (alt _CTL_1 _CTL_2)) 92 | (terminal _CTL_1 (range "#x00-#x1F")) 93 | (terminal _CTL_2 (hex "#x7F")) 94 | (terminal DIGIT (range "#x30-#x39")) 95 | (terminal DQUOTE (hex "#x22")) 96 | (terminal HEXDIG (alt DIGIT "A" "B" "C" "D" "E" "F")) 97 | (terminal HTAB (hex "#x09")) 98 | (terminal LF (hex "#x0A")) 99 | (terminal LWSP (star _LWSP_1)) 100 | (terminal _LWSP_1 (alt WSP _LWSP_2)) 101 | (terminal _LWSP_2 (seq CRLF WSP)) 102 | (terminal OCTET (range "#x00-#xFF")) 103 | (terminal SP (hex "#x20")) 104 | (terminal VCHAR (range "#x21-#x7E")) 105 | (terminal WSP (alt SP HTAB))) 106 | -------------------------------------------------------------------------------- /etc/turtle.sxp: -------------------------------------------------------------------------------- 1 | ( 2 | (rule turtleDoc (star statement)) 3 | (rule statement (alt directive (seq triples '.'))) 4 | (rule directive (alt prefixID base sparqlPrefix sparqlBase)) 5 | (rule prefixID (seq '@prefix' PNAME_NS IRIREF '.')) 6 | (rule base (seq '@base' IRIREF '.')) 7 | (rule sparqlPrefix (seq "PREFIX" PNAME_NS IRIREF)) 8 | (rule sparqlBase (seq "BASE" IRIREF)) 9 | (rule triples 10 | (alt 11 | (seq subject predicateObjectList) 12 | (seq blankNodePropertyList (opt predicateObjectList))) ) 13 | (rule predicateObjectList 14 | (seq verb objectList (star (seq ';' (opt (seq verb objectList)))))) 15 | (rule objectList (seq object (star (seq ',' object)))) 16 | (rule verb (alt predicate 'a')) 17 | (rule subject (alt iri BlankNode collection)) 18 | (rule predicate (seq iri)) 19 | (rule object (alt iri BlankNode collection blankNodePropertyList literal)) 20 | (rule literal (alt RDFLiteral NumericLiteral BooleanLiteral)) 21 | (rule blankNodePropertyList (seq '[' predicateObjectList ']')) 22 | (rule collection (seq '(' (star object) ')')) 23 | (rule NumericLiteral (alt INTEGER DECIMAL DOUBLE)) 24 | (rule RDFLiteral (seq String (opt (alt LANGTAG (seq '^^' iri))))) 25 | (rule BooleanLiteral (alt 'true' 'false')) 26 | (rule String 27 | (alt STRING_LITERAL_QUOTE STRING_LITERAL_SINGLE_QUOTE 28 | STRING_LITERAL_LONG_SINGLE_QUOTE STRING_LITERAL_LONG_QUOTE )) 29 | (rule iri (alt IRIREF PrefixedName)) 30 | (rule PrefixedName (alt PNAME_LN PNAME_NS)) 31 | (rule BlankNode (alt BLANK_NODE_LABEL ANON)) 32 | (terminals _terminals (seq)) 33 | (terminal IRIREF (seq '<' (star (alt (range "^#x00-#x20<>\"{}|^`\\") UCHAR)) '>')) 34 | (terminal PNAME_NS (seq (opt PN_PREFIX) ':')) 35 | (terminal PNAME_LN (seq PNAME_NS PN_LOCAL)) 36 | (terminal BLANK_NODE_LABEL 37 | (seq '_:' (alt PN_CHARS_U (range "0-9")) (opt (seq (star (alt PN_CHARS '.')) PN_CHARS)))) 38 | (terminal LANGTAG (seq '@' (plus (range "a-zA-Z")) (star (seq '-' (plus (range "a-zA-Z0-9")))))) 39 | (terminal INTEGER (seq (opt (range "+-")) (plus (range "0-9")))) 40 | (terminal DECIMAL (seq (opt (range "+-")) (seq (star (range "0-9")) '.' (plus (range "0-9"))))) 41 | (terminal DOUBLE 42 | (seq 43 | (opt (range "+-")) 44 | (alt 45 | (seq (plus (range "0-9")) '.' (star (range "0-9")) EXPONENT) 46 | (seq '.' (plus (range "0-9")) EXPONENT) 47 | (seq (plus (range "0-9")) EXPONENT)) )) 48 | (terminal EXPONENT (seq (range "eE") (opt (range "+-")) (plus (range "0-9")))) 49 | (terminal STRING_LITERAL_QUOTE 50 | (seq '"' (star (alt (range "^#x22#x5C#xA#xD") ECHAR UCHAR)) '"')) 51 | (terminal STRING_LITERAL_SINGLE_QUOTE 52 | (seq "'" (star (alt (range "^#x27#x5C#xA#xD") ECHAR UCHAR)) "'")) 53 | (terminal STRING_LITERAL_LONG_SINGLE_QUOTE 54 | (seq "'''" (star (seq (opt (alt "'" "''")) (alt (range "^'\\") ECHAR UCHAR))) "'''")) 55 | (terminal STRING_LITERAL_LONG_QUOTE 56 | (seq '"""' (star (seq (opt (alt '"' '""')) (alt (range "^\"\\") ECHAR UCHAR))) '"""')) 57 | (terminal UCHAR 58 | (alt (seq '\\u' HEX HEX HEX HEX) (seq '\\U' HEX HEX HEX HEX HEX HEX HEX HEX))) 59 | (terminal ECHAR (seq '\\' (range "tbnrf\\\"'"))) 60 | (terminal NIL (seq '(' (star WS) ')')) 61 | (terminal WS (alt (hex "#x20") (hex "#x9") (hex "#xD") (hex "#xA"))) 62 | (terminal ANON (seq '[' (star WS) ']')) 63 | (terminal PN_CHARS_BASE 64 | (alt 65 | (range "A-Z") 66 | (range "a-z") 67 | (range "#x00C0-#x00D6") 68 | (range "#x00D8-#x00F6") 69 | (range "#x00F8-#x02FF") 70 | (range "#x0370-#x037D") 71 | (range "#x037F-#x1FFF") 72 | (range "#x200C-#x200D") 73 | (range "#x2070-#x218F") 74 | (range "#x2C00-#x2FEF") 75 | (range "#x3001-#xD7FF") 76 | (range "#xF900-#xFDCF") 77 | (range "#xFDF0-#xFFFD") 78 | (range "#x10000-#xEFFFF")) ) 79 | (terminal PN_CHARS_U (alt PN_CHARS_BASE '_')) 80 | (terminal PN_CHARS 81 | (alt PN_CHARS_U '-' 82 | (range "0-9") 83 | (hex "#x00B7") 84 | (range "#x0300-#x036F") 85 | (range "#x203F-#x2040")) ) 86 | (terminal PN_PREFIX (seq PN_CHARS_BASE (opt (seq (star (alt PN_CHARS '.')) PN_CHARS)))) 87 | (terminal PN_LOCAL 88 | (seq 89 | (alt PN_CHARS_U ':' (range "0-9") PLX) 90 | (opt (seq (star (alt PN_CHARS '.' ':' PLX)) (alt PN_CHARS ':' PLX)))) ) 91 | (terminal PLX (alt PERCENT PN_LOCAL_ESC)) 92 | (terminal PERCENT (seq '%' HEX HEX)) 93 | (terminal HEX (alt (range "0-9") (range "A-F") (range "a-f"))) 94 | (terminal PN_LOCAL_ESC 95 | (seq '\\' 96 | (alt '_' '~' '.' '-' '!' '$' '&' "'" '(' ')' '*' '+' ',' ';' '=' '/' '?' '#' 97 | '@' '%' )) )) 98 | -------------------------------------------------------------------------------- /bin/ebnf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # ebnf --- Process EBNF to generate the following: 3 | # * S-Expression 4 | # * Turtle 5 | # * Either of the above, transformed to BNF 6 | # * And with First/Follow rules 7 | 8 | $:.unshift(File.expand_path(File.join(File.dirname(__FILE__), "..", 'lib'))) 9 | require 'rubygems' 10 | require 'getoptlong' 11 | require 'ebnf' 12 | require 'rdf/spec' 13 | 14 | options = { 15 | output_format: :sxp, 16 | prefix: "ttl", 17 | namespace: "http://www.w3.org/ns/formats/Turtle#", 18 | level: 4 19 | } 20 | 21 | input, out = nil, STDOUT 22 | 23 | OPT_ARGS = [ 24 | ["--debug", GetoptLong::NO_ARGUMENT, "Turn on debugging output"], 25 | ["--bnf", GetoptLong::NO_ARGUMENT, "Transform EBNF to BNF"], 26 | ["--evaluate","-e", GetoptLong::REQUIRED_ARGUMENT,"Evaluate argument as an EBNF document"], 27 | ["--format", "-f", GetoptLong::REQUIRED_ARGUMENT,"Specify output format one of abnf, abnfh, ebnf, html, isoebnf, isoebnfh, ttl, sxp, or rb"], 28 | ["--input-format", GetoptLong::REQUIRED_ARGUMENT,"Specify input format one of abnf, ebnf isoebnf, native, or sxp"], 29 | ["--ll1", GetoptLong::REQUIRED_ARGUMENT,"Generate First/Follow rules, argument is start symbol"], 30 | ["--mod-name", GetoptLong::REQUIRED_ARGUMENT,"Module name used when creating ruby tables"], 31 | ["--namespace", "-n", GetoptLong::REQUIRED_ARGUMENT,"Namespace to use when generating Turtle"], 32 | ["--output", "-o", GetoptLong::REQUIRED_ARGUMENT,"Output to the specified file path"], 33 | ["--peg", GetoptLong::NO_ARGUMENT, "Transform EBNF to PEG"], 34 | ["--prefix", "-p", GetoptLong::REQUIRED_ARGUMENT,"Prefix to use when generating Turtle"], 35 | ["--progress", "-v", GetoptLong::NO_ARGUMENT, "Detail on execution"], 36 | ["--renumber", GetoptLong::NO_ARGUMENT, "Renumber parsed reules"], 37 | ["--validate", GetoptLong::NO_ARGUMENT, "Validate grammar and any generated HTML"], 38 | ["--help", "-?", GetoptLong::NO_ARGUMENT, "This message"] 39 | ] 40 | def usage 41 | STDERR.puts %{#{$0} Version #{EBNF::VERSION}} 42 | STDERR.puts %{Usage: #{$0} [options] file ...} 43 | width = OPT_ARGS.map do |o| 44 | l = o.first.length 45 | l += o[1].length + 2 if o[1].is_a?(String) 46 | l 47 | end.max 48 | OPT_ARGS.each do |o| 49 | s = " %-*s " % [width, (o[1].is_a?(String) ? "#{o[0,2].join(', ')}" : o[0])] 50 | s += o.last 51 | STDERR.puts s 52 | end 53 | exit(1) 54 | end 55 | 56 | opts = GetoptLong.new(*OPT_ARGS.map {|o| o[0..-2]}) 57 | 58 | opts.each do |opt, arg| 59 | case opt 60 | when '--debug' then options[:level] = 0 61 | when '--bnf' then options[:bnf] = true 62 | when '--evaluate' then input = arg 63 | when '--input-format' 64 | unless %w(abnf ebnf isoebnf native sxp).include?(arg) 65 | STDERR.puts("unrecognized input format #{arg}") 66 | usage 67 | end 68 | options[:format] = arg.to_sym 69 | when '--format' 70 | unless %w(abnf abnfh ebnf html isoebnf isoebnfh rb sxp ttl).include?(arg) 71 | STDERR.puts("unrecognized output format #{arg}") 72 | usage 73 | end 74 | options[:output_format] = arg.to_sym 75 | when '--ll1' then (options[:ll1] ||= []) << arg.to_sym 76 | when '--mod-name' then options[:mod_name] = arg 77 | when '--output' then out = File.open(arg, "w") 78 | when '--peg' then options[:peg] = true 79 | when '--prefix' then options[:prefix] = arg 80 | when '--renumber' then options[:renumber] = true 81 | when '--namespace' then options[:namespace] = arg 82 | when '--progress' then options[:level] = 1 unless options[:level] == 0 83 | when '--validate' then options[:validate] = true 84 | when '--help' then usage 85 | end 86 | end 87 | 88 | input = File.open(ARGV[0]) if ARGV[0] 89 | 90 | logger = Logger.new(STDERR) 91 | logger.level = options[:level] || Logger::ERROR 92 | logger.formatter = lambda {|severity, datetime, progname, msg| "%5s %s\n" % [severity, msg]} 93 | 94 | ebnf = EBNF.parse(input || STDIN, logger: logger, **options) 95 | ebnf.make_bnf if options[:bnf] || options[:ll1] 96 | ebnf.make_peg if options[:peg] 97 | if options[:ll1] 98 | ebnf.first_follow(*options[:ll1]) 99 | ebnf.build_tables 100 | end 101 | 102 | ebnf.renumber! if options[:renumber] 103 | 104 | res = case options[:output_format] 105 | when :abnf then ebnf.to_s(format: :abnf) 106 | when :abnfh then ebnf.to_html(format: :abnf, validate: options[:validate]) 107 | when :ebnf then ebnf.to_s 108 | when :html then ebnf.to_html(validate: options[:validate]) 109 | when :isoebnf then ebnf.to_s(format: :isoebnf) 110 | when :isoebnfh then ebnf.to_html(format: :isoebnf, validate: options[:validate]) 111 | when :sxp then ebnf.to_sxp 112 | when :ttl then ebnf.to_ttl(options[:prefix], options[:namespace]) 113 | when :rb then ebnf.to_ruby(out, grammarFile: ARGV[0], **options) 114 | else ebnf.ast.inspect 115 | end 116 | 117 | out.puts res -------------------------------------------------------------------------------- /spec/peg/data/parser.rb: -------------------------------------------------------------------------------- 1 | # # EBNF Parser for EBNF. 2 | # 3 | # Produces an Abstract Synatx Tree in S-Expression form for the input grammar file 4 | require 'ebnf/rule' 5 | require 'ebnf/terminals' 6 | require 'ebnf/peg/parser' 7 | require 'sxp' 8 | 9 | class EBNFPegParser 10 | include EBNF::PEG::Parser 11 | include EBNF::Terminals 12 | 13 | # Abstract syntax tree from parse 14 | # 15 | # @return [Array] 16 | attr_reader :ast 17 | 18 | terminal(:LHS, LHS) do |value| 19 | # [id symbol] 20 | value.to_s.scan(/\[([^\]]+)\]\s*(\w+)\s*::=/).first 21 | end 22 | 23 | terminal(:SYMBOL, SYMBOL) do |value| 24 | value.to_sym 25 | end 26 | 27 | terminal(:HEX, HEX) 28 | 29 | terminal(:RANGE, RANGE_NOT_LHS, unescape: true) do |value| 30 | [:range, value[1..-2]] 31 | end 32 | 33 | terminal(:O_RANGE, O_RANGE, unescape: true) do |value| 34 | [:range, value[1..-2]] 35 | end 36 | 37 | terminal(:STRING1, STRING1, unescape: true) do |value| 38 | value[1..-2].tap {|s| s.quote_style = :dquote} 39 | end 40 | 41 | terminal(:STRING2, STRING2, unescape: true) do |value| 42 | value[1..-2].tap {|s| s.quote_style = :squote} 43 | end 44 | 45 | terminal(:POSTFIX, POSTFIX) 46 | 47 | production(:ebnf) do |input| 48 | # Cause method_missing to invoke something in our context 49 | to_sxp 50 | end 51 | 52 | production(:declaration, clear_packrat: true) do |value, data, callback| 53 | # current contains a declaration. 54 | # Invoke callback 55 | callback.call(:terminals) if value == '@terminals' 56 | end 57 | 58 | start_production(:rule, as_hash: true) 59 | production(:rule, clear_packrat: true) do |value, data, callback| 60 | # current contains an expression. 61 | # Invoke callback 62 | id, sym = value[:LHS] 63 | expression = value[:expression] 64 | callback.call(:rule, EBNF::Rule.new(sym.to_sym, id, expression)) 65 | end 66 | 67 | production(:expression) do |value| 68 | value.first[:alt] 69 | end 70 | 71 | production(:alt) do |value| 72 | if value.last[:_alt_1].length > 0 73 | [:alt, value.first[:seq]] + value.last[:_alt_1] 74 | else 75 | value.first[:seq] 76 | end 77 | end 78 | 79 | production(:_alt_1) do |value| 80 | value.map {|a1| a1.last[:seq]}.compact # Get rid of '|' 81 | end 82 | 83 | production(:seq) do |value| 84 | value.length == 1 ? value.first : ([:seq] + value) 85 | end 86 | 87 | start_production(:diff, as_hash: true) 88 | production(:diff) do |value| 89 | if value[:_diff_1] 90 | [:diff, value[:postfix], value[:_diff_1]] 91 | else 92 | value[:postfix] 93 | end 94 | end 95 | 96 | production(:_diff_1) do |value| 97 | value.last[:postfix] if value 98 | end 99 | 100 | start_production(:postfix, as_hash: true) 101 | production(:postfix) do |value| 102 | # Push result onto input stack, as the `diff` production can have some number of `postfix` values that are applied recursively 103 | case value[:_postfix_1] 104 | when "*" then [:star, value[:primary]] 105 | when "+" then [:plus, value[:primary]] 106 | when "?" then [:opt, value[:primary]] 107 | else value[:primary] 108 | end 109 | end 110 | 111 | production(:primary) do |value| 112 | Array(value).length > 2 ? value[1][:expression] : value 113 | end 114 | 115 | start_production(:pass, as_hash: true) 116 | production(:pass) do |value, data, callback| 117 | # Invoke callback 118 | callback.call(:pass, value[:expression]) 119 | end 120 | 121 | # ## Parser invocation. 122 | # On start, yield ourselves if a block is given, otherwise, return this parser instance 123 | # 124 | # @param [#read, #to_s] input 125 | # @param [Hash{Symbol => Object}] options 126 | # @option options [Boolean] :progress 127 | # Show progress of parser productions 128 | # @return [EBNFParser] 129 | def initialize(input, **options, &block) 130 | 131 | # Intantiate grammar from ebnf.ebnf 132 | ebnf = File.expand_path("../../../../etc/ebnf.ebnf", __FILE__) 133 | grammar = EBNF.parse(File.open(ebnf)) 134 | rules = grammar.make_peg.ast 135 | 136 | @options = options.dup 137 | @input = input.respond_to?(:read) ? input.read : input.to_s 138 | 139 | parsing_terminals = false 140 | @ast = [] 141 | parse(@input, :ebnf, rules, 142 | whitespace: EBNF::Terminals::PASS, 143 | **options 144 | ) do |context, *data| 145 | rule = case context 146 | when :terminals 147 | parsing_terminals = true 148 | rule = EBNF::Rule.new(nil, nil, data.first, kind: :terminals) 149 | when :pass 150 | rule = EBNF::Rule.new(nil, nil, data.first, kind: :pass) 151 | when :rule 152 | rule = data.first 153 | rule.kind = :terminal if parsing_terminals 154 | rule 155 | end 156 | @ast << rule if rule 157 | end 158 | @ast 159 | end 160 | 161 | # Output formatted S-Expression of grammar 162 | # 163 | # @return [String] 164 | def to_sxp(**options) 165 | require 'sxp' unless defined?(SXP) 166 | # Output rules as a formatted S-Expression 167 | SXP::Generator.string(@ast.map(&:for_sxp)) 168 | end 169 | end 170 | -------------------------------------------------------------------------------- /spec/native_spec.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | $:.unshift "." 3 | require 'spec_helper' 4 | require 'ebnf' 5 | require 'sxp' 6 | 7 | describe EBNF::Native do 8 | let(:logger) {RDF::Spec.logger} 9 | after(:each) do |example| 10 | puts logger.to_s if example.exception && !example.exception.is_a?(RSpec::Expectations::ExpectationNotMetError) 11 | end 12 | 13 | context "rule variations" do 14 | { 15 | "legal rule name": [ 16 | 'rulename ::= "foo"', 17 | %{((rule rulename (seq "foo")))} 18 | ], 19 | "prolog": [ 20 | %{[2] Prolog ::= BaseDecl? PrefixDecl*}, 21 | %{((rule Prolog "2" (seq (opt BaseDecl) (star PrefixDecl))))} 22 | ], 23 | "aliteration": [ 24 | %{[2] declaration ::= '@terminals' | '@pass'}, 25 | %{((rule declaration "2" (alt '@terminals' '@pass')))}, 26 | ], 27 | "posfix": [ 28 | %{[9] postfix ::= primary ( [?*+] )?}, 29 | %{((rule postfix "9" (seq primary (opt (range "?*+")))))}, 30 | ], 31 | "diff": [ 32 | %{[18] STRING2 ::= "'" (CHAR - "'")* "'"}, 33 | %{((terminal STRING2 "18" (seq "'" (star (diff CHAR "'")) "'")))}, 34 | ], 35 | "IRIREF": [ 36 | %([18] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'), 37 | %{((terminal IRIREF "18" 38 | (seq '<' 39 | (star 40 | (alt 41 | (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) 42 | UCHAR)) 43 | '>')))}, 44 | ], 45 | "UCHAR": [ 46 | %(UCHAR ::= ( '\\u' HEX HEX HEX HEX ) | ( '\\U' HEX HEX HEX HEX HEX HEX HEX HEX )), 47 | %{( 48 | (terminal UCHAR 49 | (alt (seq '\\\\u' HEX HEX HEX HEX) (seq '\\\\U' HEX HEX HEX HEX HEX HEX HEX HEX))) )} 50 | ] 51 | }.each do |title, (input, expect)| 52 | it title do 53 | expect(parse(input).to_sxp).to produce(expect, logger) 54 | end 55 | end 56 | 57 | context "without rule identifiers" do 58 | { 59 | "prolog": [ 60 | %{Prolog ::= BaseDecl? PrefixDecl*}, 61 | %{((rule Prolog (seq (opt BaseDecl) (star PrefixDecl))))} 62 | ], 63 | "prolog (with brackets)": [ 64 | %{ ::= ? *}, 65 | %{((rule Prolog (seq (opt BaseDecl) (star PrefixDecl))))} 66 | ], 67 | "aliteration": [ 68 | %{declaration ::= '@terminals' | '@pass'}, 69 | %{((rule declaration (alt '@terminals' '@pass')))}, 70 | ], 71 | "posfix": [ 72 | %{postfix ::= primary ( [?*+] )?}, 73 | %{((rule postfix (seq primary (opt (range "?*+")))))}, 74 | ], 75 | "diff": [ 76 | %{STRING2 ::= "'" (CHAR - "'")* "'"}, 77 | %{((terminal STRING2 (seq "'" (star (diff CHAR "'")) "'")))}, 78 | ], 79 | "IRIREF": [ 80 | %(IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'), 81 | %{((terminal IRIREF 82 | (seq '<' 83 | (star 84 | (alt 85 | (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) 86 | UCHAR)) 87 | '>')))}, 88 | ], 89 | }.each do |title, (input, expect)| 90 | it title do 91 | expect(parse(input).to_sxp).to produce(expect, logger) 92 | end 93 | end 94 | end 95 | end 96 | 97 | describe "#expression" do 98 | { 99 | "'abc' def" => %{(seq 'abc' def)}, 100 | %{[0-9]} => %{(range "0-9")}, 101 | %{#x00B7} => %{(hex "#x00B7")}, 102 | %{[#x0300-#x036F]} => %{(range "#x0300-#x036F")}, 103 | %{[^<>'{}|^`]-[#x00-#x20]} => %{(diff (range "^<>'{}|^`") (range "#x00-#x20"))}, 104 | %{a b c} => %{(seq a b c)}, 105 | %{a? b c} => %{(seq (opt a) b c)}, 106 | %{a - b} => %{(diff a b)}, 107 | %{(a - b) - c} => %{(diff (diff a b) c)}, 108 | %{a b? c} => %{(seq a (opt b) c)}, 109 | %{a | b | c} => %{(alt a b c)}, 110 | %{a? b+ c*} => %{(seq (opt a) (plus b) (star c))}, 111 | %{foo | x xlist} => %{(alt foo (seq x xlist))}, 112 | %{a | (b - c)} => %{(alt a (diff b c))}, 113 | %{a b | c d} => %{(alt (seq a b) (seq c d))}, 114 | %{BaseDecl? PrefixDecl*} => %{(seq (opt BaseDecl) (star PrefixDecl))}, 115 | %{NCCHAR1 | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]} => 116 | %{(alt NCCHAR1 '-' (range "0-9") (hex "#x00B7") (range "#x0300-#x036F") (range "#x203F-#x2040"))}, 117 | %{'<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'} => 118 | %{(seq '<' (star (alt (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) UCHAR)) '>')}, 119 | }.each do |input, expected| 120 | it "given #{input.inspect} produces #{expected}" do 121 | rule = parse("rule ::= #{input}").ast.first 122 | expect(rule.expr.to_sxp).to produce(expected, @debug) 123 | end 124 | end 125 | end 126 | 127 | context "illegal syntax" do 128 | { 129 | "diff missing second operand": %{rule ::= a -}, 130 | "unrecognized terminal" => %{rule ::= %foo%}, 131 | }.each do |title, input| 132 | it title do 133 | expect {parse(input)}.to raise_error(SyntaxError) 134 | end 135 | end 136 | end 137 | 138 | it "parses EBNF grammar" do 139 | gram = parse(File.open(File.expand_path("../../etc/ebnf.ebnf", __FILE__))) 140 | expect(gram).to be_valid 141 | end 142 | 143 | def parse(input, **options) 144 | @debug = [] 145 | EBNF.parse(input, debug: @debug, format: :native, **options) 146 | end 147 | end 148 | -------------------------------------------------------------------------------- /spec/parser_spec.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | $:.unshift "." 3 | require 'spec_helper' 4 | require 'ebnf' 5 | require 'sxp' 6 | 7 | describe EBNF::Parser do 8 | let(:logger) {RDF::Spec.logger} 9 | after(:each) do |example| 10 | puts logger.to_s if example.exception && !example.exception.is_a?(RSpec::Expectations::ExpectationNotMetError) 11 | end 12 | 13 | context "rule variations" do 14 | { 15 | "legal rule name": [ 16 | 'rulename ::= "foo"', 17 | %{((rule rulename (seq "foo")))} 18 | ], 19 | "prolog": [ 20 | %{[2] Prolog ::= BaseDecl? PrefixDecl*}, 21 | %{((rule Prolog "2" (seq (opt BaseDecl) (star PrefixDecl))))} 22 | ], 23 | "aliteration": [ 24 | %{[2] declaration ::= '@terminals' | '@pass'}, 25 | %{((rule declaration "2" (alt '@terminals' '@pass')))}, 26 | ], 27 | "posfix": [ 28 | %{[9] postfix ::= primary ( [?*+] )?}, 29 | %{((rule postfix "9" (seq primary (opt (range "?*+")))))}, 30 | ], 31 | "diff": [ 32 | %{[18] STRING2 ::= "'" (CHAR - "'")* "'"}, 33 | %{((terminal STRING2 "18" (seq "'" (star (diff CHAR "'")) "'")))}, 34 | ], 35 | "IRIREF": [ 36 | %([18] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'), 37 | %{((terminal IRIREF "18" 38 | (seq '<' 39 | (star 40 | (alt 41 | (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) 42 | UCHAR)) 43 | '>')))}, 44 | ], 45 | "minimal whitespace": [ 46 | %{[xx]minimal::=whitespace[yy]whitespace::=" "}, 47 | %{((rule minimal "xx" (seq whitespace)) 48 | (rule whitespace "yy" (seq " ")))} 49 | ] 50 | }.each do |title, (input, expect)| 51 | it title do 52 | expect(parse(input).to_sxp).to produce(expect, logger) 53 | end 54 | end 55 | 56 | context "without rule identifiers" do 57 | { 58 | "prolog": [ 59 | %{Prolog ::= BaseDecl? PrefixDecl*}, 60 | %{((rule Prolog (seq (opt BaseDecl) (star PrefixDecl))))} 61 | ], 62 | "aliteration": [ 63 | %{declaration ::= '@terminals' | '@pass'}, 64 | %{((rule declaration (alt '@terminals' '@pass')))}, 65 | ], 66 | "posfix": [ 67 | %{postfix ::= primary ( [?*+] )?}, 68 | %{((rule postfix (seq primary (opt (range "?*+")))))}, 69 | ], 70 | "diff": [ 71 | %{STRING2 ::= "'" (CHAR - "'")* "'"}, 72 | %{((terminal STRING2 (seq "'" (star (diff CHAR "'")) "'")))}, 73 | ], 74 | "IRIREF": [ 75 | %(IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'), 76 | %{((terminal IRIREF 77 | (seq '<' 78 | (star 79 | (alt 80 | (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) 81 | UCHAR)) 82 | '>')))}, 83 | ], 84 | }.each do |title, (input, expect)| 85 | it title do 86 | expect(parse(input).to_sxp).to produce(expect, logger) 87 | end 88 | end 89 | end 90 | end 91 | 92 | describe "#expression" do 93 | { 94 | "'abc' def" => %{(seq 'abc' def)}, 95 | %{[0-9]} => %{(range "0-9")}, 96 | %{#x00B7} => %{(hex "#x00B7")}, 97 | %{[#x0300-#x036F]} => %{(range "#x0300-#x036F")}, 98 | %{[^<>'{}|^`]-[#x00-#x20]} => %{(diff (range "^<>'{}|^`") (range "#x00-#x20"))}, 99 | %{a b c} => %{(seq a b c)}, 100 | %{a? b c} => %{(seq (opt a) b c)}, 101 | %{a - b} => %{(diff a b)}, 102 | %{(a - b) - c} => %{(diff (diff a b) c)}, 103 | %{a b? c} => %{(seq a (opt b) c)}, 104 | %{a | b | c} => %{(alt a b c)}, 105 | %{a? b+ c*} => %{(seq (opt a) (plus b) (star c))}, 106 | %{foo | x xlist} => %{(alt foo (seq x xlist))}, 107 | %{a | (b - c)} => %{(alt a (diff b c))}, 108 | %{a b | c d} => %{(alt (seq a b) (seq c d))}, 109 | %{[a-z]} => %{(range "a-z")}, 110 | %{[a-zA-Z]} => %{(range "a-zA-Z")}, 111 | %{[#x20-#x22]} => %{(range "#x20-#x22")}, 112 | %{[abc]} => %{(range "abc")}, 113 | %{[abc-]} => %{(range "abc-")}, 114 | %{[#x20#x21#x22]} => %{(range "#x20#x21#x22")}, 115 | %{BaseDecl? PrefixDecl*} => %{(seq (opt BaseDecl) (star PrefixDecl))}, 116 | %{NCCHAR1 | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]} => 117 | %{(alt NCCHAR1 '-' (range "0-9") (hex "#x00B7") (range "#x0300-#x036F") (range "#x203F-#x2040"))}, 118 | %{'<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'} => 119 | %{(seq '<' (star (alt (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) UCHAR)) '>')}, 120 | }.each do |input, expected| 121 | it "given #{input.inspect} produces #{expected}" do 122 | rule = parse("rule ::= #{input}").ast.first 123 | expect(rule.expr.to_sxp).to produce(expected, @debug) 124 | end 125 | end 126 | end 127 | 128 | context "illegal syntax" do 129 | { 130 | "illegal rule name": %{$rule.name ::= foo}, 131 | "diff missing second operand": %{rule ::= a -}, 132 | "unrecognized terminal" => %{rule ::= %foo%}, 133 | "unopened paren" => %{rule ::= a) b c} 134 | }.each do |title, input| 135 | it title do 136 | expect {parse(input)}.to raise_error(SyntaxError) 137 | end 138 | end 139 | end 140 | 141 | it "parses EBNF grammar" do 142 | gram = parse(File.open(File.expand_path("../../etc/ebnf.ebnf", __FILE__))) 143 | expect(gram).to be_valid 144 | end 145 | 146 | def parse(input, **options) 147 | @debug = [] 148 | EBNF.parse(input, debug: @debug, format: :ebnf, **options) 149 | end 150 | end 151 | -------------------------------------------------------------------------------- /examples/calc/calc.rb: -------------------------------------------------------------------------------- 1 | # # EBNF Parser for EBNF. 2 | # 3 | # Produces an Abstract Synatx Tree in S-Expression form for the input grammar file 4 | require 'ebnf' 5 | require 'ebnf/terminals' 6 | require 'ebnf/peg/parser' 7 | require 'sxp' 8 | require 'logger' 9 | 10 | class Calc 11 | include EBNF::PEG::Parser 12 | 13 | # Abstract syntax tree from parse 14 | # 15 | # @return [Array] 16 | attr_reader :ast 17 | 18 | ## 19 | # The calculator grammar comes from a [Wikipedia entry on Parsing Expression Grammar](https://en.wikipedia.org/wiki/Parsing_expression_grammar#Examples), with some small concessions. 20 | # 21 | # [1] Expr ::= Sum 22 | # [2] Sum ::= Product (('+' | '-') Product)* 23 | # [3] Product ::= Power (('*' | '/') Power)* 24 | # [4] Power ::= Value ('^' Power)? 25 | # [5] Value ::= NUMBER | '(' Expr ')' 26 | # [6] NUMBER ::= [0-9]+ 27 | # 28 | # This, in turn, is turned into S-Expression with sub-rules added for embedded rules, which allow them to be accessed independently: 29 | # 30 | # ( 31 | # (rule Expr "1" (seq Sum)) 32 | # (rule Sum "2" (seq Product _Sum_1)) 33 | # (rule _Sum_1 "2.1" (star _Sum_2)) 34 | # (rule _Sum_2 "2.2" (seq _Sum_3 Product)) 35 | # (rule _Sum_3 "2.3" (alt "+" "-")) 36 | # (rule Product "3" (seq Power _Product_1)) 37 | # (rule _Product_1 "3.1" (star _Product_2)) 38 | # (rule _Product_2 "3.2" (seq _Product_3 Power)) 39 | # (rule _Product_3 "3.3" (alt "*" "/")) 40 | # (rule Power "4" (seq Value _Power_1)) 41 | # (rule _Power_1 "4.1" (opt _Power_2)) 42 | # (rule _Power_2 "4.2" (seq "^" Power)) 43 | # (rule Value "5" (alt NUMBER _Value_1)) 44 | # (rule _Value_1 "5.1" (seq "(" Expr ")")) 45 | # (terminal NUMBER "6" (plus _NUMBER_1)) 46 | # (terminal _NUMBER_1 "6.1" (range "0-9"))) 47 | 48 | ## 49 | # The calculator evaluates values from each rule and applies operators resulting in the calculated result. 50 | 51 | # [1] Expr := Sum 52 | # 53 | # (rule Expr "1" (seq Sum)) 54 | production(:Expr, clear_packrat: true) do |value| 55 | value.first[:Sum] 56 | end 57 | 58 | # [2] Sum := Product (('+' | '-') Product)\* 59 | # 60 | # (rule Sum "2" (seq Product _Sum_1)) 61 | # (rule _Sum_1 "2.1" (star _Sum_2)) 62 | production(:Sum, clear_packrat: true) do |value| 63 | product, operations = value.first[:Product], value.last[:_Sum_1] 64 | # Operations are an array of tuples: [['+', 2], ['-', 3]] 65 | operations.inject(product) {|accumulator, vv| accumulator.send(*vv)} 66 | end 67 | 68 | # (('+' | '-') Product)\* 69 | # 70 | # (rule _Sum_2 "2.2" (seq _Sum_3 Product)) 71 | # (rule _Sum_3 "2.3" (alt "+" "-")) 72 | # 73 | # Turn [{_Sum_3: "+"}, {Product: N}] into ["+" N] 74 | production(:_Sum_2) do |value| 75 | value.map(&:values).flatten 76 | end 77 | 78 | # [3] Product := Power (('\*' | '/') Power)\* 79 | # 80 | # (rule Product "3" (seq Power _Product_1)) 81 | # (rule _Product_1 "3.1" (star _Product_2)) 82 | production(:Product, clear_packrat: true) do |value| 83 | power, operations = value.first[:Power], value.last[:_Product_1] 84 | # Operations are an array of tuples: [['*', 2], ['/', 3]] 85 | operations.inject(power) {|accumulator, vv| accumulator.send(*vv)} 86 | end 87 | 88 | # (('\*' | '/') Power)\* 89 | # 90 | # (rule _Product_2 "3.2" (seq _Product_3 Power)) 91 | # (rule _Product_3 "3.3" (alt "*" "/")) 92 | # 93 | # Turn [{_Product_3: "*"}, {Power: N}] into ["*" N] 94 | production(:_Product_2) do |value| 95 | value.map(&:values).flatten 96 | end 97 | 98 | # [4] Power := Value ('^' Power)? 99 | # 100 | # (rule Power "4" (seq Value _Power_1)) 101 | production(:Power, clear_packrat: true) do |value| 102 | val, pow = value.first[:Value], value.last[:_Power_1] 103 | pow ? val.pow(pow) : val 104 | end 105 | 106 | # ('^' Power)? 107 | # 108 | # (rule _Power_2 "4.2" (seq "^" Power)) 109 | production(:_Power_2) {|value| value.last[:Power]} 110 | 111 | # [5] Value := [0-9]+ | '(' Expr ')' 112 | # 113 | # (rule Value "5" (alt NUMBER _Value_1)) 114 | # (rule _Value_1 "5.1" (seq "(" Expr ")")) 115 | production(:Value, clear_packrat: true) do |value| 116 | case value 117 | when String then value.to_i 118 | when Array then value[1][:Expr] 119 | end 120 | end 121 | 122 | # Terminals don't require any special processing, but we could optimize by creating a regular expression such as `/\d+/`. 123 | # (terminal NUMBER "6" (plus _NUMBER_1)) 124 | # (terminal _NUMBER_1 "6.1" (range "0-9"))) 125 | 126 | # Instantiate the calculator using the EBNF grammar. 127 | # 128 | # @param [Hash{Symbol => Object}] options 129 | # @option options [Boolean] :trace 130 | # Trace level. 0(debug), 1(info), 2(warn), 3(error). 131 | def initialize(**options) 132 | # Intantiate grammar from ebnf.ebnf 133 | ebnf = File.expand_path("../calc.ebnf", __FILE__) 134 | 135 | # Perform PEG-specific transformation to the associated rules, which will be passed directly to the parser. 136 | @rules = EBNF.parse(File.open(ebnf)).make_peg.ast 137 | 138 | @options = options.dup 139 | 140 | # If the `trace` option is set, instantiate a logger for collecting trace information. 141 | if @options.has_key?(:trace) 142 | @options[:logger] = Logger.new(STDERR) 143 | @options[:logger].level = @options[:trace] 144 | @options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"} 145 | end 146 | end 147 | 148 | # Evaluate an expression 149 | # 150 | # Evaluates each line of input. 151 | # 152 | # @param [String] input 153 | def evaluate(input) 154 | result = parse(input, :Expr, @rules, **@options) 155 | # This is called for each Expr 156 | puts result 157 | end 158 | end 159 | -------------------------------------------------------------------------------- /spec/base_spec.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | $:.unshift "." 3 | require 'spec_helper' 4 | require 'ebnf' 5 | require 'sxp' 6 | require 'rdf/turtle' 7 | 8 | describe EBNF::Base do 9 | subject {PARSED_EBNF_GRAMMAR.dup} 10 | 11 | describe "#initialize" do 12 | { 13 | %{[2] Prolog ::= BaseDecl? PrefixDecl*} => 14 | %{((rule Prolog "2" (seq (opt BaseDecl) (star PrefixDecl))))}, 15 | %{ 16 | @terminals 17 | terminal ::= [A-Z]+ 18 | } => %{((terminals _terminals (seq)) 19 | (terminal terminal (plus (range "A-Z"))))}, 20 | %{ 21 | [9] primary ::= HEX 22 | | RANGE 23 | | O_RANGE 24 | | STRING1 25 | | STRING2 26 | | '(' expression ')' 27 | 28 | } => %{((rule primary "9" (alt HEX RANGE O_RANGE STRING1 STRING2 (seq '(' expression ')'))))}, 29 | %{#[1] rule ::= 'FOO'} => %{()}, 30 | %{//[1] rule ::= 'FOO'} => %{()}, 31 | %{[18] SolutionModifier ::= _SolutionModifier_1 _SolutionModifier_2} => 32 | %{((rule SolutionModifier "18" (seq _SolutionModifier_1 _SolutionModifier_2)))}, 33 | %{[18.1] _SolutionModifier_1 ::= _empty | GroupClause} => 34 | %{((rule _SolutionModifier_1 "18.1" (alt _empty GroupClause)))}, 35 | %q{[18] STRING1 ::= '"' (CHAR - '"')* '"'} => 36 | %q{((terminal STRING1 "18" (seq '"' (star (diff CHAR '"')) '"')))}, 37 | %q{[161s] WS ::= #x20 | #x9 | #xD | #xA} => 38 | %q{((terminal WS "161s" (alt (hex "#x20") (hex "#x9") (hex "#xD") (hex "#xA"))))}, 39 | %q{[1] shexDoc ::= directive* # leading CODE} => 40 | %q{((rule shexDoc "1" (star directive)))}, 41 | %q{[1] shexDoc ::= directive* /* leading CODE */} => 42 | %q{((rule shexDoc "1" (star directive)))}, 43 | %q{[1] shexDoc ::= directive* (* leading CODE *)} => 44 | %q{((rule shexDoc "1" (star directive)))}, 45 | %q{[1] shexDoc ::= directive* // leading CODE} => 46 | %q{((rule shexDoc "1" (star directive)))}, 47 | %q{[1] shexDoc ::= /* leading CODE */ directive*} => 48 | %q{((rule shexDoc "1" (star directive)))}, 49 | %q{[1] shexDoc ::= (* leading CODE *) directive*} => 50 | %q{((rule shexDoc "1" (star directive)))}, 51 | %q{[156s] STRING_LITERAL1 ::= "'" ([^#x27#x5C#xA#xD] | ECHAR | UCHAR)* "'" /* #x27=' #x5C=\ #xA=new line #xD=carriage return */} => 52 | %q{((terminal STRING_LITERAL1 "156s" 53 | (seq "'" (star (alt (range "^#x27#x5C#xA#xD") ECHAR UCHAR)) "'")) )} 54 | }.each do |input, expected| 55 | it "parses #{input.inspect}" do 56 | expect(parse(input).to_sxp).to produce(expected, @debug) 57 | end 58 | 59 | it "parses generated SXP for #{input.inspect}" do 60 | ast = parse(expected, format: :sxp).ast 61 | ast.each {|r| expect(r).to be_a(EBNF::Rule)} 62 | ast.to_sxp 63 | expect(ast.to_sxp).to produce(expected, @debug) 64 | end 65 | end 66 | 67 | it "rejects unknown format" do 68 | expect {parse("foo", format: :unknown)}.to raise_error "unknown input format :unknown" 69 | end 70 | end 71 | 72 | describe "#renumber!" do 73 | it "creates identifiers for grammars without identifiers" do 74 | gram = EBNF.parse("a ::= b d ::= e") 75 | gram.renumber! 76 | expect(gram.ast.map(&:id)).to eq %w{1 2} 77 | end 78 | 79 | it "renumbers grammars with identifiers" do 80 | gram = EBNF.parse("[10] a ::= b [20] d ::= e") 81 | gram.renumber! 82 | expect(gram.ast.map(&:id)).to eq %w{1 2} 83 | end 84 | end 85 | 86 | describe "#validate!" do 87 | let(:simple) {EBNF.parse("a ::= b", format: :native)} 88 | it "notes invalid grammar" do 89 | expect do 90 | expect {simple.validate!}.to raise_error SyntaxError, "In rule a: No rule found for b" 91 | end.to write(:something).to(:error) 92 | end 93 | 94 | it "validates EBNF" do 95 | expect {subject.validate!}.not_to raise_error 96 | end 97 | end 98 | 99 | describe "#valid?" do 100 | let(:simple) {EBNF.parse("a ::= b", format: :native)} 101 | it "notes invalid grammar" do 102 | expect do 103 | expect(simple.valid?).to be_falsey 104 | end.to write(:something).to(:error) 105 | end 106 | 107 | it "validates EBNF" do 108 | expect(subject).to be_valid 109 | end 110 | end 111 | 112 | describe "#each" do 113 | it "yields each rule" do 114 | rules = subject.ast.select {|r| r.rule?} 115 | expect {|b| subject.each(:rule, &b)}.to yield_control.exactly(rules.length).times 116 | end 117 | it "yields each terminal" do 118 | terminals = subject.ast.select {|r| r.terminal?} 119 | expect {|b| subject.each(:terminal, &b)}.to yield_control.exactly(terminals.length).times 120 | end 121 | end 122 | 123 | describe "#to_sxp" do 124 | specify {expect(subject.to_sxp).to include("(rule ebnf")} 125 | end 126 | 127 | describe "#to_s" do 128 | specify {expect(subject.to_s).to include("ebnf")} 129 | end 130 | 131 | describe "#to_html" do 132 | specify {expect(subject.to_s).to include("ebnf")} 133 | end 134 | 135 | describe "#to_ruby" do 136 | specify {expect {subject.to_ruby}.to write(:something).to(:output)} 137 | end 138 | 139 | describe "#to_ttl" do 140 | let(:reader) {RDF::Turtle::Reader.new(subject.to_ttl, base_uri: 'http://example.org/')} 141 | specify {expect(reader).to be_valid} 142 | end 143 | 144 | describe "#dup" do 145 | specify {expect(parse(%{[2] Prolog ::= BaseDecl? PrefixDecl*}).dup).to be_a(EBNF::Base)} 146 | end 147 | 148 | describe "#find_rule" do 149 | it "finds ebnf" do 150 | expect(subject.find_rule(:ebnf).sym).to eql :ebnf 151 | end 152 | end 153 | 154 | def parse(value, **options) 155 | @debug = [] 156 | options = {debug: @debug, format: :native}.merge(options) 157 | EBNF::Base.new(value, **options) 158 | end 159 | end 160 | -------------------------------------------------------------------------------- /lib/ebnf/ll1/scanner.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | require 'strscan' unless defined?(StringScanner) 3 | 4 | module EBNF::LL1 5 | ## 6 | # Overload StringScanner with file operations and line counting 7 | # 8 | # * Reloads scanner as required until EOF. 9 | # * Loads to a high-water and reloads when remaining size reaches a low-water. 10 | # 11 | # FIXME: Only implements the subset required by the Lexer for now. 12 | class Scanner < StringScanner 13 | HIGH_WATER = 512 * 1024 # Hopefully large enough to deal with long multi-line comments 14 | LOW_WATER = 4 * 1024 15 | 16 | ## 17 | # @return [String, IO, StringIO] 18 | attr_reader :input 19 | 20 | ## 21 | # The current line number (one-based). 22 | # 23 | # @return [Integer] 24 | attr_accessor :lineno 25 | 26 | ## 27 | # Create a scanner, from an IO 28 | # 29 | # @param [String, IO, #read] input 30 | # @param [Hash{Symbol => Object}] options 31 | # @option options[Integer] :high_water (HIGH_WATER) 32 | # @option options[Integer] :low_water (LOW_WATER) 33 | # @return [Scanner] 34 | def initialize(input, **options) 35 | @options = options.merge(high_water: HIGH_WATER, low_water: LOW_WATER) 36 | 37 | @previous_lineno = @lineno = 1 38 | @input = input.is_a?(String) ? encode_utf8(input) : input 39 | super(input.is_a?(String) ? @input : "") 40 | feed_me 41 | self 42 | end 43 | 44 | ## 45 | # Ensures that the input buffer is full to the high water mark, or end of file. Useful when matching tokens that may be longer than the low water mark 46 | def ensure_buffer_full 47 | # Read up to high-water mark ensuring we're at an end of line 48 | if @input.respond_to?(:eof?) && !@input.eof? 49 | diff = @options[:high_water] - rest_size 50 | string = encode_utf8(@input.read(diff)) 51 | string << encode_utf8(@input.gets) unless @input.eof? 52 | self << string if string 53 | end 54 | end 55 | 56 | ## 57 | # Returns true if the scan pointer is at the end of the string 58 | # 59 | # @return [Boolean] 60 | def eos? 61 | feed_me 62 | super 63 | end 64 | 65 | ## 66 | # Returns the "rest" of the line, or the next line if at EOL (i.e. everything after the scan pointer). 67 | # If there is no more data (eos? = true), it returns "". 68 | # 69 | # @return [String] 70 | def rest 71 | feed_me 72 | encode_utf8 super 73 | end 74 | 75 | ## 76 | # Tries to match with `pattern` at the current position. 77 | # 78 | # If there is a match, the scanner advances the "scan pointer" and returns the matched string. 79 | # Otherwise, the scanner returns nil. 80 | # 81 | # If the scanner begins with the multi-line start expression 82 | # @example 83 | # s = StringScanner.new('test string') 84 | # p s.scan(/\w+/) # -> "test" 85 | # p s.scan(/\w+/) # -> nil 86 | # p s.scan(/\s+/) # -> " " 87 | # p s.scan(/\w+/) # -> "string" 88 | # p s.scan(/./) # -> nil 89 | # 90 | # @param [Regexp] pattern 91 | # @return [String] 92 | def scan(pattern) 93 | feed_me 94 | @previous_lineno = @lineno 95 | if matched = encode_utf8(super) 96 | @lineno += matched.count("\n") 97 | end 98 | matched 99 | end 100 | 101 | ## 102 | # Scans the string until the pattern is matched. Returns the substring up to and including the end of the match, advancing the scan pointer to that location. If there is no match, nil is returned. 103 | # 104 | # @example 105 | # s = StringScanner.new("Fri Dec 12 1975 14:39") 106 | # s.scan_until(/1/) # -> "Fri Dec 1" 107 | # s.pre_match # -> "Fri Dec " 108 | # s.scan_until(/XYZ/) # -> nil 109 | # 110 | # @param [Regexp] pattern 111 | # @return [String] 112 | def scan_until(pattern) 113 | feed_me 114 | @previous_lineno = @lineno 115 | if matched = encode_utf8(super) 116 | @lineno += matched.count("\n") 117 | end 118 | matched 119 | end 120 | 121 | ## 122 | # Attempts to skip over the given `pattern` beginning with the scan pointer. 123 | # If it matches, the scan pointer is advanced to the end of the match, 124 | # and the length of the match is returned. Otherwise, `nil` is returned. 125 | # 126 | # similar to `scan`, but without returning the matched string. 127 | # @param [Regexp] pattern 128 | def skip(pattern) 129 | scan(pattern) 130 | nil 131 | end 132 | 133 | ## 134 | # Advances the scan pointer until pattern is matched and consumed. Returns the number of bytes advanced, or nil if no match was found. 135 | # 136 | # Look ahead to match pattern, and advance the scan pointer to the end of the match. Return the number of characters advanced, or nil if the match was unsuccessful. 137 | # 138 | # It’s similar to scan_until, but without returning the intervening string. 139 | # @param [Regexp] pattern 140 | def skip_until(pattern) 141 | (matched = scan_until(pattern)) && matched.length 142 | end 143 | 144 | ## 145 | # Sets the scan pointer to the previous position. Only one previous position is remembered, and it changes with each scanning operation. 146 | def unscan 147 | @lineno = @previous_lineno 148 | super 149 | end 150 | 151 | ## 152 | # Set the scan pointer to the end of the string and clear matching data 153 | def terminate 154 | feed_me 155 | super 156 | end 157 | 158 | private 159 | # Maintain low-water mark 160 | def feed_me 161 | ensure_buffer_full if rest_size < @options[:low_water] 162 | end 163 | 164 | # Perform UTF-8 encoding of input 165 | def encode_utf8(string) 166 | if string && string.encoding != Encoding::UTF_8 167 | string = string.dup if string.frozen? 168 | string.force_encoding(Encoding::UTF_8) 169 | end 170 | string 171 | end 172 | end 173 | end -------------------------------------------------------------------------------- /etc/iso-ebnf.isoebnf: -------------------------------------------------------------------------------- 1 | (* Scoured from https://www.cl.cam.ac.uk/~mgk25/iso-14977.pdf *) 2 | 3 | syntax = syntax_rule, {syntax_rule} ; 4 | 5 | syntax_rule = meta_identifier, defining_symbol, definitions_list, terminator_symbol 6 | (* A defines the sequences of 7 | symbols represented by a *); 8 | 9 | definitions_list = single_definition, {definition_separator_symbol, definitions_list} 10 | (* | separates alternative *); 11 | 12 | single_definition = term, {',', term} 13 | (* , separates successive *); 14 | 15 | term = factor, ['-', exception] 16 | (* A represents any sequence of symbols that is defined by the but 17 | not defined by the *); 18 | 19 | exception = factor 20 | (* A may be used as an 21 | if it could be replaced by a 22 | containingno *); 23 | 24 | factor = [integer, '*'], primary 25 | (* The specifies the number of repetitions of the *); 26 | 27 | primary = optional_sequence 28 | | repeated_sequence 29 | | special_sequence 30 | | grouped_sequence 31 | | meta_identifier 32 | | terminal_string 33 | | empty 34 | ; 35 | 36 | optional_sequence = start_option_symbol, definitions_list, end_option_symbol 37 | (* The brackets [ and ] enclose symbols which are optional *); 38 | 39 | repeated_sequence = start_repeat_symbol, definitions_list, end_repeat_symbol 40 | (* The brackets { and } enclose symbols 41 | which may be repeated any number of times *); 42 | 43 | grouped_sequence = '(', definitions_list, ')' 44 | (* The brackets ( and ) allow any to be a *); 45 | 46 | terminal_string = ("'", first_terminal_character, {first_terminal_character}, "'") 47 | | ('"', second_terminal_character, {second_terminal_character}, '"') 48 | (* A represents the 49 | between the quote symbols '_' or "_" *); 50 | 51 | meta_identifier = letter, {meta_identifier_character} 52 | (* A is the name of a syntactic element of the language being defined *); 53 | 54 | integer = decimal_digit, {decimal_digit} ; 55 | 56 | special_sequence = '?', {special_sequence_character}, '?' 57 | (* The meaning of a is not defined in the standard metalanguage. *); 58 | 59 | comment = '(*', {comment_symbol}, '*)' 60 | (* A comment is allowed anywhere outside a 61 | , , 62 | or *); 63 | 64 | comment_symbol = comment | commentless_symbol | other_character ; 65 | 66 | commentless_symbol = terminal_character | meta_identifier | integer 67 | | terminal_string | special_sequence 68 | ; 69 | 70 | letter = "A" | "B" | "C" | "D" | "E" | "F" | "G" 71 | | "H" | "I" | "J" | "K" | "L" | "M" | "N" 72 | | "O" | "P" | "Q" | "R" | "S" | "T" | "U" 73 | | "V" | "W" | "X" | "Y" | "Z" | "a" | "b" 74 | | "c" | "d" | "e" | "f" | "g" | "h" | "i" 75 | | "j" | "k" | "l" | "m" | "n" | "o" | "p" 76 | | "q" | "r" | "s" | "t" | "u" | "v" | "w" 77 | | "x" | "y" | "z" 78 | ; 79 | 80 | decimal_digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; 81 | 82 | (* Extended to allow '_' *) 83 | meta_identifier_character = letter | decimal_digit | '_' ; 84 | 85 | first_terminal_character = terminal_character - "'" ; 86 | 87 | second_terminal_character = terminal_character - '"' ; 88 | 89 | special_sequence_character = terminal_character - '?' ; 90 | 91 | terminal_character = letter 92 | | decimal_digit 93 | | concatenate_symbol 94 | | defining_symbol 95 | | definition_separator_symbol 96 | | end_comment_symbol 97 | | end_group_symbol 98 | | end_option_symbol 99 | | end_repeat_symbol 100 | | except_symbol 101 | | first_quote_symbol 102 | | repetition_symbol 103 | | second_quote_symbol 104 | | special_sequence_symbol 105 | | start_comment_symbol 106 | | start_group_symbol 107 | | start_option_symbol 108 | | start_repeat_symbol 109 | | terminator_symbol 110 | | other_character 111 | ; 112 | 113 | other_character = ' ' | ':' | '+' | '_' | '%' | '@' | '&' 114 | | '#' | '$' | '<' | '>' | '\' | '^' | '`' 115 | | '~' ; 116 | 117 | empty = ; 118 | 119 | concatenate_symbol = ',' ; 120 | repetition_symbol = '*' ; 121 | except_symbol = '-' ; 122 | first_quote_symbol = "'" ; 123 | second_quote_symbol = '"' ; 124 | start_comment_symbol = '(*' ; 125 | end_comment_symbol = '*)' ; 126 | start_group_symbol = '(' ; 127 | end_group_symbol = ')' ; 128 | special_sequence_symbol = '?' ; 129 | 130 | (* Simple terminals that are often extended *) 131 | defining_symbol = '=' | ':' ; 132 | definition_separator_symbol = '|' | '/' | '!' ; 133 | terminator_symbol = ';' | '.' ; 134 | start_option_symbol = '[' ; 135 | end_option_symbol = ']' ; 136 | start_repeat_symbol = '{' | '(:' ; 137 | end_repeat_symbol = '}' | ':)' ; 138 | -------------------------------------------------------------------------------- /etc/iso-ebnf.ebnf: -------------------------------------------------------------------------------- 1 | # W3C EBNF for ISO/IEC 14977 : 1996 EBNF 2 | # (Scoured from https://www.cl.cam.ac.uk/~mgk25/iso-14977.pdf) 3 | 4 | # Extended to allow no syntax_rule to be valid. 5 | syntax ::= syntax_rule* 6 | 7 | syntax_rule ::= meta_identifier defining_symbol definitions_list terminator_symbol 8 | (* A defines the sequences of 9 | symbols represented by a *) 10 | 11 | definitions_list ::= single_definition (definition_separator_symbol definitions_list)* 12 | (* | separates alternative *) 13 | 14 | single_definition ::= term (',' term)* 15 | (* , separates successive *) 16 | 17 | term ::= factor ('-' exception)? 18 | (* A represents any sequence of symbols that is defined by the but 19 | not defined by the *) 20 | 21 | exception ::= factor 22 | (* A may be used as an 23 | if it could be replaced by a 24 | containingno *) 25 | 26 | factor ::= (integer '*')? primary 27 | (* The specifies the number of repetitions of the *) 28 | 29 | primary ::= optional_sequence 30 | | repeated_sequence 31 | | special_sequence 32 | | grouped_sequence 33 | | meta_identifier 34 | | terminal_string 35 | | empty 36 | 37 | optional_sequence ::= start_option_symbol definitions_list end_option_symbol 38 | (* The brackets [ and ] enclose symbols which are optional *) 39 | 40 | repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol 41 | (* The brackets { and } enclose symbols 42 | which may be repeated any number of times *) 43 | 44 | grouped_sequence ::= '(' definitions_list ')' 45 | (* The brackets ( and ) allow any to be a *) 46 | 47 | # Note, the following are nominally terminal rules, 48 | # although ISO EBNF does not really distinguish between non-terminal and terminal rules. 49 | 50 | @terminals 51 | 52 | terminal_string ::= ("'" first_terminal_character+ "'") 53 | | ('"' second_terminal_character+ '"') 54 | (* A represents the 55 | between the quote symbols '_' or "_" *) 56 | 57 | meta_identifier ::= letter meta_identifier_character* 58 | (* A is the name of a syntactic element of the language being defined *) 59 | 60 | integer ::= decimal_digit+ 61 | 62 | special_sequence ::= '?' special_sequence_character* '?' 63 | (* The meaning of a is not defined in the standard metalanguage. *) 64 | 65 | comment ::= start_comment_symbol comment_symbol* end_comment_symbol 66 | (* A comment is allowed anywhere outside a 67 | , , 68 | or *) 69 | 70 | comment_symbol ::= comment | commentless_symbol | other_character 71 | 72 | commentless_symbol ::= terminal_character | meta_identifier | integer 73 | | terminal_string | special_sequence 74 | 75 | letter ::= [a-zA-Z] 76 | # gratuitous comment 77 | decimal_digit ::= [0-9] 78 | 79 | # Extended to allow '_' 80 | meta_identifier_character ::= letter | decimal_digit | '_' 81 | 82 | first_terminal_character ::= terminal_character - "'" 83 | 84 | second_terminal_character ::= terminal_character - '"' 85 | 86 | special_sequence_character ::= terminal_character - '?' 87 | 88 | terminal_character ::= letter 89 | | decimal_digit 90 | | concatenate_symbol 91 | | defining_symbol 92 | | definition_separator_symbol 93 | | end_comment_symbol 94 | | end_group_symbol 95 | | end_option_symbol 96 | | end_repeat_symbol 97 | | except_symbol 98 | | first_quote_symbol 99 | | repetition_symbol 100 | | second_quote_symbol 101 | | special_sequence_symbol 102 | | start_comment_symbol 103 | | start_group_symbol 104 | | start_option_symbol 105 | | start_repeat_symbol 106 | | terminator_symbol 107 | | other_character 108 | 109 | other_character ::= [:+_%@&$<>^` ̃#x20#x23] | '\' 110 | 111 | gap_separator ::= [#x9#xa#xb#xc#xd#x20] 112 | 113 | @pass gap_separator+ | comment 114 | 115 | empty ::= '' 116 | 117 | concatenate_symbol ::= ',' 118 | repetition_symbol ::= '*' 119 | except_symbol ::= '-' 120 | first_quote_symbol ::= "'" 121 | second_quote_symbol ::= '"' 122 | start_comment_symbol ::= '(*' 123 | end_comment_symbol ::= '*)' 124 | start_group_symbol ::= '(' 125 | end_group_symbol ::= ')' 126 | special_sequence_symbol ::= '?' 127 | 128 | # Simple terminals that are often extended 129 | defining_symbol ::= '=' | ':' 130 | definition_separator_symbol ::= '|' | '/' | '!' 131 | terminator_symbol ::= ';' | '.' 132 | start_option_symbol ::= '[' 133 | end_option_symbol ::= ']' 134 | start_repeat_symbol ::= '{' | '(:' 135 | end_repeat_symbol ::= '}' | ':)' 136 | 137 | # Symbols described, but not actually used. 138 | 139 | gap_free_symbol ::= (terminal_character - ['"]) 140 | | terminal_string 141 | -------------------------------------------------------------------------------- /spec/ll1/scanner_spec.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | $:.unshift ".." 3 | require 'spec_helper' 4 | require 'ebnf' 5 | 6 | describe EBNF::LL1::Scanner do 7 | describe ".new" do 8 | it "initializes with a StringIO" do 9 | scanner = EBNF::LL1::Scanner.new(StringIO.new("line1\nline2\n")) 10 | expect(scanner.rest).to eq "line1\nline2\n" 11 | expect(scanner).not_to be_eos 12 | end 13 | 14 | it "initializes with a string" do 15 | scanner = EBNF::LL1::Scanner.new(StringIO.new "line1\nline2\n") 16 | expect(scanner.rest).to eq "line1\nline2\n" 17 | expect(scanner).not_to be_eos 18 | end 19 | 20 | it "encodes input to UTF-8", pending: !"".respond_to?(:force_encoding) do 21 | f = double("input") 22 | expect(f).to receive(:read).and_return("ascii".force_encoding(Encoding::ASCII_8BIT)) 23 | expect(f).to receive(:gets).and_return("utf8".force_encoding(Encoding::UTF_8)) 24 | expect(f).to receive(:eof?).and_return(false, false, true) 25 | scanner = EBNF::LL1::Scanner.new(f) 26 | s = scanner.rest 27 | expect(s).to eq "asciiutf8" 28 | expect(s.encoding).to eq Encoding::UTF_8 29 | end 30 | end 31 | 32 | describe "#eos?" do 33 | it "returns true if at both eos and eof" do 34 | scanner = EBNF::LL1::Scanner.new(StringIO.new("")) 35 | expect(scanner).to be_eos 36 | end 37 | end 38 | 39 | describe "#lineno" do 40 | context "STRING_LITERAL_LONG_QUOTE" do 41 | subject { 42 | EBNF::LL1::Scanner.new( %( 43 | :Test a rdfs:Class ; 44 | rdfs:subClassOf mf:ManifestEntry; 45 | rdfs:label "Superclass of all CSVW tests" ; 46 | rdfs:comment """ 47 | All CSVW tests have an input file referenced using `mf:action`. Positive 48 | and Negative Evaluation Tests also have a result file referenced using 49 | `mf:result` . Other tests may take different inputs and options as defined 50 | for each test class. 51 | """ ; 52 | :b :c . 53 | )) 54 | } 55 | it "tracks line numbers" do 56 | subject.scan_until(/:Test/) 57 | expect(subject.lineno).to eq 2 58 | 59 | subject.scan_until(/rdfs:subClassOf/) 60 | expect(subject.lineno).to eq 3 61 | 62 | subject.scan(/\w+/) 63 | expect(subject.lineno).to eq 3 64 | 65 | subject.skip_until(/"""/) 66 | expect(subject.lineno).to eq 5 67 | 68 | subject.skip_until(/"""/) 69 | expect(subject.lineno).to eq 10 70 | end 71 | end 72 | end 73 | 74 | describe "#rest" do 75 | it "returns remaining scanner contents if not at eos" do 76 | scanner = EBNF::LL1::Scanner.new(StringIO.new("foo\n")) 77 | expect(scanner.rest).to eq "foo\n" 78 | expect(scanner.lineno).to eq 1 79 | end 80 | 81 | it "returns next line from file if at eos" do 82 | scanner = EBNF::LL1::Scanner.new(StringIO.new("\nfoo\n")) 83 | expect(scanner.rest).to eq "\nfoo\n" 84 | scanner.scan(/\s*/m) 85 | expect(scanner.rest).to eq "foo\n" 86 | expect(scanner.lineno).to eq 2 87 | end 88 | 89 | it "returns \"\" if at eos and eof" do 90 | scanner = EBNF::LL1::Scanner.new(StringIO.new("")) 91 | expect(scanner.rest).to eq "" 92 | end 93 | end 94 | 95 | describe "#scan" do 96 | context "simple terminals" do 97 | it "returns a word" do 98 | scanner = EBNF::LL1::Scanner.new(StringIO.new("foo bar")) 99 | expect(scanner.scan(/\w+/)).to eq "foo" 100 | expect(scanner.lineno).to eq 1 101 | end 102 | 103 | it "returns a STRING_LITERAL_QUOTE" do 104 | scanner = EBNF::LL1::Scanner.new(StringIO.new("'string' foo")) 105 | expect(scanner.scan(/'((?:[^\x27\x5C\x0A\x0D])*)'/)).to eq "'string'" 106 | expect(scanner.lineno).to eq 1 107 | end 108 | 109 | it "returns a STRING_LITERAL_LONG_SINGLE_QUOTE" do 110 | scanner = EBNF::LL1::Scanner.new(StringIO.new("'''\nstring\nstring''' foo")) 111 | expect(scanner.scan(/'''((?:(?:'|'')?(?:[^'\\])+)*)'''/m)).to eq "'''\nstring\nstring'''" 112 | expect(scanner.lineno).to eq 3 113 | end 114 | 115 | it "scans a multi-line string" do 116 | string = %q(''' 117 | 118 | ''' 119 | ) 120 | scanner = EBNF::LL1::Scanner.new(StringIO.new(string)) 121 | expect(scanner.scan(/'''((?:(?:'|'')?(?:[^'\\])+)*)'''/m)).not_to be_empty 122 | expect(scanner.lineno).to eq 3 123 | end 124 | 125 | it "scans a longer multi-line string" do 126 | string = %q(''' 127 | 128 | ''' 129 | ) 130 | scanner = EBNF::LL1::Scanner.new(StringIO.new(string)) 131 | expect(scanner.scan(/'''((?:(?:'|'')?(?:[^'\\])+)*)'''/m)).not_to be_empty 132 | expect(scanner.lineno).to eq 3 133 | end 134 | end 135 | end 136 | 137 | describe "#scan_until" do 138 | context "simple terminals" do 139 | it "returns a word" do 140 | scanner = EBNF::LL1::Scanner.new(StringIO.new("foo bar")) 141 | expect(scanner.scan_until(/\w+/)).to eq "foo" 142 | expect(scanner.lineno).to eq 1 143 | end 144 | 145 | it "returns a STRING_LITERAL_QUOTE" do 146 | scanner = EBNF::LL1::Scanner.new(StringIO.new("prefix 'string' foo")) 147 | expect(scanner.scan_until(/'((?:[^\x27\x5C\x0A\x0D])*)'/)).to eq "prefix 'string'" 148 | expect(scanner.lineno).to eq 1 149 | end 150 | 151 | it "returns a STRING_LITERAL_LONG_SINGLE_QUOTE" do 152 | scanner = EBNF::LL1::Scanner.new(StringIO.new("prefix '''\nstring\nstring''' foo")) 153 | expect(scanner.scan_until(/'''((?:(?:'|'')?(?:[^'\\])+)*)'''/m)).to eq "prefix '''\nstring\nstring'''" 154 | expect(scanner.lineno).to eq 3 155 | end 156 | end 157 | end 158 | 159 | describe "#skip" do 160 | it "skips input" do 161 | scanner = EBNF::LL1::Scanner.new(StringIO.new("foo\n")) 162 | scanner.skip(/^f/) 163 | expect(scanner.rest).to eq "oo\n" 164 | expect(scanner.lineno).to eq 1 165 | end 166 | end 167 | 168 | describe "#skip_until" do 169 | it "skips input" do 170 | scanner = EBNF::LL1::Scanner.new(StringIO.new("prefix\nfoo\n")) 171 | scanner.skip_until(/^f/) 172 | expect(scanner.rest).to eq "oo\n" 173 | expect(scanner.lineno).to eq 2 174 | end 175 | end 176 | 177 | describe "#terminate" do 178 | it "skips to end of input" do 179 | scanner = EBNF::LL1::Scanner.new(StringIO.new("foo\n")) 180 | scanner.terminate 181 | expect(scanner).to be_eos 182 | end 183 | end 184 | end -------------------------------------------------------------------------------- /etc/ebnf.ll1.sxp: -------------------------------------------------------------------------------- 1 | ( 2 | (rule _empty "0" (first _eps) (seq)) 3 | (rule ebnf "1" 4 | (start #t) 5 | (first '@pass' '@terminals' LHS _eps) 6 | (follow _eof) 7 | (cleanup star) 8 | (alt _empty _ebnf_2)) 9 | (rule _ebnf_2 "1.2" 10 | (first '@pass' '@terminals' LHS) 11 | (follow _eof) 12 | (cleanup merge) 13 | (seq _ebnf_1 ebnf)) 14 | (rule _ebnf_1 "1.1" 15 | (first '@pass' '@terminals' LHS) 16 | (follow '@pass' '@terminals' LHS _eof) 17 | (alt declaration rule)) 18 | (rule declaration "2" 19 | (first '@pass' '@terminals') 20 | (follow '@pass' '@terminals' LHS _eof) 21 | (alt '@terminals' pass)) 22 | (rule rule "3" (first LHS) (follow '@pass' '@terminals' LHS _eof) (seq LHS expression)) 23 | (rule expression "4" 24 | (first '(' HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) 25 | (follow ')' '@pass' '@terminals' LHS _eof) 26 | (seq alt)) 27 | (rule alt "5" 28 | (first '(' HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) 29 | (follow ')' '@pass' '@terminals' LHS _eof) 30 | (seq seq _alt_1)) 31 | (rule _alt_1 "5.1" 32 | (first _eps '|') 33 | (follow ')' '@pass' '@terminals' LHS _eof) 34 | (cleanup star) 35 | (alt _empty _alt_3)) 36 | (rule _alt_3 "5.3" 37 | (first '|') 38 | (follow ')' '@pass' '@terminals' LHS _eof) 39 | (cleanup merge) 40 | (seq _alt_2 _alt_1)) 41 | (rule _alt_2 "5.2" 42 | (first '|') 43 | (follow ')' '@pass' '@terminals' LHS _eof '|') 44 | (seq '|' seq)) 45 | (rule seq "6" 46 | (first '(' HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) 47 | (follow ')' '@pass' '@terminals' LHS _eof '|') 48 | (cleanup plus) 49 | (seq diff _seq_1)) 50 | (rule _seq_1 "6.1" 51 | (first '(' HEX O_RANGE RANGE STRING1 STRING2 SYMBOL _eps) 52 | (follow ')' '@pass' '@terminals' LHS _eof '|') 53 | (cleanup star) 54 | (alt _empty _seq_2)) 55 | (rule _seq_2 "6.2" 56 | (first '(' HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) 57 | (follow ')' '@pass' '@terminals' LHS _eof '|') 58 | (cleanup merge) 59 | (seq diff _seq_1)) 60 | (rule diff "7" 61 | (first '(' HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) 62 | (follow '(' ')' '@pass' '@terminals' HEX LHS O_RANGE RANGE STRING1 STRING2 63 | SYMBOL _eof '|' ) 64 | (seq postfix _diff_1)) 65 | (rule _diff_1 "7.1" 66 | (first '-' _eps) 67 | (follow '(' ')' '@pass' '@terminals' HEX LHS O_RANGE RANGE STRING1 STRING2 68 | SYMBOL _eof '|' ) 69 | (cleanup opt) 70 | (alt _empty _diff_2)) 71 | (rule _diff_2 "7.2" 72 | (first '-') 73 | (follow '(' ')' '@pass' '@terminals' HEX LHS O_RANGE RANGE STRING1 STRING2 74 | SYMBOL _eof '|' ) 75 | (seq '-' postfix)) 76 | (rule postfix "8" 77 | (first '(' HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) 78 | (follow '(' ')' '-' '@pass' '@terminals' HEX LHS O_RANGE RANGE STRING1 79 | STRING2 SYMBOL _eof '|' ) 80 | (seq primary _postfix_1)) 81 | (rule _postfix_1 "8.1" 82 | (first POSTFIX _eps) 83 | (follow '(' ')' '-' '@pass' '@terminals' HEX LHS O_RANGE RANGE STRING1 84 | STRING2 SYMBOL _eof '|' ) 85 | (cleanup opt) 86 | (alt _empty POSTFIX)) 87 | (rule primary "9" 88 | (first '(' HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) 89 | (follow '(' ')' '-' '@pass' '@terminals' HEX LHS O_RANGE POSTFIX RANGE 90 | STRING1 STRING2 SYMBOL _eof '|' ) 91 | (alt HEX SYMBOL O_RANGE RANGE STRING1 STRING2 _primary_1)) 92 | (rule _primary_1 "9.1" 93 | (first '(') 94 | (follow '(' ')' '-' '@pass' '@terminals' HEX LHS O_RANGE POSTFIX RANGE 95 | STRING1 STRING2 SYMBOL _eof '|' ) 96 | (seq '(' expression ')')) 97 | (rule pass "10" 98 | (first '@pass') 99 | (follow '@pass' '@terminals' LHS _eof) 100 | (seq '@pass' expression)) 101 | (terminals _terminals (seq)) 102 | (terminal LHS "11" (seq (opt (seq '[' SYMBOL ']' (plus ' '))) SYMBOL (star ' ') '::=')) 103 | (terminal SYMBOL "12" (alt (seq '<' O_SYMBOL '>') O_SYMBOL)) 104 | (terminal O_SYMBOL "12a" (plus (alt (range "a-z") (range "A-Z") (range "0-9") '_' '.'))) 105 | (terminal HEX "13" (seq '#x' (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) 106 | (terminal RANGE "14" 107 | (seq '[' (plus (alt (seq R_CHAR '-' R_CHAR) (seq HEX '-' HEX) R_CHAR HEX)) (opt '-') ']')) 108 | (terminal O_RANGE "15" 109 | (seq '[^' (plus (alt (seq R_CHAR '-' R_CHAR) (seq HEX '-' HEX) R_CHAR HEX)) (opt '-') ']')) 110 | (terminal STRING1 "16" (seq '"' (star (diff CHAR '"')) '"')) 111 | (terminal STRING2 "17" (seq "'" (star (diff CHAR "'")) "'")) 112 | (terminal CHAR "18" 113 | (alt 114 | (range "#x9#xA#xD") 115 | (range "#x20-#xD7FF") 116 | (range "#xE000-#xFFFD") 117 | (range "#x10000-#x10FFFF")) ) 118 | (terminal R_CHAR "19" (diff CHAR (alt ']' '-' HEX))) 119 | (terminal POSTFIX "20" (range "?*+")) 120 | (terminal PASS "21" 121 | (alt 122 | (range "#x9#xA#xD#x20") 123 | (seq (alt (diff '#' '#x') '//') (star (range "^#xA#xD"))) 124 | (seq '/*' (star (alt (opt (seq '*' (range "^/"))) (range "^*"))) '*/') 125 | (seq '(*' (star (alt (opt (seq '*' (range "^)"))) (range "^*"))) '*)')) ) 126 | (pass _pass (seq PASS)) 127 | (rule _ebnf_3 "1.3" (first '@pass' '@terminals' LHS _eps) (follow _eof) (seq ebnf)) 128 | (rule _rule_1 "3.1" 129 | (first '(' HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) 130 | (follow '@pass' '@terminals' LHS _eof) 131 | (seq expression)) 132 | (rule _alt_4 "5.4" 133 | (first _eps '|') 134 | (follow ')' '@pass' '@terminals' LHS _eof) 135 | (seq _alt_1)) 136 | (rule _alt_5 "5.5" 137 | (first _eps '|') 138 | (follow ')' '@pass' '@terminals' LHS _eof) 139 | (seq _alt_1)) 140 | (rule _alt_6 "5.6" 141 | (first '(' HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) 142 | (follow ')' '@pass' '@terminals' LHS _eof '|') 143 | (seq seq)) 144 | (rule _seq_3 "6.3" 145 | (first '(' HEX O_RANGE RANGE STRING1 STRING2 SYMBOL _eps) 146 | (follow ')' '@pass' '@terminals' LHS _eof '|') 147 | (seq _seq_1)) 148 | (rule _seq_4 "6.4" 149 | (first '(' HEX O_RANGE RANGE STRING1 STRING2 SYMBOL _eps) 150 | (follow ')' '@pass' '@terminals' LHS _eof '|') 151 | (seq _seq_1)) 152 | (rule _diff_3 "7.3" 153 | (first '-' _eps) 154 | (follow '(' ')' '@pass' '@terminals' HEX LHS O_RANGE RANGE STRING1 STRING2 155 | SYMBOL _eof '|' ) 156 | (seq _diff_1)) 157 | (rule _diff_4 "7.4" 158 | (first '(' HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) 159 | (follow '(' ')' '@pass' '@terminals' HEX LHS O_RANGE RANGE STRING1 STRING2 160 | SYMBOL _eof '|' ) 161 | (seq postfix)) 162 | (rule _postfix_2 "8.2" 163 | (first POSTFIX _eps) 164 | (follow '(' ')' '-' '@pass' '@terminals' HEX LHS O_RANGE RANGE STRING1 165 | STRING2 SYMBOL _eof '|' ) 166 | (seq _postfix_1)) 167 | (rule _primary_2 "9.2" 168 | (first '(' HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) 169 | (follow '(' ')' '-' '@pass' '@terminals' HEX LHS O_RANGE POSTFIX RANGE 170 | STRING1 STRING2 SYMBOL _eof '|' ) 171 | (seq expression ')')) 172 | (rule _pass_1 "10.1" 173 | (first '(' HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) 174 | (follow '@pass' '@terminals' LHS _eof) 175 | (seq expression)) 176 | (rule _primary_3 "9.3" 177 | (first ')') 178 | (follow '(' ')' '-' '@pass' '@terminals' HEX LHS O_RANGE POSTFIX RANGE 179 | STRING1 STRING2 SYMBOL _eof '|' ) 180 | (seq ')')) ) 181 | -------------------------------------------------------------------------------- /spec/peg/parser_spec.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | $:.unshift ".." 3 | require 'spec_helper' 4 | require 'ebnf' 5 | 6 | describe EBNF::PEG::Parser do 7 | class PegParserTest 8 | include EBNF::PEG::Parser 9 | end 10 | 11 | before(:all) { 12 | PegParserTest.start_production(:term) {"foo"} 13 | PegParserTest.production(:term) {"foo"} 14 | PegParserTest.start_production(:toLower, insensitive_strings: :lower) {|value| value} 15 | PegParserTest.start_production(:toUpper, insensitive_strings: :upper) {|value| value} 16 | PegParserTest.terminal(:escape, /escape/) {"foo"} 17 | PegParserTest.terminal(:unescape, /unescape/, unescape: true) {"foo"} 18 | } 19 | let(:logger) {RDF::Spec.logger} 20 | after(:each) do |example| 21 | puts logger.to_s if example.exception && !example.exception.is_a?(RSpec::Expectations::ExpectationNotMetError) 22 | end 23 | 24 | describe "ClassMethods" do 25 | describe "production" do 26 | it "adds as a start_handler" do 27 | expect(PegParserTest.start_handlers.keys).to eq [:term, :toLower, :toUpper] 28 | expect(PegParserTest.start_handlers[:term]).to be_a(Proc) 29 | end 30 | it "adds as a production_handler" do 31 | expect(PegParserTest.production_handlers.keys).to eq [:term] 32 | expect(PegParserTest.production_handlers[:term]).to include(Proc, FalseClass) 33 | end 34 | end 35 | 36 | describe "terminal" do 37 | it "adds as a terminal_handler" do 38 | expect(PegParserTest.terminal_handlers.keys).to include(:escape, :unescape) 39 | expect(PegParserTest.terminal_handlers[:escape]).to be_a(Proc) 40 | expect(PegParserTest.terminal_handlers[:unescape]).to be_a(Proc) 41 | end 42 | end 43 | end 44 | 45 | describe "#parse" do 46 | subject {PegParserTest.new} 47 | it "raises error if starting production not defined" do 48 | rule = EBNF::Rule.new(:rule, "0", [:seq, "foo"], kind: :terminal).extend(EBNF::PEG::Rule) 49 | expect { 50 | subject.parse("foo", "none", [rule]) 51 | }.to raise_error(EBNF::PEG::Parser::Error, "Starting production :none not defined") 52 | end 53 | 54 | context "simplest grammar" do 55 | let(:start) {:expression} 56 | let(:grammar) {%{( 57 | (rule expression "1" (alt sum integer)) 58 | (rule sum "2" (seq integer operator expression)) 59 | (terminal operator "3" (seq "+")) 60 | (terminal integer "4" (plus (range "0-9"))) 61 | )}} 62 | let(:rules) {EBNF.parse(grammar, format: :sxp).make_peg.ast} 63 | 64 | { 65 | "1" => "1", 66 | "10" => "10", 67 | "1+1" => [{integer: "1"}, {operator: "+"}, {expression: "1"}], 68 | " 1 + 2 " => [{integer: "1"}, {operator: "+"}, {expression: "2"}], 69 | "1 + 2 + 3" => [ 70 | {integer: "1"}, 71 | {operator: "+"}, 72 | {expression: [ 73 | {integer: "2"}, 74 | {operator: "+"}, 75 | {expression: "3"} 76 | ]}] 77 | }.each do |input, expected| 78 | it "parses #{input.inspect} to #{expected.inspect}" do 79 | output = PegParserTest.new.parse(input, start, rules, debug: 3, logger: logger) 80 | expect(output).to produce(expected, logger) 81 | end 82 | end 83 | 84 | { 85 | "" => %r{syntax error, expecting "0-9", :integer }, 86 | "10 x 1" => %r{syntax error, expecting "\+", :operator}, 87 | "1-1" => %r{syntax error, expecting "0-9", "\+", :operator}, 88 | "foo" => %r{syntax error, expecting "0-9", :integer}, 89 | "3 1 + 2" => %r{syntax error, expecting "\+", :operator} 90 | }.each do |input, expected| 91 | it "fails to parse #{input.inspect} to #{expected.inspect}" do 92 | expect { 93 | PegParserTest.new.parse(input, start, rules, debug: 3, logger: logger) 94 | }.to raise_error(EBNF::PEG::Parser::Error, expected) 95 | end 96 | end 97 | end 98 | 99 | context "case insensitive string matching" do 100 | let(:start) {:expression} 101 | let(:grammar) {%{( 102 | (rule expression "1" (alt toUpper toLower)) 103 | (rule toUpper "2" (seq "uPpEr")) 104 | (rule toLower "3" (seq "LoWeR")) 105 | )}} 106 | let(:rules) {EBNF.parse(grammar, format: :sxp).make_peg.ast} 107 | 108 | { 109 | "UPPER" => [{uPpEr: "UPPER"}], 110 | "upper" => [{uPpEr: "UPPER"}], 111 | "LOWER" => [{LoWeR: "lower"}], 112 | "lower" => [{LoWeR: "lower"}], 113 | }.each do |input, expected| 114 | it "parses #{input.inspect} to #{expected.inspect}" do 115 | output = PegParserTest.new.parse(input, start, rules, debug: 3, logger: logger) 116 | expect(output).to produce(expected, logger) 117 | end 118 | end 119 | end 120 | 121 | context "with backtracking" do 122 | let(:start) {:expression} 123 | let(:grammar) {%{( 124 | (rule expression "1" (alt (seq integer "+" integer) (seq integer "*" integer))) 125 | (terminal integer "2" (plus (range "0-9"))) 126 | )}} 127 | let(:rules) {EBNF.parse(grammar, format: :sxp).make_peg.ast} 128 | 129 | { 130 | "1+1" => [{integer: "1"}, {"+": "+"}, {integer: "1"}], 131 | # The following will memoize the first position 132 | "1*1" => [{integer: "1"}, {"*": "*"}, {integer: "1"}], 133 | }.each do |input, expected| 134 | it "parses #{input.inspect} to #{expected.inspect}" do 135 | output = PegParserTest.new.parse(input, start, rules, debug: 3, logger: logger) 136 | expect(output).to produce(expected, logger) 137 | end 138 | end 139 | end 140 | 141 | context "turtle grammar" do 142 | let(:start) {:turtleDoc} 143 | let(:grammar) {File.read File.expand_path("../../../etc/turtle.sxp", __FILE__)} 144 | let(:rules) {EBNF.parse(grammar, format: :sxp).make_peg.ast} 145 | 146 | { 147 | ":a :b :c ." => [[ 148 | {:triples=>[ 149 | {:subject=>":a"}, 150 | {:predicateObjectList=>[ 151 | {:verb=>[{:iri=>":b"}]}, 152 | {:objectList=>[ 153 | {:object=>":c"}, 154 | {:_objectList_1=>[]} 155 | ]}, 156 | {:_predicateObjectList_1=>[]} 157 | ]} 158 | ]}, 159 | {:"."=>"."} 160 | ]], 161 | "[:b :c] ." => [[ 162 | {:triples=>[ 163 | {:blankNodePropertyList=>[ 164 | {:"["=>"["}, 165 | {:predicateObjectList=>[ 166 | {:verb=>[{:iri=>":b"}]}, 167 | {:objectList=>[ 168 | {:object=>":c"}, 169 | {:_objectList_1=>[]} 170 | ]}, 171 | {:_predicateObjectList_1=>[]}]}, 172 | {:"]"=>"]"} 173 | ]}, 174 | {:_triples_3=>nil} 175 | ]}, 176 | {:"."=>"."} 177 | ]] 178 | }.each do |input, expected| 179 | it "parses #{input.inspect} to #{expected.inspect}" do 180 | output = PegParserTest.new.parse(input, start, rules, debug: 3, logger: logger) 181 | expect(output).to produce(expected, logger) 182 | end 183 | end 184 | end 185 | end 186 | 187 | require_relative "data/parser" 188 | 189 | describe EBNFPegParser do 190 | let(:input) {File.expand_path("../../../etc/ebnf.ebnf", __FILE__)} 191 | let(:sxp) {File.read File.expand_path("../../../etc/ebnf.sxp", __FILE__)} 192 | let(:parser) {EBNFPegParser.new(File.open(input), debug: 3, logger: logger)} 193 | 194 | it "parses EBNF Grammar" do 195 | expect(parser.to_sxp).to produce(sxp, logger) 196 | end 197 | end 198 | end -------------------------------------------------------------------------------- /spec/ll1/data/parser.rb: -------------------------------------------------------------------------------- 1 | # # EBNF Parser for EBNF. 2 | # 3 | # Produces an Abstract Synatx Tree in S-Expression form for the input grammar file 4 | require 'ebnf/rule' 5 | require 'ebnf/terminals' 6 | require 'ebnf/ll1/parser' 7 | require_relative 'meta' 8 | require 'sxp' 9 | 10 | class EBNFParser 11 | include EBNF::LL1::Parser 12 | include EBNFParserMeta 13 | include EBNF::Terminals 14 | 15 | # Abstract syntax tree from parse 16 | # 17 | # @return [Array] 18 | attr_reader :ast 19 | 20 | terminal(:LHS, LHS) do |prod, token, input| 21 | input[:id], input[:symbol] = token.value.to_s.scan(/\[([^\]]+)\]\s*(\w+)\s*::=/).first 22 | end 23 | 24 | terminal(:SYMBOL, SYMBOL) do |prod, token, input| 25 | input[:terminal] = token.value.to_sym 26 | end 27 | 28 | terminal(:HEX, HEX) do |prod, token, input| 29 | input[:terminal] = token.value 30 | end 31 | 32 | terminal(:RANGE, RANGE, unescape: true) do |prod, token, input| 33 | input[:terminal] = [:range, token.value[1..-2]] 34 | end 35 | 36 | terminal(:O_RANGE, O_RANGE, unescape: true) do |prod, token, input| 37 | input[:terminal] = [:range, token.value[1..-2]] 38 | end 39 | 40 | terminal(:STRING1, STRING1, unescape: true) do |prod, token, input| 41 | input[:terminal] = token.value[1..-2].tap {|s| s.quote_style = :dquote} 42 | end 43 | 44 | terminal(:STRING2, STRING2, unescape: true) do |prod, token, input| 45 | input[:terminal] = token.value[1..-2].tap {|s| s.quote_style = :squote} 46 | end 47 | 48 | terminal(:POSTFIX, POSTFIX) do |prod, token, input| 49 | input[:postfix] = token.value 50 | end 51 | 52 | terminal(nil, %r(@terminals|@pass|[\[\]|\-\(\)])) do |prod, token, input| 53 | input[:terminal] = token.value 54 | end 55 | 56 | production(:ebnf) do |input, current, callback| 57 | # Cause method_missing to invoke something in our context 58 | to_sxp 59 | end 60 | 61 | production(:declaration) do |input, current, callback| 62 | # current contains a declaration. 63 | # Invoke callback 64 | callback.call(:terminals) if current[:terminal] == '@terminals' 65 | end 66 | 67 | production(:rule) do |input, current, callback| 68 | # current contains an expression. 69 | # Invoke callback 70 | callback.call(:rule, EBNF::Rule.new(current[:symbol].to_sym, current[:id], current[:expression].last)) 71 | end 72 | 73 | production(:expression) do |input, current, callback| 74 | alt = current[:alt] 75 | (input[:expression] ||= [:expression]) << (alt.length > 2 ? alt : alt.last) 76 | end 77 | 78 | production(:alt) do |input, current, callback| 79 | input[:alt] = if current[:alt] 80 | current[:alt] 81 | elsif seq = current[:seq] 82 | [:alt] << (seq.length > 2 ? seq : seq.last) 83 | end 84 | end 85 | 86 | start_production(:_alt_1) do |input, current, callback| 87 | seq = Array(input[:seq]) 88 | (input[:alt] = [:alt]) << (seq.length > 2 ? seq : seq.last) 89 | input.delete(:seq) 90 | end 91 | 92 | production(:_alt_1) do |input, current, callback| 93 | input[:alt] ||= [:alt] 94 | 95 | # Add optimized value of `seq,` if any 96 | if seq = current[:seq] 97 | input[:alt] << (seq.length == 2 ? seq.last : seq) 98 | end 99 | 100 | # Also recursive call to `_alt_1` 101 | input[:alt] += current[:alt][1..-1] if current[:alt] 102 | end 103 | 104 | production(:seq) do |input, current, callback| 105 | input[:seq] = if current[:seq] 106 | current[:seq] 107 | elsif diff = current[:diff] 108 | [:seq] << (diff.length > 2 ? diff : diff.last) 109 | end 110 | end 111 | 112 | start_production(:_seq_1) do |input, current, callback| 113 | diff = Array(input[:diff]) 114 | (input[:seq] = [:seq]) << (diff.length > 2 ? diff : diff.last) 115 | input.delete(:diff) 116 | end 117 | 118 | production(:_seq_1) do |input, current, callback| 119 | input[:seq] ||= [:seq] 120 | 121 | # Add optimized value of `diff`, if any 122 | if diff = current[:diff] 123 | input[:seq] << (diff.length > 2 ? diff : diff.last) 124 | end 125 | 126 | # Also recursive call to `_seq_1` 127 | input[:seq] += current[:seq][1..-1] if current[:seq] 128 | end 129 | 130 | production(:diff) do |input, current, callback| 131 | input[:diff] = if current[:diff] 132 | current[:diff] 133 | elsif postfix = current[:postfix] 134 | [:diff] << postfix 135 | end 136 | end 137 | 138 | start_production(:_diff_1) do |input, current, callback| 139 | postfix = Array(input[:postfix]) 140 | (input[:diff] = [:diff]) << (postfix.length > 2 ? postfix : postfix.last) 141 | input.delete(:postfix) 142 | end 143 | 144 | production(:_diff_1) do |input, current, callback| 145 | # Gratuitous call to exercise method 146 | add_prod_data(:_diff_1, "foo") 147 | input[:diff] ||= [:diff] 148 | 149 | # Add optimized value of `postfix`, if any 150 | input[:diff] << current[:postfix] if current[:postfix] 151 | end 152 | 153 | production(:postfix) do |input, current, callback| 154 | # Gratuitous call to exercise method 155 | add_prod_datum(:postfix, "foo") 156 | # Push result onto input stack, as the `diff` production can have some number of `postfix` values that are applied recursively 157 | input[:postfix] = case current[:postfix] 158 | when "*" then [:star, current[:primary]] 159 | when "+" then [:plus, current[:primary]] 160 | when "?" then [:opt, current[:primary]] 161 | else current[:primary] 162 | end 163 | end 164 | 165 | production(:primary) do |input, current, callback| 166 | # Gratuitous call to exercise method 167 | add_prod_datum(:primary, ["foo"]) 168 | input[:primary] = if current[:expression] 169 | v = current[:expression][1..-1] 170 | v = v.first if v.length == 1 171 | else 172 | current[:terminal] 173 | end 174 | end 175 | 176 | production(:pass) do |input, current, callback| 177 | # Invoke callback 178 | callback.call(:pass, current[:expression].last) 179 | end 180 | 181 | # ## Parser invocation. 182 | # On start, yield ourselves if a block is given, otherwise, return this parser instance 183 | # 184 | # @param [#read, #to_s] input 185 | # @param [Hash{Symbol => Object}] options 186 | # @option options [Hash] :prefixes (Hash.new) 187 | # the prefix mappings to use (for acessing intermediate parser productions) 188 | # @option options [Boolean] :progress 189 | # Show progress of parser productions 190 | # @return [EBNFParser] 191 | def initialize(input, **options, &block) 192 | @options = options.dup 193 | @input = input.respond_to?(:read) ? input.read : input.to_s 194 | 195 | parsing_terminals = false 196 | @ast = [] 197 | parse(@input, START.to_sym, branch: BRANCH, 198 | first: FIRST, 199 | follow: FOLLOW, 200 | whitespace: EBNF::Terminals::PASS, 201 | reset_on_true: true, 202 | **options 203 | ) do |context, *data| 204 | rule = case context 205 | when :terminals 206 | parsing_terminals = true 207 | rule = EBNF::Rule.new(nil, nil, data.first, kind: :terminals) 208 | when :pass 209 | rule = EBNF::Rule.new(nil, nil, data.first, kind: :pass) 210 | when :rule 211 | rule = data.first 212 | rule.kind = :terminal if parsing_terminals 213 | rule 214 | end 215 | @ast << rule if rule 216 | end 217 | @ast 218 | end 219 | 220 | # Output formatted S-Expression of grammar 221 | # 222 | # @return [String] 223 | def to_sxp(**options) 224 | require 'sxp' unless defined?(SXP) 225 | # Output rules as a formatted S-Expression 226 | SXP::Generator.string(@ast.map(&:for_sxp)) 227 | end 228 | end 229 | -------------------------------------------------------------------------------- /lib/ebnf/isoebnf/meta.rb: -------------------------------------------------------------------------------- 1 | # This file is automatically generated by ebnf version 2.0.0 2 | # Derived from etc/iso-ebnf.ebnf 3 | module ISOEBNFMeta 4 | RULES = [ 5 | EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule), 6 | EBNF::Rule.new(:syntax_rule, nil, [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule), 7 | EBNF::Rule.new(:definitions_list, nil, [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule), 8 | EBNF::Rule.new(:_definitions_list_1, nil, [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule), 9 | EBNF::Rule.new(:_definitions_list_2, nil, [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule), 10 | EBNF::Rule.new(:single_definition, nil, [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule), 11 | EBNF::Rule.new(:_single_definition_1, nil, [:star, :_single_definition_2]).extend(EBNF::PEG::Rule), 12 | EBNF::Rule.new(:_single_definition_2, nil, [:seq, ",", :term]).extend(EBNF::PEG::Rule), 13 | EBNF::Rule.new(:term, nil, [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule), 14 | EBNF::Rule.new(:_term_1, nil, [:opt, :_term_2]).extend(EBNF::PEG::Rule), 15 | EBNF::Rule.new(:_term_2, nil, [:seq, "-", :exception]).extend(EBNF::PEG::Rule), 16 | EBNF::Rule.new(:exception, nil, [:seq, :factor]).extend(EBNF::PEG::Rule), 17 | EBNF::Rule.new(:factor, nil, [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule), 18 | EBNF::Rule.new(:_factor_1, nil, [:opt, :_factor_2]).extend(EBNF::PEG::Rule), 19 | EBNF::Rule.new(:_factor_2, nil, [:seq, :integer, "*"]).extend(EBNF::PEG::Rule), 20 | EBNF::Rule.new(:primary, nil, [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule), 21 | EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule), 22 | EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule), 23 | EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule), 24 | EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule), 25 | EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule), 26 | EBNF::Rule.new(:_terminal_string_1, nil, [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule), 27 | EBNF::Rule.new(:_terminal_string_3, nil, [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule), 28 | EBNF::Rule.new(:_terminal_string_2, nil, [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule), 29 | EBNF::Rule.new(:_terminal_string_4, nil, [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule), 30 | EBNF::Rule.new(:meta_identifier, nil, [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule), 31 | EBNF::Rule.new(:_meta_identifier_1, nil, [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule), 32 | EBNF::Rule.new(:integer, nil, [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule), 33 | EBNF::Rule.new(:special_sequence, nil, [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule), 34 | EBNF::Rule.new(:_special_sequence_1, nil, [:star, :special_sequence_character]).extend(EBNF::PEG::Rule), 35 | EBNF::Rule.new(:comment, nil, [:seq, :start_comment_symbol, :_comment_1, :end_comment_symbol], kind: :terminal).extend(EBNF::PEG::Rule), 36 | EBNF::Rule.new(:_comment_1, nil, [:star, :comment_symbol]).extend(EBNF::PEG::Rule), 37 | EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :commentless_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule), 38 | EBNF::Rule.new(:commentless_symbol, nil, [:alt, :terminal_character, :meta_identifier, :integer, :terminal_string, :special_sequence], kind: :terminal).extend(EBNF::PEG::Rule), 39 | EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule), 40 | EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule), 41 | EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule), 42 | EBNF::Rule.new(:first_terminal_character, nil, [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule), 43 | EBNF::Rule.new(:second_terminal_character, nil, [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule), 44 | EBNF::Rule.new(:special_sequence_character, nil, [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule), 45 | EBNF::Rule.new(:terminal_character, nil, [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule), 46 | EBNF::Rule.new(:other_character, nil, [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule), 47 | EBNF::Rule.new(:_other_character_1, nil, [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule), 48 | EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule), 49 | EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule), 50 | EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule), 51 | EBNF::Rule.new(:empty, nil, [:seq, ""], kind: :terminal).extend(EBNF::PEG::Rule), 52 | EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule), 53 | EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule), 54 | EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule), 55 | EBNF::Rule.new(:first_quote_symbol, nil, [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule), 56 | EBNF::Rule.new(:second_quote_symbol, nil, [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule), 57 | EBNF::Rule.new(:start_comment_symbol, nil, [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule), 58 | EBNF::Rule.new(:end_comment_symbol, nil, [:seq, "*)"], kind: :terminal).extend(EBNF::PEG::Rule), 59 | EBNF::Rule.new(:start_group_symbol, nil, [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule), 60 | EBNF::Rule.new(:end_group_symbol, nil, [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule), 61 | EBNF::Rule.new(:special_sequence_symbol, nil, [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule), 62 | EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule), 63 | EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule), 64 | EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule), 65 | EBNF::Rule.new(:start_option_symbol, nil, [:seq, "["], kind: :terminal).extend(EBNF::PEG::Rule), 66 | EBNF::Rule.new(:end_option_symbol, nil, [:seq, "]"], kind: :terminal).extend(EBNF::PEG::Rule), 67 | EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule), 68 | EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule), 69 | EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule), 70 | EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule), 71 | EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule), 72 | EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule), 73 | ] 74 | end 75 | 76 | -------------------------------------------------------------------------------- /examples/isoebnf/meta.rb: -------------------------------------------------------------------------------- 1 | # This file is automatically generated by ebnf version 2.0.0 2 | # Derived from ../../etc/iso-ebnf.ebnf 3 | module ISOEBNFMeta 4 | RULES = [ 5 | EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule), 6 | EBNF::Rule.new(:syntax_rule, nil, [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule), 7 | EBNF::Rule.new(:definitions_list, nil, [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule), 8 | EBNF::Rule.new(:_definitions_list_1, nil, [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule), 9 | EBNF::Rule.new(:_definitions_list_2, nil, [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule), 10 | EBNF::Rule.new(:single_definition, nil, [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule), 11 | EBNF::Rule.new(:_single_definition_1, nil, [:star, :_single_definition_2]).extend(EBNF::PEG::Rule), 12 | EBNF::Rule.new(:_single_definition_2, nil, [:seq, ",", :term]).extend(EBNF::PEG::Rule), 13 | EBNF::Rule.new(:term, nil, [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule), 14 | EBNF::Rule.new(:_term_1, nil, [:opt, :_term_2]).extend(EBNF::PEG::Rule), 15 | EBNF::Rule.new(:_term_2, nil, [:seq, "-", :exception]).extend(EBNF::PEG::Rule), 16 | EBNF::Rule.new(:exception, nil, [:seq, :factor]).extend(EBNF::PEG::Rule), 17 | EBNF::Rule.new(:factor, nil, [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule), 18 | EBNF::Rule.new(:_factor_1, nil, [:opt, :_factor_2]).extend(EBNF::PEG::Rule), 19 | EBNF::Rule.new(:_factor_2, nil, [:seq, :integer, "*"]).extend(EBNF::PEG::Rule), 20 | EBNF::Rule.new(:primary, nil, [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule), 21 | EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule), 22 | EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule), 23 | EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule), 24 | EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule), 25 | EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule), 26 | EBNF::Rule.new(:_terminal_string_1, nil, [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule), 27 | EBNF::Rule.new(:_terminal_string_3, nil, [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule), 28 | EBNF::Rule.new(:_terminal_string_2, nil, [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule), 29 | EBNF::Rule.new(:_terminal_string_4, nil, [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule), 30 | EBNF::Rule.new(:meta_identifier, nil, [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule), 31 | EBNF::Rule.new(:_meta_identifier_1, nil, [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule), 32 | EBNF::Rule.new(:integer, nil, [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule), 33 | EBNF::Rule.new(:special_sequence, nil, [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule), 34 | EBNF::Rule.new(:_special_sequence_1, nil, [:star, :special_sequence_character]).extend(EBNF::PEG::Rule), 35 | EBNF::Rule.new(:comment, nil, [:seq, :start_comment_symbol, :_comment_1, :end_comment_symbol], kind: :terminal).extend(EBNF::PEG::Rule), 36 | EBNF::Rule.new(:_comment_1, nil, [:star, :comment_symbol]).extend(EBNF::PEG::Rule), 37 | EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :commentless_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule), 38 | EBNF::Rule.new(:commentless_symbol, nil, [:alt, :terminal_character, :meta_identifier, :integer, :terminal_string, :special_sequence], kind: :terminal).extend(EBNF::PEG::Rule), 39 | EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule), 40 | EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule), 41 | EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule), 42 | EBNF::Rule.new(:first_terminal_character, nil, [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule), 43 | EBNF::Rule.new(:second_terminal_character, nil, [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule), 44 | EBNF::Rule.new(:special_sequence_character, nil, [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule), 45 | EBNF::Rule.new(:terminal_character, nil, [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule), 46 | EBNF::Rule.new(:other_character, nil, [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule), 47 | EBNF::Rule.new(:_other_character_1, nil, [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule), 48 | EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule), 49 | EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule), 50 | EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule), 51 | EBNF::Rule.new(:empty, nil, [:seq, ""], kind: :terminal).extend(EBNF::PEG::Rule), 52 | EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule), 53 | EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule), 54 | EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule), 55 | EBNF::Rule.new(:first_quote_symbol, nil, [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule), 56 | EBNF::Rule.new(:second_quote_symbol, nil, [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule), 57 | EBNF::Rule.new(:start_comment_symbol, nil, [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule), 58 | EBNF::Rule.new(:end_comment_symbol, nil, [:seq, "*)"], kind: :terminal).extend(EBNF::PEG::Rule), 59 | EBNF::Rule.new(:start_group_symbol, nil, [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule), 60 | EBNF::Rule.new(:end_group_symbol, nil, [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule), 61 | EBNF::Rule.new(:special_sequence_symbol, nil, [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule), 62 | EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule), 63 | EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule), 64 | EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule), 65 | EBNF::Rule.new(:start_option_symbol, nil, [:seq, "["], kind: :terminal).extend(EBNF::PEG::Rule), 66 | EBNF::Rule.new(:end_option_symbol, nil, [:seq, "]"], kind: :terminal).extend(EBNF::PEG::Rule), 67 | EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule), 68 | EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule), 69 | EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule), 70 | EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule), 71 | EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule), 72 | EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule), 73 | ] 74 | end 75 | 76 | --------------------------------------------------------------------------------