├── .gitignore ├── test ├── resources │ ├── RFC7405.abnf │ ├── path.abnf │ ├── bad_line_endings.abnf │ ├── basic.abnf │ ├── sdp1.txt │ ├── sip1.txt │ ├── ipv4.abnf │ ├── reduce.abnf │ ├── module_code.abnf │ ├── ipv6.abnf │ ├── RFC3966.abnf │ ├── RFC3986.abnf │ ├── RFC4566.abnf │ ├── RFC5322.abnf │ ├── RFC5322-no-obs.abnf │ └── RFC3261.abnf ├── test_helper.exs └── ex_abnf_test.exs ├── .travis.yml ├── mix.lock ├── lib ├── ex_abnf │ ├── util.ex │ ├── capture_result.ex │ ├── core.ex │ ├── interpreter.ex │ └── grammar.ex └── ex_abnf.ex ├── config └── config.exs ├── mix.exs ├── README.md └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | doc 2 | /_build 3 | /cover 4 | /deps 5 | erl_crash.dump 6 | *.ez 7 | -------------------------------------------------------------------------------- /test/resources/RFC7405.abnf: -------------------------------------------------------------------------------- 1 | case-sensitive = %s"aBc" 2 | case-insensitive-1 = %i"abc" 3 | case-insensitive-2 = "abc" 4 | -------------------------------------------------------------------------------- /test/resources/path.abnf: -------------------------------------------------------------------------------- 1 | ALPHA = %x61-7A ; A-Z / a-z 2 | path = *("/" segment) 3 | segment = ALPHA *ALPHA !!! 4 | {:ok, state ++ [rule]} 5 | !!! 6 | -------------------------------------------------------------------------------- /test/resources/bad_line_endings.abnf: -------------------------------------------------------------------------------- 1 | ALPHA = %x41-5A / %x61-7A ; A-Z / a-z 2 | DIGIT = %x30-39 3 | 4 | string1 = *ALPHA 5 | string2 = *3ALPHA 6 | string3 = 1*2ALPHA 7 | string4 = 3*ALPHA 8 | string5 = [DIGIT] string1 9 | -------------------------------------------------------------------------------- /test/resources/basic.abnf: -------------------------------------------------------------------------------- 1 | ALPHA = %x41-5A / %x61-7A ; A-Z / a-z 2 | DIGIT = %x30-39 3 | 4 | string1 = *ALPHA 5 | string2 = *3ALPHA 6 | string3 = 1*2ALPHA 7 | string4 = 3*ALPHA 8 | string5 = [DIGIT] string1 9 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: elixir 2 | elixir: 3 | - 1.0.0 4 | - 1.0.1 5 | - 1.0.2 6 | - 1.0.3 7 | - 1.0.4 8 | - 1.0.5 9 | - 1.1.0 10 | - 1.1.1 11 | - 1.2.0 12 | - 1.3.0 13 | - 1.4.0 14 | otp_release: 15 | - 18.0 16 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{"earmark": {:hex, :earmark, "1.0.3", "89bdbaf2aca8bbb5c97d8b3b55c5dd0cff517ecc78d417e87f1d0982e514557b", [:mix], []}, 2 | "ex_doc": {:hex, :ex_doc, "0.14.5", "c0433c8117e948404d93ca69411dd575ec6be39b47802e81ca8d91017a0cf83c", [:mix], [{:earmark, "~> 1.0", [hex: :earmark, optional: false]}]}} 3 | -------------------------------------------------------------------------------- /test/resources/sdp1.txt: -------------------------------------------------------------------------------- 1 | v=0 2 | o=alice 2890844526 2890844526 IN IP4 host.atlanta.example.com 3 | s=description 4 | c=IN IP4 host.atlanta.example.com 5 | t=0 0 6 | m=audio 49170 RTP/AVP 0 8 97 7 | a=rtpmap:0 PCMU/8000 8 | a=rtpmap:8 PCMA/8000 9 | a=rtpmap:97 iLBC/8000 10 | m=video 51372 RTP/AVP 31 32 11 | a=rtpmap:31 H261/90000 12 | a=rtpmap:32 MPV/90000 13 | -------------------------------------------------------------------------------- /test/resources/sip1.txt: -------------------------------------------------------------------------------- 1 | REGISTER sip:registrar.biloxi.com:1234 SIP/2.0 2 | Via: SIP/2.0/UDP bobspc.biloxi.com:5060;branch=z9hG4bKnashds7 3 | Max-Forwards: 70 4 | To: Bob 5 | From: Bob ;tag=456248 6 | Call-ID: 843817637684230@998sdasdh09 7 | CSeq: 1826 REGISTER 8 | Contact: 9 | Expires: 7200 10 | Content-Length: 0 11 | 12 | -------------------------------------------------------------------------------- /test/resources/ipv4.abnf: -------------------------------------------------------------------------------- 1 | IPv4address = 2 | dec-octet "." 3 | dec-octet "." 4 | dec-octet "." 5 | dec-octet !!! 6 | state = Map.put state, :ipv4address, rule 7 | {:ok, state, "Your ip address is: #{rule}"} 8 | !!! 9 | 10 | dec-octet = DIGIT ; 0-9 11 | / %x31-39 DIGIT ; 10-99 12 | / "1" 2DIGIT ; 100-199 13 | / "2" %x30-34 DIGIT ; 200-249 14 | / "25" %x30-35 ; 250-255 15 | 16 | DIGIT = %x30-39 17 | -------------------------------------------------------------------------------- /test/resources/reduce.abnf: -------------------------------------------------------------------------------- 1 | ALPHA = %x41-5A / %x61-7A ; A-Z / a-z 2 | DIGIT = %x30-39 3 | 4 | anint = 1*DIGIT !!! 5 | {int, ""} = Integer.parse to_string(rule) 6 | {:ok, state, int} 7 | !!! 8 | 9 | astring = 1*ALPHA !!! 10 | {:ok, state, to_string(rule)} 11 | !!! 12 | 13 | composed = anint astring !!! 14 | [int, string] = values 15 | {:ok, %{state | field: true}, %{ 16 | int: hd(:lists.flatten(int)), 17 | string: hd(:lists.flatten(string)) 18 | }} 19 | !!! 20 | -------------------------------------------------------------------------------- /test/resources/module_code.abnf: -------------------------------------------------------------------------------- 1 | !!! 2 | require Logger 3 | def return_value(ip) do 4 | "Your ip address is: #{ip}" 5 | end 6 | !!! 7 | 8 | IPv4address = 9 | dec-octet "." 10 | dec-octet "." 11 | dec-octet "." 12 | dec-octet !!! 13 | state = Map.put state, :ipv4address, rule 14 | {:ok, state, return_value(rule)} 15 | !!! 16 | 17 | dec-octet = DIGIT ; 0-9 18 | / %x31-39 DIGIT ; 10-99 19 | / "1" 2DIGIT ; 100-199 20 | / "2" %x30-34 DIGIT ; 200-249 21 | / "25" %x30-35 ; 250-255 22 | 23 | DIGIT = %x30-39 24 | -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright 2015 Marcelo Gornstein 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | ################################################################################ 16 | ExUnit.start() 17 | -------------------------------------------------------------------------------- /test/resources/ipv6.abnf: -------------------------------------------------------------------------------- 1 | IPv6address = 6( h16 ":" ) ls32 2 | / "::" 5( h16 ":" ) ls32 3 | / [ h16 ] "::" 4( h16 ":" ) ls32 4 | / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 5 | / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 6 | / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 7 | / [ *4( h16 ":" ) h16 ] "::" ls32 8 | / [ *5( h16 ":" ) h16 ] "::" h16 9 | / [ *6( h16 ":" ) h16 ] "::" 10 | 11 | h16 = 1*4HEXDIG 12 | ls32 = ( h16 ":" h16 ) / IPv4address 13 | IPv4address = 14 | dec-octet "." 15 | dec-octet "." 16 | dec-octet "." 17 | dec-octet 18 | 19 | dec-octet = DIGIT ; 0-9 20 | / %x31-39 DIGIT ; 10-99 21 | / "1" 2DIGIT ; 100-199 22 | / "2" %x30-34 DIGIT ; 200-249 23 | / "25" %x30-35 ; 250-255 24 | 25 | DIGIT = %x30-39 26 | HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" 27 | -------------------------------------------------------------------------------- /lib/ex_abnf/util.ex: -------------------------------------------------------------------------------- 1 | defmodule ABNF.Util do 2 | @moduledoc """ 3 | Some utilities used internally. 4 | 5 | 6 | Copyright 2015 Marcelo Gornstein 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | @doc """ 22 | Normalices a rule name. It will convert it to a String.t and also downcase it. 23 | """ 24 | @spec rulename(String.t|char_list) :: String.t 25 | def rulename(name) when is_list(name) do 26 | rulename to_string(name) 27 | end 28 | 29 | def rulename(name) when is_binary(name) do 30 | String.downcase name 31 | end 32 | end -------------------------------------------------------------------------------- /config/config.exs: -------------------------------------------------------------------------------- 1 | # This file is responsible for configuring your application 2 | # and its dependencies with the aid of the Mix.Config module. 3 | use Mix.Config 4 | 5 | # This configuration is loaded before any dependency and is restricted 6 | # to this project. If another project depends on this project, this 7 | # file won't be loaded nor affect the parent project. For this reason, 8 | # if you want to provide default values for your application for third- 9 | # party users, it should be done in your mix.exs file. 10 | 11 | # Sample configuration: 12 | # 13 | # config :logger, 14 | # level: :info 15 | # 16 | # config :logger, :console, 17 | # format: "$date $time [$level] $metadata$message\n", 18 | # metadata: [:user_id] 19 | 20 | # It is also possible to import configuration files, relative to this 21 | # directory. For example, you can emulate configuration per environment 22 | # by uncommenting the line below and defining dev.exs, test.exs and such. 23 | # Configuration from the imported file will override the ones defined 24 | # here (which is why it is important to import them last). 25 | # 26 | # import_config "#{Mix.env}.exs" 27 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule ABNF.Mixfile do 2 | use Mix.Project 3 | 4 | def project do 5 | [app: :ex_abnf, 6 | name: "ex_abnf", 7 | source_url: "https://github.com/marcelog/ex_abnf", 8 | version: "0.3.0", 9 | elixir: ">= 1.0.0", 10 | description: description(), 11 | package: package(), 12 | deps: deps()] 13 | end 14 | 15 | def application do 16 | [applications: [:logger]] 17 | end 18 | 19 | defp deps do 20 | [ 21 | {:earmark, "~> 1.0.3", only: :dev}, 22 | {:ex_doc, "~> 0.14.5", only: :dev} 23 | ] 24 | end 25 | 26 | defp description do 27 | """ 28 | A parser and interpreter for ABNF grammars. This is not a parser generator, but an interpreter. 29 | It will load up an ABNF grammar, and generate an AST for it. Then one can apply any of the rules to an input and the interpreter will parse the input according to the rule. 30 | """ 31 | end 32 | 33 | defp package do 34 | [ 35 | files: ["lib", "mix.exs", "README*", "LICENSE*"], 36 | maintainers: ["Marcelo Gornstein"], 37 | licenses: ["Apache 2.0"], 38 | links: %{ 39 | "GitHub" => "https://github.com/marcelog/ex_abnf" 40 | } 41 | ] 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /lib/ex_abnf/capture_result.ex: -------------------------------------------------------------------------------- 1 | defmodule ABNF.CaptureResult do 2 | @moduledoc """ 3 | Capture result, used when returning a result after applying a grammar to an 4 | input. 5 | 6 | Copyright 2015 Marcelo Gornstein 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | defstruct input: '', # original input before match 22 | rest: '', # text that didn't match 23 | string_text: '', # full text that matched 24 | string_tokens: [], # contains string parts that matched (usually 1) 25 | values: nil, # real rule value 26 | state: nil # state after match 27 | 28 | @type t :: %ABNF.CaptureResult{} 29 | end -------------------------------------------------------------------------------- /lib/ex_abnf.ex: -------------------------------------------------------------------------------- 1 | defmodule ABNF do 2 | @moduledoc """ 3 | Main module. ABNF parser as described in [RFC4234](https://tools.ietf.org/html/rfc4234) 4 | and [RFC5234](https://tools.ietf.org/html/rfc5234) 5 | 6 | Copyright 2015 Marcelo Gornstein 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | alias ABNF.Grammar, as: Grammar 22 | alias ABNF.Interpreter, as: Interpreter 23 | alias ABNF.CaptureResult, as: CaptureResult 24 | require Logger 25 | @doc """ 26 | Loads a set of abnf rules from a file. 27 | """ 28 | @spec load_file(String.t) :: Grammar.t | no_return 29 | def load_file(file) do 30 | data = File.read! file 31 | load to_char_list(data) 32 | end 33 | 34 | @doc """ 35 | Returns the abnf rules found in the given char list. 36 | """ 37 | @spec load([byte]) :: Grammar.t | no_return 38 | def load(input) do 39 | case Grammar.rulelist input do 40 | {rules, ''} -> rules 41 | {_rlist, rest} -> throw {:incomplete_parsing, rest} 42 | _ -> throw {:invalid_grammar, input} 43 | end 44 | end 45 | 46 | @doc """ 47 | Parses an input given a gramar, looking for the given rule. 48 | """ 49 | @spec apply(Grammar.t, String.t, [byte], term) :: CaptureResult.t 50 | def apply(grammar, rule, input, state \\ nil) do 51 | Interpreter.apply grammar, rule, input, state 52 | end 53 | end 54 | -------------------------------------------------------------------------------- /test/resources/RFC3966.abnf: -------------------------------------------------------------------------------- 1 | telephone-uri = "tel:" telephone-subscriber 2 | telephone-subscriber = global-number / local-number 3 | global-number = global-number-digits *par 4 | local-number = local-number-digits *par context *par 5 | par = parameter / extension / isdn-subaddress 6 | isdn-subaddress = ";isub=" 1*paramchar 7 | extension = ";ext=" 1*phonedigit 8 | context = ";phone-context=" descriptor 9 | descriptor = domainname / global-number-digits 10 | global-number-digits = "+" *phonedigit DIGIT *phonedigit 11 | local-number-digits = *phonedigit-hex (HEXDIG / "*" / "#") *phonedigit-hex 12 | domainname = *( domainlabel "." ) toplabel [ "." ] 13 | domainlabel = alphanum / alphanum *( alphanum / "-" ) alphanum 14 | toplabel = ALPHA / ALPHA *( alphanum / "-" ) alphanum 15 | parameter = ";" pname ["=" pvalue ] 16 | pname = 1*( alphanum / "-" ) 17 | pvalue = 1*paramchar 18 | paramchar = param-unreserved / unreserved / pct-encoded 19 | unreserved = alphanum / mark 20 | mark = "-" / "_" / "." / "!" / "~" / "*" / "'" / "(" / ")" 21 | pct-encoded = "%" HEXDIG HEXDIG 22 | param-unreserved = "[" / "]" / "/" / ":" / "&" / "+" / "$" 23 | phonedigit = DIGIT / visual-separator 24 | phonedigit-hex = HEXDIG / "*" / "#" / visual-separator 25 | visual-separator = "-" / "." / "(" / ")" 26 | alphanum = ALPHA / DIGIT 27 | reserved = ";" / "/" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / "," 28 | uric = reserved / unreserved / pct-encoded 29 | ALPHA = %x41-5A / %x61-7A ; A-Z / a-z 30 | BIT = "0" / "1" 31 | CHAR = %x01-7F ; any 7-bit US-ASCII character, excluding NUL 32 | CR = %x0D ; carriage return 33 | CRLF = CR LF ; Internet standard newline 34 | CTL = %x00-1F / %x7F ; controls 35 | DIGIT = %x30-39 ; 0-9 36 | DQUOTE = %x22 ; " (Double Quote) 37 | HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" 38 | HTAB = %x09 ; horizontal tab 39 | LF = %x0A ; linefeed 40 | LWSP = *(WSP / CRLF WSP) ; linear white space (past newline) 41 | OCTET = %x00-FF ; 8 bits of data 42 | SP = %x20 43 | VCHAR = %x21-7E ; visible (printing) characters 44 | WSP = SP / HTAB ; white space 45 | -------------------------------------------------------------------------------- /lib/ex_abnf/core.ex: -------------------------------------------------------------------------------- 1 | defmodule ABNF.Core do 2 | @moduledoc """ 3 | Core rules found in the [Apendix B](https://tools.ietf.org/html/rfc4234#appendix-B) of the ABNF RFC. 4 | 5 | Copyright 2015 Marcelo Gornstein 6 | 7 | Licensed under the Apache License, Version 2.0 (the "License"); 8 | you may not use this file except in compliance with the License. 9 | You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | """ 19 | 20 | @doc """ 21 | ALPHA = %x41-5A / %x61-7A ; A-Z / a-z 22 | """ 23 | @spec alpha?(char) :: boolean 24 | def alpha?(char) do 25 | (char >= 0x41 and char <= 0x5A) or 26 | (char >= 0x61 and char <= 0x7A) 27 | end 28 | 29 | @doc """ 30 | BIT = "0" / "1" 31 | """ 32 | @spec bit?(char) :: boolean 33 | def bit?(char) do 34 | char === 0x30 or char === 0x31 35 | end 36 | 37 | @doc """ 38 | CHAR = %x01-7F ; any 7-bit US-ASCII character, excluding NUL 39 | """ 40 | @spec char?(char) :: boolean 41 | def char?(char) do 42 | char >= 0x01 and char <= 0x7F 43 | end 44 | 45 | @doc """ 46 | CR = %x0D ; carriage return 47 | """ 48 | @spec cr?(char) :: boolean 49 | def cr?(char) do 50 | char === 0x0D 51 | end 52 | 53 | @doc """ 54 | CTL = %x00-1F / %x7F ; controls 55 | """ 56 | @spec ctl?(char) :: boolean 57 | def ctl?(char) do 58 | char >= 0x00 and char <= 0x1F 59 | end 60 | 61 | @doc """ 62 | DIGIT = %x30-39; 0-9 63 | """ 64 | @spec digit?(char) :: boolean 65 | def digit?(char) do 66 | char >= 0x30 and char <= 0x39 67 | end 68 | 69 | @doc """ 70 | DQUOTE = %x22 ; " (Double Quote) 71 | """ 72 | @spec dquote?(char) :: boolean 73 | def dquote?(char) do 74 | char === 0x22 75 | end 76 | 77 | @doc """ 78 | HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" 79 | """ 80 | @spec hexdig?(char) :: boolean 81 | def hexdig?(char) do 82 | digit?(char) or 83 | (char >= 0x41 and char <= 0x46) or 84 | (char >= 0x61 and char <= 0x66) 85 | end 86 | 87 | @doc """ 88 | HTAB = %x09 ; horizontal tab 89 | """ 90 | @spec htab?(char) :: boolean 91 | def htab?(char) do 92 | char === 0x09 93 | end 94 | 95 | @doc """ 96 | LF = %x0A ; linefeed 97 | """ 98 | @spec lf?(char) :: boolean 99 | def lf?(char) do 100 | char === 0x0A 101 | end 102 | 103 | @doc """ 104 | OCTET = %x00-FF ; 8 bits of data 105 | """ 106 | @spec octet?(char) :: boolean 107 | def octet?(char) do 108 | char >= 0x00 and char <= 0xFF 109 | end 110 | 111 | @doc """ 112 | SP = %x20 113 | """ 114 | @spec sp?(char) :: boolean 115 | def sp?(char) do 116 | char === 0x20 117 | end 118 | 119 | @doc """ 120 | VCHAR = %x21-7E ; visible (printing) characters 121 | """ 122 | @spec vchar?(char) :: boolean 123 | def vchar?(char) do 124 | char >= 0x21 and char <= 0x7E 125 | end 126 | 127 | @doc """ 128 | WSP = SP / HTAB ; white space 129 | """ 130 | @spec wsp?(char) :: boolean 131 | def wsp?(char) do 132 | sp?(char) or htab?(char) 133 | end 134 | end 135 | -------------------------------------------------------------------------------- /test/resources/RFC3986.abnf: -------------------------------------------------------------------------------- 1 | URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] 2 | 3 | hier-part = "//" authority path-abempty 4 | / path-absolute 5 | / path-rootless 6 | / path-empty 7 | 8 | URI-reference = URI / relative-ref 9 | 10 | absolute-URI = scheme ":" hier-part [ "?" query ] 11 | 12 | relative-ref = relative-part [ "?" query ] [ "#" fragment ] 13 | 14 | relative-part = "//" authority path-abempty 15 | / path-absolute 16 | / path-noscheme 17 | / path-empty 18 | 19 | scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) !!! 20 | state = Map.put state, :scheme, rule 21 | {:ok, state} 22 | !!! 23 | 24 | authority = [ userinfo "@" ] host [ ":" port ] 25 | userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) !!! 26 | state = Map.put state, :userinfo, rule 27 | {:ok, state} 28 | !!! 29 | 30 | host = IP-literal / IPv4address / reg-name !!! 31 | state = Map.put state, :host, rule 32 | {:ok, state} 33 | !!! 34 | port = *DIGIT !!! 35 | state = Map.put state, :port, rule 36 | {:ok, state} 37 | !!! 38 | 39 | IP-literal = "[" ( IPv6address / IPvFuture ) "]" 40 | 41 | IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) !!! 42 | state = Map.put state, :host_type, :ipvfuture 43 | {:ok, state} 44 | !!! 45 | 46 | IPv6address = 6( h16 ":" ) ls32 47 | / "::" 5( h16 ":" ) ls32 48 | / [ h16 ] "::" 4( h16 ":" ) ls32 49 | / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 50 | / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 51 | / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 52 | / [ *4( h16 ":" ) h16 ] "::" ls32 53 | / [ *5( h16 ":" ) h16 ] "::" h16 54 | / [ *6( h16 ":" ) h16 ] "::" !!! 55 | state = Map.put state, :host_type, :ipv6 56 | {:ok, state} 57 | !!! 58 | 59 | h16 = 1*4HEXDIG 60 | ls32 = ( h16 ":" h16 ) / IPv4address 61 | IPv4address = 62 | dec-octet "." 63 | dec-octet "." 64 | dec-octet "." 65 | dec-octet !!! 66 | state = Map.put state, :host_type, :ipv4 67 | {:ok, state} 68 | !!! 69 | 70 | dec-octet = DIGIT ; 0-9 71 | / %x31-39 DIGIT ; 10-99 72 | / "1" 2DIGIT ; 100-199 73 | / "2" %x30-34 DIGIT ; 200-249 74 | / "25" %x30-35 ; 250-255 75 | 76 | reg-name = *( unreserved / pct-encoded / sub-delims ) !!! 77 | state = Map.put state, :host_type, :reg_name 78 | {:ok, state} 79 | !!! 80 | 81 | path = path-abempty ; begins with "/" or is empty 82 | / path-absolute ; begins with "/" but not "//" 83 | / path-noscheme ; begins with a non-colon segment 84 | / path-rootless ; begins with a segment 85 | / path-empty ; zero characters 86 | 87 | path-abempty = *( "/" segment ) !!! 88 | state = Map.put state, :type, :abempty 89 | {:ok, state} 90 | !!! 91 | 92 | path-absolute = "/" [ segment-nz *( "/" segment ) ] !!! 93 | state = Map.put state, :type, :absolute 94 | {:ok, state} 95 | !!! 96 | 97 | path-noscheme = segment-nz-nc *( "/" segment ) !!! 98 | state = Map.put state, :type, :noscheme 99 | {:ok, state} 100 | !!! 101 | 102 | path-rootless = segment-nz *( "/" segment ) !!! 103 | state = Map.put state, :type, :rootless 104 | {:ok, state} 105 | !!! 106 | 107 | path-empty = 0 !!! 108 | state = Map.put state, :type, :empty 109 | {:ok, state} 110 | !!! 111 | 112 | segment = *pchar !!! 113 | segments = state.segments 114 | state = Map.put state, :segments, Enum.reverse([rule|segments]) 115 | {:ok, state} 116 | !!! 117 | segment-nz = 1*pchar !!! 118 | segments = state.segments 119 | state = Map.put state, :segments, Enum.reverse([rule|segments]) 120 | {:ok, state} 121 | !!! 122 | segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) !!! 123 | segments = state.segments 124 | state = Map.put state, :segments, Enum.reverse([rule|segments]) 125 | {:ok, state} 126 | !!! 127 | ; non-zero-length segment without any colon ":" 128 | 129 | pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 130 | 131 | query = *( pchar / "/" / "?" ) !!! 132 | state = Map.put state, :query, rule 133 | {:ok, state} 134 | !!! 135 | 136 | fragment = *( pchar / "/" / "?" ) !!! 137 | state = Map.put state, :fragment, rule 138 | {:ok, state} 139 | !!! 140 | 141 | pct-encoded = "%" HEXDIG HEXDIG 142 | 143 | unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 144 | reserved = gen-delims / sub-delims 145 | gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" 146 | sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 147 | / "*" / "+" / "," / ";" / "=" 148 | ALPHA = %x41-5A / %x61-7A ; A-Z / a-z 149 | 150 | BIT = "0" / "1" 151 | 152 | CHAR = %x01-7F 153 | ; any 7-bit US-ASCII character, 154 | ; excluding NUL 155 | 156 | CR = %x0D 157 | ; carriage return 158 | 159 | CRLF = CR LF 160 | ; Internet standard newline 161 | 162 | CTL = %x00-1F / %x7F 163 | ; controls 164 | 165 | DIGIT = %x30-39 166 | ; 0-9 167 | 168 | DQUOTE = %x22 169 | ; " (Double Quote) 170 | 171 | HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" 172 | 173 | HTAB = %x09 174 | ; horizontal tab 175 | 176 | LF = %x0A 177 | ; linefeed 178 | 179 | LWSP = *(WSP / CRLF WSP) 180 | ; linear white space (past newline) 181 | 182 | OCTET = %x00-FF 183 | ; 8 bits of data 184 | 185 | SP = %x20 186 | 187 | VCHAR = %x21-7E 188 | ; visible (printing) characters 189 | 190 | WSP = SP / HTAB 191 | ; white space 192 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/marcelog/ex_abnf.svg)](https://travis-ci.org/marcelog/ex_abnf) 2 | 3 | ## About 4 | 5 | A parser and interpreter written in [Elixir](http://elixir-lang.org/) for [ABNF grammars](https://en.wikipedia.org/wiki/Augmented_Backus%E2%80%93Naur_Form). 6 | 7 | ABNF is defined in the [RFC2234](https://tools.ietf.org/html/rfc2234), which is 8 | obsoleted by [RFC4234](https://tools.ietf.org/html/rfc4234), which in turn is 9 | obsoleted by the [RFC5234](https://tools.ietf.org/html/rfc5234). There's also an update 10 | in the [RFC7405](https://tools.ietf.org/html/rfc7405). 11 | 12 | This library implements the latest definition (RFC5234) (with erratas #3076, and #2968), and RFC7405. 13 | 14 | ## Use example 15 | 16 | iex(1)> grammar = ABNF.load_file "test/resources/ipv4.abnf" 17 | iex(2)> initial_state = %{} 18 | iex(2)> ABNF.apply grammar, "ipv4address", '250.246.192.34', initial_state 19 | %ABNF.CaptureResult{ 20 | input: '250.246.192.34', 21 | rest: '', 22 | state: %{ipv4address: '250.246.192.34'}, 23 | string_text: '250.246.192.34', 24 | string_tokens: ['250', '.', '246', '.', '192', '.', '34'], 25 | values: ["Your ip address is: 250.246.192.34"] 26 | } 27 | 28 | The result can be read as an [%ABNF.CaptureResult{}](https://github.com/marcelog/ex_abnf/blob/master/lib/ex_abnf/capture_result.ex) 29 | where: 30 | * **input**: The original input 31 | * **rest**: The part of the input that **didn't** match. 32 | * **state**: The state after running all the rules applied to the input. 33 | * **string_text**: The rule value as a string (this might or might not be the same as the rule value, since you can return custom values when adding a reduce code to the rule). 34 | * **string_tokens**: Each one of the values that compose the string (in this case, [octet, dot, octet, dot, octet, dot, octet]). 35 | * **values**: The rule value. In this case the value comes from the reduce code in the [grammar itself](https://github.com/marcelog/ex_abnf/blob/master/test/resources/ipv4.abnf#L6). 36 | 37 | ## More complex examples 38 | 39 | * There's a small sample application at [https://github.com/marcelog/ex_abnf_example](https://github.com/marcelog/ex_abnf_example). An article 40 | describing this application is located at [http://marcelog.github.io/articles/abnf_grammars_in_elixir.html](http://marcelog.github.io/articles/abnf_grammars_in_elixir.html). 41 | 42 | * The [unit tests](https://github.com/marcelog/ex_abnf/blob/master/test/ex_abnf_test.exs) 43 | use different [sample RFCs](https://github.com/marcelog/ex_abnf/tree/master/test/resources) to 44 | test the [grammar parser](https://github.com/marcelog/ex_abnf/blob/master/lib/ex_abnf/grammar.ex) 45 | and [the interpreter](https://github.com/marcelog/ex_abnf/blob/master/lib/ex_abnf/interpreter.ex) 46 | 47 | ## How it works 48 | This is not a parser generator, but an interpreter. It will load up an ABNF 49 | grammar, and generate an (kind of) [AST](http://en.wikipedia.org/wiki/Abstract_syntax_tree) 50 | for it. Then you can apply any of the rules to an input and the interpreter 51 | will parse the input according to the rule. 52 | 53 | ## Using it with Mix 54 | 55 | To use it in your Mix projects, first add it as a dependency: 56 | 57 | ```elixir 58 | def deps do 59 | [{:ex_abnf, "~> 0.2.8"}] 60 | end 61 | ``` 62 | Then run mix deps.get to install it. 63 | 64 | ## Adding custom code to reduce rules 65 | After a rule, you can add your own code, for example: 66 | ``` 67 | userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) !!! 68 | state = Map.put state, :userinfo, rule 69 | {:ok, state} 70 | !!! 71 | ``` 72 | 73 | The code in question will be packed together into a module that is created in 74 | runtime to speed up execution later on. 75 | 76 | Your code can return: 77 | * **{:ok, state}**: The match continues, and the new state is used for 78 | future calls. 79 | 80 | * **{:ok, state, rule_value}**: Returns a new state but also the **rule_value** 81 | is used as the result of the match. In YACC terms, rule_value would be the 82 | equivalent of $$ = ... 83 | 84 | * **{:error, error}**: The whole match is aborted and this error is thrown. 85 | 86 | And your code will be called with the following bindings: 87 | 88 | * **state**: This is the state that you can pass when calling the initial 89 | **ABNF.apply** function, and is a way to keep state through the whole match, 90 | it can be whatever you like and can mutate through calls as long as your code 91 | can handle it. 92 | 93 | * **values**: When a rule is composed of different tokens 94 | (e.g: path = SEGMENT "/" SEGMENT) this contains a list with all the values of 95 | those tokens in order. In YACC terms, this would be the equivalent of using 96 | $1, $2, $3, etc. Note that a value here can be a reduced value returned by 97 | your own code in a previous rule. 98 | 99 | * **string_values**: Just like `values` but each value is a nested list of 100 | lists with all the characters that matched (you will usually want to flatten 101 | the list to get each one of the full strings). 102 | 103 | ## Adding helper code 104 | You can also start your grammar with code to write your own helper functions and 105 | module additions. For example: 106 | ``` 107 | !!! 108 | require Logger 109 | def return_value(ip) do 110 | Logger.debug "Hello world" 111 | "Your ip address is: #{ip}" 112 | end 113 | !!! 114 | 115 | IPv4address = 116 | dec-octet "." 117 | dec-octet "." 118 | dec-octet "." 119 | dec-octet !!! 120 | state = Map.put state, :ipv4address, rule 121 | {:ok, state, return_value(rule)} 122 | !!! 123 | 124 | dec-octet = DIGIT ; 0-9 125 | / %x31-39 DIGIT ; 10-99 126 | / "1" 2DIGIT ; 100-199 127 | / "2" %x30-34 DIGIT ; 200-249 128 | / "25" %x30-35 ; 250-255 129 | 130 | DIGIT = %x30-39 131 | ``` 132 | 133 | Note how the result of the `IPv4address` rule is the result of a call to the 134 | function `return_value`. 135 | 136 | ## Changes from 0.1.x to 0.2.x 137 | * In the reduce code the rule value is no longer the rule name, but the 138 | variable `rule`. 139 | * The grammar text no longer supports `cr` as the newline, one should always 140 | use `crlf`. 141 | * In the reduce code there are now available the following variables: 142 | * `rule`: The rule value 143 | * `string_values`: Like the old `tokens` variable, but contains a nested list 144 | of lists with the parsed strings. 145 | * `values`: Like the old `tokens` variable, but with the reduced values 146 | (could be a mixed nested list of lists containing char_lists and/or other 147 | kind of values). 148 | * Original rule names are now preserverd and only downcased, no replacements 149 | are done to chars (i.e: `-` to `_`). 150 | 151 | ## License 152 | The source code is released under Apache 2 License. 153 | 154 | Check [LICENSE](https://github.com/marcelog/ex_abnf/blob/master/LICENSE) file 155 | for more information. 156 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | -------------------------------------------------------------------------------- /test/resources/RFC4566.abnf: -------------------------------------------------------------------------------- 1 | session-description = proto-version 2 | origin-field 3 | session-name-field 4 | information-field 5 | uri-field 6 | email-fields 7 | phone-fields 8 | connection-field 9 | bandwidth-fields 10 | time-fields 11 | key-field 12 | attribute-fields 13 | media-descriptions 14 | 15 | proto-version = %x76 "=" 1*DIGIT CRLF !!! 16 | [_, _, v, _] = values 17 | {:ok, Map.put(state, :version, :lists.flatten(v))} 18 | !!! 19 | ;this memo describes version 0 20 | 21 | origin-field = %x6f "=" username SP sess-id SP sess-version SP 22 | nettype SP addrtype SP unicast-address CRLF !!! 23 | [ 24 | _, 25 | _, 26 | username, 27 | _, 28 | sess_id, 29 | _, 30 | sess_version, 31 | _, 32 | net_type, 33 | _, 34 | addr_type, 35 | _, 36 | unicast, 37 | _ 38 | ] = values 39 | {:ok, Map.put(state, :origin, %{ 40 | username: :lists.flatten(username), 41 | session_id: :lists.flatten(sess_id), 42 | session_version: :lists.flatten(sess_version), 43 | net_type: :lists.flatten(net_type), 44 | address_type: :lists.flatten(addr_type), 45 | unicast_address: :lists.flatten(unicast) 46 | })} 47 | !!! 48 | 49 | session-name-field = %x73 "=" text CRLF !!! 50 | [_, _, s, _] = values 51 | {:ok, Map.put(state, :session_name, :lists.flatten(s))} 52 | !!! 53 | 54 | information-field = [%x69 "=" text CRLF] 55 | 56 | uri-field = [%x75 "=" uri CRLF] 57 | 58 | email-fields = *(%x65 "=" email-address CRLF) 59 | 60 | phone-fields = *(%x70 "=" phone-number CRLF) 61 | 62 | connection-field = [%x63 "=" nettype SP addrtype SP 63 | connection-address CRLF] !!! 64 | {:ok, state} 65 | !!! 66 | ;a connection field must be present 67 | ;in every media description or at the 68 | ;session-level 69 | 70 | bandwidth-fields = *(%x62 "=" bwtype ":" bandwidth CRLF) 71 | 72 | time-fields = 1*( %x74 "=" start-time SP stop-time 73 | *(CRLF repeat-fields) CRLF) 74 | [zone-adjustments CRLF] 75 | 76 | repeat-fields = %x72 "=" repeat-interval SP typed-time 77 | 1*(SP typed-time) 78 | 79 | zone-adjustments = %x7a "=" time SP ["-"] typed-time 80 | *(SP time SP ["-"] typed-time) 81 | 82 | key-field = [%x6b "=" key-type CRLF] 83 | 84 | attribute-fields = *(%x61 "=" attribute CRLF) 85 | 86 | media-descriptions = *( media-field 87 | information-field 88 | *connection-field 89 | bandwidth-fields 90 | key-field 91 | attribute-fields ) 92 | 93 | media-field = %x6d "=" media SP port ["/" integer] 94 | SP proto 1*(SP fmt) CRLF 95 | 96 | ; sub-rules of 'o=' 97 | username = non-ws-string 98 | ;pretty wide definition, but doesn't 99 | ;include space 100 | 101 | sess-id = 1*DIGIT 102 | ;should be unique for this username/host 103 | 104 | sess-version = 1*DIGIT 105 | 106 | nettype = token 107 | ;typically "IN" 108 | 109 | addrtype = token 110 | ;typically "IP4" or "IP6" 111 | 112 | ; sub-rules of 'u=' 113 | uri = URI-reference 114 | ; see RFC 3986 115 | 116 | 117 | ; sub-rules of 'e=', see RFC 2822 for definitions 118 | email-address = address-and-comment / dispname-and-address 119 | / addr-spec 120 | address-and-comment = addr-spec 1*SP "(" 1*email-safe ")" 121 | dispname-and-address = 1*email-safe 1*SP "<" addr-spec ">" 122 | 123 | ; sub-rules of 'p=' 124 | phone-number = phone *SP "(" 1*email-safe ")" / 125 | 1*email-safe "<" phone ">" / 126 | phone 127 | 128 | phone = ["+"] DIGIT 1*(SP / "-" / DIGIT) 129 | 130 | ; sub-rules of 'c=' 131 | connection-address = multicast-address / unicast-address 132 | 133 | ; sub-rules of 'b=' 134 | bwtype = token 135 | 136 | bandwidth = 1*DIGIT 137 | 138 | ; sub-rules of 't=' 139 | start-time = time / "0" 140 | 141 | stop-time = time / "0" 142 | 143 | time = POS-DIGIT 9*DIGIT 144 | ; Decimal representation of NTP time in 145 | ; seconds since 1900. The representation 146 | ; of NTP time is an unbounded length field 147 | ; containing at least 10 digits. Unlike the 148 | ; 64-bit representation used elsewhere, time 149 | ; in SDP does not wrap in the year 2036. 150 | 151 | ; sub-rules of 'r=' and 'z=' 152 | repeat-interval = POS-DIGIT *DIGIT [fixed-len-time-unit] 153 | 154 | typed-time = 1*DIGIT [fixed-len-time-unit] 155 | 156 | fixed-len-time-unit = %x64 / %x68 / %x6d / %x73 157 | 158 | ; sub-rules of 'k=' 159 | key-type = %x70 %x72 %x6f %x6d %x70 %x74 / ; "prompt" 160 | %x63 %x6c %x65 %x61 %x72 ":" text / ; "clear:" 161 | %x62 %x61 %x73 %x65 "64:" base64 / ; "base64:" 162 | %x75 %x72 %x69 ":" uri ; "uri:" 163 | 164 | base64 = *base64-unit [base64-pad] 165 | 166 | base64-unit = 4base64-char 167 | base64-pad = 2base64-char "==" / 3base64-char "=" 168 | base64-char = ALPHA / DIGIT / "+" / "/" 169 | 170 | ; sub-rules of 'a=' 171 | attribute = (att-field ":" att-value) / att-field 172 | 173 | att-field = token 174 | 175 | att-value = byte-string 176 | 177 | ; sub-rules of 'm=' 178 | media = token 179 | ;typically "audio", "video", "text", or 180 | ;"application" 181 | 182 | fmt = token 183 | ;typically an RTP payload type for audio 184 | ;and video media 185 | 186 | proto = token *("/" token) 187 | ;typically "RTP/AVP" or "udp" 188 | 189 | port = 1*DIGIT 190 | 191 | ; generic sub-rules: addressing 192 | unicast-address = IP4-address / IP6-address / FQDN / extn-addr 193 | 194 | multicast-address = IP4-multicast / IP6-multicast / FQDN 195 | / extn-addr 196 | 197 | IP4-multicast = m1 3( "." decimal-uchar ) 198 | "/" ttl [ "/" integer ] 199 | ; IPv4 multicast addresses may be in the 200 | ; range 224.0.0.0 to 239.255.255.255 201 | 202 | m1 = ("22" ("4"/"5"/"6"/"7"/"8"/"9")) / 203 | ("23" DIGIT ) 204 | 205 | IP6-multicast = hexpart [ "/" integer ] 206 | ; IPv6 address starting with FF 207 | 208 | ttl = (POS-DIGIT *2DIGIT) / "0" 209 | 210 | FQDN = 4*(alpha-numeric / "-" / ".") 211 | ; fully qualified domain name as specified 212 | ; in RFC 1035 (and updates) 213 | 214 | IP4-address = b1 3("." decimal-uchar) 215 | 216 | b1 = decimal-uchar 217 | ; less than "224" 218 | 219 | ; The following is consistent with RFC 2373 [30], Appendix B. 220 | IP6-address = hexpart [ ":" IP4-address ] 221 | 222 | hexpart = hexseq / hexseq "::" [ hexseq ] / 223 | "::" [ hexseq ] 224 | 225 | hexseq = hex4 *( ":" hex4) 226 | 227 | hex4 = 1*4HEXDIG 228 | 229 | ; Generic for other address families 230 | extn-addr = non-ws-string 231 | 232 | ; generic sub-rules: datatypes 233 | text = byte-string 234 | ;default is to interpret this as UTF8 text. 235 | ;ISO 8859-1 requires "a=charset:ISO-8859-1" 236 | ;session-level attribute to be used 237 | 238 | byte-string = 1*(%x01-09/%x0B-0C/%x0E-FF) 239 | ;any byte except NUL, CR, or LF 240 | 241 | non-ws-string = 1*(VCHAR/%x80-FF) 242 | ;string of visible characters 243 | 244 | token-char = %x21 / %x23-27 / %x2A-2B / %x2D-2E / %x30-39 245 | / %x41-5A / %x5E-7E 246 | 247 | token = 1*(token-char) 248 | 249 | email-safe = %x01-09/%x0B-0C/%x0E-27/%x2A-3B/%x3D/%x3F-FF 250 | ;any byte except NUL, CR, LF, or the quoting 251 | ;characters ()<> 252 | 253 | integer = POS-DIGIT *DIGIT 254 | 255 | ; generic sub-rules: primitives 256 | alpha-numeric = ALPHA / DIGIT 257 | 258 | POS-DIGIT = %x31-39 ; 1 - 9 259 | decimal-uchar = DIGIT 260 | / POS-DIGIT DIGIT 261 | / ("1" 2*(DIGIT)) 262 | / ("2" ("0"/"1"/"2"/"3"/"4") DIGIT) 263 | / ("2" "5" ("0"/"1"/"2"/"3"/"4"/"5")) 264 | ALPHA = %x41-5A / %x61-7A ; A-Z / a-z 265 | 266 | BIT = "0" / "1" 267 | 268 | CHAR = %x01-7F 269 | ; any 7-bit US-ASCII character, 270 | ; excluding NUL 271 | 272 | CR = %x0D 273 | ; carriage return 274 | 275 | CRLF = [CR] LF 276 | ; Internet standard newline 277 | 278 | CTL = %x00-1F / %x7F 279 | ; controls 280 | 281 | DIGIT = %x30-39 282 | ; 0-9 283 | 284 | DQUOTE = %x22 285 | ; " (Double Quote) 286 | 287 | HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" 288 | 289 | HTAB = %x09 290 | ; horizontal tab 291 | 292 | LF = %x0A 293 | ; linefeed 294 | 295 | LWSP = *(WSP / CRLF WSP) 296 | ; linear white space (past newline) 297 | 298 | OCTET = %x00-FF 299 | ; 8 bits of data 300 | 301 | SP = %x20 302 | 303 | VCHAR = %x21-7E 304 | ; visible (printing) characters 305 | 306 | WSP = SP / HTAB 307 | ; white space 308 | -------------------------------------------------------------------------------- /lib/ex_abnf/interpreter.ex: -------------------------------------------------------------------------------- 1 | defmodule ABNF.Interpreter do 2 | @moduledoc """ 3 | This modules implements the Grammar.t interpreter. Applying a Grammar.t to the 4 | given input will result in a CaptureResult.t or an exception. 5 | 6 | Copyright 2015 Marcelo Gornstein 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | alias ABNF.Util, as: Util 22 | alias ABNF.Grammar, as: Grammar 23 | alias ABNF.CaptureResult, as: Res 24 | require Logger 25 | 26 | @doc """ 27 | Parses the given input using the given grammar. 28 | """ 29 | @spec apply( 30 | Grammar.t, String.t, char_list, term 31 | ) :: CaptureResult.t | no_return 32 | def apply(grammar, rule_str, input, state \\ nil) do 33 | rule_str = Util.rulename rule_str 34 | case parse_real grammar, %{element: :rulename, value: rule_str}, input, state do 35 | nil -> nil 36 | { 37 | r_string_text, 38 | r_string_tokens, 39 | r_values, 40 | r_state, 41 | r_rest 42 | } -> %Res{ 43 | string_text: r_string_text, 44 | string_tokens: r_string_tokens, 45 | values: r_values, 46 | state: r_state, 47 | input: input, 48 | rest: r_rest 49 | } 50 | end 51 | end 52 | 53 | defp parse_real( 54 | grammar, e = %{element: :rule, value: a, code: c}, input, state 55 | ) do 56 | case parse_real grammar, a, input, state do 57 | nil -> nil 58 | r = { 59 | r_string_text, 60 | r_string_tokens, 61 | r_values, 62 | r_state, 63 | r_rest 64 | } -> 65 | if is_nil c do 66 | r 67 | else 68 | try do 69 | {m, f, _} = c 70 | case :erlang.apply m, f, [ 71 | r_state, r_string_text, r_string_tokens, r_values 72 | ] do 73 | {:ok, state} -> { 74 | r_string_text, 75 | r_string_tokens, 76 | r_values, 77 | state, 78 | r_rest 79 | } 80 | {:ok, state, val} -> { 81 | r_string_text, 82 | r_string_tokens, 83 | [val], 84 | state, 85 | r_rest 86 | } 87 | r -> raise ArgumentError, 88 | "Unexpected result for rule #{inspect e} #{inspect r}" 89 | end 90 | rescue 91 | ex -> 92 | Logger.error "Unexpected result for rule " <> 93 | " when running code #{inspect e.code}" 94 | stacktrace = System.stacktrace 95 | reraise ex, stacktrace 96 | end 97 | end 98 | end 99 | end 100 | 101 | defp parse_real( 102 | grammar, %{element: :prose_val, value: v}, input, state 103 | ) do 104 | parse_real grammar, %{element: :rulename, value: v}, input, state 105 | end 106 | 107 | defp parse_real( 108 | grammar, %{element: :alternation, value: alternation}, input, state 109 | ) do 110 | run_concs grammar, alternation, input, state, nil 111 | end 112 | 113 | defp parse_real( 114 | _grammar, %{element: :num_range, value: %{from: from, to: to}}, input, state 115 | ) do 116 | case input do 117 | [char|rest] -> if(char >= from and char <= to) do 118 | result = [char] 119 | { 120 | result, 121 | [result], 122 | [result], 123 | state, 124 | rest 125 | } 126 | else 127 | nil 128 | end 129 | _ -> nil 130 | end 131 | end 132 | 133 | defp parse_real( 134 | _grammar, %{element: :char_val, value: %{regex: r, length: l}}, input, state 135 | ) do 136 | case :re.run input, r do 137 | {:match, _} -> 138 | {s1, rest} = :lists.split l, input 139 | { 140 | s1, 141 | [s1], 142 | [s1], 143 | state, 144 | rest 145 | } 146 | _ -> nil 147 | end 148 | end 149 | 150 | defp parse_real(_grammar, %{element: :num_concat, value: list}, input, state) do 151 | case num_concat list, input do 152 | nil -> nil 153 | {match, rest} -> { 154 | match, 155 | [match], 156 | [match], 157 | state, 158 | rest 159 | } 160 | end 161 | end 162 | 163 | defp parse_real(grammar, %{element: :rulename, value: e}, input, state) do 164 | value = case :maps.find e, grammar do 165 | {:ok, value} -> value 166 | _ -> raise ArgumentError, "Rule #{e} not found in #{Map.keys(grammar)}" 167 | end 168 | parse_real grammar, value, input, state 169 | end 170 | 171 | defp parse_real(grammar, %{element: :group, value: e}, input, state) do 172 | parse_real grammar, e, input, state 173 | end 174 | 175 | defp parse_real(grammar, %{element: :option, value: e}, input, state) do 176 | case parse_real grammar, e, input, state do 177 | nil -> { 178 | '', 179 | [''], 180 | [], 181 | state, 182 | input 183 | } 184 | r -> r 185 | end 186 | end 187 | 188 | defp num_concat(list, input, acc \\ []) 189 | 190 | defp num_concat([], input, acc) do 191 | match = :lists.reverse acc 192 | {match, input} 193 | end 194 | 195 | defp num_concat([char1|rest_list], [char2|rest_input], acc) do 196 | if char1 === char2 do 197 | num_concat rest_list, rest_input, [char1|acc] 198 | else 199 | nil 200 | end 201 | end 202 | 203 | defp repetition( 204 | grammar, e = %{element: :repetition, value: %{from: from, to: to, value: v}}, 205 | input, state, acc = { 206 | acc_string_text, 207 | acc_string_tokens, 208 | acc_values, 209 | _acc_state, 210 | _acc_rest 211 | } 212 | ) do 213 | case parse_real grammar, v, input, state do 214 | nil -> if length(acc_values) >= from do 215 | acc 216 | else 217 | nil 218 | end 219 | { 220 | r_string_text, 221 | _r_string_tokens, 222 | r_values, 223 | r_state, 224 | r_rest 225 | } -> 226 | { 227 | _acc_string_text, 228 | _acc_string_tokens, 229 | acc_values, 230 | _acc_state, 231 | _acc_rest 232 | } = acc = { 233 | [r_string_text|acc_string_text], 234 | [r_string_text|acc_string_tokens], 235 | [r_values|acc_values], 236 | r_state, 237 | r_rest 238 | } 239 | if length(acc_values) === to do 240 | acc 241 | else 242 | # Check for from:0 to: :infinity and empty match 243 | if r_string_text === '' do 244 | acc 245 | else 246 | repetition grammar, e, r_rest, r_state, acc 247 | end 248 | end 249 | end 250 | end 251 | 252 | defp concatenation( 253 | grammar, [c = %{value: value = %{from: from}}|cs], 254 | input, state, acc, next_match \\ nil 255 | ) do 256 | r = if is_nil next_match do 257 | repetition grammar, c, input, state, { 258 | [], 259 | [], 260 | [], 261 | state, 262 | input 263 | } 264 | else 265 | next_match 266 | end 267 | 268 | if is_nil r do 269 | nil 270 | else 271 | # This one matches, but we need to check if the next one also matches 272 | # and try with one less repetition if not (backtracking) 273 | { 274 | _r_string_text, 275 | r_string_tokens, 276 | r_values, 277 | r_state, 278 | r_rest 279 | } = r 280 | case cs do 281 | [next_c|_next_cs] -> case repetition grammar, next_c, r_rest, r_state, { 282 | [], 283 | [], 284 | [], 285 | r_state, 286 | r_rest 287 | } do 288 | nil -> 289 | match_length = length r_string_tokens 290 | to = match_length - 1 291 | if to > 0 and to >= from do 292 | c_val = :maps.put :to, to, value 293 | c = :maps.put :value, c_val, c 294 | 295 | [h_string_tokens|t_string_tokens] = r_string_tokens 296 | [_h_values|t_values] = r_values 297 | 298 | rest = :lists.append h_string_tokens, r_rest 299 | r = { 300 | t_string_tokens, 301 | t_string_tokens, 302 | t_values, 303 | r_state, 304 | rest 305 | } 306 | concatenation grammar, [c|cs], input, state, acc, r 307 | else 308 | if from === 0 do 309 | r = { 310 | '', 311 | [], 312 | [], 313 | state, 314 | input 315 | } 316 | acc = { 317 | _acc_string_text, 318 | _acc_string_tokens, 319 | _acc_values, 320 | acc_state, 321 | acc_rest 322 | } = conc_result r, acc 323 | concatenation grammar, cs, acc_rest, acc_state, acc 324 | else 325 | nil 326 | end 327 | end 328 | next_r -> 329 | # Next one matches, we're cool. Go on, and pass on the next match 330 | # so it's not parsed again. 331 | acc = { 332 | _acc_string_text, 333 | _acc_string_tokens, 334 | _acc_values, 335 | acc_state, 336 | acc_rest 337 | } = conc_result r, acc 338 | concatenation grammar, cs, acc_rest, acc_state, acc, next_r 339 | end 340 | [] -> 341 | acc = conc_result r, acc 342 | prep_result acc 343 | end 344 | end 345 | end 346 | 347 | defp run_concs(_grammar, [], _input, _state, acc) do 348 | case acc do 349 | nil -> nil 350 | {_, r} -> r 351 | end 352 | end 353 | 354 | defp run_concs(grammar, [%{value: value}|concs], input, state, acc) do 355 | case concatenation grammar, value, input, state, { 356 | [], 357 | [], 358 | [], 359 | state, 360 | input 361 | } do 362 | nil -> run_concs grammar, concs, input, state, acc 363 | r = { 364 | r_string_text, 365 | _r_string_tokens, 366 | _r_values, 367 | _r_state, 368 | _r_rest 369 | } -> case acc do 370 | nil -> 371 | l = :erlang.iolist_size r_string_text 372 | run_concs grammar, concs, input, state, {l, r} 373 | {last_l, _last_r} -> 374 | l = :erlang.iolist_size r_string_text 375 | if last_l >= l do 376 | run_concs grammar, concs, input, state, acc 377 | else 378 | run_concs grammar, concs, input, state, {l, r} 379 | end 380 | end 381 | end 382 | end 383 | 384 | defp prep_result({ 385 | r_string_text, 386 | r_string_tokens, 387 | r_values, 388 | r_state, 389 | r_rest 390 | }) do 391 | { 392 | :lists.flatten(:lists.reverse(r_string_text)), 393 | :lists.map(&:lists.flatten/1, :lists.reverse(r_string_tokens)), 394 | :lists.reverse(:lists.map(&:lists.reverse/1, r_values)), 395 | r_state, 396 | r_rest 397 | } 398 | end 399 | 400 | defp conc_result({ 401 | r_string_text, 402 | _r_string_tokens, 403 | r_values, 404 | r_state, 405 | r_rest 406 | }, { 407 | acc_string_text, 408 | acc_string_tokens, 409 | acc_values, 410 | _acc_state, 411 | _acc_rest 412 | }) do 413 | m = :lists.reverse r_string_text 414 | { 415 | [m|acc_string_text], 416 | [m|acc_string_tokens], 417 | [r_values|acc_values], 418 | r_state, 419 | r_rest 420 | } 421 | end 422 | end -------------------------------------------------------------------------------- /test/resources/RFC5322.abnf: -------------------------------------------------------------------------------- 1 | FWS = ([*WSP CRLF] 1*WSP) / obs-FWS ; Folding white space 2 | ctext = %d33-39 / ; Printable US-ASCII 3 | %d42-91 / ; characters not including 4 | %d93-126 / ; "(", ")", or "\" 5 | obs-ctext 6 | ccontent = ctext / quoted-pair / comment 7 | comment = "(" *([FWS] ccontent) [FWS] ")" 8 | CFWS = (1*([FWS] comment) [FWS]) / FWS 9 | atext = ALPHA / DIGIT / ; Printable US-ASCII 10 | "!" / "#" / ; characters not including 11 | "$" / "%" / ; specials. Used for atoms. 12 | "&" / "'" / 13 | "*" / "+" / 14 | "-" / "/" / 15 | "=" / "?" / 16 | "^" / "_" / 17 | "`" / "{" / 18 | "|" / "}" / 19 | "~" 20 | atom = [CFWS] 1*atext [CFWS] 21 | dot-atom-text = 1*atext *("." 1*atext) 22 | dot-atom = [CFWS] dot-atom-text [CFWS] 23 | specials = "(" / ")" / ; Special characters that do 24 | "<" / ">" / ; not appear in atext 25 | "[" / "]" / 26 | ":" / ";" / 27 | "@" / "\" / 28 | "," / "." / 29 | DQUOTE 30 | qtext = %d33 / ; Printable US-ASCII 31 | %d35-91 / ; characters not including 32 | %d93-126 / ; "\" or the quote character 33 | obs-qtext 34 | qcontent = qtext / quoted-pair 35 | quoted-string = [CFWS] 36 | DQUOTE *([FWS] qcontent) [FWS] DQUOTE 37 | [CFWS] 38 | word = atom / quoted-string 39 | phrase = 1*word / obs-phrase 40 | unstructured = (*([FWS] VCHAR) *WSP) / obs-unstruct 41 | date-time = [ day-of-week "," ] date time [CFWS] 42 | day-of-week = ([FWS] day-name) / obs-day-of-week 43 | day-name = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" 44 | date = day month year 45 | day = ([FWS] 1*2DIGIT FWS) / obs-day !!! 46 | {:ok, Map.put(state, :day, rule)} 47 | !!! 48 | month = "Jan" / "Feb" / "Mar" / "Apr" / 49 | "May" / "Jun" / "Jul" / "Aug" / 50 | "Sep" / "Oct" / "Nov" / "Dec" !!! 51 | {:ok, Map.put(state, :month, rule)} 52 | !!! 53 | year = (FWS 4*DIGIT FWS) / obs-year !!! 54 | {:ok, Map.put(state, :year, rule)} 55 | !!! 56 | time = time-of-day zone 57 | time-of-day = hour ":" minute [ ":" second ] 58 | hour = 2DIGIT / obs-hour !!! 59 | {:ok, Map.put(state, :hour, rule)} 60 | !!! 61 | minute = 2DIGIT / obs-minute !!! 62 | {:ok, Map.put(state, :minute, rule)} 63 | !!! 64 | second = 2DIGIT / obs-second !!! 65 | {:ok, Map.put(state, :second, rule)} 66 | !!! 67 | zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone !!! 68 | {:ok, Map.put(state, :tz, rule)} 69 | !!! 70 | address = mailbox / group 71 | mailbox = name-addr / addr-spec 72 | name-addr = [display-name] angle-addr 73 | angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr 74 | group = display-name ":" [group-list] ";" [CFWS] 75 | display-name = phrase !!! 76 | {:ok, Map.put(state, :display_name, rule)} 77 | !!! 78 | mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list 79 | address-list = (address *("," address)) / obs-addr-list 80 | group-list = mailbox-list / CFWS / obs-group-list 81 | addr-spec = local-part "@" domain 82 | local-part = dot-atom / quoted-string / obs-local-part !!! 83 | {:ok, Map.put(state, :local_part, rule)} 84 | !!! 85 | domain = dot-atom / domain-literal / obs-domain !!! 86 | {:ok, Map.put(state, :domain, rule)} 87 | !!! 88 | domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] 89 | dtext = %d33-90 / ; Printable US-ASCII 90 | %d94-126 / ; characters not including 91 | obs-dtext ; "[", "]", or "\" 92 | message = (fields / obs-fields) [CRLF body] 93 | body = (*(*998text CRLF) *998text) / obs-body 94 | text = %d1-9 / ; Characters excluding CR 95 | %d11 / ; and LF 96 | %d12 / 97 | %d14-127 98 | fields = *(trace 99 | *optional-field / 100 | *(resent-date / 101 | resent-from / 102 | resent-sender / 103 | resent-to / 104 | resent-cc / 105 | resent-bcc / 106 | resent-msg-id)) 107 | *(orig-date / 108 | from / 109 | sender / 110 | reply-to / 111 | to / 112 | cc / 113 | bcc / 114 | message-id / 115 | in-reply-to / 116 | references / 117 | subject / 118 | comments / 119 | keywords / 120 | optional-field) 121 | orig-date = "Date:" date-time CRLF 122 | from = "From:" mailbox-list CRLF 123 | sender = "Sender:" mailbox CRLF 124 | reply-to = "Reply-To:" address-list CRLF 125 | to = "To:" address-list CRLF 126 | cc = "Cc:" address-list CRLF 127 | bcc = "Bcc:" [address-list / CFWS] CRLF 128 | message-id = "Message-ID:" msg-id CRLF 129 | in-reply-to = "In-Reply-To:" 1*msg-id CRLF 130 | references = "References:" 1*msg-id CRLF 131 | msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS] 132 | id-left = dot-atom-text / obs-id-left 133 | id-right = dot-atom-text / no-fold-literal / obs-id-right 134 | no-fold-literal = "[" *dtext "]" 135 | subject = "Subject:" unstructured CRLF 136 | comments = "Comments:" unstructured CRLF 137 | keywords = "Keywords:" phrase *("," phrase) CRLF 138 | resent-date = "Resent-Date:" date-time CRLF 139 | resent-from = "Resent-From:" mailbox-list CRLF 140 | resent-sender = "Resent-Sender:" mailbox CRLF 141 | resent-to = "Resent-To:" address-list CRLF 142 | resent-cc = "Resent-Cc:" address-list CRLF 143 | resent-bcc = "Resent-Bcc:" [address-list / CFWS] CRLF 144 | resent-msg-id = "Resent-Message-ID:" msg-id CRLF 145 | trace = [return] 1*received 146 | return = "Return-Path:" path CRLF 147 | path = angle-addr / ([CFWS] "<" [CFWS] ">" [CFWS]) 148 | received = "Received:" *received-token ";" date-time CRLF 149 | received-token = word / angle-addr / addr-spec / domain 150 | optional-field = field-name ":" unstructured CRLF 151 | field-name = 1*ftext 152 | ftext = %d33-57 / ; Printable US-ASCII 153 | %d59-126 ; characters not including 154 | ; ":". 155 | obs-NO-WS-CTL = %d1-8 / ; US-ASCII control 156 | %d11 / ; characters that do not 157 | %d12 / ; include the carriage 158 | %d14-31 / ; return, line feed, and 159 | %d127 ; white space characters 160 | obs-ctext = obs-NO-WS-CTL 161 | obs-qtext = obs-NO-WS-CTL 162 | obs-utext = %d0 / obs-NO-WS-CTL / VCHAR 163 | obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR) 164 | obs-body = *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF) 165 | obs-unstruct = *((*LF *CR *(obs-utext *LF *CR)) / FWS) 166 | obs-phrase = word *(word / "." / CFWS) 167 | obs-phrase-list = [phrase / CFWS] *("," [phrase / CFWS]) 168 | obs-FWS = 1*WSP *(CRLF 1*WSP) 169 | obs-day-of-week = [CFWS] day-name [CFWS] 170 | obs-day = [CFWS] 1*2DIGIT [CFWS] 171 | obs-year = [CFWS] 2*DIGIT [CFWS] 172 | obs-hour = [CFWS] 2DIGIT [CFWS] 173 | obs-minute = [CFWS] 2DIGIT [CFWS] 174 | obs-second = [CFWS] 2DIGIT [CFWS] 175 | obs-zone = "UT" / "GMT" / ; Universal Time 176 | ; North American UT 177 | ; offsets 178 | "EST" / "EDT" / ; Eastern: - 5/ - 4 179 | "CST" / "CDT" / ; Central: - 6/ - 5 180 | "MST" / "MDT" / ; Mountain: - 7/ - 6 181 | "PST" / "PDT" / ; Pacific: - 8/ - 7 182 | ; 183 | %d65-73 / ; Military zones - "A" 184 | %d75-90 / ; through "I" and "K" 185 | %d97-105 / ; through "Z", both 186 | %d107-122 ; upper and lower case 187 | obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS] 188 | obs-route = obs-domain-list ":" 189 | obs-domain-list = *(CFWS / ",") "@" domain 190 | *("," [CFWS] ["@" domain]) 191 | obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS]) 192 | obs-addr-list = *([CFWS] ",") address *("," [address / CFWS]) 193 | obs-group-list = 1*([CFWS] ",") [CFWS] 194 | obs-local-part = word *("." word) 195 | obs-domain = atom *("." atom) 196 | obs-dtext = obs-NO-WS-CTL / quoted-pair 197 | obs-fields = *(obs-return / 198 | obs-received / 199 | obs-orig-date / 200 | obs-from / 201 | obs-sender / 202 | obs-reply-to / 203 | obs-to / 204 | obs-cc / 205 | obs-bcc / 206 | obs-message-id / 207 | obs-in-reply-to / 208 | obs-references / 209 | obs-subject / 210 | obs-comments / 211 | obs-keywords / 212 | obs-resent-date / 213 | obs-resent-from / 214 | obs-resent-send / 215 | obs-resent-rply / 216 | obs-resent-to / 217 | obs-resent-cc / 218 | obs-resent-bcc / 219 | obs-resent-mid / 220 | obs-optional) 221 | obs-orig-date = "Date" *WSP ":" date-time CRLF 222 | obs-from = "From" *WSP ":" mailbox-list CRLF 223 | obs-sender = "Sender" *WSP ":" mailbox CRLF 224 | obs-reply-to = "Reply-To" *WSP ":" address-list CRLF 225 | obs-to = "To" *WSP ":" address-list CRLF 226 | obs-cc = "Cc" *WSP ":" address-list CRLF 227 | obs-bcc = "Bcc" *WSP ":" 228 | (address-list / (*([CFWS] ",") [CFWS])) CRLF 229 | obs-message-id = "Message-ID" *WSP ":" msg-id CRLF 230 | obs-in-reply-to = "In-Reply-To" *WSP ":" *(phrase / msg-id) CRLF 231 | obs-references = "References" *WSP ":" *(phrase / msg-id) CRLF 232 | obs-id-left = local-part 233 | obs-id-right = domain 234 | obs-subject = "Subject" *WSP ":" unstructured CRLF 235 | obs-comments = "Comments" *WSP ":" unstructured CRLF 236 | obs-keywords = "Keywords" *WSP ":" obs-phrase-list CRLF 237 | obs-resent-from = "Resent-From" *WSP ":" mailbox-list CRLF 238 | obs-resent-send = "Resent-Sender" *WSP ":" mailbox CRLF 239 | obs-resent-date = "Resent-Date" *WSP ":" date-time CRLF 240 | obs-resent-to = "Resent-To" *WSP ":" address-list CRLF 241 | obs-resent-cc = "Resent-Cc" *WSP ":" address-list CRLF 242 | obs-resent-bcc = "Resent-Bcc" *WSP ":" 243 | (address-list / (*([CFWS] ",") [CFWS])) CRLF 244 | obs-resent-mid = "Resent-Message-ID" *WSP ":" msg-id CRLF 245 | obs-resent-rply = "Resent-Reply-To" *WSP ":" address-list CRLF 246 | obs-return = "Return-Path" *WSP ":" path CRLF 247 | obs-received = "Received" *WSP ":" *received-token CRLF 248 | obs-optional = field-name *WSP ":" unstructured CRLF 249 | ALPHA = %x41-5A / %x61-7A ; A-Z / a-z 250 | BIT = "0" / "1" 251 | CHAR = %x01-7F ; any 7-bit US-ASCII character, excluding NUL 252 | CR = %x0D ; carriage return 253 | CRLF = CR LF ; Internet standard newline 254 | CTL = %x00-1F / %x7F ; controls 255 | DIGIT = %x30-39 ; 0-9 256 | DQUOTE = %x22 ; " (Double Quote) 257 | HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" 258 | HTAB = %x09 ; horizontal tab 259 | LF = %x0A ; linefeed 260 | LWSP = *(WSP / CRLF WSP) ; linear white space (past newline) 261 | OCTET = %x00-FF ; 8 bits of data 262 | SP = %x20 263 | VCHAR = %x21-7E ; visible (printing) characters 264 | WSP = SP / HTAB ; white space 265 | -------------------------------------------------------------------------------- /test/resources/RFC5322-no-obs.abnf: -------------------------------------------------------------------------------- 1 | FWS = ([*WSP CRLF] 1*WSP); Folding white space 2 | ctext = %d33-39 / ; Printable US-ASCII 3 | %d42-91 / ; characters not including 4 | %d93-126 ; "(", ")", or "\" 5 | ccontent = ctext / quoted-pair / comment 6 | comment = "(" *([FWS] ccontent) [FWS] ")" 7 | CFWS = (1*([FWS] comment) [FWS]) / FWS 8 | atext = ALPHA / DIGIT / ; Printable US-ASCII 9 | "!" / "#" / ; characters not including 10 | "$" / "%" / ; specials. Used for atoms. 11 | "&" / "'" / 12 | "*" / "+" / 13 | "-" / "/" / 14 | "=" / "?" / 15 | "^" / "_" / 16 | "`" / "{" / 17 | "|" / "}" / 18 | "~" 19 | atom = [CFWS] 1*atext [CFWS] 20 | dot-atom-text = 1*atext *("." 1*atext) 21 | dot-atom = [CFWS] dot-atom-text [CFWS] 22 | specials = "(" / ")" / ; Special characters that do 23 | "<" / ">" / ; not appear in atext 24 | "[" / "]" / 25 | ":" / ";" / 26 | "@" / "\" / 27 | "," / "." / 28 | DQUOTE 29 | qtext = %d33 / ; Printable US-ASCII 30 | %d35-91 / ; characters not including 31 | %d93-126 ; "\" or the quote character 32 | quoted-pair = ("\" (VCHAR / WSP)) 33 | qcontent = qtext / quoted-pair 34 | quoted-string = [CFWS] 35 | DQUOTE *([FWS] qcontent) [FWS] DQUOTE 36 | [CFWS] 37 | word = atom / quoted-string 38 | phrase = 1*word 39 | unstructured = (*([FWS] VCHAR) *WSP) 40 | date-time = [ day-of-week "," ] date time [CFWS] 41 | day-of-week = ([FWS] day-name) 42 | day-name = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" 43 | date = day month year !!! 44 | [d, m, y] = values 45 | state = Map.put(state, :day, :lists.flatten(d)) 46 | state = Map.put(state, :month, :lists.flatten(m)) 47 | {:ok, Map.put(state, :year, :lists.flatten(y))} 48 | !!! 49 | day = ([FWS] 1*2DIGIT FWS) !!! 50 | {:ok, state, to_char_list String.strip(to_string(rule))} 51 | !!! 52 | month = "Jan" / "Feb" / "Mar" / "Apr" / 53 | "May" / "Jun" / "Jul" / "Aug" / 54 | "Sep" / "Oct" / "Nov" / "Dec" 55 | year = (FWS 4*DIGIT FWS) !!! 56 | {:ok, state, to_char_list String.strip(to_string(rule))} 57 | !!! 58 | time = time-of-day zone !!! 59 | [_, tz] = values 60 | {:ok, Map.put(state, :tz, :lists.flatten(tz))} 61 | !!! 62 | time-of-day = hour ":" minute [ ":" second ] 63 | hour = 2DIGIT !!! 64 | {:ok, Map.put(state, :hour, rule)} 65 | !!! 66 | minute = 2DIGIT !!! 67 | {:ok, Map.put(state, :minute, rule)} 68 | !!! 69 | second = 2DIGIT !!! 70 | {:ok, Map.put(state, :second, rule)} 71 | !!! 72 | zone = (FWS ( "+" / "-" ) 4DIGIT) !!! 73 | {:ok, state, to_char_list String.strip(to_string(rule))} 74 | !!! 75 | address = mailbox / group 76 | mailbox = name-addr / addr-spec 77 | name-addr = [display-name] angle-addr 78 | angle-addr = [CFWS] "<" addr-spec ">" [CFWS] 79 | group = display-name ":" [group-list] ";" [CFWS] 80 | display-name = phrase !!! 81 | {:ok, Map.put(state, :display_name, rule)} 82 | !!! 83 | mailbox-list = (mailbox *("," mailbox)) 84 | address-list = (address *("," address)) 85 | group-list = mailbox-list / CFWS 86 | addr-spec = local-part "@" domain 87 | local-part = dot-atom / quoted-string !!! 88 | {:ok, Map.put(state, :local_part, rule)} 89 | !!! 90 | domain = dot-atom / domain-literal !!! 91 | {:ok, Map.put(state, :domain, rule)} 92 | !!! 93 | domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] 94 | dtext = %d33-90 / ; Printable US-ASCII 95 | %d94-126 ; characters not including 96 | ; "[", "]", or "\" 97 | message = (fields) [CRLF body] 98 | body = (*(*998text CRLF) *998text) 99 | text = %d1-9 / ; Characters excluding CR 100 | %d11 / ; and LF 101 | %d12 / 102 | %d14-127 103 | fields = *(trace 104 | *optional-field / 105 | *(resent-date / 106 | resent-from / 107 | resent-sender / 108 | resent-to / 109 | resent-cc / 110 | resent-bcc / 111 | resent-msg-id)) 112 | *(orig-date / 113 | from / 114 | sender / 115 | reply-to / 116 | to / 117 | cc / 118 | bcc / 119 | message-id / 120 | in-reply-to / 121 | references / 122 | subject / 123 | comments / 124 | keywords / 125 | optional-field) 126 | orig-date = "Date:" date-time CRLF 127 | from = "From:" mailbox-list CRLF 128 | sender = "Sender:" mailbox CRLF 129 | reply-to = "Reply-To:" address-list CRLF 130 | to = "To:" address-list CRLF 131 | cc = "Cc:" address-list CRLF 132 | bcc = "Bcc:" [address-list / CFWS] CRLF 133 | message-id = "Message-ID:" msg-id CRLF 134 | in-reply-to = "In-Reply-To:" 1*msg-id CRLF 135 | references = "References:" 1*msg-id CRLF 136 | msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS] 137 | id-left = dot-atom-text 138 | id-right = dot-atom-text / no-fold-literal 139 | no-fold-literal = "[" *dtext "]" 140 | subject = "Subject:" unstructured CRLF 141 | comments = "Comments:" unstructured CRLF 142 | keywords = "Keywords:" phrase *("," phrase) CRLF 143 | resent-date = "Resent-Date:" date-time CRLF 144 | resent-from = "Resent-From:" mailbox-list CRLF 145 | resent-sender = "Resent-Sender:" mailbox CRLF 146 | resent-to = "Resent-To:" address-list CRLF 147 | resent-cc = "Resent-Cc:" address-list CRLF 148 | resent-bcc = "Resent-Bcc:" [address-list / CFWS] CRLF 149 | resent-msg-id = "Resent-Message-ID:" msg-id CRLF 150 | trace = [return] 1*received 151 | return = "Return-Path:" path CRLF 152 | path = angle-addr / ([CFWS] "<" [CFWS] ">" [CFWS]) 153 | received = "Received:" *received-token ";" date-time CRLF 154 | received-token = word / angle-addr / addr-spec / domain 155 | optional-field = field-name ":" unstructured CRLF 156 | field-name = 1*ftext 157 | ftext = %d33-57 / ; Printable US-ASCII 158 | %d59-126 ; characters not including 159 | ; ":". 160 | obs-NO-WS-CTL = %d1-8 / ; US-ASCII control 161 | %d11 / ; characters that do not 162 | %d12 / ; include the carriage 163 | %d14-31 / ; return, line feed, and 164 | %d127 ; white space characters 165 | obs-ctext = obs-NO-WS-CTL 166 | obs-qtext = obs-NO-WS-CTL 167 | obs-utext = %d0 / obs-NO-WS-CTL / VCHAR 168 | obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR) 169 | obs-body = *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF) 170 | obs-unstruct = *((*LF *CR *(obs-utext *LF *CR)) / FWS) 171 | obs-phrase = word *(word / "." / CFWS) 172 | obs-phrase-list = [phrase / CFWS] *("," [phrase / CFWS]) 173 | obs-FWS = 1*WSP *(CRLF 1*WSP) 174 | obs-day-of-week = [CFWS] day-name [CFWS] 175 | obs-day = [CFWS] 1*2DIGIT [CFWS] 176 | obs-year = [CFWS] 2*DIGIT [CFWS] 177 | obs-hour = [CFWS] 2DIGIT [CFWS] 178 | obs-minute = [CFWS] 2DIGIT [CFWS] 179 | obs-second = [CFWS] 2DIGIT [CFWS] 180 | obs-zone = "UT" / "GMT" / ; Universal Time 181 | ; North American UT 182 | ; offsets 183 | "EST" / "EDT" / ; Eastern: - 5/ - 4 184 | "CST" / "CDT" / ; Central: - 6/ - 5 185 | "MST" / "MDT" / ; Mountain: - 7/ - 6 186 | "PST" / "PDT" / ; Pacific: - 8/ - 7 187 | ; 188 | %d65-73 / ; Military zones - "A" 189 | %d75-90 / ; through "I" and "K" 190 | %d97-105 / ; through "Z", both 191 | %d107-122 ; upper and lower case 192 | obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS] 193 | obs-route = obs-domain-list ":" 194 | obs-domain-list = *(CFWS / ",") "@" domain 195 | *("," [CFWS] ["@" domain]) 196 | obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS]) 197 | obs-addr-list = *([CFWS] ",") address *("," [address / CFWS]) 198 | obs-group-list = 1*([CFWS] ",") [CFWS] 199 | obs-local-part = word *("." word) 200 | obs-domain = atom *("." atom) 201 | obs-dtext = obs-NO-WS-CTL / quoted-pair 202 | obs-fields = *(obs-return / 203 | obs-received / 204 | obs-orig-date / 205 | obs-from / 206 | obs-sender / 207 | obs-reply-to / 208 | obs-to / 209 | obs-cc / 210 | obs-bcc / 211 | obs-message-id / 212 | obs-in-reply-to / 213 | obs-references / 214 | obs-subject / 215 | obs-comments / 216 | obs-keywords / 217 | obs-resent-date / 218 | obs-resent-from / 219 | obs-resent-send / 220 | obs-resent-rply / 221 | obs-resent-to / 222 | obs-resent-cc / 223 | obs-resent-bcc / 224 | obs-resent-mid / 225 | obs-optional) 226 | obs-orig-date = "Date" *WSP ":" date-time CRLF 227 | obs-from = "From" *WSP ":" mailbox-list CRLF 228 | obs-sender = "Sender" *WSP ":" mailbox CRLF 229 | obs-reply-to = "Reply-To" *WSP ":" address-list CRLF 230 | obs-to = "To" *WSP ":" address-list CRLF 231 | obs-cc = "Cc" *WSP ":" address-list CRLF 232 | obs-bcc = "Bcc" *WSP ":" 233 | (address-list / (*([CFWS] ",") [CFWS])) CRLF 234 | obs-message-id = "Message-ID" *WSP ":" msg-id CRLF 235 | obs-in-reply-to = "In-Reply-To" *WSP ":" *(phrase / msg-id) CRLF 236 | obs-references = "References" *WSP ":" *(phrase / msg-id) CRLF 237 | obs-id-left = local-part 238 | obs-id-right = domain 239 | obs-subject = "Subject" *WSP ":" unstructured CRLF 240 | obs-comments = "Comments" *WSP ":" unstructured CRLF 241 | obs-keywords = "Keywords" *WSP ":" obs-phrase-list CRLF 242 | obs-resent-from = "Resent-From" *WSP ":" mailbox-list CRLF 243 | obs-resent-send = "Resent-Sender" *WSP ":" mailbox CRLF 244 | obs-resent-date = "Resent-Date" *WSP ":" date-time CRLF 245 | obs-resent-to = "Resent-To" *WSP ":" address-list CRLF 246 | obs-resent-cc = "Resent-Cc" *WSP ":" address-list CRLF 247 | obs-resent-bcc = "Resent-Bcc" *WSP ":" 248 | (address-list / (*([CFWS] ",") [CFWS])) CRLF 249 | obs-resent-mid = "Resent-Message-ID" *WSP ":" msg-id CRLF 250 | obs-resent-rply = "Resent-Reply-To" *WSP ":" address-list CRLF 251 | obs-return = "Return-Path" *WSP ":" path CRLF 252 | obs-received = "Received" *WSP ":" *received-token CRLF 253 | obs-optional = field-name *WSP ":" unstructured CRLF 254 | ALPHA = %x41-5A / %x61-7A ; A-Z / a-z 255 | BIT = "0" / "1" 256 | CHAR = %x01-7F ; any 7-bit US-ASCII character, excluding NUL 257 | CR = %x0D ; carriage return 258 | CRLF = CR LF ; Internet standard newline 259 | CTL = %x00-1F / %x7F ; controls 260 | DIGIT = %x30-39 ; 0-9 261 | DQUOTE = %x22 ; " (Double Quote) 262 | HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" 263 | HTAB = %x09 ; horizontal tab 264 | LF = %x0A ; linefeed 265 | LWSP = *(WSP / CRLF WSP) ; linear white space (past newline) 266 | OCTET = %x00-FF ; 8 bits of data 267 | SP = %x20 268 | VCHAR = %x21-7E ; visible (printing) characters 269 | WSP = SP / HTAB ; white space 270 | -------------------------------------------------------------------------------- /test/resources/RFC3261.abnf: -------------------------------------------------------------------------------- 1 | alphanum = ALPHA / DIGIT 2 | reserved = ";" / "/" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / "," 3 | unreserved = alphanum / mark 4 | mark = "-" / "_" / "." / "!" / "~" / "*" / "'" / "(" / ")" 5 | escaped = "%" HEXDIG HEXDIG 6 | LWS = [*WSP CRLF] 1*WSP 7 | SWS = [LWS] 8 | HCOLON = *( SP / HTAB ) ":" SWS 9 | TEXT-UTF8-TRIM = 1*TEXT-UTF8char *(*LWS TEXT-UTF8char) 10 | TEXT-UTF8char = %x21-7E / UTF8-NONASCII 11 | UTF8-NONASCII = %xC0-DF 1UTF8-CONT / %xE0-EF 2UTF8-CONT / %xF0-F7 3UTF8-CONT / %xF8-Fb 4UTF8-CONT / %xFC-FD 5UTF8-CONT 12 | UTF8-CONT = %x80-BF 13 | LHEX = DIGIT 14 | token = 1*(alphanum / "-" / "." / "!" / "%" / "*" / "_" / "+" / "`" / "'" / "~" ) 15 | separators = "(" / ")" / "<" / ">" / "@" / "," / ";" / ":" / "\" / DQUOTE / "/" / "[" / "]" / "?" / "=" / "{" / "}" / SP / HTAB 16 | word = 1*(alphanum / "-" / "." / "!" / "%" / "*" / "_" / "+" / "`" / "'" / "~" / "(" / ")" / "<" / ">" / ":" / "\" / DQUOTE / "/" / "[" / "]" / "?" / "{" / "}" ) 17 | STAR = SWS "*" SWS 18 | SLASH = SWS "/" SWS 19 | EQUAL = SWS "=" SWS 20 | LPAREN = SWS "(" SWS 21 | RPAREN = SWS ")" SWS 22 | RAQUOT = ">" SWS 23 | LAQUOT = SWS "<" 24 | COMMA = SWS "," SWS 25 | SEMI = SWS ";" SWS 26 | COLON = SWS ":" SWS 27 | LDQUOT = SWS DQUOTE 28 | RDQUOT = DQUOTE SWS 29 | comment = LPAREN *(ctext / quoted-pair / comment) RPAREN 30 | ctext = %x21-27 / %x2A-5B / %x5D-7E / UTF8-NONASCII / LWS 31 | quoted-string = SWS DQUOTE *(qdtext / quoted-pair ) DQUOTE 32 | qdtext = LWS / %x21 / %x23-5B / %x5D-7E / UTF8-NONASCII 33 | quoted-pair = "\" (%x00-09 / %x0B-0C / %x0E-7F) 34 | SIP-URI = "sip:" [ userinfo ] hostport uri-parameters [ headers ] !!! 35 | [_, uinfo, [[hostport]], _, _] = values 36 | {:ok, state, %{ 37 | scheme: "sip", 38 | userinfo: (case uinfo do 39 | [] -> nil 40 | [uinfo] -> to_string(uinfo) 41 | end), 42 | hostport: hostport 43 | }} 44 | !!! 45 | SIPS-URI = "sips:" [ userinfo ] hostport uri-parameters [ headers ] 46 | userinfo = ( user / telephone-subscriber ) [ ":" password ] "@" !!! 47 | [u, _, _] = values 48 | {:ok, state, :lists.flatten(u)} 49 | !!! 50 | user = 1*( unreserved / escaped / user-unreserved ) 51 | user-unreserved = "&" / "=" / "+" / "$" / "," / ";" / "?" / "/" 52 | password = *( unreserved / escaped / "&" / "=" / "+" / "$" / "," ) 53 | hostport = host [ ":" port ] !!! 54 | [[[host]], [port]] = values 55 | port = case port do 56 | [] -> 5060 57 | port -> 58 | [_, port] = :lists.flatten(port) 59 | port 60 | end 61 | {:ok, state, %{host: host, port: port}} 62 | !!! 63 | host = hostname / IPv4address / IPv6reference !!! 64 | {:ok, state, to_string(rule)} 65 | !!! 66 | hostname = *( domainlabel "." ) toplabel [ "." ] 67 | domainlabel = alphanum / alphanum *( alphanum / "-" ) alphanum 68 | toplabel = ALPHA / ALPHA *( alphanum / "-" ) alphanum 69 | IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT 70 | IPv6reference = "[" IPv6address "]" 71 | IPv6address = hexpart [ ":" IPv4address ] 72 | hexpart = hexseq / hexseq "::" [ hexseq ] / "::" [ hexseq ] 73 | hexseq = hex4 *( ":" hex4) 74 | hex4 = 1*4HEXDIG 75 | port = 1*DIGIT !!! 76 | {port, ""} = Integer.parse to_string(rule) 77 | {:ok, state, port} 78 | !!! 79 | uri-parameters = *( ";" uri-parameter) 80 | uri-parameter = transport-param / user-param / method-param / ttl-param / maddr-param / lr-param / other-param 81 | transport-param = "transport=" ( "udp" / "tcp" / "sctp" / "tls" / other-transport) 82 | other-transport = token 83 | user-param = "user=" ( "phone" / "ip" / other-user) 84 | other-user = token 85 | method-param = "method=" Method 86 | ttl-param = "ttl=" ttl 87 | maddr-param = "maddr=" host 88 | lr-param = "lr" 89 | other-param = pname [ "=" pvalue ] 90 | pname = 1*paramchar 91 | pvalue = 1*paramchar 92 | paramchar = param-unreserved / unreserved / escaped 93 | param-unreserved = "[" / "]" / "/" / ":" / "&" / "+" / "$" 94 | headers = "?" header *( "&" header ) 95 | header = hname "=" hvalue 96 | hname = 1*( hnv-unreserved / unreserved / escaped ) 97 | hvalue = *( hnv-unreserved / unreserved / escaped ) 98 | hnv-unreserved = "[" / "]" / "/" / "?" / ":" / "+" / "$" 99 | SIP-message = Request / Response !!! 100 | {:ok, Map.put(state, :request, true)} 101 | !!! 102 | 103 | Request = Request-Line *( message-header ) CRLF [ message-body ] 104 | Request-Line = Method SP Request-URI SP SIP-Version CRLF !!! 105 | [[[method]], _, [[[uri]]], _, _, _] = values 106 | state = Map.put state, :method, method 107 | state = Map.put state, :uri, uri 108 | {:ok, state} 109 | !!! 110 | 111 | Request-URI = SIP-URI / SIPS-URI / absoluteURI !!! 112 | [[uri]] = values 113 | {:ok, state, uri} 114 | !!! 115 | 116 | absoluteURI = scheme ":" ( hier-part / opaque-part ) 117 | hier-part = ( net-path / abs-path ) [ "?" query ] 118 | net-path = "//" authority [ abs-path ] 119 | abs-path = "/" path-segments 120 | opaque-part = uric-no-slash *uric 121 | uric = reserved / unreserved / escaped 122 | uric-no-slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / "," 123 | path-segments = segment *( "/" segment ) 124 | segment = *pchar *( ";" param ) 125 | param = *pchar 126 | pchar = unreserved / escaped / ":" / "@" / "&" / "=" / "+" / "$" / "," 127 | scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 128 | authority = srvr / reg-name 129 | srvr = [ [ userinfo "@" ] hostport ] 130 | reg-name = 1*( unreserved / escaped / "$" / "," / ";" / ":" / "@" / "&" / "=" / "+" ) 131 | query = *uric 132 | SIP-Version = "SIP" "/" 1*DIGIT "." 1*DIGIT 133 | message-header = (Accept / Accept-Encoding / Accept-Language / Alert-Info / Allow / Authentication-Info / Authorization / Call-ID / Call-Info / Contact / Content-Disposition / Content-Encoding / Content-Language / Content-Length / Content-Type / CSeq / Date / Error-Info / Expires / From / In-Reply-To / Max-Forwards / MIME-Version / Min-Expires / Organization / Priority / Proxy-Authenticate / Proxy-Authorization / Proxy-Require / Record-Route / Reply-To / Require / Retry-After / Route / Server / Subject / Supported / Timestamp / To / Unsupported / User-Agent / Via / Warning / WWW-Authenticate / extension-header) CRLF 134 | INVITEm = %x49.4E.56.49.54.45 135 | ACKm = %x41.43.4B 136 | OPTIONSm = %x4F.50.54.49.4F.4E.53 137 | BYEm = %x42.59.45 138 | CANCELm = %x43.41.4E.43.45.4C 139 | REGISTERm = %x52.45.47.49.53.54.45.52 140 | Method = INVITEm / ACKm / OPTIONSm / BYEm / CANCELm / REGISTERm / extension-method !!! 141 | {:ok, state, String.to_atom(String.downcase(to_string(rule)))} 142 | !!! 143 | extension-method = token 144 | Response = Status-Line *( message-header ) CRLF [ message-body ] 145 | Status-Line = SIP-Version SP Status-Code SP Reason-Phrase CRLF 146 | Status-Code = Informational / Redirection / Success / Client-Error / Server-Error / Global-Failure / extension-code 147 | extension-code = 3DIGIT 148 | Reason-Phrase = *(reserved / unreserved / escaped / UTF8-NONASCII / UTF8-CONT / SP / HTAB) 149 | Informational = "100" / "180" / "181" / "182" / "183" 150 | Success = "200" 151 | Redirection = "300" / "301" / "302" / "305" / "380" 152 | Client-Error = "400" /"401" /"402" /"403" /"404" /"405" /"406" /"407" /"408" /"410" /"413" /"414" /"415" /"416" /"420" /"421" /"423" /"480" /"481" /"482" /"483" /"484" /"485" /"486" /"487" /"488" /"491" /"493" 153 | Server-Error = "500" / "501" / "502" / "503" / "504" / "505" / "513" 154 | Global-Failure = "600" / "603" / "604" / "606" 155 | Accept = "Accept" HCOLON [ accept-range *(COMMA accept-range) ] 156 | accept-range = media-range *(SEMI accept-param) 157 | media-range = ( "*/*" / ( m-type SLASH "*" ) / ( m-type SLASH m-subtype )) *( SEMI m-parameter ) 158 | accept-param = ("q" EQUAL qvalue) / generic-param 159 | qvalue = ( "0" [ "." 0*3DIGIT ] ) / ( "1" [ "." 0*3("0") ] ) 160 | generic-param = token [ EQUAL gen-value ] 161 | gen-value = token / host / quoted-string 162 | Accept-Encoding = "Accept-Encoding" HCOLON [ encoding *(COMMA encoding) ] 163 | encoding = codings *(SEMI accept-param) 164 | codings = content-coding / "*" 165 | content-coding = token 166 | Accept-Language = "Accept-Language" HCOLON [ language *(COMMA language) ] 167 | language = language-range *(SEMI accept-param) 168 | language-range = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) / "*" ) 169 | Alert-Info = "Alert-Info" HCOLON alert-param *(COMMA alert-param) 170 | alert-param = LAQUOT absoluteURI RAQUOT *( SEMI generic-param ) 171 | Allow = "Allow" HCOLON [Method *(COMMA Method)] 172 | Authorization = "Authorization" HCOLON credentials 173 | credentials = ("Digest" LWS digest-response) / other-response 174 | digest-response = dig-resp *(COMMA dig-resp) 175 | dig-resp = username / realm / nonce / digest-uri / dresponse / algorithm / cnonce / opaque / message-qop / nonce-count / auth-param 176 | username = "username" EQUAL username-value 177 | username-value = quoted-string 178 | digest-uri = "uri" EQUAL LDQUOT digest-uri-value RDQUOT 179 | digest-uri-value = rquest-uri 180 | message-qop = "qop" EQUAL qop-value 181 | cnonce = "cnonce" EQUAL cnonce-value 182 | cnonce-value = nonce-value 183 | nonce-count = "nc" EQUAL nc-value 184 | nc-value = 8LHEX 185 | dresponse = "response" EQUAL request-digest 186 | request-digest = LDQUOT 32LHEX RDQUOT 187 | auth-param = auth-param-name EQUAL ( token / quoted-string ) 188 | auth-param-name = token 189 | other-response = auth-scheme LWS auth-param *(COMMA auth-param) 190 | auth-scheme = token 191 | Authentication-Info = "Authentication-Info" HCOLON ainfo *(COMMA ainfo) 192 | ainfo = nextnonce / message-qop / response-auth / cnonce / nonce-count 193 | nextnonce = "nextnonce" EQUAL nonce-value 194 | response-auth = "rspauth" EQUAL response-digest 195 | response-digest = LDQUOT *LHEX RDQUOT 196 | Call-ID = ( "Call-ID" / "i" ) HCOLON callid 197 | callid = word [ "@" word ] 198 | Call-Info = "Call-Info" HCOLON info *(COMMA info) 199 | info = LAQUOT absoluteURI RAQUOT *( SEMI info-param) 200 | info-param = ( "purpose" EQUAL ( "icon" / "info" / "card" / token ) ) / generic-param 201 | Contact = ("Contact" / "m" ) HCOLON ( STAR / (contact-param *(COMMA contact-param))) 202 | contact-param = (name-addr / addr-spec) *(SEMI contact-params) 203 | name-addr = [ display-name ] LAQUOT addr-spec RAQUOT !!! 204 | [name, _, [[[addr]]], _] = values 205 | name = case name do 206 | [] -> nil 207 | name -> to_string(:lists.flatten(name)) 208 | end 209 | {:ok, state, %{ 210 | display_name: name, 211 | addr: addr 212 | }} 213 | !!! 214 | addr-spec = SIP-URI / SIPS-URI / absoluteURI !!! 215 | [[uri]] = values 216 | {:ok, state, uri} 217 | !!! 218 | display-name = *(token LWS)/ quoted-string 219 | contact-params = c-p-q / c-p-expires / contact-extension 220 | c-p-q = "q" EQUAL qvalue 221 | c-p-expires = "expires" EQUAL delta-seconds 222 | contact-extension = generic-param 223 | delta-seconds = 1*DIGIT 224 | Content-Disposition = "Content-Disposition" HCOLON disp-type *( SEMI disp-param ) 225 | disp-type = "render" / "session" / "icon" / "alert" / disp-extension-token 226 | disp-param = handling-param / generic-param 227 | handling-param = "handling" EQUAL ( "optional" / "required" / other-handling ) 228 | other-handling = token 229 | disp-extension-token = token 230 | Content-Encoding = ( "Content-Encoding" / "e" ) HCOLON content-coding *(COMMA content-coding) 231 | Content-Language = "Content-Language" HCOLON language-tag *(COMMA language-tag) 232 | language-tag = primary-tag *( "-" subtag ) 233 | primary-tag = 1*8ALPHA 234 | subtag = 1*8ALPHA 235 | Content-Length = ( "Content-Length" / "l" ) HCOLON 1*DIGIT 236 | Content-Type = ( "Content-Type" / "c" ) HCOLON media-type 237 | media-type = m-type SLASH m-subtype *(SEMI m-parameter) 238 | m-type = discrete-type / composite-type 239 | discrete-type = "text" / "image" / "audio" / "video" / "application" / extension-token 240 | composite-type = "message" / "multipart" / extension-token 241 | extension-token = ietf-token / x-token 242 | ietf-token = token 243 | x-token = "x-" token 244 | m-subtype = extension-token / iana-token 245 | iana-token = token 246 | m-parameter = m-attribute EQUAL m-value 247 | m-attribute = token 248 | m-value = token / quoted-string 249 | CSeq = "CSeq" HCOLON 1*DIGIT LWS Method 250 | Date = "Date" HCOLON SIP-date 251 | SIP-date = rfc1123-date 252 | rfc1123-date = wkday "," SP date1 SP time SP "GMT" 253 | date1 = 2DIGIT SP month SP 4DIGIT 254 | time = 2DIGIT ":" 2DIGIT ":" 2DIGIT 255 | wkday = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" 256 | month = "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" / "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec" 257 | Error-Info = "Error-Info" HCOLON error-uri *(COMMA error-uri) 258 | error-uri = LAQUOT absoluteURI RAQUOT *( SEMI generic-param ) 259 | Expires = "Expires" HCOLON delta-seconds 260 | From = ( "From" / "f" ) HCOLON from-spec !!! 261 | [_, _, [[[from]]]] = values 262 | headers = Map.put(state.headers, "from", from) 263 | {:ok, Map.put(state, :headers, headers)} 264 | !!! 265 | from-spec = ( name-addr / addr-spec ) *( SEMI from-param ) !!! 266 | [[from], _] = values 267 | {:ok, state, :lists.flatten(from)} 268 | !!! 269 | from-param = tag-param / generic-param 270 | tag-param = "tag" EQUAL token 271 | In-Reply-To = "In-Reply-To" HCOLON callid *(COMMA callid) 272 | Max-Forwards = "Max-Forwards" HCOLON 1*DIGIT 273 | MIME-Version = "MIME-Version" HCOLON 1*DIGIT "." 1*DIGIT 274 | Min-Expires = "Min-Expires" HCOLON delta-seconds 275 | Organization = "Organization" HCOLON [TEXT-UTF8-TRIM] 276 | Priority = "Priority" HCOLON priority-value 277 | priority-value = "emergency" / "urgent" / "normal" / "non-urgent" / other-priority 278 | other-priority = token 279 | Proxy-Authenticate = "Proxy-Authenticate" HCOLON challenge 280 | challenge = ("Digest" LWS digest-cln *(COMMA digest-cln)) / other-challenge 281 | other-challenge = auth-scheme LWS auth-param *(COMMA auth-param) 282 | digest-cln = realm / domain / nonce / opaque / stale / algorithm / qop-options / auth-param 283 | realm = "realm" EQUAL realm-value 284 | realm-value = quoted-string 285 | domain = "domain" EQUAL LDQUOT URI *( 1*SP URI ) RDQUOT 286 | URI = absoluteURI / abs-path 287 | nonce = "nonce" EQUAL nonce-value 288 | nonce-value = quoted-string 289 | opaque = "opaque" EQUAL quoted-string 290 | stale = "stale" EQUAL ( "true" / "false" ) 291 | algorithm = "algorithm" EQUAL ( "MD5" / "MD5-sess" / token ) 292 | qop-options = "qop" EQUAL LDQUOT qop-value *("," qop-value) RDQUOT 293 | qop-value = "auth" / "auth-int" / token 294 | Proxy-Authorization = "Proxy-Authorization" HCOLON credentials 295 | Proxy-Require = "Proxy-Require" HCOLON option-tag *(COMMA option-tag) 296 | option-tag = token 297 | Record-Route = "Record-Route" HCOLON rec-route *(COMMA rec-route) 298 | rec-route = name-addr *( SEMI rr-param ) 299 | rr-param = generic-param 300 | Reply-To = "Reply-To" HCOLON rplyto-spec 301 | rplyto-spec = ( name-addr / addr-spec ) *( SEMI rplyto-param ) 302 | rplyto-param = generic-param 303 | Require = "Require" HCOLON option-tag *(COMMA option-tag) 304 | Retry-After = "Retry-After" HCOLON delta-seconds [ comment ] *( SEMI retry-param ) 305 | retry-param = ("duration" EQUAL delta-seconds) / generic-param 306 | Route = "Route" HCOLON route-param *(COMMA route-param) 307 | route-param = name-addr *( SEMI rr-param ) 308 | Server = "Server" HCOLON server-val *(LWS server-val) 309 | server-val = product / comment 310 | product = token [SLASH product-version] 311 | product-version = token 312 | Subject = ( "Subject" / "s" ) HCOLON [TEXT-UTF8-TRIM] 313 | Supported = ( "Supported" / "k" ) HCOLON [option-tag *(COMMA option-tag)] 314 | Timestamp = "Timestamp" HCOLON 1*(DIGIT) [ "." *(DIGIT) ] [ LWS delay ] 315 | delay = *(DIGIT) [ "." *(DIGIT) ] 316 | To = ( "To" / "t" ) HCOLON ( name-addr / addr-spec ) *( SEMI to-param ) 317 | to-param = tag-param / generic-param 318 | Unsupported = "Unsupported" HCOLON option-tag *(COMMA option-tag) 319 | User-Agent = "User-Agent" HCOLON server-val *(LWS server-val) 320 | Via = ( "Via" / "v" ) HCOLON via-parm *(COMMA via-parm) 321 | via-parm = sent-protocol LWS sent-by *( SEMI via-params ) 322 | via-params = via-ttl / via-maddr / via-received / via-branch / via-extension 323 | via-ttl = "ttl" EQUAL ttl 324 | via-maddr = "maddr" EQUAL host 325 | via-received = "received" EQUAL (IPv4address / IPv6address) 326 | via-branch = "branch" EQUAL token 327 | via-extension = generic-param 328 | sent-protocol = protocol-name SLASH protocol-version SLASH transport 329 | protocol-name = "SIP" / token 330 | protocol-version = token 331 | transport = "UDP" / "TCP" / "TLS" / "SCTP" / other-transport 332 | sent-by = host [ COLON port ] 333 | ttl = 1*3DIGIT 334 | Warning = "Warning" HCOLON warning-value *(COMMA warning-value) 335 | warning-value = warn-code SP warn-agent SP warn-text 336 | warn-code = 3DIGIT 337 | warn-agent = hostport / pseudonym 338 | warn-text = quoted-string 339 | pseudonym = token 340 | WWW-Authenticate = "WWW-Authenticate" HCOLON challenge 341 | extension-header = header-name HCOLON header-value 342 | header-name = token 343 | header-value = *(TEXT-UTF8char / UTF8-CONT / LWS) 344 | message-body = *OCTET 345 | ALPHA = %x41-5A / %x61-7A 346 | BIT = "0" / "1" 347 | CHAR = %x01-7F 348 | CR = %x0D 349 | CRLF = CR LF 350 | CTL = %x00-1F / %x7F 351 | DIGIT = %x30-39 352 | DQUOTE = %x22 353 | HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" 354 | HTAB = %x09 355 | LF = %x0A 356 | LWSP = *(WSP / CRLF WSP) 357 | OCTET = %x00-FF 358 | SP = %x20 359 | VCHAR = %x21-7E 360 | WSP = SP / HTAB 361 | telephone-uri = "tel:" telephone-subscriber 362 | telephone-subscriber = global-number / local-number 363 | global-number = global-number-digits *par 364 | local-number = local-number-digits *par context *par 365 | par = parameter / extension / isdn-subaddress 366 | isdn-subaddress = ";isub=" 1*paramchar 367 | extension = ";ext=" 1*phonedigit 368 | context = ";phone-context=" descriptor 369 | descriptor = domainname / global-number-digits 370 | global-number-digits = "+" *phonedigit DIGIT *phonedigit 371 | local-number-digits = *phonedigit-hex (HEXDIG / "*" / "#") *phonedigit-hex 372 | domainname = *( domainlabel "." ) toplabel [ "." ] 373 | domainlabel = alphanum / alphanum *( alphanum / "-" ) alphanum 374 | toplabel = ALPHA / ALPHA *( alphanum / "-" ) alphanum 375 | parameter = ";" pname ["=" pvalue ] 376 | pname = 1*( alphanum / "-" ) 377 | pvalue = 1*paramchar 378 | paramchar = param-unreserved / unreserved / pct-encoded 379 | unreserved = alphanum / mark 380 | mark = "-" / "_" / "." / "!" / "~" / "*" / "'" / "(" / ")" 381 | pct-encoded = "%" HEXDIG HEXDIG 382 | param-unreserved = "[" / "]" / "/" / ":" / "&" / "+" / "$" 383 | phonedigit = DIGIT / visual-separator 384 | phonedigit-hex = HEXDIG / "*" / "#" / visual-separator 385 | visual-separator = "-" / "." / "(" / ")" 386 | alphanum = ALPHA / DIGIT 387 | reserved = ";" / "/" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / "," 388 | uric = reserved / unreserved / pct-encoded 389 | -------------------------------------------------------------------------------- /lib/ex_abnf/grammar.ex: -------------------------------------------------------------------------------- 1 | defmodule ABNF.Grammar do 2 | @moduledoc """ 3 | Parses an ABNF Grammar. 4 | 5 | Copyright 2015 Marcelo Gornstein 6 | 7 | Licensed under the Apache License, Version 2.0 (the "License"); 8 | you may not use this file except in compliance with the License. 9 | You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | """ 19 | 20 | alias ABNF.Core, as: Core 21 | alias ABNF.Util, as: Util 22 | @type t :: Map 23 | 24 | # rulelist = 1*( rule / (*WSP c-nl) ) 25 | # As described in the Errata #3076 26 | @doc """ 27 | Builds a Grammar.t from the given input (an ABNF text grammar). You should 28 | never use this one directly but use the ones in the ABNF module instead. 29 | """ 30 | @spec rulelist(char_list) :: t 31 | def rulelist(input) do 32 | {module_code, rest} = case code input do 33 | nil -> {"", input} 34 | {c, rest} -> {to_string(c), rest} 35 | end 36 | rulelist_tail module_code, rest 37 | end 38 | 39 | defp rulelist_tail(module_code, input, acc \\ %{}, last \\ nil) do 40 | case rule input do 41 | nil -> 42 | rest = zero_or_more_wsp input 43 | case c_nl rest do 44 | nil -> 45 | module_name = String.to_atom( 46 | "A#{Base.encode16 :crypto.hash( 47 | :md5, :erlang.term_to_binary(make_ref()) 48 | )}" 49 | ) 50 | 51 | acc = Enum.reduce acc, %{}, fn({k, v}, rules) -> 52 | c = if is_nil v[:code] do 53 | nil 54 | else 55 | fun_name = String.to_atom( 56 | String.downcase("A#{Base.encode16 :crypto.hash( 57 | :md5, :erlang.term_to_binary(make_ref()) 58 | )}" 59 | )) 60 | {module_name, fun_name, v[:code]} 61 | end 62 | v = %{v | code: c} 63 | Map.put rules, k, v 64 | end 65 | 66 | funs = Enum.reduce acc, module_code, fn({_k, v}, str) -> 67 | if is_nil v[:code] do 68 | str 69 | else 70 | {_, f, c} = v[:code] 71 | str = str <> "def #{f}(state, rule, string_values, values) do\r\n" 72 | str = str <> "\t#{c}\r\n" 73 | str = str <> "end\r\n" 74 | str 75 | end 76 | end 77 | funs = Code.string_to_quoted(funs) 78 | Module.create module_name, funs, Macro.Env.location(__ENV__) 79 | {acc, input} 80 | {comments, rest} -> case last do 81 | nil -> rulelist_tail module_code, rest, acc 82 | last -> 83 | last = add_comments last, comments 84 | rulelist_tail module_code, rest, Map.put(acc, last.name, last) 85 | end 86 | end 87 | {r, rest} -> rulelist_tail module_code, rest, Map.put(acc, r.name, r) 88 | end 89 | end 90 | 91 | # rule = rulename defined-as elements c-nl 92 | # ; continues if next line starts with white space 93 | defp rule(input) do 94 | case rulename input do 95 | nil -> nil 96 | {name, rest} -> case defined_as rest do 97 | nil -> nil 98 | {das, rest} -> case elements rest do 99 | nil -> nil 100 | {es, rest} -> 101 | {c, rest} = case code rest do 102 | nil -> {nil, rest} 103 | r -> r 104 | end 105 | case c_nl rest do 106 | nil -> nil 107 | {comments, rest} -> 108 | r = %{ 109 | name: Util.rulename(name.value), 110 | defined_as: das, 111 | element: :rule, 112 | value: es, 113 | code: c, 114 | comments: comments 115 | } 116 | {r, rest} 117 | end 118 | end 119 | end 120 | end 121 | end 122 | 123 | # defined-as = *c-wsp ("=" / "=/") *c-wsp 124 | # ; basic rules definition and incremental alternatives 125 | defp defined_as(input) do 126 | case zero_or_more_cwsp input do 127 | {_, [?=, ?/|_rest]} -> 128 | raise RuntimeError, "Incremental alternatives are not yet supported" 129 | #{_, rest} = zero_or_more_cwsp rest 130 | #{:alternative, rest} 131 | {_, [?=|rest]} -> 132 | {_, rest} = zero_or_more_cwsp rest 133 | {:equal, rest} 134 | _ -> nil 135 | end 136 | end 137 | 138 | # code = !!! octet !!! 139 | defp code(input) do 140 | case input do 141 | [?!,?!,?!|rest] -> code_tail rest 142 | _ -> nil 143 | end 144 | end 145 | 146 | defp code_tail(input, acc \\ []) do 147 | case input do 148 | [?!,?!,?!|rest] -> {Enum.reverse(acc), rest} 149 | [char|rest] -> code_tail rest, [char|acc] 150 | _ -> nil 151 | end 152 | end 153 | 154 | # alternation = concatenation *(*c-wsp "/" *c-wsp concatenation) 155 | defp alternation(input) do 156 | case concatenation input do 157 | nil -> nil 158 | {c, rest} -> 159 | {as, rest} = alternation_tail rest, [c] 160 | {token(:alternation, as), rest} 161 | end 162 | end 163 | 164 | defp alternation_tail(input, [last_e|next_e] = acc) do 165 | case zero_or_more_cwsp input do 166 | {comments1, [?/|rest]} -> 167 | {comments2, rest} = zero_or_more_cwsp rest 168 | case concatenation rest do 169 | nil -> {Enum.reverse(acc), input} 170 | {c, rest} -> 171 | c = add_comments c, comments2 172 | last_e = add_comments last_e, comments1 173 | alternation_tail rest, [c, last_e|next_e] 174 | end 175 | _ -> {Enum.reverse(acc), input} 176 | end 177 | end 178 | 179 | # repetition = [repeat] element 180 | defp repetition(input) do 181 | {from, to, rest} = case repeat input do 182 | nil -> {1, 1, input} 183 | r -> r 184 | end 185 | case element rest do 186 | nil -> nil 187 | {e, rest} -> {token(:repetition, %{from: from, to: to, value: e}), rest} 188 | end 189 | end 190 | 191 | # concatenation = repetition *(1*c-wsp repetition) 192 | defp concatenation(input) do 193 | case repetition input do 194 | nil -> nil 195 | {e, rest} -> 196 | {es, rest} = concatenation_tail rest, [e] 197 | {token(:concatenation, es), rest} 198 | end 199 | end 200 | 201 | defp concatenation_tail(input, [last_e|next_e] = acc) do 202 | {match, rest} = zero_or_more_cwsp input 203 | if length(match) === 0 do 204 | {Enum.reverse(acc), input} 205 | else 206 | case repetition rest do 207 | nil -> {Enum.reverse(acc), input} 208 | {e, rest} -> 209 | last_e = add_comments last_e, match 210 | concatenation_tail rest, [e, last_e|next_e] 211 | end 212 | end 213 | end 214 | 215 | # elements = alternation *WSP 216 | # As described in the Errata #2968 217 | defp elements(input) do 218 | case alternation input do 219 | nil -> nil 220 | {a, rest} -> 221 | rest = zero_or_more_wsp rest 222 | {a, rest} 223 | end 224 | end 225 | 226 | # element = rulename / group / option / char-val / num-val / prose-val 227 | defp element(input) do 228 | case rulename input do 229 | nil -> case group input do 230 | nil -> case option input do 231 | nil -> case char_val input do 232 | nil -> case num_val input do 233 | nil -> prose_val input 234 | r -> r 235 | end 236 | r -> r 237 | end 238 | r -> r 239 | end 240 | r -> r 241 | end 242 | r -> r 243 | end 244 | end 245 | 246 | # group = "(" *c-wsp alternation *c-wsp ")" 247 | defp group(input) do 248 | case input do 249 | [?(|rest] -> 250 | {comments1, rest} = zero_or_more_cwsp rest 251 | case alternation rest do 252 | nil -> nil 253 | {a, rest} -> 254 | case zero_or_more_cwsp rest do 255 | {comments2, [?)|rest]} -> 256 | a = add_comments a, (comments1 ++ comments2) 257 | {token(:group, a), rest} 258 | _ -> nil 259 | end 260 | _ -> nil 261 | end 262 | _ -> nil 263 | end 264 | end 265 | 266 | # option = "[" *c-wsp alternation *c-wsp "]" 267 | defp option(input) do 268 | case input do 269 | [?[|rest] -> 270 | {comments1, rest} = zero_or_more_cwsp rest 271 | case alternation rest do 272 | nil -> nil 273 | {a, rest} -> 274 | case zero_or_more_cwsp rest do 275 | {comments2, [?]|rest]} -> 276 | a = add_comments a, (comments1 ++ comments2) 277 | {token(:option, a), rest} 278 | _ -> nil 279 | end 280 | _ -> nil 281 | end 282 | _ -> nil 283 | end 284 | end 285 | 286 | # rulename = ALPHA *(ALPHA / DIGIT / "-") 287 | # Rule names are case-insensitive 288 | # The names , , , and all 289 | # refer to the same rule. 290 | defp rulename(input) do 291 | case input do 292 | [char|rest] -> if Core.alpha?(char) do 293 | rulename_tail rest, [char] 294 | else 295 | nil 296 | end 297 | _ -> nil 298 | end 299 | end 300 | 301 | defp rulename_tail(input, acc) do 302 | case input do 303 | [char|rest] -> if Core.alpha?(char) or Core.digit?(char) or (char === ?-) do 304 | rulename_tail rest, [char|acc] 305 | else 306 | {token(:rulename, Util.rulename(Enum.reverse(acc))), input} 307 | end 308 | _ -> {token(:rulename, Util.rulename(Enum.reverse(acc))), input} 309 | end 310 | end 311 | 312 | # repeat = 1*DIGIT / (*DIGIT "*" *DIGIT) 313 | defp repeat(input) do 314 | case num input, 10 do 315 | {from, [?*|rest]} -> case num rest, 10 do 316 | {to, rest} -> {from, to, rest} 317 | _ -> {from, :infinity, rest} 318 | end 319 | {from, rest} -> {from, from, rest} 320 | nil -> case input do 321 | [?*|rest] -> case num rest, 10 do 322 | nil -> {0, :infinity, rest} 323 | {to, rest} -> {0, to, rest} 324 | end 325 | _ -> nil 326 | end 327 | end 328 | end 329 | 330 | # *WSP 331 | defp zero_or_more_wsp(input) do 332 | case input do 333 | [char|rest] -> if Core.wsp?(char) do 334 | zero_or_more_wsp rest 335 | else 336 | input 337 | end 338 | _ -> input 339 | end 340 | end 341 | 342 | # *c-wsp 343 | defp zero_or_more_cwsp(input, acc \\ []) do 344 | case c_wsp input do 345 | nil -> {:lists.flatten(Enum.reverse(acc)), input} 346 | {match, rest} -> zero_or_more_cwsp rest, [match|acc] 347 | end 348 | end 349 | 350 | # c-wsp = WSP / (c-nl WSP) 351 | defp c_wsp(input) do 352 | case input do 353 | [char|rest] -> if Core.wsp? char do 354 | {[char], rest} 355 | else 356 | c_nl_wsp input 357 | end 358 | _ -> nil 359 | end 360 | end 361 | 362 | # c-nl WSP 363 | defp c_nl_wsp(input) do 364 | case c_nl input do 365 | nil -> nil 366 | {match, [char|rest]} -> if Core.wsp? char do 367 | {match ++ [char], rest} 368 | else 369 | nil 370 | end 371 | _ -> nil 372 | end 373 | end 374 | 375 | # c-nl = comment / CRLF ; comment or newline 376 | defp c_nl(input) do 377 | case comment input do 378 | nil -> case crlf input do 379 | nil -> nil 380 | r -> r 381 | end 382 | r -> r 383 | end 384 | end 385 | 386 | # comment = ";" *(WSP / VCHAR) CRLF 387 | defp comment(input) do 388 | case input do 389 | [?;|rest] -> comment_tail rest 390 | _ -> nil 391 | end 392 | end 393 | 394 | defp comment_tail(input, acc \\ []) do 395 | case crlf(input) do 396 | nil -> case input do 397 | [char|rest] -> if Core.wsp?(char) or Core.vchar?(char) do 398 | comment_tail rest, [char|acc] 399 | else 400 | nil 401 | end 402 | _ -> nil 403 | end 404 | {match, rest} -> {Enum.reverse(acc) ++ match, rest} 405 | end 406 | end 407 | 408 | # From RFC7405: 409 | # char-val = case-insensitive-string / case-sensitive-string 410 | defp char_val(input) do 411 | case case_insensitive_string input do 412 | nil -> case_sensitive_string input 413 | r -> r 414 | end 415 | end 416 | 417 | # From RFC7405 418 | # case-insensitive-string = [ "%i" ] quoted-string 419 | # 420 | # ABNF permits the specification of literal text strings directly, 421 | # enclosed in quotation-marks. Hence: 422 | # 423 | # command = "command string" 424 | # 425 | # Literal text strings are interpreted as a concatenated set of 426 | # printable characters. 427 | # 428 | # NOTE: ABNF strings are case-insensitive and the character set for these 429 | # strings is us-ascii. 430 | # 431 | # Hence: rulename = "abc" and: rulename = "aBc" will match "abc", "Abc", 432 | # "aBc", "abC", "ABc", "aBC", "AbC", and "ABC". 433 | # To specify a rule that IS case SENSITIVE, specify the characters 434 | # individually. 435 | defp case_insensitive_string(input) do 436 | case input do 437 | [?%, char|rest] -> if char === ?i or char === ?I do 438 | case quoted_string rest do 439 | nil -> nil 440 | {{l, str}, rest} -> char_val_token rest, l, str, [:caseless] 441 | end 442 | else 443 | nil 444 | end 445 | _ -> case quoted_string input do 446 | nil -> nil 447 | {{l, str}, rest} -> char_val_token rest, l, str, [:caseless] 448 | end 449 | end 450 | end 451 | 452 | # From RFC7405 453 | # case-sensitive-string = "%s" quoted-string 454 | defp case_sensitive_string(input) do 455 | case input do 456 | [?%, char|rest] -> if char === ?s or char === ?S do 457 | case quoted_string rest do 458 | nil -> nil 459 | {{l, str}, rest} -> char_val_token rest, l, str 460 | end 461 | else 462 | nil 463 | end 464 | _ -> nil 465 | end 466 | end 467 | 468 | defp char_val_token(rest, length, str, options \\ []) do 469 | {:ok, r} = :re.compile [?^, str], options 470 | { 471 | token(:char_val, %{ 472 | regex: r, 473 | length: length 474 | }), 475 | rest 476 | } 477 | end 478 | 479 | # From RFC7405 480 | # quoted-string DQUOTE *(%x20-21 / %x23-7E) DQUOTE 481 | # ; quoted string of SP and VCHAR without DQUOTE 482 | defp quoted_string(input) do 483 | case input do 484 | [char|rest] -> if Core.dquote? char do 485 | quoted_string_tail rest 486 | else 487 | nil 488 | end 489 | [] -> nil 490 | end 491 | end 492 | 493 | defp quoted_string_tail(input, acc \\ {0, []}) do 494 | {l, acc_char} = acc 495 | case input do 496 | [char|rest] -> 497 | if((char >= 0x20 and char <= 0x21) or (char >= 0x23 and char <= 0x7E)) do 498 | escape = not (Core.alpha?(char) or Core.digit?(char)) 499 | if escape do 500 | quoted_string_tail rest, {l + 1, [char, 92|acc_char]} 501 | else 502 | quoted_string_tail rest, {l + 1, [char|acc_char]} 503 | end 504 | else 505 | if Core.dquote? char do 506 | {{l, Enum.reverse(acc_char)}, rest} 507 | else 508 | nil 509 | end 510 | end 511 | _ -> nil 512 | end 513 | end 514 | 515 | # num-val = "%" (bin-val / dec-val / hex-val) 516 | # 517 | # bin-val = "b" 1*BIT [ 1*("." 1*BIT) / ("-" 1*BIT) ] 518 | # ; series of concatenated bit values or single ONEOF range 519 | # 520 | # dec-val = "d" 1*DIGIT [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ] 521 | # 522 | # hex-val = "x" 1*HEXDIG [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ] 523 | defp num_val(input) do 524 | case input do 525 | [?%, type|rest] -> cond do 526 | (type === ?b or type === ?B) -> num_val_tail rest, 2 527 | (type === ?d or type === ?D) -> num_val_tail rest, 10 528 | (type === ?x or type === ?X) -> num_val_tail rest, 16 529 | true -> nil 530 | end 531 | _ -> nil 532 | end 533 | end 534 | 535 | defp num_val_tail(input, base) do 536 | case num input, base do 537 | nil -> nil 538 | {n, [?.|_] = rest} -> num_concat_tail rest, base, [n] 539 | {from, [?-|rest]} -> case num rest, base do 540 | nil -> nil 541 | {to, rest} -> {token(:num_range, %{from: from, to: to}), rest} 542 | end 543 | {n, rest} -> {token(:num_range, %{from: n, to: n}), rest} 544 | end 545 | end 546 | 547 | defp num_concat_tail(input, base, acc) do 548 | case input do 549 | [?.|rest] -> case num rest, base do 550 | nil -> {token(:num_concat, Enum.reverse(acc)), input} 551 | {n, rest} -> num_concat_tail rest, base, [n|acc] 552 | end 553 | _ -> {token(:num_concat, Enum.reverse(acc)), input} 554 | end 555 | end 556 | 557 | defp num(input, base) do 558 | case input do 559 | [char|rest] -> if is_num? char, base do 560 | num_tail rest, base, [char] 561 | else 562 | nil 563 | end 564 | _ -> nil 565 | end 566 | end 567 | 568 | defp num_tail(input, base, acc) do 569 | case input do 570 | [char|rest] -> if is_num? char, base do 571 | num_tail rest, base, [char|acc] 572 | else 573 | {to_i(Enum.reverse(acc), base), input} 574 | end 575 | _ -> {to_i(Enum.reverse(acc), base), input} 576 | end 577 | end 578 | 579 | # prose-val = "<" *(%x20-3D / %x3F-7E) ">" 580 | # bracketed string of SP and VCHAR without ">" prose description, 581 | # to be used as last resort 582 | defp prose_val(input) do 583 | case input do 584 | [?<|rest] -> case prose_val_tail rest do 585 | nil -> nil 586 | {value, rest} -> {token(:rulename, Util.rulename(value)), rest} 587 | end 588 | _ -> nil 589 | end 590 | end 591 | 592 | defp prose_val_tail(input, acc \\ []) do 593 | case input do 594 | [?>|rest] -> {Enum.reverse(acc), rest} 595 | [char|rest] -> 596 | if((char >= 0x20 and char <= 0x3D) or (char >= 0x3F and char <= 0x7E)) do 597 | prose_val_tail rest, [char|acc] 598 | else 599 | nil 600 | end 601 | _ -> nil 602 | end 603 | end 604 | 605 | defp add_comments(t, comments) do 606 | Map.put t, :comments, (t.comments ++ comments) 607 | end 608 | 609 | defp token(type, value, comments \\ []) do 610 | %{ 611 | element: type, 612 | value: value, 613 | code: nil, 614 | comments: comments 615 | } 616 | end 617 | 618 | defp crlf(input) do 619 | case input do 620 | [char1, char2|rest] -> 621 | cond do 622 | Core.cr?(char1) and Core.lf?(char2) -> 623 | {[char1, char2], rest} 624 | Core.cr?(char1) == false and Core.lf?(char2) -> 625 | raise ArgumentError, message: "Lines should end with CRLF [13,10], found [#{char1},#{char2}]" 626 | true -> 627 | nil 628 | end 629 | _ -> nil 630 | end 631 | end 632 | 633 | defp to_i(input, base) do 634 | String.to_integer to_string(input), base 635 | end 636 | 637 | defp is_num?(char, base) do 638 | case base do 639 | 2 -> Core.bit? char 640 | 10 -> Core.digit? char 641 | 16 -> Core.hexdig? char 642 | end 643 | end 644 | end 645 | 646 | -------------------------------------------------------------------------------- /test/ex_abnf_test.exs: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright 2015 Marcelo Gornstein 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | ################################################################################ 16 | defmodule ABNF_Test do 17 | alias ABNF.CaptureResult, as: Res 18 | use ExUnit.Case, async: true 19 | require Logger 20 | @on_load :init 21 | 22 | test "raises for incorrect line endings" do 23 | assert_raise ArgumentError, "Lines should end with CRLF [13,10], found [122,10]", fn -> 24 | ABNF.load_file("test/resources/bad_line_endings.abnf") 25 | end 26 | end 27 | 28 | test "can do case (in)sensitive matches - RFC7405" do 29 | grammar = load "RFC7405" 30 | 31 | nil = ABNF.apply grammar, "case-sensitive", 'abc', nil 32 | %Res{ 33 | input: 'aBc', 34 | rest: '', 35 | string_text: 'aBc', 36 | string_tokens: ['aBc'], 37 | state: nil, 38 | values: _ 39 | } = ABNF.apply grammar, "case-sensitive", 'aBc', nil 40 | 41 | %Res{ 42 | input: 'aBc', 43 | rest: '', 44 | string_text: 'aBc', 45 | string_tokens: ['aBc'], 46 | state: nil, 47 | values: _ 48 | } = ABNF.apply grammar, "case-insensitive-1", 'aBc', nil 49 | 50 | %Res{ 51 | input: 'aBc', 52 | rest: '', 53 | string_text: 'aBc', 54 | string_tokens: ['aBc'], 55 | state: nil, 56 | values: _ 57 | } = ABNF.apply grammar, "case-insensitive-2", 'aBc', nil 58 | end 59 | 60 | test "can write module code" do 61 | grammar = load "module_code" 62 | %Res{ 63 | input: '1.2.3.4rest', 64 | rest: 'rest', 65 | string_text: '1.2.3.4', 66 | string_tokens: ['1', '.', '2', '.', '3', '.', '4'], 67 | state: %{ipv4address: '1.2.3.4'}, 68 | values: ["Your ip address is: 1.2.3.4"] 69 | } = ABNF.apply grammar, "ipv4address", '1.2.3.4rest', %{} 70 | end 71 | 72 | test "ipv4" do 73 | grammar = load "ipv4" 74 | 75 | %Res{ 76 | input: '1.2.3.4rest', 77 | rest: 'rest', 78 | string_text: '1.2.3.4', 79 | string_tokens: ['1', '.', '2', '.', '3', '.', '4'], 80 | state: %{ipv4address: '1.2.3.4'}, 81 | values: ["Your ip address is: 1.2.3.4"] 82 | } = ABNF.apply grammar, "ipv4address", '1.2.3.4rest', %{} 83 | 84 | %Res{ 85 | input: '192.168.0.1rest', 86 | rest: 'rest', 87 | string_text: '192.168.0.1', 88 | string_tokens: ['192', '.', '168', '.', '0', '.', '1'], 89 | state: %{ipv4address: '192.168.0.1'}, 90 | values: ["Your ip address is: 192.168.0.1"] 91 | } = ABNF.apply grammar, "ipv4address", '192.168.0.1rest', %{} 92 | 93 | %Res{ 94 | input: '255.255.255.255rest', 95 | rest: 'rest', 96 | string_text: '255.255.255.255', 97 | string_tokens: ['255', '.', '255', '.', '255', '.', '255'], 98 | state: %{ipv4address: '255.255.255.255'}, 99 | values: ["Your ip address is: 255.255.255.255"] 100 | } = ABNF.apply grammar, "ipv4address", '255.255.255.255rest', %{} 101 | 102 | nil = ABNF.apply grammar, "ipv4address", '255.255.256.255rest', %{} 103 | end 104 | 105 | test "medium complexity" do 106 | grammar = load "path" 107 | %Res{ 108 | input: 'segment', 109 | rest: '', 110 | string_text: 'segment', 111 | string_tokens: ['s', 'egment'], 112 | state: ['segment'], 113 | values: _ 114 | } = ABNF.apply grammar, "segment", 'segment', [] 115 | 116 | %Res{ 117 | input: '/a', 118 | rest: '', 119 | string_text: '/a', 120 | string_tokens: ['/a'], 121 | state: ['a'], 122 | values: _ 123 | } = ABNF.apply grammar, "path", '/a', [] 124 | 125 | %Res{ 126 | input: '/aa/bb', 127 | rest: '', 128 | string_text: '/aa/bb', 129 | string_tokens: ['/aa/bb'], 130 | state: ['aa', 'bb'], 131 | values: _ 132 | } = ABNF.apply grammar, "path", '/aa/bb', [] 133 | end 134 | 135 | test "basic repetition and optional" do 136 | grammar = load "basic" 137 | %Res{ 138 | input: 'helloworld rest', 139 | rest: ' rest', 140 | string_text: 'helloworld', 141 | string_tokens: ['helloworld'], 142 | state: nil, 143 | values: _ 144 | } = ABNF.apply grammar, "string1", 'helloworld rest', nil 145 | 146 | %Res{ 147 | input: 'helloworld rest', 148 | rest: 'loworld rest', 149 | string_text: 'hel', 150 | string_tokens: ['hel'], 151 | state: nil, 152 | values: _ 153 | } = ABNF.apply grammar, "string2", 'helloworld rest', nil 154 | 155 | %Res{ 156 | input: 'helloworld rest', 157 | rest: 'lloworld rest', 158 | string_text: 'he', 159 | string_tokens: ['he'], 160 | state: nil, 161 | values: _ 162 | } = ABNF.apply grammar, "string3", 'helloworld rest', nil 163 | 164 | %Res{ 165 | input: 'helloworld rest', 166 | rest: ' rest', 167 | string_text: 'helloworld', 168 | string_tokens: ['helloworld'], 169 | state: nil, 170 | values: _ 171 | } = ABNF.apply grammar, "string4", 'helloworld rest', nil 172 | 173 | %Res{ 174 | input: '3helloworld rest', 175 | rest: ' rest', 176 | string_text: '3helloworld', 177 | string_tokens: ['3', 'helloworld'], 178 | state: nil, 179 | values: _ 180 | } = ABNF.apply grammar, "string5", '3helloworld rest', nil 181 | 182 | %Res{ 183 | input: 'helloworld rest', 184 | rest: ' rest', 185 | string_text: 'helloworld', 186 | string_tokens: ['', 'helloworld'], 187 | state: nil, 188 | values: _ 189 | } = ABNF.apply grammar, "string5", 'helloworld rest', nil 190 | end 191 | 192 | test "ipv6" do 193 | grammar = load "ipv6" 194 | 195 | addresses = [ 196 | '::', 197 | '1:2:3:4:5:6:7:8', 198 | '1:2:3:4:5:6:192.168.0.1', 199 | 'FE80:0000:0000:0000:0202:B3FF:FE1E:8329', 200 | '::1', 201 | '1::1:2:3:4:5:6', 202 | '1:2::3:4:5:6:7', 203 | '::1:2:3:4:5', 204 | 'fe80::200:f8ff:fe21:67cf', 205 | '2001:db8::1', 206 | '2001:db8:a0b:12f0::1', 207 | 'fdf8:f53b:82e4::53', 208 | '2001:db8:85a3::8a2e:370:7334', 209 | '::ffff:c000:0280', 210 | '2001:db8::2:1', 211 | '2001:db8::1:0:0:1', 212 | 'FE80:0:0:0:903A::11E4', 213 | 'FE80::903A:0:0:11E4', 214 | '2001:db8:122:344::192.0.2.33', 215 | '2001:db8:122:344:c0:2:2100::', 216 | '2001:db8:122:3c0:0:221::', 217 | '2001:db8:122:c000:2:2100::', 218 | '2001:db8:1c0:2:21::', 219 | '2001:db8:c000:221::', 220 | '::1', 221 | '::', 222 | '0:0:0:0:0:0:0:1', 223 | '0:0:0:0:0:0:0:0', 224 | '2001:DB8:0:0:8:800:200C:417A', 225 | 'FF01:0:0:0:0:0:0:101', 226 | '2001:DB8::8:800:200C:417A', 227 | 'FF01::101', 228 | 'fe80::217:f2ff:fe07:ed62', 229 | '2001:0000:1234:0000:0000:C1C0:ABCD:0876', 230 | '3ffe:0b00:0000:0000:0001:0000:0000:000a', 231 | 'FF02:0000:0000:0000:0000:0000:0000:0001', 232 | '0000:0000:0000:0000:0000:0000:0000:0001', 233 | '0000:0000:0000:0000:0000:0000:0000:0000', 234 | '2::10', 235 | 'ff02::1', 236 | 'fe80::', 237 | '2002::', 238 | '2001:db8::', 239 | '2001:0db8:1234::', 240 | '::ffff:0:0', 241 | '::1', 242 | '1:2:3:4:5:6:7:8', 243 | '1:2:3:4:5:6::8', 244 | '1:2:3:4:5::8', 245 | '1:2:3:4::8', 246 | '1:2:3::8', 247 | '1:2::8', 248 | '1::8', 249 | '1::2:3:4:5:6:7', 250 | '1::2:3:4:5:6', 251 | '1::2:3:4:5', 252 | '1::2:3:4', 253 | '1::2:3', 254 | '1::8', 255 | '::2:3:4:5:6:7:8', 256 | '::2:3:4:5:6:7', 257 | '::2:3:4:5:6', 258 | '::2:3:4:5', 259 | '::2:3:4', 260 | '::2:3', 261 | '::8', 262 | '1:2:3:4:5:6::', 263 | '1:2:3:4:5::', 264 | '1:2:3:4::', 265 | '1:2:3::', 266 | '1:2::', 267 | '1::', 268 | '1:2:3:4:5::7:8', 269 | '1:2:3:4::7:8', 270 | '1:2:3::7:8', 271 | '1:2::7:8', 272 | '1::7:8', 273 | '1:2:3:4:5:6:1.2.3.4', 274 | '1:2:3:4:5::1.2.3.4', 275 | '1:2:3:4::1.2.3.4', 276 | '1:2:3::1.2.3.4', 277 | '1:2::1.2.3.4', 278 | '1::1.2.3.4', 279 | '1:2:3:4::5:1.2.3.4', 280 | '1:2:3::5:1.2.3.4', 281 | '1:2::5:1.2.3.4', 282 | '1::5:1.2.3.4', 283 | '1::5:11.22.33.44', 284 | 'fe80::217:f2ff:254.7.237.98', 285 | '::ffff:192.168.1.26', 286 | '::ffff:192.168.1.1', 287 | '0:0:0:0:0:0:13.1.68.3', 288 | '0:0:0:0:0:FFFF:129.144.52.38', 289 | '::13.1.68.3', 290 | '::FFFF:129.144.52.38', 291 | 'fe80:0:0:0:204:61ff:254.157.241.86', 292 | 'fe80::204:61ff:254.157.241.86', 293 | '::ffff:12.34.56.78', 294 | '::ffff:192.0.2.128', 295 | 'fe80:0000:0000:0000:0204:61ff:fe9d:f156', 296 | 'fe80:0:0:0:204:61ff:fe9d:f156', 297 | 'fe80::204:61ff:fe9d:f156', 298 | '::1', 299 | 'fe80::', 300 | 'fe80::1', 301 | '::ffff:c000:280', 302 | '2001:0db8:85a3:0000:0000:8a2e:0370:7334', 303 | '2001:db8:85a3:0:0:8a2e:370:7334', 304 | '2001:db8:85a3::8a2e:370:7334', 305 | '2001:0db8:0000:0000:0000:0000:1428:57ab', 306 | '2001:0db8:0000:0000:0000::1428:57ab', 307 | '2001:0db8:0:0:0:0:1428:57ab', 308 | '2001:0db8:0:0::1428:57ab', 309 | '2001:0db8::1428:57ab', 310 | '2001:db8::1428:57ab', 311 | '0000:0000:0000:0000:0000:0000:0000:0001', 312 | '::1', 313 | '::ffff:0c22:384e', 314 | '2001:0db8:1234:0000:0000:0000:0000:0000', 315 | '2001:0db8:1234:ffff:ffff:ffff:ffff:ffff', 316 | '2001:db8:a::123', 317 | 'fe80::', 318 | '1111:2222:3333:4444:5555:6666:7777:8888', 319 | '1111:2222:3333:4444:5555:6666:7777::', 320 | '1111:2222:3333:4444:5555:6666::', 321 | '1111:2222:3333:4444:5555::', 322 | '1111:2222:3333:4444::', 323 | '1111:2222:3333::', 324 | '1111:2222::', 325 | '1111::', 326 | '1111:2222:3333:4444:5555:6666::8888', 327 | '1111:2222:3333:4444:5555::8888', 328 | '1111:2222:3333:4444::8888', 329 | '1111:2222:3333::8888', 330 | '1111:2222::8888', 331 | '1111::8888', 332 | '::8888', 333 | '1111:2222:3333:4444:5555::7777:8888', 334 | '1111:2222:3333:4444::7777:8888', 335 | '1111:2222:3333::7777:8888', 336 | '1111:2222::7777:8888', 337 | '1111::7777:8888', 338 | '::7777:8888', 339 | '1111:2222:3333:4444::6666:7777:8888', 340 | '1111:2222:3333::6666:7777:8888', 341 | '1111:2222::6666:7777:8888', 342 | '1111::6666:7777:8888', 343 | '::6666:7777:8888', 344 | '1111:2222:3333::5555:6666:7777:8888', 345 | '1111:2222::5555:6666:7777:8888', 346 | '1111::5555:6666:7777:8888', 347 | '::5555:6666:7777:8888', 348 | '1111:2222::4444:5555:6666:7777:8888', 349 | '1111::4444:5555:6666:7777:8888', 350 | '::4444:5555:6666:7777:8888', 351 | '1111::3333:4444:5555:6666:7777:8888', 352 | '::3333:4444:5555:6666:7777:8888', 353 | '::2222:3333:4444:5555:6666:7777:8888', 354 | '1111:2222:3333:4444:5555:6666:123.123.123.123', 355 | '1111:2222:3333:4444:5555::123.123.123.123', 356 | '1111:2222:3333:4444::123.123.123.123', 357 | '1111:2222:3333::123.123.123.123', 358 | '1111:2222::123.123.123.123', 359 | '1111::123.123.123.123', 360 | '::123.123.123.123', 361 | '1111:2222:3333:4444::6666:123.123.123.123', 362 | '1111:2222:3333::6666:123.123.123.123', 363 | '1111:2222::6666:123.123.123.123', 364 | '1111::6666:123.123.123.123', 365 | '::6666:123.123.123.123', 366 | '1111:2222:3333::5555:6666:123.123.123.123', 367 | '1111:2222::5555:6666:123.123.123.123', 368 | '1111::5555:6666:123.123.123.123', 369 | '::5555:6666:123.123.123.123', 370 | '1111:2222::4444:5555:6666:123.123.123.123', 371 | '1111::4444:5555:6666:123.123.123.123', 372 | '::4444:5555:6666:123.123.123.123', 373 | '1111::3333:4444:5555:6666:123.123.123.123', 374 | '::2222:3333:4444:5555:6666:123.123.123.123', 375 | '::0:0:0:0:0:0:0', 376 | '::0:0:0:0:0:0', 377 | '::0:0:0:0:0', 378 | '::0:0:0:0', 379 | '::0:0:0', 380 | '::0:0', 381 | '::0', 382 | '0:0:0:0:0:0:0::', 383 | '0:0:0:0:0:0::', 384 | '0:0:0:0:0::', 385 | '0:0:0:0::', 386 | '0:0:0::', 387 | '0:0::', 388 | '0::', 389 | '0:a:b:c:d:e:f::', 390 | '::0:a:b:c:d:e:f', 391 | 'a:b:c:d:e:f:0::' 392 | ] 393 | 394 | Enum.each addresses, fn(a) -> 395 | Logger.debug "Testing IPv6: #{inspect a}" 396 | string = a ++ 'rest' 397 | %Res{ 398 | input: ^string, 399 | rest: 'rest', 400 | string_text: ^a, 401 | state: %{} 402 | } = ABNF.apply grammar, "ipv6address", string, %{} 403 | end 404 | end 405 | 406 | test "uri" do 407 | grammar = load "RFC3986" 408 | url = 'http://user:pass@host.com:421/some/path?k1=v1&k2=v2#one_fragment' 409 | %Res{ 410 | input: ^url, 411 | rest: '', 412 | state: %{ 413 | fragment: 'one_fragment', 414 | host: 'host.com', 415 | host_type: :reg_name, 416 | port: '421', 417 | query: 'k1=v1&k2=v2', 418 | scheme: 'http', 419 | userinfo: 'user:pass', 420 | segments: ['some', 'path'], 421 | type: :abempty 422 | }, 423 | string_text: ^url, 424 | string_tokens: [ 425 | 'http', 426 | ':', 427 | '//user:pass@host.com:421/some/path', 428 | '?k1=v1&k2=v2', 429 | '#one_fragment' 430 | ], 431 | values: _ 432 | } = ABNF.apply grammar, "uri", url, %{segments: []} 433 | 434 | url = 'http:/path' 435 | %Res{ 436 | input: ^url, 437 | rest: '', 438 | state: %{ 439 | scheme: 'http', 440 | segments: ['path'], 441 | type: :absolute 442 | }, 443 | string_text: ^url, 444 | string_tokens: ['http', ':', '/path', '', ''], 445 | values: _ 446 | } = ABNF.apply grammar, "uri", url, %{segments: []} 447 | 448 | url = 'http://a.com' 449 | %Res{ 450 | input: ^url, 451 | rest: '', 452 | state: %{ 453 | scheme: 'http', 454 | host: 'a.com', 455 | host_type: :reg_name, 456 | type: :abempty 457 | }, 458 | string_text: ^url, 459 | string_tokens: ['http', ':', '//a.com', '', ''], 460 | values: _ 461 | } = ABNF.apply grammar, "uri", url, %{segments: []} 462 | 463 | url = 'http://a.com:789' 464 | %Res{ 465 | input: ^url, 466 | rest: '', 467 | state: %{ 468 | scheme: 'http', 469 | host: 'a.com', 470 | port: '789', 471 | host_type: :reg_name, 472 | type: :abempty 473 | }, 474 | string_text: ^url, 475 | string_tokens: ['http', ':', '//a.com:789', '', ''], 476 | values: _ 477 | } = ABNF.apply grammar, "uri", url, %{segments: []} 478 | 479 | url = 'http://192.168.0.1/path' 480 | %Res{ 481 | input: ^url, 482 | rest: '', 483 | state: %{ 484 | scheme: 'http', 485 | segments: ['path'], 486 | host: '192.168.0.1', 487 | host_type: :ipv4, 488 | type: :abempty 489 | }, 490 | string_text: ^url, 491 | string_tokens: ['http', ':', '//192.168.0.1/path', '', ''], 492 | values: _ 493 | } = ABNF.apply grammar, "uri", url, %{segments: []} 494 | 495 | url = 'http:' 496 | %Res{ 497 | input: ^url, 498 | rest: '', 499 | state: %{ 500 | scheme: 'http', 501 | type: :empty 502 | }, 503 | string_text: ^url, 504 | string_tokens: ['http', ':', '', '', ''], 505 | values: _ 506 | } = ABNF.apply grammar, "uri", url, %{segments: []} 507 | 508 | url = 'http:path1/path2' 509 | %Res{ 510 | input: ^url, 511 | rest: '', 512 | state: %{ 513 | scheme: 'http', 514 | segments: ['path1', 'path2'], 515 | type: :rootless 516 | }, 517 | string_text: ^url, 518 | string_tokens: ['http', ':', 'path1/path2', '', ''], 519 | values: _ 520 | } = ABNF.apply grammar, "uri", url, %{segments: []} 521 | 522 | url = 'http://[v1.fe80::a+en1]/path' 523 | %Res{ 524 | input: ^url, 525 | rest: '', 526 | state: %{ 527 | scheme: 'http', 528 | segments: ['path'], 529 | host: '[v1.fe80::a+en1]', 530 | host_type: :ipvfuture, 531 | type: :abempty 532 | }, 533 | string_text: ^url, 534 | string_tokens: ['http', ':', '//[v1.fe80::a+en1]/path', '', ''], 535 | values: _ 536 | } = ABNF.apply grammar, "uri", url, %{segments: []} 537 | end 538 | 539 | test "can reduce rule" do 540 | grammar = load "reduce" 541 | %Res{ 542 | input: '123asd', 543 | rest: '', 544 | state: %{field: true}, 545 | string_text: '123asd', 546 | string_tokens: ['123', 'asd'], 547 | values: [%{int: 123, string: "asd"}] 548 | } = ABNF.apply grammar, "composed", '123asd', %{field: false} 549 | end 550 | 551 | test "teluri" do 552 | grammar = load "RFC3966" 553 | 554 | tel = 'tel:+1-201-555-0123' 555 | %Res{ 556 | input: 'tel:+1-201-555-0123', 557 | rest: '', 558 | state: %{}, 559 | string_text: 'tel:+1-201-555-0123', 560 | string_tokens: ['tel:', '+1-201-555-0123'], 561 | values: _ 562 | } = ABNF.apply grammar, "telephone-uri", tel, %{} 563 | 564 | tel = 'tel:863-1234;phone-context=+1-914-555' 565 | %Res{ 566 | input: 'tel:863-1234;phone-context=+1-914-555', 567 | rest: '', 568 | state: %{}, 569 | string_text: 'tel:863-1234;phone-context=+1-914-555', 570 | string_tokens: ['tel:', '863-1234;phone-context=+1-914-555'], 571 | values: _ 572 | } = ABNF.apply grammar, "telephone-uri", tel, %{} 573 | end 574 | 575 | test "sdp" do 576 | grammar = load "RFC4566" 577 | data = to_char_list(File.read! "test/resources/sdp1.txt") 578 | %Res{ 579 | input: ^data, 580 | rest: '', 581 | state: %{ 582 | version: '0', 583 | session_name: 'description', 584 | origin: %{ 585 | username: 'alice', 586 | session_id: '2890844526', 587 | session_version: '2890844526', 588 | net_type: 'IN', 589 | address_type: 'IP4', 590 | unicast_address: 'host.atlanta.example.com' 591 | } 592 | }, 593 | string_text: ^data, 594 | string_tokens: [ 595 | 'v=0\r\n', 596 | 'o=alice 2890844526 2890844526 IN IP4 host.atlanta.example.com\r\n', 597 | 's=description\r\n', 598 | '', 599 | '', 600 | '', 601 | '', 602 | 'c=IN IP4 host.atlanta.example.com\r\n', 603 | '', 604 | 't=0 0\r\n', 605 | '', 606 | '', 607 | 'm=audio 49170 RTP/AVP 0 8 97\r\na=rtpmap:0 PCMU/8000\r\na=rtpmap:8 PCMA/8000\r\na=rtpmap:97 iLBC/8000\r\nm=video 51372 RTP/AVP 31 32\r\na=rtpmap:31 H261/90000\r\na=rtpmap:32 MPV/90000\r\n' 608 | ], 609 | values: _ 610 | } = ABNF.apply grammar, "session-description", data, %{} 611 | end 612 | 613 | test "sip" do 614 | grammar = load "RFC3261" 615 | data = to_char_list(File.read! "test/resources/sip1.txt") 616 | %Res{ 617 | input: ^data, 618 | rest: '', 619 | state: %{ 620 | headers: %{ 621 | "from" => %{ 622 | addr: %{ 623 | hostport: %{host: "biloxi.com", port: 5060}, 624 | scheme: "sip", 625 | userinfo: "bob" 626 | }, 627 | display_name: "Bob " 628 | } 629 | }, 630 | method: :register, 631 | request: true, 632 | uri: %{ 633 | hostport: %{ 634 | host: "registrar.biloxi.com", 635 | port: 1234 636 | }, 637 | userinfo: "", 638 | scheme: "sip" 639 | } 640 | }, 641 | string_text: ^data, 642 | string_tokens: [^data] 643 | } = ABNF.apply grammar, "SIP-message", data, %{ 644 | headers: %{} 645 | } 646 | end 647 | 648 | test "email" do 649 | grammar = load "RFC5322-no-obs" 650 | 651 | email = 'user@domain.com' 652 | %Res{ 653 | input: ^email, 654 | rest: '', 655 | state: %{ 656 | domain: 'domain.com', 657 | local_part: 'user' 658 | }, 659 | string_text: ^email, 660 | string_tokens: ['user@domain.com'], 661 | values: _ 662 | } = ABNF.apply grammar, "mailbox", email, %{} 663 | 664 | email = '' 665 | %Res{ 666 | input: ^email, 667 | rest: '', 668 | state: %{ 669 | domain: 'domain.com', 670 | local_part: 'user' 671 | }, 672 | string_text: ^email, 673 | string_tokens: [''], 674 | values: _ 675 | } = ABNF.apply grammar, "mailbox", email, %{} 676 | 677 | email = 'Peter Cantropus ' 678 | %Res{ 679 | input: ^email, 680 | rest: '', 681 | state: %{ 682 | domain: 'domain.com', 683 | local_part: 'user', 684 | display_name: 'Peter Cantropus ' 685 | }, 686 | string_text: ^email, 687 | string_tokens: ['Peter Cantropus '], 688 | values: _ 689 | } = ABNF.apply grammar, "mailbox", email, %{} 690 | 691 | input = '21 Nov 1997 10:01:22 -0600' 692 | %Res{ 693 | input: ^input, 694 | rest: '', 695 | state: %{ 696 | month: 'Nov', 697 | year: '1997', 698 | day: '21', 699 | tz: '-0600', 700 | hour: '10', 701 | minute: '01', 702 | second: '22' 703 | }, 704 | string_text: ^input, 705 | string_tokens: [[], '21 Nov 1997 ', '10:01:22 -0600', []], 706 | values: _ 707 | } = ABNF.apply grammar, "date-time", input, %{} 708 | 709 | input = 'Received: from node.example by x.y.test; 21 Nov 1997 10:01:22 -0600\r\n' 710 | %Res{ 711 | input: ^input, 712 | rest: '', 713 | state: %{ 714 | day: '21', 715 | domain: 'x.y.test', 716 | hour: '10', 717 | minute: '01', 718 | month: 'Nov', 719 | second: '22', 720 | tz: '-0600', 721 | year: '1997' 722 | }, 723 | string_text: ^input, 724 | string_tokens: ['Received:', ' from node.example by x.y.test', ';', ' 21 Nov 1997 10:01:22 -0600', '\r\n'], 725 | values: _ 726 | } = ABNF.apply grammar, "Received", input, %{} 727 | end 728 | 729 | # Load grammars before tests are run 730 | def init() do 731 | me = self() 732 | spawn fn -> 733 | :ets = :ets.new :ets, [:named_table, :public, {:read_concurrency, true}] 734 | for t <- [ 735 | "ipv4", 736 | "ipv6", 737 | "path", 738 | "reduce", 739 | "basic", 740 | "RFC7405", 741 | "RFC3261", 742 | "RFC3966", 743 | "RFC3986", 744 | "RFC4566", 745 | "module_code", 746 | "RFC5322-no-obs" 747 | ] do 748 | :ets.insert_new( 749 | :ets, {t, ABNF.load_file("test/resources/#{t}.abnf")} 750 | ) 751 | :timer.sleep 1 752 | end 753 | send me, :done 754 | receive do 755 | _ -> :ok 756 | end 757 | end 758 | receive do 759 | :done -> :ok 760 | end 761 | :ok 762 | end 763 | 764 | defp load(file) do 765 | [{^file, grammar}] = :ets.lookup :ets, file 766 | grammar 767 | end 768 | end 769 | --------------------------------------------------------------------------------