├── .travis.yml ├── test ├── org │ ├── table_test.exs │ ├── content_test.exs │ ├── document_test.exs │ ├── section_test.exs │ ├── code_block_test.exs │ ├── paragraph_test.exs │ ├── parser_test.exs │ └── lexer_test.exs ├── org_test.exs └── test_helper.exs ├── README.md ├── mix.lock ├── .gitignore ├── lib ├── org │ ├── content.ex │ ├── paragraph.ex │ ├── code_block.ex │ ├── parser.ex │ ├── section.ex │ ├── lexer.ex │ ├── table.ex │ └── document.ex └── org.ex ├── mix.exs └── config └── config.exs /.travis.yml: -------------------------------------------------------------------------------- 1 | language: elixir 2 | elixir: '1.5.2' 3 | script: 4 | - "mix test --trace" 5 | - "mix dialyzer" -------------------------------------------------------------------------------- /test/org/table_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Org.TableTest do 2 | use ExUnit.Case 3 | doctest Org.Table 4 | end 5 | -------------------------------------------------------------------------------- /test/org/content_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Org.ContentTest do 2 | use ExUnit.Case 3 | doctest Org.Content 4 | end 5 | -------------------------------------------------------------------------------- /test/org/document_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Org.DocumentTest do 2 | use ExUnit.Case 3 | doctest Org.Document 4 | end 5 | -------------------------------------------------------------------------------- /test/org/section_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Org.SectionTest do 2 | use ExUnit.Case 3 | doctest Org.Section 4 | end 5 | -------------------------------------------------------------------------------- /test/org/code_block_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Org.CodeBlockTest do 2 | use ExUnit.Case 3 | doctest Org.CodeBlock 4 | end 5 | -------------------------------------------------------------------------------- /test/org/paragraph_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Org.ParagraphTest do 2 | use ExUnit.Case 3 | doctest Org.Paragraph 4 | end 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Org 2 | 3 | Implements an org-mode parser. 4 | 5 | Further documentation is available [on hexdocs.pm](https://hexdocs.pm/org/Org.html) 6 | 7 | ## Installation 8 | 9 | The package can be installed by adding `org` to your list of dependencies in `mix.exs`: 10 | 11 | ```elixir 12 | def deps do 13 | [ 14 | {:org, "~> 0.1.0"} 15 | ] 16 | end 17 | ``` 18 | 19 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{"dialyxir": {:hex, :dialyxir, "0.5.1", "b331b091720fd93e878137add264bac4f644e1ddae07a70bf7062c7862c4b952", [], [], "hexpm"}, 2 | "earmark": {:hex, :earmark, "1.2.4", "99b637c62a4d65a20a9fb674b8cffb8baa771c04605a80c911c4418c69b75439", [], [], "hexpm"}, 3 | "ex_doc": {:hex, :ex_doc, "0.18.1", "37c69d2ef62f24928c1f4fdc7c724ea04aecfdf500c4329185f8e3649c915baf", [], [{:earmark, "~> 1.1", [hex: :earmark, repo: "hexpm", optional: false]}], "hexpm"}} 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # The directory Mix will write compiled artifacts to. 2 | /_build/ 3 | 4 | # If you run "mix test --cover", coverage assets end up here. 5 | /cover/ 6 | 7 | # The directory Mix downloads your dependencies sources to. 8 | /deps/ 9 | 10 | # Where 3rd-party dependencies like ExDoc output generated docs. 11 | /doc/ 12 | 13 | # Ignore .fetch files in case you like to edit your project deps locally. 14 | /.fetch 15 | 16 | # If the VM crashes, it generates a dump, let's ignore it too. 17 | erl_crash.dump 18 | 19 | # Also ignore archive artifacts (built via "mix archive.build"). 20 | *.ez 21 | 22 | *~ 23 | *.swp 24 | -------------------------------------------------------------------------------- /lib/org/content.ex: -------------------------------------------------------------------------------- 1 | defprotocol Org.Content do 2 | @moduledoc ~S""" 3 | Represents a piece of content, such as: 4 | * `Org.Table` 5 | * `Org.Paragraph` 6 | * `Org.CodeBlock` 7 | """ 8 | 9 | @doc "Reverses the content's elements. Used by the parser after building up content in reverse." 10 | def reverse_recursive(content) 11 | end 12 | 13 | # This is just to shut up dialyzer: 14 | defimpl Org.Content, for: [Atom, BitString, Float, Function, Integer, List, Map, PID, Port, Reference, Tuple] do 15 | def reverse_recursive(content) do 16 | raise "#{__MODULE__} Not implemented for #{inspect content}" 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /test/org_test.exs: -------------------------------------------------------------------------------- 1 | defmodule OrgTest do 2 | use ExUnit.Case 3 | doctest Org 4 | 5 | @document ~S""" 6 | #+TITLE: Hello World 7 | 8 | * Hello 9 | ** World 10 | | X | Y | 11 | |---+---| 12 | | 0 | 4 | 13 | | 1 | 7 | 14 | | 2 | 5 | 15 | | 3 | 6 | 16 | ** Universe 17 | Something something... 18 | * Also 19 | 1 20 | ** another 21 | 2 22 | *** thing 23 | 3 24 | **** is nesting 25 | 4 26 | ***** stuff 27 | 5 28 | ** at 29 | 6 30 | *** different 31 | 7 32 | **** levels 33 | 8 34 | *** and 35 | 9 36 | *** next 37 | 10 38 | *** to 39 | 11 40 | *** one 41 | 12 42 | *** another 43 | 13 44 | #+BEGIN_SRC sql 45 | SELECT * FROM products; 46 | #+END_SRC 47 | """ 48 | 49 | # Used by Org.LexerTest and Org.ParserTest 50 | def example_document do 51 | @document 52 | end 53 | 54 | end 55 | -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | defmodule OrgTestHelper do 2 | defmacro test_tokens(expected) do 3 | for {token, index} <- Enum.with_index(expected) do 4 | quote do 5 | test "Token #{unquote(index)} equals #{inspect(unquote(token))}", %{tokens: tokens} do 6 | assert Enum.at(tokens, unquote(index)) == unquote(token) 7 | end 8 | end 9 | end 10 | ++ 11 | [quote do 12 | test "There are #{unquote(length(expected))} tokens", %{tokens: tokens} do 13 | assert length(tokens) == unquote(length(expected)) 14 | end 15 | end] 16 | end 17 | 18 | defmacro test_section_text_contents(expected) do 19 | for {path, text_content} <- expected do 20 | quote do 21 | test "Section at #{inspect(unquote(path))} has content #{inspect(unquote(text_content))}", %{doc: doc} do 22 | assert Org.Section.contents(Org.section(doc, unquote(path))) == [%Org.Paragraph{lines: unquote(text_content)}] 23 | end 24 | end 25 | end 26 | end 27 | end 28 | 29 | ExUnit.start() 30 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule Org.Mixfile do 2 | use Mix.Project 3 | 4 | def project do 5 | [ 6 | app: :org, 7 | version: "0.1.1", 8 | elixir: "~> 1.5", 9 | start_permanent: Mix.env == :prod, 10 | package: package(), 11 | deps: deps(), 12 | description: "org-mode parser", 13 | ] 14 | end 15 | 16 | defp package do 17 | [ 18 | licenses: ["MIT"], 19 | maintainers: ["Niklas Cathor"], 20 | links: %{ 21 | "GitHub" => "https://github.com/nilclass/elixir_org" 22 | } 23 | ] 24 | end 25 | 26 | # Run "mix help compile.app" to learn about applications. 27 | def application do 28 | [ 29 | extra_applications: [:logger] 30 | ] 31 | end 32 | 33 | # Run "mix help deps" to learn about dependencies. 34 | defp deps do 35 | [ 36 | # {:dep_from_hexpm, "~> 0.3.0"}, 37 | # {:dep_from_git, git: "https://github.com/elixir-lang/my_dep.git", tag: "0.1.0"}, 38 | {:ex_doc, "~> 0.16", only: :dev, runtime: false}, 39 | {:dialyxir, "~> 0.5.1"} 40 | ] 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /lib/org/paragraph.ex: -------------------------------------------------------------------------------- 1 | defmodule Org.Paragraph do 2 | defstruct lines: [] 3 | 4 | @type t :: %Org.Paragraph{ 5 | lines: list(String.t), 6 | } 7 | 8 | @moduledoc ~S""" 9 | Represents an uninterrupted list of lines. Paragraphs are separated by one or more newlines. 10 | 11 | Example: 12 | iex> doc = Org.Parser.parse("Foo\nBar\n\nBaz") 13 | iex> doc.contents 14 | [%Org.Paragraph{lines: ["Foo", "Bar"]}, %Org.Paragraph{lines: ["Baz"]}] 15 | """ 16 | 17 | @doc "Constructs a new paragraph from given list of lines" 18 | @spec new(list(String.t)) :: t 19 | def new(lines) do 20 | %Org.Paragraph{lines: lines} 21 | end 22 | 23 | @doc "Prepends a line to the list of lines. Used by the parser." 24 | @spec prepend_line(t, String.t) :: t 25 | def prepend_line(paragraph, line) do 26 | %Org.Paragraph{paragraph | lines: [line | paragraph.lines]} 27 | end 28 | end 29 | 30 | defimpl Org.Content, for: Org.Paragraph do 31 | def reverse_recursive(paragraph) do 32 | %Org.Paragraph{paragraph | lines: Enum.reverse(paragraph.lines)} 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /config/config.exs: -------------------------------------------------------------------------------- 1 | # This file is responsible for configuring your application 2 | # and its dependencies with the aid of the Mix.Config module. 3 | use Mix.Config 4 | 5 | # This configuration is loaded before any dependency and is restricted 6 | # to this project. If another project depends on this project, this 7 | # file won't be loaded nor affect the parent project. For this reason, 8 | # if you want to provide default values for your application for 9 | # 3rd-party users, it should be done in your "mix.exs" file. 10 | 11 | # You can configure your application as: 12 | # 13 | # config :org, key: :value 14 | # 15 | # and access this configuration in your application as: 16 | # 17 | # Application.get_env(:org, :key) 18 | # 19 | # You can also configure a 3rd-party app: 20 | # 21 | # config :logger, level: :info 22 | # 23 | 24 | # It is also possible to import configuration files, relative to this 25 | # directory. For example, you can emulate configuration per environment 26 | # by uncommenting the line below and defining dev.exs, test.exs and such. 27 | # Configuration from the imported file will override the ones defined 28 | # here (which is why it is important to import them last). 29 | # 30 | # import_config "#{Mix.env}.exs" 31 | -------------------------------------------------------------------------------- /lib/org/code_block.ex: -------------------------------------------------------------------------------- 1 | defmodule Org.CodeBlock do 2 | defstruct lang: "", details: "", lines: [] 3 | 4 | @type t :: %Org.CodeBlock{ 5 | lang: String.t, 6 | details: String.t, 7 | lines: list(String.t), 8 | } 9 | 10 | @moduledoc ~S""" 11 | Represents a block of code. 12 | 13 | Example: 14 | iex> doc = Org.Parser.parse("#+BEGIN_SRC emacs-lisp -n 20\n(message \"Hello World\")\n#+END_SRC") 15 | iex> doc.contents 16 | [%Org.CodeBlock{lang: "emacs-lisp", details: "-n 20", lines: ["(message \"Hello World\")"]}] 17 | """ 18 | 19 | @doc "Construct a new code block, with given language details & lines" 20 | @spec new(String.t, String.t, list(String.t)) :: t 21 | def new(lang, details, lines \\ []) do 22 | %Org.CodeBlock{lang: lang, details: details, lines: lines} 23 | end 24 | 25 | @doc "Prepend a line of code. Used by the parser." 26 | @spec prepend_line(t, String.t) :: t 27 | def prepend_line(code_block, line) do 28 | %Org.CodeBlock{code_block | lines: [line | code_block.lines]} 29 | end 30 | end 31 | 32 | defimpl Org.Content, for: Org.CodeBlock do 33 | def reverse_recursive(code_block) do 34 | %Org.CodeBlock{code_block | lines: Enum.reverse(code_block.lines)} 35 | end 36 | end 37 | 38 | -------------------------------------------------------------------------------- /test/org/parser_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Org.ParserTest do 2 | use ExUnit.Case 3 | doctest Org.Parser 4 | 5 | require OrgTestHelper 6 | 7 | describe "parse document" do 8 | setup do 9 | doc = Org.Parser.parse(OrgTest.example_document) 10 | {:ok, %{doc: doc}} 11 | end 12 | 13 | OrgTestHelper.test_section_text_contents([ 14 | {["Also"], ["1"]}, 15 | {["Also", "another"], ["2"]}, 16 | {["Also", "another", "thing"], ["3"]}, 17 | {["Also", "another", "thing", "is nesting"], ["4"]}, 18 | {["Also", "another", "thing", "is nesting", "stuff"], ["5"]}, 19 | {["Also", "at"], ["6"]}, 20 | {["Also", "at", "different"], ["7"]}, 21 | {["Also", "at", "different", "levels"], ["8"]}, 22 | {["Also", "at", "and"], ["9"]}, 23 | {["Also", "at", "next"], ["10"]}, 24 | {["Also", "at", "to"], ["11"]}, 25 | {["Also", "at", "one"], ["12"]}, 26 | ]) 27 | 28 | test "section with paragraph and code", %{doc: doc} do 29 | assert Org.Section.contents(Org.section(doc, ["Also", "at", "another"])) == [ 30 | %Org.Paragraph{lines: ["13"]}, 31 | %Org.CodeBlock{lang: "sql", details: "", lines: ["SELECT * FROM products;"]}, 32 | ] 33 | end 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /test/org/lexer_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Org.LexerTest do 2 | use ExUnit.Case 3 | doctest Org.Lexer 4 | 5 | require OrgTestHelper 6 | 7 | describe "lex document" do 8 | setup do 9 | tokens = Org.Lexer.lex(OrgTest.example_document) 10 | {:ok, %{tokens: tokens}} 11 | end 12 | 13 | OrgTestHelper.test_tokens [ 14 | {:comment, "+TITLE: Hello World"}, 15 | {:empty_line}, 16 | {:section_title, 1, "Hello"}, 17 | {:section_title, 2, "World"}, 18 | {:table_row, ["X", "Y"]}, 19 | {:table_row, ["---+---"]}, 20 | {:table_row, ["0", "4"]}, 21 | {:table_row, ["1", "7"]}, 22 | {:table_row, ["2", "5"]}, 23 | {:table_row, ["3", "6"]}, 24 | {:section_title, 2, "Universe"}, 25 | {:text, "Something something..."}, 26 | {:section_title, 1, "Also"}, 27 | {:text, "1"}, 28 | {:section_title, 2, "another"}, 29 | {:text, "2"}, 30 | {:section_title, 3, "thing"}, 31 | {:text, "3"}, 32 | {:section_title, 4, "is nesting"}, 33 | {:text, "4"}, 34 | {:section_title, 5, "stuff"}, 35 | {:text, "5"}, 36 | {:section_title, 2, "at"}, 37 | {:text, "6"}, 38 | {:section_title, 3, "different"}, 39 | {:text, "7"}, 40 | {:section_title, 4, "levels"}, 41 | {:text, "8"}, 42 | {:section_title, 3, "and"}, 43 | {:text, "9"}, 44 | {:section_title, 3, "next"}, 45 | {:text, "10"}, 46 | {:section_title, 3, "to"}, 47 | {:text, "11"}, 48 | {:section_title, 3, "one"}, 49 | {:text, "12"}, 50 | {:section_title, 3, "another"}, 51 | {:text, "13"}, 52 | {:begin_src, "sql", ""}, 53 | {:raw_line, "SELECT * FROM products;"}, 54 | {:end_src}, 55 | {:empty_line} 56 | ] 57 | end 58 | end 59 | -------------------------------------------------------------------------------- /lib/org/parser.ex: -------------------------------------------------------------------------------- 1 | defmodule Org.Parser do 2 | defstruct doc: %Org.Document{}, mode: nil 3 | 4 | @type t :: %Org.Parser{ 5 | doc: Org.Document.t, 6 | mode: :paragraph | :table | :code_block | nil, 7 | } 8 | 9 | @moduledoc ~S""" 10 | Parses a text or list of tokens into an `Org.Document`. 11 | 12 | By calling `parse/1`, the lexer is invoked first. 13 | To parse a file that has already been lexed, pass the tokens to `parse_tokens/2` directly. 14 | """ 15 | 16 | @spec parse(String.t) :: Org.Document.t 17 | def parse(text) do 18 | text 19 | |> Org.Lexer.lex 20 | |> parse_tokens 21 | end 22 | 23 | @spec parse_tokens(Org.Parser.t, list(Org.Lexer.token)) :: Org.Document.t 24 | def parse_tokens(parser \\ %Org.Parser{}, tokens) 25 | 26 | def parse_tokens(parser, []) do 27 | parser 28 | |> Map.get(:doc) 29 | |> Org.Document.reverse_recursive 30 | end 31 | 32 | def parse_tokens(parser, [token | rest]) do 33 | token 34 | |> parse_token(parser) 35 | |> parse_tokens(rest) 36 | end 37 | 38 | defp parse_token({:comment, comment}, parser) do 39 | %Org.Parser{doc: Org.Document.add_comment(parser.doc, comment)} 40 | end 41 | 42 | defp parse_token({:section_title, level, title}, parser) do 43 | %Org.Parser{doc: Org.Document.add_subsection(parser.doc, level, title)} 44 | end 45 | 46 | defp parse_token({:empty_line}, parser) do 47 | %Org.Parser{parser | mode: nil} 48 | end 49 | 50 | defp parse_token({:text, line}, parser) do 51 | doc = if parser.mode == :paragraph do 52 | Org.Document.update_content(parser.doc, fn paragraph -> 53 | Org.Paragraph.prepend_line(paragraph, line) 54 | end) 55 | else 56 | Org.Document.prepend_content(parser.doc, Org.Paragraph.new([line])) 57 | end 58 | 59 | %Org.Parser{parser | doc: doc, mode: :paragraph} 60 | end 61 | 62 | defp parse_token({:table_row, cells}, parser) do 63 | doc = if parser.mode == :table do 64 | Org.Document.update_content(parser.doc, fn table -> 65 | Org.Table.prepend_row(table, cells) 66 | end) 67 | else 68 | Org.Document.prepend_content(parser.doc, Org.Table.new([cells])) 69 | end 70 | 71 | %Org.Parser{parser | doc: doc, mode: :table} 72 | end 73 | 74 | defp parse_token({:begin_src, lang, details}, parser) do 75 | doc = Org.Document.prepend_content(parser.doc, Org.CodeBlock.new(lang, details)) 76 | 77 | %Org.Parser{parser | doc: doc, mode: :code_block} 78 | end 79 | 80 | defp parse_token({:raw_line, line}, %Org.Parser{mode: :code_block} = parser) do 81 | doc = Org.Document.update_content(parser.doc, fn code_block -> 82 | Org.CodeBlock.prepend_line(code_block, line) 83 | end) 84 | 85 | %Org.Parser{parser | doc: doc} 86 | end 87 | 88 | defp parse_token({:end_src}, %Org.Parser{mode: :code_block} = parser) do 89 | %Org.Parser{parser | mode: nil} 90 | end 91 | end 92 | -------------------------------------------------------------------------------- /lib/org/section.ex: -------------------------------------------------------------------------------- 1 | defmodule Org.Section do 2 | defstruct title: "", children: [], contents: [] 3 | 4 | @moduledoc ~S""" 5 | Represents a section of a document with a title and possible contents & subsections. 6 | 7 | Example: 8 | iex> source = "* Hello\nWorld\n** What's up?\nNothing much.\n** How's it going?\nAll fine, whow are you?\n" 9 | iex> doc = Org.Parser.parse(source) 10 | iex> section = Org.section(doc, ["Hello"]) 11 | iex> section.contents 12 | [%Org.Paragraph{lines: ["World"]}] 13 | iex> length(section.children) 14 | 2 15 | iex> for child <- section.children, do: child.title 16 | ["What's up?", "How's it going?"] 17 | """ 18 | 19 | @type t :: %Org.Section{ 20 | title: String.t, 21 | children: list(Org.Section.t), 22 | contents: list(Org.Content.t), 23 | } 24 | 25 | def add_nested(parent, 1, child) do 26 | %Org.Section{parent | children: [child | parent.children]} 27 | end 28 | 29 | def add_nested(parent, level, child) do 30 | {first, rest} = case parent.children do 31 | [first | rest] -> {first, rest} 32 | [] -> {%Org.Section{}, []} 33 | end 34 | %Org.Section{parent | children: [add_nested(first || %Org.Section{}, level - 1, child) | rest]} 35 | end 36 | 37 | def reverse_recursive(section) do 38 | %Org.Section{ 39 | section | 40 | children: Enum.reverse(Enum.map(section.children, &reverse_recursive/1)), 41 | contents: Enum.reverse(Enum.map(section.contents, &Org.Content.reverse_recursive/1)), 42 | } 43 | end 44 | 45 | def find_by_path(_, []) do 46 | raise "BUG: can't find section with empty path!" 47 | end 48 | 49 | def find_by_path([], path) do 50 | raise "Section not found with remaining path: #{inspect path}" 51 | end 52 | 53 | def find_by_path([%Org.Section{title: title} = matching_section | _], [title]) do 54 | matching_section 55 | end 56 | 57 | def find_by_path([%Org.Section{title: title} = matching_section | _], [title | rest_path]) do 58 | find_by_path(matching_section.children, rest_path) 59 | end 60 | 61 | def find_by_path([_ | rest], path) do 62 | find_by_path(rest, path) 63 | end 64 | 65 | def contents(%Org.Section{contents: contents}) do 66 | contents 67 | end 68 | 69 | @doc "Adds content to the last prepended section" 70 | def prepend_content(%Org.Section{children: []} = section, content) do 71 | %Org.Section{section | contents: [content | section.contents]} 72 | end 73 | 74 | def prepend_content(%Org.Section{children: [current_child | children]} = section, content) do 75 | %Org.Section{section | children: [prepend_content(current_child, content) | children]} 76 | end 77 | 78 | def update_content(%Org.Section{children: [], contents: [current_content | rest]} = section, updater) do 79 | %Org.Section{section | contents: [updater.(current_content) | rest]} 80 | end 81 | 82 | def update_content(%Org.Section{children: [current_section | rest]} = section, updater) do 83 | %Org.Section{section | children: [update_content(current_section, updater) | rest]} 84 | end 85 | end 86 | -------------------------------------------------------------------------------- /lib/org/lexer.ex: -------------------------------------------------------------------------------- 1 | defmodule Org.Lexer do 2 | defstruct tokens: [], mode: :normal 3 | 4 | @type token :: ( 5 | {:comment, String.t} | 6 | {:section_title, integer, String.t} | 7 | {:table_row, list(String.t)} | 8 | {:empty_line} | 9 | {:text, String.t} 10 | ) 11 | 12 | @type t :: %Org.Lexer{ 13 | tokens: list(token), 14 | mode: :normal | :raw 15 | } 16 | 17 | @moduledoc ~S""" 18 | Splits an org-document into tokens. 19 | 20 | For many simple tasks, using the lexer is enough, and a full-fledged `Org.Document` is not needed. 21 | 22 | Usage example: 23 | iex> source = "#+TITLE: Greetings\n\n* Hello\n** World\n** Universe\n* Goodbye\n" 24 | iex> Org.Lexer.lex(source) 25 | [{:comment, "+TITLE: Greetings"}, 26 | {:empty_line}, 27 | {:section_title, 1, "Hello"}, 28 | {:section_title, 2, "World"}, 29 | {:section_title, 2, "Universe"}, 30 | {:section_title, 1, "Goodbye"}, 31 | {:empty_line}] 32 | """ 33 | 34 | @spec lex(String.t) :: list(token) 35 | def lex(text) do 36 | text 37 | |> String.split("\n") 38 | |> lex_lines 39 | |> Map.get(:tokens) 40 | |> Enum.reverse 41 | end 42 | 43 | defp lex_lines(lexer \\ %Org.Lexer{}, lines) 44 | 45 | defp lex_lines(lexer, []) do 46 | lexer 47 | end 48 | 49 | defp lex_lines(lexer, [line | rest]) do 50 | line 51 | |> lex_line(lexer) 52 | |> lex_lines(rest) 53 | end 54 | 55 | @begin_src_re ~r/^#\+BEGIN_SRC(?:\s+([^\s]*)\s?(.*)|)$/ 56 | @end_src_re ~r/^#\+END_SRC$/ 57 | @comment_re ~r/^#(.+)$/ 58 | @section_title_re ~r/^(\*+) (.+)$/ 59 | @empty_line_re ~r/^\s*$/ 60 | @table_row_re ~r/^\s*(?:\|[^|]*)+\|\s*$/ 61 | 62 | defp lex_line(line, %Org.Lexer{mode: :normal} = lexer) do 63 | cond do 64 | match = Regex.run(@begin_src_re, line) -> 65 | [_, lang, details] = match 66 | append_token(lexer, {:begin_src, lang, details}) |> set_mode(:raw) 67 | match = Regex.run(@comment_re, line) -> 68 | [_, text] = match 69 | append_token(lexer, {:comment, text}) 70 | match = Regex.run(@section_title_re, line) -> 71 | [_, nesting, title] = match 72 | append_token(lexer, {:section_title, String.length(nesting), title}) 73 | Regex.run(@empty_line_re, line) -> 74 | append_token(lexer, {:empty_line}) 75 | Regex.run(@table_row_re, line) -> 76 | cells = ~r/\|(?[^|]+)/ 77 | |> Regex.scan(line, capture: :all_names) 78 | |> List.flatten 79 | |> Enum.map(&String.trim/1) 80 | append_token(lexer, {:table_row, cells}) 81 | true -> 82 | append_token(lexer, {:text, line}) 83 | end 84 | end 85 | 86 | defp lex_line(line, %Org.Lexer{mode: :raw} = lexer) do 87 | if Regex.run(@end_src_re, line) do 88 | append_token(lexer, {:end_src}) |> set_mode(:normal) 89 | else 90 | append_token(lexer, {:raw_line, line}) 91 | end 92 | end 93 | 94 | defp append_token(%Org.Lexer{} = lexer, token) do 95 | %Org.Lexer{lexer | tokens: [token | lexer.tokens]} 96 | end 97 | 98 | defp set_mode(%Org.Lexer{} = lexer, mode) do 99 | %Org.Lexer{lexer | mode: mode} 100 | end 101 | end 102 | -------------------------------------------------------------------------------- /lib/org/table.ex: -------------------------------------------------------------------------------- 1 | 2 | defmodule Org.Table.Row do 3 | defstruct cells: [] 4 | @type t :: %Org.Table.Row{cells: list(String.t)} 5 | end 6 | 7 | defmodule Org.Table.Separator do 8 | defstruct [] 9 | @type t :: %Org.Table.Separator{} 10 | end 11 | 12 | defmodule Org.Table do 13 | defstruct rows: [] 14 | 15 | @type row :: Org.Table.Row.t | Org.Table.Separator.t 16 | @type t :: %Org.Table{rows: list(row)} 17 | 18 | @moduledoc ~S""" 19 | Represents a table. 20 | 21 | Example: 22 | iex> source = "| *Foo* | *Bar* |\n|-------+-------|\n| 123 | 456 |" 23 | iex> doc = Org.Parser.parse(source) 24 | iex> [table] = Org.tables(doc) 25 | iex> Enum.at(table.rows, 0) 26 | %Org.Table.Row{cells: ["*Foo*", "*Bar*"]} 27 | iex> Enum.at(table.rows, 1) 28 | %Org.Table.Separator{} 29 | iex> Enum.at(table.rows, 2) 30 | %Org.Table.Row{cells: ["123", "456"]} 31 | """ 32 | 33 | @doc """ 34 | Constructs a new table, with given initial rows. 35 | 36 | The rows can either be Org.Table.Row / Org.Table.Separator structs or 37 | will be interpreted from a list of cell contents. 38 | 39 | Creating a table from plain cell contents: 40 | iex> table = Org.Table.new([["foo", "bar"]]) 41 | iex> table.rows 42 | [%Org.Table.Row{cells: ["foo", "bar"]}] 43 | 44 | Creating a table from row structures: 45 | iex> table = Org.Table.new([%Org.Table.Row{cells: ["foo", "bar"]}]) 46 | iex> table.rows 47 | [%Org.Table.Row{cells: ["foo", "bar"]}] 48 | 49 | Creating a table from a mixture of structures and cell contents: 50 | iex> table = Org.Table.new([["foo"], %Org.Table.Separator{}, ["bar"]]) 51 | iex> table.rows 52 | [%Org.Table.Row{cells: ["foo"]}, %Org.Table.Separator{}, %Org.Table.Row{cells: ["bar"]}] 53 | """ 54 | @spec new(list(row | list(String.t))) :: t 55 | def new(rows) do 56 | %Org.Table{rows: Enum.map(rows, &cast_row/1)} 57 | end 58 | 59 | @doc """ 60 | Prepends a row to the table. The row will be cast the same way as when passed to `new/1`. 61 | 62 | This function is used by the parser, which builds up documents in reverse and then finally 63 | calls Org.Content.reverse_recursive/1 to yield the original order. 64 | """ 65 | @spec prepend_row(t, row) :: t 66 | def prepend_row(table, row) do 67 | %Org.Table{table | rows: [cast_row(row) | table.rows]} 68 | end 69 | 70 | @doc """ 71 | Returns a table with the given number of leading rows omitted. 72 | 73 | Example: 74 | iex> table = Org.Table.new([["X", "Y"], ["7", "4"], ["3", "8"], ["15", "24"]]) 75 | iex> Org.Table.skip_rows(table, 1) 76 | %Org.Table{rows: [%Org.Table.Row{cells: ["7", "4"]}, %Org.Table.Row{cells: ["3", "8"]}, %Org.Table.Row{cells: ["15", "24"]}]} 77 | """ 78 | @spec skip_rows(t, integer) :: t 79 | def skip_rows(table, 0) do 80 | table 81 | end 82 | 83 | def skip_rows(table, n) do 84 | skip_rows(%Org.Table{table | rows: tl(table.rows)}, n - 1) 85 | end 86 | 87 | @doc """ 88 | Returns a list of rows with cells named according to given keys. 89 | 90 | Example: 91 | iex> table = Org.Table.new([["Width", "20"], ["Height", "40"]]) 92 | iex> Org.Table.extract_rows(table, [:parameter_name, :value]) 93 | [%{parameter_name: "Width", value: "20"}, %{parameter_name: "Height", value: "40"}] 94 | """ 95 | @spec extract_rows(t, list(any)) :: list(map) 96 | def extract_rows(table, keys) do 97 | for %Org.Table.Row{cells: cells} <- table.rows do 98 | keys 99 | |> Enum.zip(cells) 100 | |> Enum.into(%{}) 101 | end 102 | end 103 | 104 | defp cast_row(%{__struct__: type} = row) when type in [Org.Table.Row, Org.Table.Separator] do 105 | row 106 | end 107 | 108 | defp cast_row(cells) do 109 | if String.match?(hd(cells), ~r/^\-+/) do 110 | %Org.Table.Separator{} 111 | else 112 | %Org.Table.Row{cells: cells} 113 | end 114 | end 115 | end 116 | 117 | defimpl Org.Content, for: Org.Table do 118 | def reverse_recursive(table) do 119 | %Org.Table{table | rows: Enum.reverse(table.rows)} 120 | end 121 | end 122 | -------------------------------------------------------------------------------- /lib/org.ex: -------------------------------------------------------------------------------- 1 | defmodule Org do 2 | @moduledoc """ 3 | This package implements an org-mode lexer and parser. 4 | 5 | org-mode is the markup language used by the powerful [org mode package for emacs](http://orgmode.org/). 6 | 7 | This implementation supports only a small subset of the syntax at this point, but can already be useful for extracting information from well-formed documents. 8 | 9 | Features supported are: 10 | - Comments 11 | - (nested) Sections 12 | - Paragraphs 13 | - Tables 14 | - Code blocks 15 | """ 16 | 17 | @type load_mode :: :document | :tokens 18 | 19 | @doc "Loads a document from a file at given path" 20 | @spec load_file(String.t, load_mode) :: Org.Document.t 21 | def load_file(path, load_mode \\ :document) do 22 | {:ok, data} = File.read(path) 23 | load_string(data, load_mode) 24 | end 25 | 26 | @doc "Loads a document from the given source string" 27 | @spec load_string(String.t, load_mode) :: Org.Document.t 28 | def load_string(data, load_mode \\ :document) 29 | 30 | def load_string(data, :document) do 31 | Org.Parser.parse(data) 32 | end 33 | 34 | def load_string(data, :tokens) do 35 | Org.Lexer.lex(data) 36 | end 37 | 38 | @doc ~S""" 39 | Extracts a section at the given path of titles 40 | 41 | Example: 42 | iex> doc = Org.load_string(~S{ 43 | ...>* First 44 | ...>** Second 45 | ...>*** Third 46 | ...>* Fourth 47 | iex>}) 48 | iex> Org.section(doc, ["First"]).title 49 | "First" 50 | iex> Org.section(doc, ["First", "Second", "Third"]).title 51 | "Third" 52 | iex> Org.section(doc, ["Fourth"]).title 53 | "Fourth" 54 | """ 55 | @spec section(Org.Document.t, list(String.t)) :: Org.Section.t 56 | def section(doc, path) do 57 | Org.Section.find_by_path(doc.sections, path) 58 | end 59 | 60 | @doc ~S""" 61 | Extracts all tables from the given section or document 62 | 63 | Example: 64 | iex> doc = Org.load_string(~S{ 65 | ...>First paragraph 66 | ...>| x | y | 67 | ...>| 1 | 7 | 68 | ...>Second paragraph 69 | ...>}) 70 | iex> Org.tables(doc) 71 | [%Org.Table{rows: [%Org.Table.Row{cells: ["x", "y"]}, %Org.Table.Row{cells: ["1", "7"]}]}] 72 | """ 73 | @spec tables(Org.Section.t | Org.Document.t) :: list(Org.Table.t) 74 | def tables(section_or_document) do 75 | for %Org.Table{} = table <- Org.contents(section_or_document), do: table 76 | end 77 | 78 | @doc ~S""" 79 | Extracts all code blocks from the given section or document 80 | 81 | Example: 82 | iex> doc = Org.load_string(~S{ 83 | ...>First example: 84 | ...> 85 | ...>#+BEGIN_SRC emacs-lisp -n 10 86 | ...>(message "Hello World!") 87 | ...>(message "...") 88 | ...>#+END_SRC 89 | ...> 90 | ...>Second example: 91 | ...> 92 | ...>#+BEGIN_SRC org-mode 93 | ...>* Nested document 94 | ...>This is a nested document. 95 | ...> 96 | ...>| With | a | 97 | ...>| nested | table. | 98 | ...> 99 | ...>It will not be parsed. 100 | ...>#+END_SRC 101 | ...> 102 | ...>}) 103 | iex> Org.code_blocks(doc) 104 | [%Org.CodeBlock{lang: "emacs-lisp", details: "-n 10", lines: ["(message \"Hello World!\")", "(message \"...\")"]}, 105 | %Org.CodeBlock{lang: "org-mode", details: "", lines: ["* Nested document", "This is a nested document.", "", "| With | a |", "| nested | table. |", "", "It will not be parsed."]}] 106 | """ 107 | def code_blocks(section_or_document) do 108 | for %Org.CodeBlock{} = code_block <- Org.contents(section_or_document), do: code_block 109 | end 110 | 111 | @doc ~S""" 112 | Extracts all paragraphs from the given section or document 113 | 114 | Example: 115 | iex> doc = Org.load_string(~S{ 116 | ...>First paragraph 117 | ...>| x | y | 118 | ...>| 1 | 7 | 119 | ...>Second paragraph 120 | ...>}) 121 | iex> Org.paragraphs(doc) 122 | [%Org.Paragraph{lines: ["First paragraph"]}, %Org.Paragraph{lines: ["Second paragraph"]}] 123 | """ 124 | @spec paragraphs(Org.Section.t | Org.Document.t) :: list(Org.Paragraph.t) 125 | def paragraphs(section_or_document) do 126 | for %Org.Paragraph{} = paragraph <- Org.contents(section_or_document), do: paragraph 127 | end 128 | 129 | @doc "Extracts all contents from given section or document" 130 | @spec contents(Org.Document.t | Org.Section.t) :: list(Org.Content.t) 131 | def contents(section_or_document) 132 | def contents(%Org.Document{} = doc) do 133 | Org.Document.contents(doc) 134 | end 135 | 136 | def contents(%Org.Section{} = section) do 137 | Org.Section.contents(section) 138 | end 139 | end 140 | -------------------------------------------------------------------------------- /lib/org/document.ex: -------------------------------------------------------------------------------- 1 | defmodule Org.Document do 2 | defstruct comments: [], sections: [], contents: [] 3 | 4 | @type t :: %Org.Document{ 5 | comments: list(String.t), 6 | sections: list(Org.Section.t), 7 | contents: list(Org.Content.t), 8 | } 9 | 10 | @moduledoc ~S""" 11 | Represents an interpreted document. 12 | 13 | Documents are organized as a tree of sections, each of which has a title and optional contents. 14 | The document can also have contents at the top level. 15 | """ 16 | 17 | @doc "Retrieve current contents of document" 18 | def contents(%Org.Document{contents: contents}) do 19 | contents 20 | end 21 | 22 | @doc "Prepend a comment to the list of comments. Used by the parser" 23 | def add_comment(doc, comment) do 24 | %Org.Document{doc | comments: [comment | doc.comments]} 25 | end 26 | 27 | @doc "Prepend a subsection at the given level." 28 | def add_subsection(doc, level, title) 29 | 30 | def add_subsection(doc, 1, title) do 31 | %Org.Document{doc | sections: [%Org.Section{title: title} | doc.sections]} 32 | end 33 | 34 | def add_subsection(doc, level, title) do 35 | {current, rest} = case doc.sections do 36 | [current | rest] -> {current, rest} 37 | [] -> {%Org.Section{}, []} 38 | end 39 | %Org.Document{doc | sections: [Org.Section.add_nested(current, level - 1, %Org.Section{title: title}) | rest]} 40 | end 41 | 42 | @doc """ 43 | Reverses the document's entire content recursively. 44 | 45 | Uses `Org.Section.reverse_recursive/1` and `Org.Content.reverse_recursive/1` to reverse sections and contents. 46 | 47 | Example (comments): 48 | iex> doc = %Org.Document{} 49 | iex> doc = Org.Document.add_comment(doc, "first") 50 | iex> doc = Org.Document.add_comment(doc, "second") 51 | iex> doc = Org.Document.add_comment(doc, "third") 52 | iex> doc.comments 53 | ["third", "second", "first"] 54 | iex> doc = Org.Document.reverse_recursive(doc) 55 | iex> doc.comments 56 | ["first", "second", "third"] 57 | 58 | Example (sections): 59 | iex> doc = %Org.Document{} 60 | iex> doc = Org.Document.add_subsection(doc, 1, "First") 61 | iex> doc = Org.Document.add_subsection(doc, 1, "Second") 62 | iex> doc = Org.Document.add_subsection(doc, 1, "Third") 63 | iex> for %Org.Section{title: title} <- doc.sections, do: title 64 | ["Third", "Second", "First"] 65 | iex> doc = Org.Document.reverse_recursive(doc) 66 | iex> for %Org.Section{title: title} <- doc.sections, do: title 67 | ["First", "Second", "Third"] 68 | 69 | Example (contents): 70 | iex> doc = %Org.Document{} 71 | iex> doc = Org.Document.prepend_content(doc, %Org.Paragraph{lines: ["first paragraph, first line"]}) 72 | iex> doc = Org.Document.update_content(doc, fn p -> Org.Paragraph.prepend_line(p, "first paragraph, second line") end) 73 | iex> doc = Org.Document.prepend_content(doc, %Org.Paragraph{lines: ["second paragraph, first line"]}) 74 | iex> doc = Org.Document.update_content(doc, fn p -> Org.Paragraph.prepend_line(p, "second paragraph, second line") end) 75 | iex> Org.Document.contents(doc) 76 | [%Org.Paragraph{lines: ["second paragraph, second line", "second paragraph, first line"]}, 77 | %Org.Paragraph{lines: ["first paragraph, second line", "first paragraph, first line"]}] 78 | iex> doc = Org.Document.reverse_recursive(doc) 79 | iex> Org.Document.contents(doc) 80 | [%Org.Paragraph{lines: ["first paragraph, first line", "first paragraph, second line"]}, 81 | %Org.Paragraph{lines: ["second paragraph, first line", "second paragraph, second line"]}] 82 | """ 83 | def reverse_recursive(doc) do 84 | %Org.Document{ 85 | doc | 86 | comments: Enum.reverse(doc.comments), 87 | sections: Enum.reverse(Enum.map(doc.sections, &Org.Section.reverse_recursive/1)), 88 | contents: Enum.reverse(Enum.map(doc.contents, &Org.Content.reverse_recursive/1)), 89 | } 90 | end 91 | 92 | @doc ~S""" 93 | Prepend content to the currently deepest section, or toplevel if no sections exist. 94 | 95 | See documentation of `reverse_recursive/1` for a usage example. 96 | """ 97 | def prepend_content(%Org.Document{sections: []} = doc, content) do 98 | %Org.Document{doc | contents: [content | doc.contents]} 99 | end 100 | 101 | def prepend_content(%Org.Document{sections: [current_section | rest]} = doc, content) do 102 | %Org.Document{doc | sections: [Org.Section.prepend_content(current_section, content) | rest]} 103 | end 104 | 105 | @doc ~S""" 106 | Update the last prepended content. Yields the content to the given updater. 107 | 108 | See documentation of `reverse_recursive/1` for a usage example. 109 | """ 110 | def update_content(%Org.Document{sections: [], contents: [current_content | rest]} = doc, updater) do 111 | %Org.Document{doc | contents: [updater.(current_content) | rest]} 112 | end 113 | 114 | def update_content(%Org.Document{sections: [current_section | rest]} = doc, updater) do 115 | %Org.Document{doc | sections: [Org.Section.update_content(current_section, updater) | rest]} 116 | end 117 | end 118 | --------------------------------------------------------------------------------