├── test
├── test_helper.exs
└── friendly_test.exs
├── .gitignore
├── mix.lock
├── LICENSE
├── config
└── config.exs
├── mix.exs
├── lib
└── friendly.ex
├── README.md
└── friendly.iml
/test/test_helper.exs:
--------------------------------------------------------------------------------
1 | ExUnit.start()
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /_build
2 | /cover
3 | /deps
4 | erl_crash.dump
5 | *.ez
6 | /.idea
7 |
--------------------------------------------------------------------------------
/mix.lock:
--------------------------------------------------------------------------------
1 | %{"floki": {:hex, :floki, "0.18.1", "6f903e3074357fe9756079d0f607e430589912f698b5c5e5970af08daba1537c", [], [{:mochiweb, "~> 2.15", [hex: :mochiweb, repo: "hexpm", optional: false]}], "hexpm"},
2 | "mochiweb": {:hex, :mochiweb, "2.15.0", "e1daac474df07651e5d17cc1e642c4069c7850dc4508d3db7263a0651330aacc", [], [], "hexpm"}}
3 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (C) 2016 Piotr 'Qertoip' Włodarek
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
4 | this software and associated documentation files (the "Software"), to deal in
5 | the Software without restriction, including without limitation the rights to
6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7 | of the Software, and to permit persons to whom the Software is furnished to do
8 | so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 |
--------------------------------------------------------------------------------
/config/config.exs:
--------------------------------------------------------------------------------
1 | # This file is responsible for configuring your application
2 | # and its dependencies with the aid of the Mix.Config module.
3 | use Mix.Config
4 |
5 | # This configuration is loaded before any dependency and is restricted
6 | # to this project. If another project depends on this project, this
7 | # file won't be loaded nor affect the parent project. For this reason,
8 | # if you want to provide default values for your application for
9 | # 3rd-party users, it should be done in your "mix.exs" file.
10 |
11 | # You can configure for your application as:
12 | #
13 | # config :friendly, key: :value
14 | #
15 | # And access this configuration in your application as:
16 | #
17 | # Application.get_env(:friendly, :key)
18 | #
19 | # Or configure a 3rd-party app:
20 | #
21 | # config :logger, level: :info
22 | #
23 |
24 | # It is also possible to import configuration files, relative to this
25 | # directory. For example, you can emulate configuration per environment
26 | # by uncommenting the line below and defining dev.exs, test.exs and such.
27 | # Configuration from the imported file will override the ones defined
28 | # here (which is why it is important to import them last).
29 | #
30 | # import_config "#{Mix.env}.exs"
31 |
--------------------------------------------------------------------------------
/mix.exs:
--------------------------------------------------------------------------------
1 | defmodule Friendly.Mixfile do
2 | use Mix.Project
3 |
4 | def project do
5 | [app: :friendly,
6 | version: "1.1.0",
7 | description: "HTML and XML parser with the most friendly API in Elixir land. CSS selector in, list of elements out.",
8 | elixir: "~> 1.5",
9 | package: package(),
10 | build_embedded: Mix.env == :prod,
11 | start_permanent: Mix.env == :prod,
12 | deps: deps()]
13 | end
14 |
15 | def package do
16 | [
17 | maintainers: ["Piotr Włodarek"],
18 | licenses: ["MIT"],
19 | links: %{"GitHub" => "https://github.com/qertoip/friendly/",
20 | "Docs" => "https://github.com/qertoip/friendly/"}
21 | ]
22 | end
23 |
24 | # Configuration for the OTP application
25 | #
26 | # Type "mix help compile.app" for more information
27 | def application do
28 | [applications: [:logger, :floki]]
29 | end
30 |
31 | # Dependencies can be Hex packages:
32 | #
33 | # {:mydep, "~> 0.3.0"}
34 | #
35 | # Or git/path repositories:
36 | #
37 | # {:mydep, git: "https://github.com/elixir-lang/mydep.git", tag: "0.1.0"}
38 | #
39 | # Type "mix help deps" for more examples and options
40 | defp deps do
41 | [
42 | {:floki, "~> 0.18"}
43 | ]
44 | end
45 | end
46 |
--------------------------------------------------------------------------------
/lib/friendly.ex:
--------------------------------------------------------------------------------
1 | defmodule Friendly do
2 |
3 | def find(xml_string, css_selector) do
4 | Floki.find(xml_string, css_selector)
5 | |> to_elements
6 | end
7 |
8 | defp to_elements(flok_elms) do
9 | Enum.map(flok_elms, &to_element/1)
10 | end
11 |
12 | defp to_element(flok_elm) do
13 | children = fetch_children(flok_elm)
14 | { elements, texts } = Enum.partition(children, &(is_map(&1)))
15 | text = Enum.join(texts, " ")
16 | %{
17 | name: fetch_name(flok_elm),
18 | attributes: fetch_attributes(flok_elm),
19 | elements: elements,
20 | texts: texts,
21 | text: text
22 | }
23 | end
24 |
25 | defp fetch_name(elm_tuple) do
26 | elem(elm_tuple, 0)
27 | end
28 |
29 | defp fetch_attributes(elm_tuple) do
30 | Enum.into(Enum.map(elem(elm_tuple, 1), fn key_value_tuple ->
31 | { attr_name, attr_value } = key_value_tuple
32 | #attr_name = String.to_atom(attr_name)
33 | { attr_name, attr_value }
34 | end), %{})
35 | end
36 |
37 | defp fetch_children(elm_tuple) do
38 | children = elem(elm_tuple, 2)
39 | Enum.map(children, fn child ->
40 | if is_tuple(child) do
41 | to_element(child)
42 | else
43 | child # text
44 | end
45 | end)
46 | end
47 |
48 | end
49 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Friendly
2 |
3 | Friendly is HTML and XML parser for Elixir aiming at friendly API.
4 |
5 | Friendly was born out of frustration with Elixir APIs for HTML/XML parsing as of January 2016.
6 |
7 | The package is a thin API layer on top of otherwise excellent [Floki](https://github.com/philss/floki).
8 |
9 | Query the XML/HTML with a CSS selector, get the list of elements out.
10 |
11 | ## Usage
12 |
13 | ```elixir
14 | Friendly.find(xml_string, css_selector)
15 | ```
16 |
17 | Returns a List of elements:
18 | ```elixir
19 | [element1, element2, element3, ...]
20 | ```
21 |
22 | Each element is a Map:
23 | ```elixir
24 | %{
25 | name: "ElementName",
26 | attributes: %{ "attr1" => "value1", "attr2" => "value2" }, # Map of attributes
27 | elements: [element1, element2, element3], # List of children elements
28 | text: "Concatenated direct text content",
29 | texts: ["Text1", "Text2", "Text3"] # List of children texts
30 | }
31 | ```
32 | The children elements are again Maps.
33 |
34 | This makes it very natural to traverse.
35 |
36 | Caveats:
37 |
38 | * Attributes' names are BitStrings, not Atoms. This is because Atoms in Elixir and not GC-ed. We cannot allow them to be injected into VM by the untrusted XML.
39 |
40 | * Attributes' names are __forced lowercase__, so iAmAttributeName becomes iamattributename. Unfortunately this is how underlying Floki works. Hopefully this will get fixed eventually.
41 |
42 | ## Example
43 |
44 | ```elixir
45 | xml = """
46 |
47 |
48 |
49 | Gambardella, Matthew
50 | XML Developer's Guide
51 | Computer
52 | 44.95
53 | 2000-10-01
54 | An in-depth look at creating applications
55 | with XML.
56 |
57 |
58 | Ralls, Kim
59 | Midnight Rain
60 | Fantasy
61 | 5.95
62 | 2000-12-16
63 | A former architect battles corporate zombies,
64 | an evil sorceress, and her own childhood to become queen
65 | of the world.
66 |
67 |
68 | """
69 |
70 | books = Friendly.find(xml, "book")
71 |
72 | Enum.each(books, fn book ->
73 | id = book.attributes["id"]
74 | IO.puts("Book [#{id}]")
75 |
76 | title = Enum.find(book.elements, fn elm -> elm.name == "title" end)
77 | IO.puts("\ttitle: #{title.text}")
78 |
79 | author = Enum.find(book.elements, fn elm -> elm.name == "author" end)
80 | IO.puts("\tauthor: #{author.text}")
81 | end)
82 | ```
83 |
84 | ## Installation
85 |
86 | The package is [available in Hex](https://hex.pm/packages/friendly). To install:
87 |
88 | 1. Add friendly to your list of dependencies in `mix.exs`:
89 |
90 | def deps do
91 | [{:friendly, "~> 1.0.0"}]
92 | end
93 |
94 | 2. Ensure friendly is started before your application:
95 |
96 | def application do
97 | [applications: [:friendly]]
98 | end
99 |
--------------------------------------------------------------------------------
/test/friendly_test.exs:
--------------------------------------------------------------------------------
1 | defmodule FriendlyTest do
2 | use ExUnit.Case
3 |
4 | test "Non-matching selector" do
5 | xml = String.trim("""
6 |
7 |
8 | """)
9 | elements = Friendly.find(xml, "non-matching selector")
10 | assert(is_list(elements))
11 | assert(Enum.empty?(elements))
12 | end
13 |
14 | test "Single empty element" do
15 | xml = String.trim("""
16 |
17 |
18 | """)
19 | elements = Friendly.find(xml, "root")
20 | assert(Enum.count(elements) == 1)
21 | root = hd(elements)
22 | assert(root.name == "root")
23 | assert(root.attributes == %{})
24 | assert(root.text == "")
25 | end
26 |
27 | test "Single empty element with attributes" do
28 | xml = String.trim("""
29 |
30 |
31 | """)
32 | elements = Friendly.find(xml, "root")
33 | assert(Enum.count(elements) == 1)
34 | root = hd(elements)
35 | assert(root.name == "root")
36 | assert(root.attributes == %{
37 | "id" => "2",
38 | "iamcamelcased" => "iAmCamelCased",
39 | "some_attr" => "some attr",
40 | "data-attr" => "ĄĆĘŁŃÓŚŻŹ",
41 | "i-am-strange" => "strange"
42 | })
43 | assert(root.text == "")
44 | end
45 |
46 | test "Single element with text" do
47 | xml = String.trim("""
48 |
49 | \t I am some text in first line.\nI should be in the second line.\nAnd me in the third. \t
50 | """)
51 | elements = Friendly.find(xml, "root")
52 | assert(Enum.count(elements) == 1)
53 | root = hd(elements)
54 | assert(root.name == "root")
55 | assert(root.attributes == %{})
56 | assert(root.text == " \t I am some text in first line.\nI should be in the second line.\nAnd me in the third. \t ")
57 | end
58 |
59 | test "Nested single element with atributes and text" do
60 | xml = String.trim("""
61 |
62 |
63 | XXXXX1
64 |
65 | yyyyy2
66 |
67 | wwwww3
68 |
69 | """)
70 | elements = Friendly.find(xml, "target")
71 | assert(Enum.count(elements) == 1)
72 | target = hd(elements)
73 | assert(target.name == "target")
74 | assert(target.attributes["name"] == "Sporitelna_cz")
75 | assert(String.trim(target.text) == "yyyyy2")
76 | end
77 |
78 | test "Multiple nested elements" do
79 | xml = String.trim("""
80 |
81 |
82 | XXXXX1
83 |
84 | yyyyy2.1
85 |
86 | yyyyy2.2
87 | UU
88 | yyyyy2.3
89 |
90 | wwwww3
91 |
92 | """)
93 | elements = Friendly.find(xml, "command")
94 | assert(Enum.count(elements) == 2)
95 | [c1, c2] = elements
96 | assert(c1.name == "command")
97 | assert(c2.name == "command")
98 | assert(c1.attributes["id"] == "c1")
99 | assert(c2.attributes["id"] == "c2")
100 | assert(String.trim(c1.text) == "")
101 | assert(String.trim(c2.text) == "UU")
102 | end
103 |
104 | test "Multiple text nodes" do
105 | xml = String.trim("""
106 |
107 |
108 | XXXXX1
109 |
110 | yyyyy2.1
111 |
112 | yyyyy2.2
113 |
114 | yyyyy2.3
115 |
116 | wwwww3
117 |
118 | """)
119 | elements = Friendly.find(xml, "target")
120 | assert(Enum.count(elements) == 1)
121 | target = hd(elements)
122 | assert(String.trim(target.text) =~ "yyyyy2.1")
123 | assert(String.trim(target.text) =~ "yyyyy2.2")
124 | assert(String.trim(target.text) =~ "yyyyy2.3")
125 | assert(Enum.count(target.texts) == 3)
126 | end
127 |
128 | @tag :skip
129 | test "Readme Example" do
130 | xml = """
131 |
132 |
133 |
134 | Gambardella, Matthew
135 | XML Developer's Guide
136 | Computer
137 | 44.95
138 | 2000-10-01
139 | An in-depth look at creating applications
140 | with XML.
141 |
142 |
143 | Ralls, Kim
144 | Midnight Rain
145 | Fantasy
146 | 5.95
147 | 2000-12-16
148 | A former architect battles corporate zombies,
149 | an evil sorceress, and her own childhood to become queen
150 | of the world.
151 |
152 |
153 | """
154 |
155 | books = Friendly.find(xml, "book")
156 |
157 | Enum.each(books, fn book ->
158 | id = book.attributes["id"]
159 | IO.puts("Book [#{id}]")
160 |
161 | title = Enum.find(book.elements, fn elm -> elm.name == "title" end)
162 | IO.puts("\ttitle: #{title.text}")
163 |
164 | author = Enum.find(book.elements, fn elm -> elm.name == "author" end)
165 | IO.puts("\tauthor: #{author.text}")
166 | end)
167 | end
168 |
169 | end
170 |
--------------------------------------------------------------------------------
/friendly.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
31 |
202 |
215 |
216 |
--------------------------------------------------------------------------------