├── test ├── test_helper.exs └── friendly_test.exs ├── .gitignore ├── mix.lock ├── LICENSE ├── config └── config.exs ├── mix.exs ├── lib └── friendly.ex ├── README.md └── friendly.iml /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | ExUnit.start() 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /_build 2 | /cover 3 | /deps 4 | erl_crash.dump 5 | *.ez 6 | /.idea 7 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{"floki": {:hex, :floki, "0.18.1", "6f903e3074357fe9756079d0f607e430589912f698b5c5e5970af08daba1537c", [], [{:mochiweb, "~> 2.15", [hex: :mochiweb, repo: "hexpm", optional: false]}], "hexpm"}, 2 | "mochiweb": {:hex, :mochiweb, "2.15.0", "e1daac474df07651e5d17cc1e642c4069c7850dc4508d3db7263a0651330aacc", [], [], "hexpm"}} 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2016 Piotr 'Qertoip' Włodarek 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | of the Software, and to permit persons to whom the Software is furnished to do 8 | so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /config/config.exs: -------------------------------------------------------------------------------- 1 | # This file is responsible for configuring your application 2 | # and its dependencies with the aid of the Mix.Config module. 3 | use Mix.Config 4 | 5 | # This configuration is loaded before any dependency and is restricted 6 | # to this project. If another project depends on this project, this 7 | # file won't be loaded nor affect the parent project. For this reason, 8 | # if you want to provide default values for your application for 9 | # 3rd-party users, it should be done in your "mix.exs" file. 10 | 11 | # You can configure for your application as: 12 | # 13 | # config :friendly, key: :value 14 | # 15 | # And access this configuration in your application as: 16 | # 17 | # Application.get_env(:friendly, :key) 18 | # 19 | # Or configure a 3rd-party app: 20 | # 21 | # config :logger, level: :info 22 | # 23 | 24 | # It is also possible to import configuration files, relative to this 25 | # directory. For example, you can emulate configuration per environment 26 | # by uncommenting the line below and defining dev.exs, test.exs and such. 27 | # Configuration from the imported file will override the ones defined 28 | # here (which is why it is important to import them last). 29 | # 30 | # import_config "#{Mix.env}.exs" 31 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule Friendly.Mixfile do 2 | use Mix.Project 3 | 4 | def project do 5 | [app: :friendly, 6 | version: "1.1.0", 7 | description: "HTML and XML parser with the most friendly API in Elixir land. CSS selector in, list of elements out.", 8 | elixir: "~> 1.5", 9 | package: package(), 10 | build_embedded: Mix.env == :prod, 11 | start_permanent: Mix.env == :prod, 12 | deps: deps()] 13 | end 14 | 15 | def package do 16 | [ 17 | maintainers: ["Piotr Włodarek"], 18 | licenses: ["MIT"], 19 | links: %{"GitHub" => "https://github.com/qertoip/friendly/", 20 | "Docs" => "https://github.com/qertoip/friendly/"} 21 | ] 22 | end 23 | 24 | # Configuration for the OTP application 25 | # 26 | # Type "mix help compile.app" for more information 27 | def application do 28 | [applications: [:logger, :floki]] 29 | end 30 | 31 | # Dependencies can be Hex packages: 32 | # 33 | # {:mydep, "~> 0.3.0"} 34 | # 35 | # Or git/path repositories: 36 | # 37 | # {:mydep, git: "https://github.com/elixir-lang/mydep.git", tag: "0.1.0"} 38 | # 39 | # Type "mix help deps" for more examples and options 40 | defp deps do 41 | [ 42 | {:floki, "~> 0.18"} 43 | ] 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /lib/friendly.ex: -------------------------------------------------------------------------------- 1 | defmodule Friendly do 2 | 3 | def find(xml_string, css_selector) do 4 | Floki.find(xml_string, css_selector) 5 | |> to_elements 6 | end 7 | 8 | defp to_elements(flok_elms) do 9 | Enum.map(flok_elms, &to_element/1) 10 | end 11 | 12 | defp to_element(flok_elm) do 13 | children = fetch_children(flok_elm) 14 | { elements, texts } = Enum.partition(children, &(is_map(&1))) 15 | text = Enum.join(texts, " ") 16 | %{ 17 | name: fetch_name(flok_elm), 18 | attributes: fetch_attributes(flok_elm), 19 | elements: elements, 20 | texts: texts, 21 | text: text 22 | } 23 | end 24 | 25 | defp fetch_name(elm_tuple) do 26 | elem(elm_tuple, 0) 27 | end 28 | 29 | defp fetch_attributes(elm_tuple) do 30 | Enum.into(Enum.map(elem(elm_tuple, 1), fn key_value_tuple -> 31 | { attr_name, attr_value } = key_value_tuple 32 | #attr_name = String.to_atom(attr_name) 33 | { attr_name, attr_value } 34 | end), %{}) 35 | end 36 | 37 | defp fetch_children(elm_tuple) do 38 | children = elem(elm_tuple, 2) 39 | Enum.map(children, fn child -> 40 | if is_tuple(child) do 41 | to_element(child) 42 | else 43 | child # text 44 | end 45 | end) 46 | end 47 | 48 | end 49 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Friendly 2 | 3 | Friendly is HTML and XML parser for Elixir aiming at friendly API. 4 | 5 | Friendly was born out of frustration with Elixir APIs for HTML/XML parsing as of January 2016. 6 | 7 | The package is a thin API layer on top of otherwise excellent [Floki](https://github.com/philss/floki). 8 | 9 | Query the XML/HTML with a CSS selector, get the list of elements out. 10 | 11 | ## Usage 12 | 13 | ```elixir 14 | Friendly.find(xml_string, css_selector) 15 | ``` 16 | 17 | Returns a List of elements: 18 | ```elixir 19 | [element1, element2, element3, ...] 20 | ``` 21 | 22 | Each element is a Map: 23 | ```elixir 24 | %{ 25 | name: "ElementName", 26 | attributes: %{ "attr1" => "value1", "attr2" => "value2" }, # Map of attributes 27 | elements: [element1, element2, element3], # List of children elements 28 | text: "Concatenated direct text content", 29 | texts: ["Text1", "Text2", "Text3"] # List of children texts 30 | } 31 | ``` 32 | The children elements are again Maps. 33 | 34 | This makes it very natural to traverse. 35 | 36 | Caveats: 37 | 38 | * Attributes' names are BitStrings, not Atoms. This is because Atoms in Elixir and not GC-ed. We cannot allow them to be injected into VM by the untrusted XML. 39 | 40 | * Attributes' names are __forced lowercase__, so iAmAttributeName becomes iamattributename. Unfortunately this is how underlying Floki works. Hopefully this will get fixed eventually. 41 | 42 | ## Example 43 | 44 | ```elixir 45 | xml = """ 46 | 47 | 48 | 49 | Gambardella, Matthew 50 | XML Developer's Guide 51 | Computer 52 | 44.95 53 | 2000-10-01 54 | An in-depth look at creating applications 55 | with XML. 56 | 57 | 58 | Ralls, Kim 59 | Midnight Rain 60 | Fantasy 61 | 5.95 62 | 2000-12-16 63 | A former architect battles corporate zombies, 64 | an evil sorceress, and her own childhood to become queen 65 | of the world. 66 | 67 | 68 | """ 69 | 70 | books = Friendly.find(xml, "book") 71 | 72 | Enum.each(books, fn book -> 73 | id = book.attributes["id"] 74 | IO.puts("Book [#{id}]") 75 | 76 | title = Enum.find(book.elements, fn elm -> elm.name == "title" end) 77 | IO.puts("\ttitle: #{title.text}") 78 | 79 | author = Enum.find(book.elements, fn elm -> elm.name == "author" end) 80 | IO.puts("\tauthor: #{author.text}") 81 | end) 82 | ``` 83 | 84 | ## Installation 85 | 86 | The package is [available in Hex](https://hex.pm/packages/friendly). To install: 87 | 88 | 1. Add friendly to your list of dependencies in `mix.exs`: 89 | 90 | def deps do 91 | [{:friendly, "~> 1.0.0"}] 92 | end 93 | 94 | 2. Ensure friendly is started before your application: 95 | 96 | def application do 97 | [applications: [:friendly]] 98 | end 99 | -------------------------------------------------------------------------------- /test/friendly_test.exs: -------------------------------------------------------------------------------- 1 | defmodule FriendlyTest do 2 | use ExUnit.Case 3 | 4 | test "Non-matching selector" do 5 | xml = String.trim(""" 6 | 7 | 8 | """) 9 | elements = Friendly.find(xml, "non-matching selector") 10 | assert(is_list(elements)) 11 | assert(Enum.empty?(elements)) 12 | end 13 | 14 | test "Single empty element" do 15 | xml = String.trim(""" 16 | 17 | 18 | """) 19 | elements = Friendly.find(xml, "root") 20 | assert(Enum.count(elements) == 1) 21 | root = hd(elements) 22 | assert(root.name == "root") 23 | assert(root.attributes == %{}) 24 | assert(root.text == "") 25 | end 26 | 27 | test "Single empty element with attributes" do 28 | xml = String.trim(""" 29 | 30 | 31 | """) 32 | elements = Friendly.find(xml, "root") 33 | assert(Enum.count(elements) == 1) 34 | root = hd(elements) 35 | assert(root.name == "root") 36 | assert(root.attributes == %{ 37 | "id" => "2", 38 | "iamcamelcased" => "iAmCamelCased", 39 | "some_attr" => "some attr", 40 | "data-attr" => "ĄĆĘŁŃÓŚŻŹ", 41 | "i-am-strange" => "strange" 42 | }) 43 | assert(root.text == "") 44 | end 45 | 46 | test "Single element with text" do 47 | xml = String.trim(""" 48 | 49 | \t I am some text in first line.\nI should be in the second line.\nAnd me in the third. \t 50 | """) 51 | elements = Friendly.find(xml, "root") 52 | assert(Enum.count(elements) == 1) 53 | root = hd(elements) 54 | assert(root.name == "root") 55 | assert(root.attributes == %{}) 56 | assert(root.text == " \t I am some text in first line.\nI should be in the second line.\nAnd me in the third. \t ") 57 | end 58 | 59 | test "Nested single element with atributes and text" do 60 | xml = String.trim(""" 61 | 62 | 63 | XXXXX1 64 | 65 | yyyyy2 66 | 67 | wwwww3 68 | 69 | """) 70 | elements = Friendly.find(xml, "target") 71 | assert(Enum.count(elements) == 1) 72 | target = hd(elements) 73 | assert(target.name == "target") 74 | assert(target.attributes["name"] == "Sporitelna_cz") 75 | assert(String.trim(target.text) == "yyyyy2") 76 | end 77 | 78 | test "Multiple nested elements" do 79 | xml = String.trim(""" 80 | 81 | 82 | XXXXX1 83 | 84 | yyyyy2.1 85 | 86 | yyyyy2.2 87 | UU 88 | yyyyy2.3 89 | 90 | wwwww3 91 | 92 | """) 93 | elements = Friendly.find(xml, "command") 94 | assert(Enum.count(elements) == 2) 95 | [c1, c2] = elements 96 | assert(c1.name == "command") 97 | assert(c2.name == "command") 98 | assert(c1.attributes["id"] == "c1") 99 | assert(c2.attributes["id"] == "c2") 100 | assert(String.trim(c1.text) == "") 101 | assert(String.trim(c2.text) == "UU") 102 | end 103 | 104 | test "Multiple text nodes" do 105 | xml = String.trim(""" 106 | 107 | 108 | XXXXX1 109 | 110 | yyyyy2.1 111 | 112 | yyyyy2.2 113 | 114 | yyyyy2.3 115 | 116 | wwwww3 117 | 118 | """) 119 | elements = Friendly.find(xml, "target") 120 | assert(Enum.count(elements) == 1) 121 | target = hd(elements) 122 | assert(String.trim(target.text) =~ "yyyyy2.1") 123 | assert(String.trim(target.text) =~ "yyyyy2.2") 124 | assert(String.trim(target.text) =~ "yyyyy2.3") 125 | assert(Enum.count(target.texts) == 3) 126 | end 127 | 128 | @tag :skip 129 | test "Readme Example" do 130 | xml = """ 131 | 132 | 133 | 134 | Gambardella, Matthew 135 | XML Developer's Guide 136 | Computer 137 | 44.95 138 | 2000-10-01 139 | An in-depth look at creating applications 140 | with XML. 141 | 142 | 143 | Ralls, Kim 144 | Midnight Rain 145 | Fantasy 146 | 5.95 147 | 2000-12-16 148 | A former architect battles corporate zombies, 149 | an evil sorceress, and her own childhood to become queen 150 | of the world. 151 | 152 | 153 | """ 154 | 155 | books = Friendly.find(xml, "book") 156 | 157 | Enum.each(books, fn book -> 158 | id = book.attributes["id"] 159 | IO.puts("Book [#{id}]") 160 | 161 | title = Enum.find(book.elements, fn elm -> elm.name == "title" end) 162 | IO.puts("\ttitle: #{title.text}") 163 | 164 | author = Enum.find(book.elements, fn elm -> elm.name == "author" end) 165 | IO.puts("\tauthor: #{author.text}") 166 | end) 167 | end 168 | 169 | end 170 | -------------------------------------------------------------------------------- /friendly.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 31 | 202 | 215 | 216 | --------------------------------------------------------------------------------