├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── config └── config.exs ├── lib └── social_parser.ex ├── mix.exs ├── mix.lock └── test ├── social_parser_test.exs └── test_helper.exs /.gitignore: -------------------------------------------------------------------------------- 1 | # The directory Mix will write compiled artifacts to. 2 | /_build 3 | 4 | # If you run "mix test --cover", coverage assets end up here. 5 | /cover 6 | 7 | # The directory Mix downloads your dependencies sources to. 8 | /deps 9 | 10 | # Where 3rd-party dependencies like ExDoc output generated docs. 11 | /doc 12 | 13 | # If the VM crashes, it generates a dump, let's ignore it too. 14 | erl_crash.dump 15 | 16 | # Also ignore archive artifacts (built via "mix archive.build"). 17 | *.ez 18 | 19 | # Elixir language server files 20 | .elixir_ls -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: elixir 2 | elixir: 3 | - 1.3.0 4 | env: MIX_ENV=test 5 | otp_release: 6 | - 19.0 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Stuart Welham 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SocialParser 2 | 3 | [![Join the chat at https://gitter.im/swelham/social_parser](https://badges.gitter.im/swelham/social_parser.svg)](https://gitter.im/swelham/social_parser?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) 4 | [![Build Status](https://travis-ci.org/swelham/social_parser.svg?branch=master)](https://travis-ci.org/swelham/social_parser) [![Hex Version](https://img.shields.io/hexpm/v/social_parser.svg)](https://hex.pm/packages/social_parser) 5 | 6 | A small library for parsing out common social elements such as hashtags, mentions and urls. 7 | 8 | ## Usage 9 | 10 | Add `social_parser` to your list of dependencies in `mix.exs`: 11 | 12 | ```elixir 13 | def deps do 14 | [{:social_parser, "~> 1.1.0"}] 15 | end 16 | ``` 17 | 18 | You can then parse out the social components like so: 19 | 20 | ```elixir 21 | defmodule SocialParserTest do 22 | def do_social_stuff() do 23 | message = "hi @you checkout http://example.com/ that +someone hosted #example" 24 | 25 | # parse out all components into an array 26 | components = SocialParser.parse(message) 27 | 28 | IO.inspect(components) 29 | # [ 30 | # {:text, "hi ", {0, 3}}, 31 | # {:mention, "@you", {4, 8}}, 32 | # {:text, " checkout ", {9, 19}}, 33 | # {:link, "http://example.com/", {20, 39}}, 34 | # {:text, " that ", {40, 46}}, 35 | # {:mention, "+someone", {47, 55}}, 36 | # {:text, " hosted ", {56, 64}}, 37 | # {:hashtag, "#example", {65, 73}} 38 | # ] 39 | 40 | # extract targeted components 41 | some_components = SocialParser.extract(message, [:hashtags, :mentions]) 42 | 43 | IO.inspect(some_components) 44 | #%{ 45 | # hashtags: ["#example"], 46 | # mentions: ["@you", "+someone"] 47 | #} 48 | end 49 | end 50 | ``` 51 | 52 | # TODO 53 | 54 | * Merge the private `parse` and `parse_components` functions as there is some duplication of code 55 | -------------------------------------------------------------------------------- /config/config.exs: -------------------------------------------------------------------------------- 1 | # This file is responsible for configuring your application 2 | # and its dependencies with the aid of the Mix.Config module. 3 | use Mix.Config 4 | 5 | # This configuration is loaded before any dependency and is restricted 6 | # to this project. If another project depends on this project, this 7 | # file won't be loaded nor affect the parent project. For this reason, 8 | # if you want to provide default values for your application for 9 | # 3rd-party users, it should be done in your "mix.exs" file. 10 | 11 | # You can configure for your application as: 12 | # 13 | # config :social_parser, key: :value 14 | # 15 | # And access this configuration in your application as: 16 | # 17 | # Application.get_env(:social_parser, :key) 18 | # 19 | # Or configure a 3rd-party app: 20 | # 21 | # config :logger, level: :info 22 | # 23 | 24 | # It is also possible to import configuration files, relative to this 25 | # directory. For example, you can emulate configuration per environment 26 | # by uncommenting the line below and defining dev.exs, test.exs and such. 27 | # Configuration from the imported file will override the ones defined 28 | # here (which is why it is important to import them last). 29 | # 30 | # import_config "#{Mix.env}.exs" 31 | -------------------------------------------------------------------------------- /lib/social_parser.ex: -------------------------------------------------------------------------------- 1 | defmodule SocialParser do 2 | @moduledoc """ 3 | SocialParser is used to parse out common social message commponents 4 | such as hashtags, mentions and urls. 5 | """ 6 | 7 | @whitespace_chars [?\s, ?\t, ?\n] 8 | @breaking_chars [?#, ?@, ?+ | @whitespace_chars] 9 | 10 | @doc """ 11 | Returns a list of three element tuples (`{:type, "content", {start_pos, end_pos}}`) containing 12 | all components found for the given `message` 13 | 14 | Prefixes used 15 | 16 | * `#` for hashtags 17 | * `@` or `+` for mentions 18 | * `http://` or `https://` for links 19 | 20 | Usage 21 | 22 | iex> SocialParser.parse("hi @you checkout http://example.com/ that +someone hosted #example") 23 | [ 24 | {:text, "hi ", {0, 3}}, 25 | {:mention, "@you", {4, 8}}, 26 | {:text, " checkout ", {9, 19}}, 27 | {:link, "http://example.com/", {20, 39}}, 28 | {:text, " that ", {40, 46}}, 29 | {:mention, "+someone", {47, 55}}, 30 | {:text, " hosted ", {56, 64}}, 31 | {:hashtag, "#example", {65, 73}} 32 | ] 33 | """ 34 | @spec parse(binary) :: list 35 | def parse(message) do 36 | message 37 | |> parse([]) 38 | |> Enum.reverse 39 | end 40 | 41 | @doc """ 42 | Returns a map of all components for a given `message` 43 | 44 | Usage 45 | 46 | iex> SocialParser.extract("hi @you checkout http://example.com/ that +someone hosted #example") 47 | %{ 48 | hashtags: ["#example"], 49 | mentions: ["@you", "+someone"], 50 | links: ["http://example.com/"], 51 | text: ["hi ", " checkout ", " that ", " hosted "] 52 | } 53 | """ 54 | @spec extract(binary) :: map 55 | def extract(message) do 56 | message 57 | |> parse 58 | |> Enum.group_by(&map_key(elem(&1, 0)), &elem(&1, 1)) 59 | end 60 | 61 | @doc """ 62 | Returns a map of all components for a given `message` filtered by a list of 63 | atoms specified in the `components` 64 | 65 | The available atoms are, `:hashtags`, `:mentions`, `:links` and `:text` 66 | 67 | Usage 68 | 69 | iex> SocialParser.extract("hi @you checkout http://example.com/", [:mentions, :links]) 70 | %{ 71 | mentions: ["@you"], 72 | links: ["http://example.com/"], 73 | } 74 | """ 75 | @spec extract(binary, list) :: map 76 | def extract(message, components) do 77 | message 78 | |> extract 79 | |> Map.take(components) 80 | end 81 | 82 | defp map_key(:hashtag), do: :hashtags 83 | defp map_key(:mention), do: :mentions 84 | defp map_key(:link), do: :links 85 | defp map_key(key), do: key 86 | 87 | defp parse(<<>>, acc), 88 | do: acc 89 | 90 | defp parse("http://" <> <>, acc), 91 | do: parse_component(rest, acc, "//:ptth", :link) 92 | 93 | defp parse("https://" <> <>, acc), 94 | do: parse_component(rest, acc, "//:sptth", :link) 95 | 96 | defp parse(<>, acc), 97 | do: parse_component(rest, acc, "#", :hashtag) 98 | 99 | defp parse(<>, acc), 100 | do: parse_component(rest, acc, "@", :mention) 101 | 102 | defp parse(<>, acc), 103 | do: parse_component(rest, acc, "+", :mention) 104 | 105 | defp parse(<>, acc), 106 | do: parse_component(rest, acc, <>, :text) 107 | 108 | defp parse_component("http://" <> <>, acc, value, type) do 109 | acc = add_to_acc(acc, type, value) 110 | parse_component(rest, acc, "//:ptth", :link) 111 | end 112 | defp parse_component("https://" <> <>, acc, value, type) do 113 | acc = add_to_acc(acc, type, value) 114 | parse_component(rest, acc, "//:sptth", :link) 115 | end 116 | defp parse_component(<>, acc, value, :link) 117 | when c in @whitespace_chars do 118 | acc = add_to_acc(acc, :link, value) 119 | parse(<> <> rest, acc) 120 | end 121 | defp parse_component(<>, acc, value, :text) 122 | when c in @whitespace_chars do 123 | parse_component(rest, acc, <> <> value, :text) 124 | end 125 | defp parse_component(<>, acc, value, type) 126 | when type != :link and c in @breaking_chars do 127 | acc = add_to_acc(acc, type, value) 128 | parse(<> <> rest, acc) 129 | end 130 | defp parse_component(<>, acc, value, type) do 131 | parse_component(rest, acc, <> <> value, type) 132 | end 133 | defp parse_component(<<>>, acc, value, type) do 134 | add_to_acc(acc, type, value) 135 | end 136 | 137 | defp add_to_acc(acc, key, value) do 138 | count = get_next_count(acc) 139 | 140 | value = String.reverse(value) 141 | value_len = String.length(value) 142 | value_pos = {count, count + value_len} 143 | 144 | [{key, value, value_pos}] ++ acc 145 | end 146 | 147 | defp get_next_count([]), do: 0 148 | defp get_next_count([{_, _, {_, count}} | _]), do: count + 1 149 | end 150 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule SocialParser.Mixfile do 2 | use Mix.Project 3 | 4 | def project do 5 | [app: :social_parser, 6 | version: "2.0.0", 7 | elixir: "~> 1.3", 8 | description: description(), 9 | package: package(), 10 | build_embedded: Mix.env == :prod, 11 | start_permanent: Mix.env == :prod, 12 | deps: deps()] 13 | end 14 | 15 | # Configuration for the OTP application 16 | # 17 | # Type "mix help compile.app" for more information 18 | def application do 19 | [applications: [:logger]] 20 | end 21 | 22 | # Dependencies can be Hex packages: 23 | # 24 | # {:mydep, "~> 0.3.0"} 25 | # 26 | # Or git/path repositories: 27 | # 28 | # {:mydep, git: "https://github.com/elixir-lang/mydep.git", tag: "0.1.0"} 29 | # 30 | # Type "mix help deps" for more examples and options 31 | defp deps do 32 | [{:ex_doc, ">= 0.14.0", only: :dev}] 33 | end 34 | 35 | defp description do 36 | "A small library for parsing out common social elements such as hashtags, mentions and urls." 37 | end 38 | 39 | defp package do 40 | [name: :social_parser, 41 | maintainers: ["swelham"], 42 | licenses: ["MIT"], 43 | links: %{"GitHub" => "https://github.com/swelham/social_parser"}] 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{"earmark": {:hex, :earmark, "1.0.3", "89bdbaf2aca8bbb5c97d8b3b55c5dd0cff517ecc78d417e87f1d0982e514557b", [:mix], []}, 2 | "ex_doc": {:hex, :ex_doc, "0.14.3", "e61cec6cf9731d7d23d254266ab06ac1decbb7651c3d1568402ec535d387b6f7", [:mix], [{:earmark, "~> 1.0", [hex: :earmark, optional: false]}]}} 3 | -------------------------------------------------------------------------------- /test/social_parser_test.exs: -------------------------------------------------------------------------------- 1 | defmodule SocialParserTest do 2 | use ExUnit.Case 3 | doctest SocialParser 4 | 5 | @test_message ~S""" 6 | @you http://example.com/test?a=1&b=abc+123#abc 7 | this is a #test #message with #a few #test tags from +me 8 | """ 9 | 10 | test "parse should return an empty for blank input" do 11 | components = SocialParser.parse("") 12 | 13 | assert components == [] 14 | end 15 | 16 | test "parse/1 should return an array of all components" do 17 | components = SocialParser.parse(@test_message) 18 | 19 | assert components == [ 20 | {:mention, "@you", {0, 4}}, 21 | {:text, " ", {5, 6}}, 22 | {:link, "http://example.com/test?a=1&b=abc+123#abc", {7, 48}}, 23 | {:text, "\nthis is a ", {49, 60}}, 24 | {:hashtag, "#test", {61, 66}}, 25 | {:text, " ", {67, 68}}, 26 | {:hashtag, "#message", {69, 77}}, 27 | {:text, " with ", {78, 84}}, 28 | {:hashtag, "#a", {85, 87}}, 29 | {:text, " few ", {88, 93}}, 30 | {:hashtag, "#test", {94, 99}}, 31 | {:text, " tags from ", {100, 111}}, 32 | {:mention, "+me", {112, 115}}, 33 | {:text, "\n", {116, 117}} 34 | ] 35 | end 36 | 37 | test "parse/1 should split joined hashtags" do 38 | components = SocialParser.parse("#one#two") 39 | 40 | assert components == [ 41 | {:hashtag, "#one", {0, 4}}, 42 | {:hashtag, "#two", {5, 9}} 43 | ] 44 | end 45 | 46 | test "parse/1 should split joined mentions" do 47 | components = SocialParser.parse("@one@two+three+four") 48 | 49 | assert components == [ 50 | {:mention, "@one", {0, 4}}, 51 | {:mention, "@two", {5, 9}}, 52 | {:mention, "+three", {10, 16}}, 53 | {:mention, "+four", {17, 22}} 54 | ] 55 | end 56 | 57 | test "extract/2 should return a map containing hashtags" do 58 | map = SocialParser.extract(@test_message, [:hashtags]) 59 | 60 | assert map == %{ 61 | hashtags: ["#test", "#message", "#a", "#test"] 62 | } 63 | end 64 | 65 | test "extract/2 should return a map containing mentions" do 66 | map = SocialParser.extract(@test_message, [:mentions]) 67 | 68 | assert map == %{ 69 | mentions: ["@you", "+me"] 70 | } 71 | end 72 | 73 | test "extract/2 should return a map containing links" do 74 | map = SocialParser.extract(@test_message, [:links]) 75 | 76 | assert map == %{ 77 | links: ["http://example.com/test?a=1&b=abc+123#abc"] 78 | } 79 | end 80 | 81 | test "extract/2 should return a map containing text components" do 82 | map = SocialParser.extract(@test_message, [:text]) 83 | 84 | assert map == %{ 85 | text: [" ", "\nthis is a ", " ", " with ", " few ", " tags from ", "\n"] 86 | } 87 | end 88 | 89 | test "extract/1 should return a map containing all social components" do 90 | map = SocialParser.extract(@test_message) 91 | 92 | assert map == %{ 93 | hashtags: ["#test", "#message", "#a", "#test"], 94 | mentions: ["@you", "+me"], 95 | links: ["http://example.com/test?a=1&b=abc+123#abc"], 96 | text: [" ", "\nthis is a ", " ", " with ", " few ", " tags from ", "\n"] 97 | } 98 | end 99 | end 100 | -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | ExUnit.start() 2 | --------------------------------------------------------------------------------