├── test ├── fixtures │ ├── empty.yml │ ├── referers_social.yml │ └── referers_search.yml ├── test_helper.exs ├── downloads │ └── .gitignore ├── ref_inspector │ ├── downloader │ │ ├── adapter │ │ │ └── hackney_test.exs │ │ └── readme_test.exs │ ├── database │ │ ├── loader_test.exs │ │ ├── empty_test.exs │ │ ├── init_test.exs │ │ ├── initializer_test.exs │ │ └── reload_test.exs │ ├── config_test.exs │ └── parser_test.exs └── mix │ └── tasks │ └── ref_inspector │ ├── download_renaming_test.exs │ └── download_test.exs ├── verify ├── database │ └── .gitignore ├── mix.exs ├── config │ └── config.exs └── lib │ └── mix │ ├── ref_inspector │ └── verify │ │ ├── fixture.ex │ │ └── cleanup.ex │ └── tasks │ └── ref_inspector │ └── verify.ex ├── .gitignore ├── .dialyzer_ignore.exs ├── .formatter.exs ├── .github ├── .yamllint └── workflows │ ├── verification.yml │ └── ci.yml ├── .credo.exs ├── lib ├── ref_inspector │ ├── application.ex │ ├── downloader │ │ ├── adapter.ex │ │ ├── adapter │ │ │ └── hackney.ex │ │ └── readme.ex │ ├── database │ │ ├── state.ex │ │ ├── loader.ex │ │ ├── location.ex │ │ └── parser.ex │ ├── result.ex │ ├── downloader.ex │ ├── supervisor.ex │ ├── parser.ex │ ├── database.ex │ └── config.ex ├── mix │ └── tasks │ │ └── ref_inspector │ │ └── download.ex └── ref_inspector.ex ├── priv └── ref_inspector.readme.md ├── config └── config.exs ├── bench ├── database.exs ├── parse.exs └── data │ └── referers.yml ├── README.md ├── mix.exs ├── mix.lock ├── LICENSE └── CHANGELOG.md /test/fixtures/empty.yml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | ExUnit.start() 2 | -------------------------------------------------------------------------------- /test/downloads/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /verify/database/.gitignore: -------------------------------------------------------------------------------- 1 | *.json 2 | *.yml 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | deps/ 3 | doc/ 4 | plts/ 5 | -------------------------------------------------------------------------------- /.dialyzer_ignore.exs: -------------------------------------------------------------------------------- 1 | [ 2 | {"lib/ref_inspector/config.ex", :contract_supertype} 3 | ] 4 | -------------------------------------------------------------------------------- /test/fixtures/referers_social.yml: -------------------------------------------------------------------------------- 1 | social: 2 | Twitter: 3 | domains: 4 | - twitter.com 5 | -------------------------------------------------------------------------------- /.formatter.exs: -------------------------------------------------------------------------------- 1 | [ 2 | inputs: [ 3 | "{bench,config,lib,test,verify}/**/*.{ex,exs}", 4 | "{.credo,.dialyzer_ignore,.formatter,mix}.exs" 5 | ] 6 | ] 7 | -------------------------------------------------------------------------------- /.github/.yamllint: -------------------------------------------------------------------------------- 1 | extends: default 2 | 3 | rules: 4 | document-start: disable 5 | line-length: 6 | max: 120 7 | truthy: 8 | check-keys: false 9 | -------------------------------------------------------------------------------- /.credo.exs: -------------------------------------------------------------------------------- 1 | %{ 2 | configs: [ 3 | %{ 4 | name: "default", 5 | files: %{ 6 | included: ["bench/", "config/", "lib/", "test/", "verify/"] 7 | } 8 | } 9 | ] 10 | } 11 | -------------------------------------------------------------------------------- /lib/ref_inspector/application.ex: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Application do 2 | @moduledoc false 3 | 4 | use Application 5 | 6 | def start(_type, _args), do: RefInspector.Supervisor.start_link() 7 | end 8 | -------------------------------------------------------------------------------- /test/fixtures/referers_search.yml: -------------------------------------------------------------------------------- 1 | search: 2 | Google Images: 3 | parameters: 4 | - q 5 | domains: 6 | - google.fr/imgres 7 | 8 | Google: 9 | parameters: 10 | - q 11 | domains: 12 | - www.google.com 13 | -------------------------------------------------------------------------------- /priv/ref_inspector.readme.md: -------------------------------------------------------------------------------- 1 | # RefInspector Parser Database 2 | 3 | The file in this directory is taken from the 4 | [snowplow-referer-parser](https://github.com/snowplow-referer-parser/referer-parser) 5 | project. See there for detailed license information about the data contained. 6 | -------------------------------------------------------------------------------- /test/ref_inspector/downloader/adapter/hackney_test.exs: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Downloader.Adapter.HackneyTest do 2 | use ExUnit.Case, async: true 3 | 4 | alias RefInspector.Downloader.Adapter.Hackney 5 | 6 | test "errors returned from adapter" do 7 | assert Hackney.read_remote("invalid") == {:error, :nxdomain} 8 | end 9 | end 10 | -------------------------------------------------------------------------------- /verify/mix.exs: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Verification.MixProject do 2 | use Mix.Project 3 | 4 | def project do 5 | [ 6 | app: :ref_inspector_verification, 7 | version: "0.0.1", 8 | elixir: "~> 1.9", 9 | deps: [{:ref_inspector, path: "../"}], 10 | deps_path: "../deps", 11 | lockfile: "../mix.lock" 12 | ] 13 | end 14 | end 15 | -------------------------------------------------------------------------------- /lib/ref_inspector/downloader/adapter.ex: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Downloader.Adapter do 2 | @moduledoc """ 3 | Behaviour for modules used by the downloader. 4 | """ 5 | 6 | @doc """ 7 | Reads a database file from a remote location and returns its contents. 8 | """ 9 | @callback read_remote(location :: binary) :: {:ok, contents :: binary} | {:error, term} 10 | end 11 | -------------------------------------------------------------------------------- /verify/config/config.exs: -------------------------------------------------------------------------------- 1 | import Config 2 | 3 | config :ref_inspector, 4 | database_files: ["referers.yml"], 5 | database_path: Path.expand("../database", __DIR__), 6 | internal: ["snowplowanalytics.com"], 7 | remote_urls: [ 8 | {"referers.yml", 9 | "https://raw.githubusercontent.com/snowplow-referer-parser/referer-parser/master/resources/referers.yml"} 10 | ] 11 | -------------------------------------------------------------------------------- /config/config.exs: -------------------------------------------------------------------------------- 1 | import Config 2 | 3 | if Mix.env() == :bench do 4 | config :ref_inspector, 5 | database_path: Path.expand("../bench/data", __DIR__) 6 | end 7 | 8 | if Mix.env() == :test do 9 | config :ref_inspector, 10 | database_files: ["referers_search.yml", "referers_social.yml"], 11 | database_path: Path.expand("../test/fixtures", __DIR__), 12 | internal: ["www.example.com", "www.example.org"] 13 | end 14 | -------------------------------------------------------------------------------- /lib/ref_inspector/database/state.ex: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Database.State do 2 | @moduledoc false 3 | 4 | defstruct [ 5 | :database, 6 | :yaml_reader, 7 | startup_silent: false, 8 | startup_sync: true 9 | ] 10 | 11 | @type t :: %__MODULE__{ 12 | database: atom, 13 | startup_silent: boolean, 14 | startup_sync: boolean, 15 | yaml_reader: {module, atom, [term]} 16 | } 17 | end 18 | -------------------------------------------------------------------------------- /lib/ref_inspector/result.ex: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Result do 2 | @moduledoc """ 3 | Result struct. 4 | """ 5 | 6 | @type t :: %__MODULE__{ 7 | referer: String.t() | nil, 8 | medium: String.t() | :internal | :unknown, 9 | source: String.t() | :unknown, 10 | term: String.t() | :none 11 | } 12 | 13 | defstruct referer: "", 14 | medium: :unknown, 15 | source: :unknown, 16 | term: :none 17 | end 18 | -------------------------------------------------------------------------------- /lib/ref_inspector/downloader/adapter/hackney.ex: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Downloader.Adapter.Hackney do 2 | @moduledoc false 3 | 4 | alias RefInspector.Config 5 | 6 | @behaviour RefInspector.Downloader.Adapter 7 | 8 | @impl RefInspector.Downloader.Adapter 9 | def read_remote(location) do 10 | _ = Application.ensure_all_started(:hackney) 11 | 12 | http_opts = Config.get(:http_opts, []) 13 | 14 | case :hackney.get(location, [], [], http_opts) do 15 | {:ok, _, _, client} -> :hackney.body(client) 16 | {:error, _} = error -> error 17 | end 18 | end 19 | end 20 | -------------------------------------------------------------------------------- /bench/database.exs: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Benchmark.Database do 2 | alias RefInspector.Config 3 | alias RefInspector.Database.Loader 4 | alias RefInspector.Database.Parser 5 | 6 | def run do 7 | {:ok, entries} = load_database() 8 | 9 | Benchee.run(%{ 10 | "parse database" => fn -> Parser.parse(entries) end 11 | }) 12 | end 13 | 14 | defp load_database do 15 | db_dir = Config.database_path() 16 | [db_file] = Config.database_files() 17 | 18 | [db_dir, db_file] 19 | |> Path.join() 20 | |> Loader.load() 21 | end 22 | end 23 | 24 | RefInspector.Benchmark.Database.run() 25 | -------------------------------------------------------------------------------- /test/ref_inspector/database/loader_test.exs: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Database.LoaderTest do 2 | use ExUnit.Case, async: false 3 | 4 | alias RefInspector.Database.Loader 5 | 6 | defmodule NoopYAML do 7 | def call_mf(_file), do: [:ok_mf] 8 | def call_mfargs(_file, [:arg]), do: [:ok_mfargs] 9 | end 10 | 11 | test "yaml file reader: {mod, fun}" do 12 | assert {:ok, :ok_mf} = Loader.load(__ENV__.file, {NoopYAML, :call_mf, []}) 13 | end 14 | 15 | test "yaml file reader: {mod, fun, extra_args}" do 16 | assert {:ok, :ok_mfargs} = Loader.load(__ENV__.file, {NoopYAML, :call_mfargs, [[:arg]]}) 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /lib/ref_inspector/database/loader.ex: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Database.Loader do 2 | @moduledoc false 3 | 4 | @doc """ 5 | Returns the yaml contents of a database file. 6 | """ 7 | @spec load(Path.t(), {module, atom, [term]}) :: {:ok, list} | {:error, File.posix()} 8 | def load(file, {reader_mod, reader_fun, reader_extra_args}) do 9 | case File.stat(file) do 10 | {:ok, _} -> 11 | apply(reader_mod, reader_fun, [file | reader_extra_args]) 12 | |> maybe_hd() 13 | 14 | error -> 15 | error 16 | end 17 | end 18 | 19 | defp maybe_hd([]), do: {:ok, []} 20 | defp maybe_hd([data | _]), do: {:ok, data} 21 | end 22 | -------------------------------------------------------------------------------- /test/ref_inspector/database/empty_test.exs: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Database.EmptyTest do 2 | use ExUnit.Case, async: false 3 | 4 | alias RefInspector.Database 5 | alias RefInspector.Database.State 6 | 7 | setup do 8 | app_files = Application.get_env(:ref_inspector, :database_files) 9 | 10 | on_exit(fn -> 11 | Application.put_env(:ref_inspector, :database_files, app_files) 12 | end) 13 | end 14 | 15 | test "empty files are ignored" do 16 | Application.put_env(:ref_inspector, :database_files, ["empty.yml"]) 17 | 18 | state = %State{database: :ignored, startup_sync: false} 19 | 20 | assert {:ok, _} = Database.init(state) 21 | end 22 | end 23 | -------------------------------------------------------------------------------- /lib/ref_inspector/database/location.ex: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Database.Location do 2 | @moduledoc false 3 | 4 | alias RefInspector.Config 5 | 6 | @doc """ 7 | Returns the local path to store a configured database at. 8 | """ 9 | @spec local({binary, binary} | binary) :: binary 10 | def local({local, _remote}) do 11 | Path.join(Config.database_path(), local) 12 | end 13 | 14 | def local(remote) do 15 | Path.join(Config.database_path(), Path.basename(remote)) 16 | end 17 | 18 | @doc """ 19 | Returns the remote path to download a configured database from. 20 | """ 21 | @spec remote({binary, binary} | binary) :: binary 22 | def remote({_local, remote}), do: remote 23 | def remote(remote), do: remote 24 | end 25 | -------------------------------------------------------------------------------- /bench/parse.exs: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Benchmark.Parse do 2 | @uri_email "http://co106w.col106.mail.live.com/default.aspx?rru=inbox" 3 | @uri_search "http://www.google.com/search?q=gateway+oracle+cards+denise+linn&hl=en&client=safari" 4 | @uri_social "http://www.facebook.com/l.php?u=http%3A%2F%2Fwww.psychicbazaar.com&h=yAQHZtXxS&s=1" 5 | 6 | def run do 7 | Benchee.run( 8 | %{ 9 | "Parse: email" => fn -> RefInspector.parse(@uri_email) end, 10 | "Parse: search" => fn -> RefInspector.parse(@uri_search) end, 11 | "Parse: social" => fn -> RefInspector.parse(@uri_social) end 12 | }, 13 | formatters: [{Benchee.Formatters.Console, comparison: false}] 14 | ) 15 | end 16 | end 17 | 18 | RefInspector.Benchmark.Parse.run() 19 | -------------------------------------------------------------------------------- /test/ref_inspector/database/init_test.exs: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Database.InitTest do 2 | use ExUnit.Case, async: false 3 | 4 | import ExUnit.CaptureLog 5 | 6 | alias RefInspector.Database 7 | 8 | setup do 9 | app_files = Application.get_env(:ref_inspector, :database_files) 10 | 11 | on_exit(fn -> 12 | Application.put_env(:ref_inspector, :database_files, app_files) 13 | end) 14 | end 15 | 16 | test "log info when initial load failed" do 17 | file = "something_that_is_no_file" 18 | 19 | Application.put_env(:ref_inspector, :database_files, [file]) 20 | 21 | log = 22 | capture_log(fn -> 23 | {Database, database: :init_test, startup_sync: false} 24 | |> start_supervised!() 25 | |> :sys.get_state() 26 | end) 27 | 28 | assert log =~ ~r/Failed to load #{file}: :enoent/ 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /test/ref_inspector/config_test.exs: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.ConfigTest do 2 | use ExUnit.Case, async: false 3 | 4 | alias RefInspector.Config 5 | 6 | setup do 7 | app_path = Application.get_env(:ref_inspector, :database_path) 8 | 9 | on_exit(fn -> 10 | Application.put_env(:ref_inspector, :database_path, app_path) 11 | end) 12 | end 13 | 14 | test "application configuration" do 15 | path = "/configuration/by/application/configuration" 16 | urls = ["http://some/host/database.yml"] 17 | 18 | Application.put_env(:ref_inspector, :database_path, path) 19 | Application.put_env(:ref_inspector, :remote_urls, urls) 20 | 21 | assert path == Config.database_path() 22 | assert urls == Config.yaml_urls() 23 | end 24 | 25 | test "priv dir fallback for misconfiguration" do 26 | Application.put_env(:ref_inspector, :database_path, nil) 27 | 28 | refute nil == Config.database_path() 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /verify/lib/mix/ref_inspector/verify/fixture.ex: -------------------------------------------------------------------------------- 1 | defmodule Mix.RefInspector.Verify.Fixture do 2 | @moduledoc """ 3 | Utility module to bundle/download the verification fixture. 4 | """ 5 | 6 | alias RefInspector.Config 7 | alias RefInspector.Database.Location 8 | 9 | @local "referer-tests.json" 10 | @remote "https://raw.githubusercontent.com/snowplow-referer-parser/referer-parser/master/resources/referer-tests.json" 11 | 12 | def download do 13 | Mix.shell().info("Download path: #{Config.database_path()}") 14 | 15 | setup() 16 | download_fixture() 17 | 18 | Mix.shell().info("Download complete!") 19 | :ok 20 | end 21 | 22 | def download_fixture do 23 | Mix.shell().info(".. downloading: #{@local}") 24 | 25 | {:ok, content} = Config.downloader_adapter().read_remote(@remote) 26 | 27 | File.write!(local_file(), content) 28 | end 29 | 30 | def local_file, do: Location.local(@local) 31 | def setup, do: File.mkdir_p!(Config.database_path()) 32 | end 33 | -------------------------------------------------------------------------------- /verify/lib/mix/ref_inspector/verify/cleanup.ex: -------------------------------------------------------------------------------- 1 | defmodule Mix.RefInspector.Verify.Cleanup do 2 | @moduledoc """ 3 | Cleans up testcases. 4 | """ 5 | 6 | @doc """ 7 | Cleans up a test case. 8 | """ 9 | @spec cleanup(testcase :: map) :: map 10 | def cleanup(testcase) do 11 | testcase 12 | |> cleanup_medium() 13 | |> cleanup_source() 14 | |> cleanup_term() 15 | end 16 | 17 | defp cleanup_medium(%{medium: :null} = testcase) do 18 | %{testcase | medium: :unknown} 19 | end 20 | 21 | defp cleanup_medium(%{medium: "internal"} = testcase) do 22 | %{testcase | medium: :internal} 23 | end 24 | 25 | defp cleanup_medium(testcase), do: testcase 26 | 27 | defp cleanup_source(%{source: :null} = testcase) do 28 | %{testcase | source: :unknown} 29 | end 30 | 31 | defp cleanup_source(testcase), do: testcase 32 | 33 | defp cleanup_term(%{term: :null} = testcase) do 34 | %{testcase | term: :none} 35 | end 36 | 37 | defp cleanup_term(testcase), do: testcase 38 | end 39 | -------------------------------------------------------------------------------- /lib/ref_inspector/downloader/readme.ex: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Downloader.README do 2 | @moduledoc false 3 | 4 | alias RefInspector.Config 5 | alias RefInspector.Database.Location 6 | 7 | @readme "ref_inspector.readme.md" 8 | 9 | @doc """ 10 | Writes the informational README file if remote database is the default. 11 | """ 12 | @spec write() :: :ok 13 | def write do 14 | default? = Config.default_remote_database?() 15 | readme? = !Config.get(:skip_download_readme) 16 | 17 | if default? && readme? do 18 | do_write() 19 | else 20 | :ok 21 | end 22 | end 23 | 24 | defp do_write do 25 | readme_local = Location.local(@readme) 26 | readme_priv = Application.app_dir(:ref_inspector, ["priv", @readme]) 27 | 28 | dirname_local = Path.dirname(readme_local) 29 | 30 | if !File.dir?(dirname_local) do 31 | File.mkdir_p!(dirname_local) 32 | end 33 | 34 | _ = 35 | if readme_priv != readme_local do 36 | File.copy!(readme_priv, readme_local) 37 | end 38 | 39 | :ok 40 | end 41 | end 42 | -------------------------------------------------------------------------------- /lib/ref_inspector/downloader.ex: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Downloader do 2 | @moduledoc """ 3 | Fetches copies of the configured database file(s). 4 | 5 | All files will be stored in the configured database path with the default 6 | setting being the result of `Application.app_dir(:ref_inspector, "priv")`. 7 | 8 | Please consult `RefInspector.Config` for details on database configuration. 9 | 10 | ## Mix Task 11 | 12 | Please see `Mix.Tasks.RefInspector.Download` if you are interested in 13 | using a mix task to obtain your database file(s). 14 | """ 15 | 16 | alias RefInspector.Config 17 | alias RefInspector.Database.Location 18 | 19 | @doc """ 20 | Performs the download of the configured database files. 21 | """ 22 | @spec download() :: :ok 23 | def download do 24 | File.mkdir_p!(Config.database_path()) 25 | 26 | Enum.each(Config.yaml_urls(), fn config -> 27 | local = Location.local(config) 28 | remote = Location.remote(config) 29 | 30 | {:ok, content} = Config.downloader_adapter().read_remote(remote) 31 | 32 | File.write(local, content) 33 | end) 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /lib/ref_inspector/supervisor.ex: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Supervisor do 2 | @moduledoc """ 3 | This supervisor module takes care of starting the required database storage 4 | processes. It is automatically started with the `:ref_inspector` application. 5 | 6 | If you do not want to automatically start the application itself you can 7 | adapt your configuration for a more manual supervision approach. 8 | 9 | Instead of adding `:ref_inspector` to your `:applications` list or using 10 | the automatic discovery you need to add it to your `:included_applications`: 11 | 12 | def application do 13 | [ 14 | included_applications: [ 15 | # ... 16 | :ref_inspector, 17 | # ... 18 | ] 19 | ] 20 | end 21 | 22 | That done you can add `RefInspector.Supervisor` to your hierarchy: 23 | 24 | children = [ 25 | # ... 26 | RefInspector.Supervisor, 27 | # .. 28 | ] 29 | """ 30 | 31 | use Supervisor 32 | 33 | @doc false 34 | def start_link(default \\ nil) do 35 | Supervisor.start_link(__MODULE__, default, name: __MODULE__) 36 | end 37 | 38 | @doc false 39 | def init(_state) do 40 | Supervisor.init( 41 | [RefInspector.Database], 42 | strategy: :one_for_one 43 | ) 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /.github/workflows/verification.yml: -------------------------------------------------------------------------------- 1 | name: Verification 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - master 8 | - test 9 | tags: 10 | - '*' 11 | schedule: 12 | - cron: '15 16 * * *' 13 | workflow_dispatch: 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | check: 20 | runs-on: ubuntu-24.04 21 | 22 | steps: 23 | - uses: actions/checkout@v4 24 | with: 25 | persist-credentials: false 26 | 27 | - name: setup 28 | id: setup 29 | uses: erlef/setup-beam@v1 30 | with: 31 | elixir-version: '1.19.3' 32 | otp-version: '28.1' 33 | 34 | - name: mix-cache 35 | uses: actions/cache@v4 36 | id: mix-cache 37 | with: 38 | path: deps 39 | # yamllint disable-line rule:line-length 40 | key: ${{ runner.os }}-${{ steps.setup.outputs.otp-version }}-${{ steps.setup.outputs.elixir-version }}-mix-${{ hashFiles(format('{0}{1}', github.workspace, '/mix.lock')) }} 41 | 42 | - name: mix-deps 43 | if: steps.mix-cache.outputs.cache-hit != 'true' 44 | run: | 45 | mix local.rebar --force 46 | mix local.hex --force 47 | mix deps.get 48 | 49 | - name: verify 50 | run: mix ref_inspector.verify 51 | working-directory: ./verify 52 | -------------------------------------------------------------------------------- /test/ref_inspector/database/initializer_test.exs: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Database.InitializerTest do 2 | use ExUnit.Case, async: false 3 | 4 | defmodule Initializer do 5 | use Agent 6 | 7 | def start_link(_), do: Agent.start_link(fn -> nil end, name: __MODULE__) 8 | 9 | def call_init, do: call_init(:ok_empty) 10 | def call_init(result), do: Agent.update(__MODULE__, fn _ -> result end) 11 | 12 | def get_init, do: Agent.get(__MODULE__, & &1) 13 | end 14 | 15 | setup_all do 16 | init = Application.get_env(:ref_inspector, :init) 17 | 18 | on_exit(fn -> 19 | :ok = Application.put_env(:ref_inspector, :init, init) 20 | end) 21 | end 22 | 23 | test "init {mod, fun} called upon database (re-) start" do 24 | :ok = Application.put_env(:ref_inspector, :init, {Initializer, :call_init}) 25 | 26 | {:ok, _} = start_supervised(Initializer) 27 | {:ok, _} = start_supervised({RefInspector.Database, database: :initializer_test}) 28 | 29 | assert :ok_empty == Initializer.get_init() 30 | end 31 | 32 | test "init {mod, fun, args} called upon database (re-) start" do 33 | :ok = Application.put_env(:ref_inspector, :init, {Initializer, :call_init, [:ok_passed]}) 34 | 35 | {:ok, _} = start_supervised(Initializer) 36 | {:ok, _} = start_supervised({RefInspector.Database, database: :initializer_test}) 37 | 38 | assert :ok_passed == Initializer.get_init() 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /test/ref_inspector/downloader/readme_test.exs: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Downloader.READMETest do 2 | use ExUnit.Case, async: false 3 | 4 | alias RefInspector.Downloader.README 5 | 6 | @test_path Path.expand("../../downloads", __DIR__) 7 | @test_readme Path.join(@test_path, "ref_inspector.readme.md") 8 | 9 | setup_all do 10 | database_path = Application.get_env(:ref_inspector, :database_path) 11 | remote_urls = Application.get_env(:ref_inspector, :remote_urls) 12 | 13 | :ok = Application.put_env(:ref_inspector, :database_path, @test_path) 14 | _ = File.rm(@test_readme) 15 | 16 | on_exit(fn -> 17 | :ok = Application.put_env(:ref_inspector, :database_path, database_path) 18 | :ok = Application.put_env(:ref_inspector, :remote_urls, remote_urls) 19 | end) 20 | end 21 | 22 | test "README creation for default remote" do 23 | :ok = Application.put_env(:ref_inspector, :remote_urls, ["non-default-remote.yml"]) 24 | :ok = README.write() 25 | 26 | refute File.exists?(@test_readme) 27 | 28 | :ok = Application.delete_env(:ref_inspector, :remote_urls) 29 | :ok = Application.put_env(:ref_inspector, :skip_download_readme, true) 30 | :ok = README.write() 31 | 32 | refute File.exists?(@test_readme) 33 | 34 | :ok = Application.delete_env(:ref_inspector, :skip_download_readme) 35 | :ok = README.write() 36 | 37 | assert File.exists?(@test_readme) 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /test/mix/tasks/ref_inspector/download_renaming_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Mix.Tasks.RefInspector.DownloadRenamingTest do 2 | use ExUnit.Case, async: false 3 | 4 | import ExUnit.CaptureIO 5 | 6 | alias Mix.Tasks.RefInspector.Download 7 | 8 | @fixture_file "empty.yml" 9 | @fixture_path Path.expand("../../../fixtures", __DIR__) 10 | @test_file "empty_renamed.yml" 11 | @test_path Path.expand("../../../downloads", __DIR__) 12 | 13 | setup_all do 14 | httpd_opts = [ 15 | port: 0, 16 | server_name: ~c"ref_inspector_test", 17 | server_root: String.to_charlist(@fixture_path), 18 | document_root: String.to_charlist(@fixture_path) 19 | ] 20 | 21 | {:ok, httpd_pid} = :inets.start(:httpd, httpd_opts) 22 | 23 | # configure app to use testing webserver 24 | remote_base = "http://localhost:#{:httpd.info(httpd_pid)[:port]}" 25 | yaml_urls = Application.get_env(:ref_inspector, :remote_urls) 26 | 27 | :ok = 28 | Application.put_env(:ref_inspector, :remote_urls, [ 29 | {@test_file, "#{remote_base}/#{@fixture_file}"} 30 | ]) 31 | 32 | on_exit(fn -> 33 | Application.put_env(:ref_inspector, :remote_urls, yaml_urls) 34 | end) 35 | end 36 | 37 | test "download with custom filename" do 38 | Mix.shell(Mix.Shell.IO) 39 | 40 | orig_path = Application.get_env(:ref_inspector, :database_path) 41 | test_file = Path.join([@test_path, @test_file]) 42 | 43 | if File.exists?(test_file) do 44 | File.rm!(test_file) 45 | end 46 | 47 | console = 48 | capture_io(fn -> 49 | Application.put_env(:ref_inspector, :database_path, @test_path) 50 | Download.run(["--force"]) 51 | Application.put_env(:ref_inspector, :database_path, orig_path) 52 | end) 53 | 54 | fixture_file = Path.join([@fixture_path, @fixture_file]) 55 | 56 | assert File.exists?(test_file) 57 | assert String.contains?(console, test_file) 58 | assert File.stat!(test_file).size == File.stat!(fixture_file).size 59 | end 60 | end 61 | -------------------------------------------------------------------------------- /test/ref_inspector/database/reload_test.exs: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Database.ReloadTest do 2 | use ExUnit.Case, async: false 3 | 4 | import ExUnit.CaptureLog 5 | 6 | @fixture_search "referers_search.yml" 7 | @fixture_social "referers_social.yml" 8 | 9 | setup do 10 | app_files = Application.get_env(:ref_inspector, :database_files) 11 | app_path = Application.get_env(:ref_inspector, :database_path) 12 | 13 | on_exit(fn -> 14 | Application.put_env(:ref_inspector, :database_files, app_files) 15 | Application.put_env(:ref_inspector, :database_path, app_path) 16 | end) 17 | end 18 | 19 | test "reloading databases" do 20 | Application.put_env(:ref_inspector, :database_files, [@fixture_search]) 21 | RefInspector.reload(async: false) 22 | 23 | assert RefInspector.ready?() 24 | assert RefInspector.parse("http://www.google.com/test").source == "Google" 25 | assert RefInspector.parse("http://twitter.com/test").source == :unknown 26 | 27 | Application.put_env(:ref_inspector, :database_files, [@fixture_social]) 28 | RefInspector.reload(async: false) 29 | 30 | assert RefInspector.ready?() 31 | assert RefInspector.parse("http://www.google.com/test").source == :unknown 32 | assert RefInspector.parse("http://twitter.com/test").source == "Twitter" 33 | end 34 | 35 | test "warns about missing files configuration (sync reload)" do 36 | Application.put_env(:ref_inspector, :database_files, []) 37 | 38 | log = 39 | capture_log(fn -> 40 | RefInspector.reload(async: false) 41 | end) 42 | 43 | assert log =~ ~r/no database files/i 44 | refute RefInspector.ready?() 45 | end 46 | 47 | test "warns about missing files configuration (async reload)" do 48 | Application.put_env(:ref_inspector, :database_files, []) 49 | 50 | log = 51 | capture_log(fn -> 52 | RefInspector.reload() 53 | 54 | :ref_inspector_default 55 | |> Process.whereis() 56 | |> :sys.get_state() 57 | end) 58 | 59 | assert log =~ ~r/no database files/i 60 | refute RefInspector.ready?() 61 | end 62 | end 63 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RefInspector 2 | 3 | Referer parser library. 4 | 5 | ## Package Setup 6 | 7 | To use RefInspector with your projects, edit your `mix.exs` file and add the required dependencies: 8 | 9 | ```elixir 10 | defp deps do 11 | [ 12 | # ... 13 | {:ref_inspector, "~> 2.0"}, 14 | # ... 15 | ] 16 | end 17 | ``` 18 | 19 | If you want to use a manual supervision approach (without starting the application) please look at the inline documentation of `RefInspector.Supervisor`. 20 | 21 | ## Application Configuration 22 | 23 | Out of the box the default database will be stored in the `:priv_dir` of `:ref_inspector`. Both the database(s) and path used can be changed. 24 | 25 | For a detailed list of available configuration options please consult `RefInspector.Config`. 26 | 27 | ## Referer Database 28 | 29 | The default database is taken from the [snowplow-referer-parser](https://github.com/snowplow-referer-parser/referer-parser) project. 30 | 31 | ## Basic Usage 32 | 33 | ### Database Download 34 | 35 | You need to obtain a copy of the configured database by calling either `mix ref_inspector.download` from the command line or `RefInspector.Downloader.download/0` from within your application. 36 | 37 | Refer to `RefInspector.Downloader` for more details. 38 | 39 | ### Referer Parsing 40 | 41 | ```elixir 42 | iex> RefInspector.parse("http://www.google.com/search?q=ref_inspector") 43 | %RefInspector.Result{ 44 | medium: "search", 45 | referer: "http://www.google.com/search?q=ref_inspector", 46 | source: "Google", 47 | term: "ref_inspector" 48 | } 49 | ``` 50 | 51 | Full documentation is available inline in the `RefInspector` module and at [https://hexdocs.pm/ref_inspector](https://hexdocs.pm/ref_inspector). 52 | 53 | ## Benchmark 54 | 55 | Several (minimal) benchmark scripts are included. Please refer to the Mixfile or `mix help` output for their names. 56 | 57 | ## Resources 58 | 59 | - [snowplow-referer-parser](https://github.com/snowplow-referer-parser/referer-parser) 60 | 61 | ## License 62 | 63 | [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) 64 | 65 | _Referers.yml_ taken from the [snowplow-referer-parser](https://github.com/snowplow-referer-parser/referer-parser) project. See there for detailed license information about the data contained. 66 | -------------------------------------------------------------------------------- /verify/lib/mix/tasks/ref_inspector/verify.ex: -------------------------------------------------------------------------------- 1 | defmodule Mix.Tasks.RefInspector.Verify do 2 | @moduledoc """ 3 | Verifies RefInspector results. 4 | """ 5 | 6 | @shortdoc "Verifies parser results" 7 | 8 | use Mix.Task 9 | 10 | alias Mix.RefInspector.Verify 11 | alias RefInspector.Downloader 12 | 13 | def run(args) do 14 | {opts, _argv, _errors} = OptionParser.parse(args, strict: [quick: :boolean]) 15 | 16 | :ok = maybe_download(opts) 17 | {:ok, _} = Application.ensure_all_started(:ref_inspector) 18 | 19 | verify_all() 20 | Mix.shell().info("Verification complete!") 21 | :ok 22 | end 23 | 24 | defp compare(testcase, result) do 25 | testcase.uri == result.referer && testcase.medium == result.medium && 26 | testcase.source == result.source && testcase.term == result.term 27 | end 28 | 29 | defp maybe_download(quick: true), do: :ok 30 | 31 | defp maybe_download(_) do 32 | {:ok, _} = Application.ensure_all_started(:hackney) 33 | :ok = Downloader.download() 34 | :ok = Verify.Fixture.download() 35 | 36 | Mix.shell().info("=== Skip download using '--quick' ===") 37 | 38 | :ok 39 | end 40 | 41 | defp verify([]), do: nil 42 | 43 | defp verify([%{uri: testuri} = testcase | testcases]) do 44 | result = RefInspector.parse(testuri) 45 | 46 | if compare(testcase, result) do 47 | verify(testcases) 48 | else 49 | IO.puts("-- verification failed --") 50 | IO.puts("referer: #{testuri}") 51 | IO.puts("testcase: #{inspect(testcase)}") 52 | IO.puts("result: #{inspect(result)}") 53 | 54 | throw("verification failed") 55 | end 56 | end 57 | 58 | defp verify_all do 59 | fixture = Verify.Fixture.local_file() 60 | 61 | if File.exists?(fixture) do 62 | verify_fixture(fixture) 63 | else 64 | Mix.shell().error("Fixture file #{fixture} is missing.") 65 | Mix.shell().error("Please run without '--quick' param to download it!") 66 | end 67 | end 68 | 69 | defp verify_fixture(fixture) do 70 | [testcases] = :yamerl_constr.file(fixture, [:str_node_as_binary]) 71 | 72 | testcases 73 | |> Enum.map(fn testcase -> 74 | testcase 75 | |> Enum.into(%{}, fn {k, v} -> {String.to_atom(k), v} end) 76 | |> Verify.Cleanup.cleanup() 77 | end) 78 | |> verify() 79 | end 80 | end 81 | -------------------------------------------------------------------------------- /lib/ref_inspector/database/parser.ex: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Database.Parser do 2 | @moduledoc false 3 | 4 | @doc """ 5 | Parses a list of database entries and modifies them to be usable. 6 | """ 7 | @spec parse([{binary, list}]) :: map 8 | def parse(entries) do 9 | entries 10 | |> parse([]) 11 | |> Enum.reduce(%{}, &reduce_database/2) 12 | |> Enum.map(fn {k, v} -> {k, sort_database(v)} end) 13 | |> Map.new() 14 | end 15 | 16 | defp parse([], acc), do: acc 17 | 18 | defp parse([{medium, sources} | entries], acc) do 19 | sources = 20 | sources 21 | |> parse_sources([]) 22 | |> Enum.map(&Map.put(&1, :medium, medium)) 23 | 24 | parse(entries, sources ++ acc) 25 | end 26 | 27 | defp parse_domains(_, [], acc), do: acc 28 | 29 | defp parse_domains(source, [domain | domains], acc) do 30 | uri = URI.parse("http://#{domain}") 31 | 32 | data = 33 | source 34 | |> Map.put(:host, uri.host) 35 | |> Map.put(:host_parts, uri.host |> String.split(".") |> Enum.reverse()) 36 | |> Map.put(:path, uri.path || "/") 37 | 38 | parse_domains(source, domains, acc ++ [data]) 39 | end 40 | 41 | defp parse_sources([], acc), do: acc 42 | 43 | defp parse_sources([{name, details} | sources], acc) do 44 | details = Enum.into(details, %{}) 45 | domains = Map.get(details, "domains", []) 46 | parameters = Map.get(details, "parameters", []) 47 | 48 | source = %{name: name, parameters: parameters} 49 | acc = acc ++ parse_domains(source, domains, []) 50 | 51 | parse_sources(sources, acc) 52 | end 53 | 54 | defp reduce_database(source, red_acc) do 55 | %{ 56 | host: host, 57 | host_parts: [last_part | _], 58 | medium: medium, 59 | name: name, 60 | parameters: parameters, 61 | path: path 62 | } = source 63 | 64 | part_acc = red_acc[last_part] || [] 65 | 66 | entry = { 67 | host, 68 | "." <> String.trim_leading(host, "."), 69 | path, 70 | String.trim_trailing(path, "/") <> "/", 71 | parameters, 72 | medium, 73 | name 74 | } 75 | 76 | Map.put(red_acc, last_part, [entry | part_acc]) 77 | end 78 | 79 | defp sort_database(entries) do 80 | Enum.sort(entries, fn {_, host_a, _, path_a, _, _, _}, {_, host_b, _, path_b, _, _, _} -> 81 | byte_size(host_a <> path_a) > byte_size(host_b <> path_b) 82 | end) 83 | end 84 | end 85 | -------------------------------------------------------------------------------- /lib/mix/tasks/ref_inspector/download.ex: -------------------------------------------------------------------------------- 1 | defmodule Mix.Tasks.RefInspector.Download do 2 | @moduledoc """ 3 | Mix task to download database file(s) from your command line. 4 | 5 | ## Arguments 6 | 7 | When run without arguments the task will display the target directory for the 8 | downloaded files and will ask for confirmation before downloading. 9 | 10 | - `--force`: skip confirmation before downloading 11 | - `--quiet`: silences task output (does not imply `--force`!) 12 | 13 | ## Informational README 14 | 15 | If you are using the default databases from the default remote location an 16 | informational README with the filename `ref_inspector.readme.md` will be 17 | placed next to the downloaded file(s). Inside you will find a link to the 18 | original database source. 19 | 20 | The creation of this file can be deactivated by configuration: 21 | 22 | config :ref_inspector, 23 | skip_download_readme: true 24 | """ 25 | 26 | @shortdoc "Downloads database files" 27 | 28 | alias RefInspector.Config 29 | alias RefInspector.Database.Location 30 | alias RefInspector.Downloader 31 | 32 | use Mix.Task 33 | 34 | @cli_options [ 35 | aliases: [f: :force], 36 | strict: [force: :boolean, quiet: :boolean] 37 | ] 38 | 39 | def run(args) do 40 | :ok = load_app(args) 41 | :ok = Config.init_env() 42 | 43 | {opts, _argv, _errors} = OptionParser.parse(args, @cli_options) 44 | 45 | if !opts[:quiet] do 46 | Mix.shell().info("Download paths:") 47 | 48 | Enum.each(Config.yaml_urls(), fn yaml -> 49 | Mix.shell().info(["- ", Location.local(yaml)]) 50 | end) 51 | 52 | Mix.shell().info("This command will replace any already existing copy!") 53 | end 54 | 55 | if request_confirmation(opts) do 56 | perform_download(opts) 57 | else 58 | exit_unconfirmed(opts) 59 | end 60 | end 61 | 62 | defp exit_unconfirmed(opts) do 63 | if !opts[:quiet] do 64 | Mix.shell().info("Download aborted!") 65 | end 66 | 67 | :ok 68 | end 69 | 70 | defp perform_download(opts) do 71 | :ok = Downloader.download() 72 | :ok = Downloader.README.write() 73 | 74 | if !opts[:quiet] do 75 | Mix.shell().info("Download complete!") 76 | end 77 | 78 | :ok 79 | end 80 | 81 | defp request_confirmation(opts) do 82 | case opts[:force] do 83 | true -> true 84 | _ -> Mix.shell().yes?("Download databases?") 85 | end 86 | end 87 | 88 | defp load_app(args) do 89 | _ = Mix.Task.run("loadpaths", args) 90 | 91 | if "--no-compile" not in args do 92 | _ = Mix.Task.run("compile", args) 93 | end 94 | 95 | :ok 96 | end 97 | end 98 | -------------------------------------------------------------------------------- /lib/ref_inspector/parser.ex: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Parser do 2 | @moduledoc false 3 | 4 | alias RefInspector.Database 5 | alias RefInspector.Result 6 | 7 | @doc """ 8 | Checks if a given URI struct is a known referer. 9 | """ 10 | @spec parse(URI.t(), Keyword.t()) :: Result.t() 11 | def parse(%URI{host: nil}, _), do: %Result{} 12 | 13 | def parse(%URI{host: host, path: path, query: query}, opts) do 14 | if internal?(host, Application.get_env(:ref_inspector, :internal, [])) do 15 | %Result{medium: :internal} 16 | else 17 | %{ 18 | host: host, 19 | host_parts: host |> String.split(".") |> Enum.reverse(), 20 | path: path || "/", 21 | query: query 22 | } 23 | |> parse_ref(Database.list(opts[:database])) 24 | end 25 | end 26 | 27 | defp internal?(_, []), do: false 28 | 29 | defp internal?(host, [internal_host | internal_hosts]) do 30 | if host == internal_host || String.ends_with?(host, "." <> internal_host) do 31 | true 32 | else 33 | internal?(host, internal_hosts) 34 | end 35 | end 36 | 37 | defp maybe_parse_query(nil, _, result), do: result 38 | defp maybe_parse_query(_, [], result), do: result 39 | 40 | defp maybe_parse_query(query, params, result) do 41 | term = 42 | query 43 | |> URI.decode_query() 44 | |> parse_query(params) 45 | 46 | %{result | term: term} 47 | end 48 | 49 | defp match_sources(_, []), do: nil 50 | 51 | defp match_sources( 52 | %{host: ref_host, path: ref_path, query: query} = ref, 53 | [ 54 | {src_host, src_subdomains, src_path, src_subpaths, src_parameters, src_medium, 55 | src_name} 56 | | sources 57 | ] 58 | ) do 59 | if (ref_host == src_host || String.ends_with?(ref_host, src_subdomains)) && 60 | (ref_path == src_path || String.starts_with?(ref_path, src_subpaths)) do 61 | result = %Result{ 62 | medium: src_medium, 63 | source: src_name 64 | } 65 | 66 | maybe_parse_query(query, src_parameters, result) 67 | else 68 | match_sources(ref, sources) 69 | end 70 | end 71 | 72 | defp parse_query(_, []), do: :none 73 | 74 | defp parse_query(query, [param | params]) do 75 | case query do 76 | %{^param => value} -> value 77 | _ -> parse_query(query, params) 78 | end 79 | end 80 | 81 | defp parse_ref(_, []), do: %Result{} 82 | 83 | defp parse_ref(%{host_parts: [first | _]} = ref, [{_database, entries} | referers]) do 84 | sources = Map.get(entries, first, []) 85 | 86 | case match_sources(ref, sources) do 87 | nil -> parse_ref(ref, referers) 88 | match -> match 89 | end 90 | end 91 | end 92 | -------------------------------------------------------------------------------- /test/mix/tasks/ref_inspector/download_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Mix.Tasks.RefInspector.DownloadTest do 2 | use ExUnit.Case, async: false 3 | 4 | import ExUnit.CaptureIO 5 | 6 | alias Mix.Tasks.RefInspector.Download 7 | 8 | @fixture_path Path.expand("../../../fixtures", __DIR__) 9 | @test_files ["referers_search.yml", "referers_social.yml"] 10 | @test_path Path.expand("../../../downloads", __DIR__) 11 | 12 | setup_all do 13 | httpd_opts = [ 14 | port: 0, 15 | server_name: ~c"ref_inspector_test", 16 | server_root: String.to_charlist(@fixture_path), 17 | document_root: String.to_charlist(@fixture_path) 18 | ] 19 | 20 | {:ok, httpd_pid} = :inets.start(:httpd, httpd_opts) 21 | 22 | # configure app to use testing webserver 23 | remote_base = "http://localhost:#{:httpd.info(httpd_pid)[:port]}" 24 | yaml_urls = Application.get_env(:ref_inspector, :remote_urls) 25 | 26 | :ok = 27 | Application.put_env( 28 | :ref_inspector, 29 | :remote_urls, 30 | Enum.map(@test_files, &"#{remote_base}/#{&1}") 31 | ) 32 | 33 | on_exit(fn -> 34 | Application.put_env(:ref_inspector, :remote_urls, yaml_urls) 35 | end) 36 | end 37 | 38 | test "aborted download" do 39 | Mix.shell(Mix.Shell.IO) 40 | 41 | console = 42 | capture_io(fn -> 43 | Download.run([]) 44 | IO.write("n") 45 | end) 46 | 47 | assert String.contains?(console, "Download aborted") 48 | end 49 | 50 | test "aborted download (quiet)" do 51 | Mix.shell(Mix.Shell.IO) 52 | 53 | console = 54 | capture_io(fn -> 55 | Download.run(["--quiet"]) 56 | IO.write("n") 57 | end) 58 | 59 | assert console == "Download databases? [Yn] n" 60 | end 61 | 62 | test "confirmed download" do 63 | Mix.shell(Mix.Shell.IO) 64 | 65 | console = 66 | capture_io([capture_prompt: true], fn -> 67 | Download.run([]) 68 | end) 69 | 70 | assert String.contains?(console, "Download databases? [Yn]") 71 | end 72 | 73 | test "forceable download" do 74 | Mix.shell(Mix.Shell.IO) 75 | 76 | orig_path = Application.get_env(:ref_inspector, :database_path) 77 | test_files = Enum.map(@test_files, &Path.join([@test_path, &1])) 78 | 79 | for test_file <- test_files do 80 | if File.exists?(test_file) do 81 | File.rm!(test_file) 82 | end 83 | end 84 | 85 | console = 86 | capture_io(fn -> 87 | Application.put_env(:ref_inspector, :database_path, @test_path) 88 | Download.run(["--force"]) 89 | Application.put_env(:ref_inspector, :database_path, orig_path) 90 | end) 91 | 92 | for test_file <- test_files do 93 | fixture_file = Path.join([@fixture_path, Path.basename(test_file)]) 94 | 95 | assert File.exists?(test_file) 96 | assert String.contains?(console, test_file) 97 | assert File.stat!(test_file).size == File.stat!(fixture_file).size 98 | end 99 | end 100 | end 101 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.MixProject do 2 | use Mix.Project 3 | 4 | @url_changelog "https://hexdocs.pm/ref_inspector/changelog.html" 5 | @url_github "https://github.com/elixir-inspector/ref_inspector" 6 | @version "3.0.0-dev" 7 | 8 | def project do 9 | [ 10 | app: :ref_inspector, 11 | name: "RefInspector", 12 | version: @version, 13 | elixir: "~> 1.17", 14 | aliases: aliases(), 15 | deps: deps(), 16 | description: "Referer parser library", 17 | dialyzer: dialyzer(), 18 | docs: docs(), 19 | package: package(), 20 | test_coverage: [tool: ExCoveralls] 21 | ] 22 | end 23 | 24 | def application do 25 | [ 26 | extra_applications: extra_applications(Mix.env()) ++ [:logger], 27 | mod: {RefInspector.Application, []} 28 | ] 29 | end 30 | 31 | def cli do 32 | [ 33 | preferred_envs: [ 34 | "bench.database": :bench, 35 | "bench.parse": :bench, 36 | coveralls: :test, 37 | "coveralls.detail": :test 38 | ] 39 | ] 40 | end 41 | 42 | defp aliases() do 43 | [ 44 | "bench.database": "run bench/database.exs", 45 | "bench.parse": "run bench/parse.exs" 46 | ] 47 | end 48 | 49 | defp extra_applications(:test), do: [:inets] 50 | defp extra_applications(_), do: [] 51 | 52 | defp deps do 53 | [ 54 | {:benchee, "~> 1.3", only: :bench, runtime: false}, 55 | {:credo, "~> 1.7", only: :dev, runtime: false}, 56 | {:dialyxir, "~> 1.4", only: :dev, runtime: false}, 57 | {:ex_doc, ">= 0.0.0", only: :dev, runtime: false}, 58 | {:excoveralls, "~> 0.16.0", only: :test, runtime: false}, 59 | {:hackney, "~> 1.0"}, 60 | {:yamerl, "~> 0.7"} 61 | ] 62 | end 63 | 64 | defp dialyzer do 65 | [ 66 | flags: [ 67 | :error_handling, 68 | :underspecs, 69 | :unmatched_returns 70 | ], 71 | ignore_warnings: ".dialyzer_ignore.exs", 72 | list_unused_filters: true, 73 | plt_add_apps: [:mix], 74 | plt_core_path: "plts", 75 | plt_local_path: "plts" 76 | ] 77 | end 78 | 79 | defp docs do 80 | [ 81 | extras: [ 82 | "CHANGELOG.md", 83 | LICENSE: [title: "License"], 84 | "README.md": [title: "Overview"] 85 | ], 86 | formatters: ["html"], 87 | groups_for_modules: [ 88 | "Database Downloader": [ 89 | RefInspector.Downloader, 90 | RefInspector.Downloader.Adapter 91 | ] 92 | ], 93 | main: "RefInspector", 94 | skip_undefined_reference_warnings_on: ["CHANGELOG.md"], 95 | source_ref: "v#{@version}", 96 | source_url: @url_github 97 | ] 98 | end 99 | 100 | defp package do 101 | [ 102 | files: ["CHANGELOG.md", "LICENSE", "mix.exs", "README.md", "lib", "priv"], 103 | licenses: ["Apache-2.0"], 104 | links: %{ 105 | "Changelog" => @url_changelog, 106 | "GitHub" => @url_github 107 | } 108 | ] 109 | end 110 | end 111 | -------------------------------------------------------------------------------- /lib/ref_inspector.ex: -------------------------------------------------------------------------------- 1 | defmodule RefInspector do 2 | @moduledoc """ 3 | Referer parser library. 4 | 5 | ## Preparation 6 | 7 | 1. Verify your supervision setup according to `RefInspector.Supervisor` 8 | 2. Revise the default configuration values of `RefInspector.Config` and 9 | adjust to your project/environment where necessary 10 | 3. Download a copy of the parser database file(s) as outlined in 11 | `RefInspector.Downloader` 12 | 13 | ## Usage 14 | 15 | iex> RefInspector.parse("http://www.google.com/search?q=ref_inspector") 16 | %RefInspector.Result{ 17 | medium: "search", 18 | referer: "http://www.google.com/search?q=ref_inspector", 19 | source: "Google", 20 | term: "ref_inspector" 21 | } 22 | 23 | Passing a referer string will result in a `%RefInspector.Result{}` returned 24 | with the following information (if available): 25 | 26 | - `:referer` will contain the unmodified referer passed to the parser. 27 | 28 | - `:medium` will be `:internal` (if configured), `:unknown` if no matching 29 | database entry could be found, or a string matching the entry in the 30 | database. Detecting a referer as `:internal` requires additional 31 | configuration (see `RefInspector.Config`). 32 | 33 | - `:source` will be `:unknown` if no known source could be detected. 34 | Otherwise it will contain a string with the provider's name. 35 | 36 | - `:term` will be `:none` if no query parameters were given or the provider 37 | has no configured term parameters in the database (mostly relevant for 38 | social or email referers). If a configured term parameter was found it will 39 | be an unencoded string (possibly empty). 40 | 41 | #### Note about Result Medium Atoms/Binaries 42 | 43 | The medium atoms `:unknown` and `:internal` are specially treated to reflect 44 | two special cases. One being reserved for completely unknown referers and 45 | one being for configured domains to not be parsed. 46 | 47 | Your database can still include `"unknown"` and `"internal"` sections. These 48 | will be parsed fully and returned using a binary as the medium instead of 49 | the aforementioned atoms. 50 | """ 51 | 52 | alias RefInspector.Database 53 | alias RefInspector.Parser 54 | alias RefInspector.Result 55 | 56 | @doc """ 57 | Checks if RefInspector is ready to perform lookups. 58 | 59 | The `true == ready?` definition is made on the assumption that if there is 60 | at least one referer in the database then lookups can be performed. 61 | 62 | Checking the state is done using the currently active database. 63 | Any potentially concurrent reload requests are not considered. 64 | """ 65 | @spec ready?(atom) :: boolean 66 | def ready?(database \\ :default), do: [] != Database.list(database) 67 | 68 | @doc """ 69 | Parses a referer. 70 | 71 | Passing an empty referer (`""` or `nil`) will directly return an empty result 72 | without accessing the database. 73 | """ 74 | @spec parse(URI.t() | String.t() | nil, Keyword.t()) :: Result.t() 75 | def parse(ref, opts \\ [database: :default]) 76 | 77 | def parse(nil, _), do: %Result{referer: nil} 78 | def parse("", _), do: %Result{referer: ""} 79 | 80 | def parse(ref, opts) when is_binary(ref), do: ref |> URI.parse() |> parse(opts) 81 | 82 | def parse(%URI{} = uri, opts) do 83 | uri 84 | |> Parser.parse(opts) 85 | |> Map.put(:referer, URI.to_string(uri)) 86 | end 87 | 88 | @doc """ 89 | Reloads all databases. 90 | 91 | You can pass `[async: true|false]` to define if the reload should happen 92 | in the background or block your calling process until completed. 93 | """ 94 | @spec reload(Keyword.t()) :: :ok 95 | def reload(opts \\ []) do 96 | [async: true, database: :default] 97 | |> Keyword.merge(opts) 98 | |> Database.reload() 99 | end 100 | end 101 | -------------------------------------------------------------------------------- /test/ref_inspector/parser_test.exs: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.ParserTest do 2 | use ExUnit.Case, async: true 3 | 4 | alias RefInspector.Result 5 | 6 | test "empty referer" do 7 | assert %Result{referer: nil} == RefInspector.parse(nil) 8 | assert %Result{referer: ""} == RefInspector.parse("") 9 | assert %Result{} == RefInspector.parse(URI.parse("")) 10 | end 11 | 12 | test "URI struct referer" do 13 | referer = "http://does.not/matter" 14 | uri = URI.parse(referer) 15 | 16 | assert %Result{referer: referer} == RefInspector.parse(uri) 17 | end 18 | 19 | test "completely unknown" do 20 | referer = "http://i.will.not.be.found/" 21 | parsed = %Result{referer: referer} 22 | 23 | assert parsed == RefInspector.parse(referer) 24 | end 25 | 26 | test "internal referer" do 27 | referer = "http://www.example.com/sub-page" 28 | parsed = %Result{referer: referer, medium: :internal} 29 | 30 | assert parsed == RefInspector.parse(referer) 31 | end 32 | 33 | test "internal referer (subdomain)" do 34 | referer = "http://some.subdomain.from.www.example.org/" 35 | parsed = %Result{referer: referer, medium: :internal} 36 | 37 | assert parsed == RefInspector.parse(referer) 38 | end 39 | 40 | test "internal referer without exact domain match" do 41 | referer = "http://not-www.example.com/sub-page" 42 | 43 | assert %Result{referer: referer} == RefInspector.parse(referer) 44 | end 45 | 46 | test "no query" do 47 | referer = "http://www.google.fr/imgres?ignored=parameters" 48 | 49 | parsed = %Result{ 50 | referer: referer, 51 | medium: "search", 52 | source: "Google Images" 53 | } 54 | 55 | assert parsed == RefInspector.parse(referer) 56 | end 57 | 58 | test "google search" do 59 | referer = "http://www.google.com/search?q=snowplow+referer+parser&hl=en&client=chrome" 60 | 61 | parsed = %Result{ 62 | referer: referer, 63 | medium: "search", 64 | source: "Google", 65 | term: "snowplow referer parser" 66 | } 67 | 68 | assert parsed == RefInspector.parse(referer) 69 | end 70 | 71 | test "google empty search" do 72 | referer = "http://www.google.com/search?q=&hl=en&client=chrome" 73 | 74 | parsed = %Result{ 75 | referer: referer, 76 | medium: "search", 77 | source: "Google", 78 | term: "" 79 | } 80 | 81 | assert parsed == RefInspector.parse(referer) 82 | end 83 | 84 | test "referer without parameters" do 85 | referer = "https://twitter.com/elixirlang" 86 | 87 | parsed = %Result{ 88 | referer: referer, 89 | medium: "social", 90 | source: "Twitter" 91 | } 92 | 93 | assert parsed == RefInspector.parse(referer) 94 | end 95 | 96 | test "referer without defined parameters" do 97 | referer = "https://twitter.com/elixirlang?nothing=defined" 98 | 99 | parsed = %Result{ 100 | referer: referer, 101 | medium: "social", 102 | source: "Twitter" 103 | } 104 | 105 | assert parsed == RefInspector.parse(referer) 106 | end 107 | 108 | test "referer without exact domain match" do 109 | referer = "https://not-twitter.com/elixirlang" 110 | 111 | assert %Result{referer: referer} == RefInspector.parse(referer) 112 | end 113 | 114 | test "referer without exact path match" do 115 | referer = "http://www.google.fr/imgres-mismatch" 116 | 117 | assert %Result{referer: referer} == RefInspector.parse(referer) 118 | end 119 | 120 | test "referer with subdomain match" do 121 | referer = "https://also.twitter.com/elixirlang" 122 | 123 | parsed = %Result{ 124 | referer: referer, 125 | medium: "social", 126 | source: "Twitter" 127 | } 128 | 129 | assert parsed == RefInspector.parse(referer) 130 | end 131 | 132 | test "referer with subpath match" do 133 | referer = "http://www.google.fr/imgres/also-matched" 134 | 135 | parsed = %Result{ 136 | referer: referer, 137 | medium: "search", 138 | source: "Google Images" 139 | } 140 | 141 | assert parsed == RefInspector.parse(referer) 142 | end 143 | end 144 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - master 8 | - test 9 | tags: 10 | - '*' 11 | workflow_dispatch: 12 | 13 | permissions: 14 | contents: read 15 | 16 | jobs: 17 | dialyzer: 18 | runs-on: ubuntu-24.04 19 | 20 | steps: 21 | - uses: actions/checkout@v4 22 | with: 23 | persist-credentials: false 24 | 25 | - name: setup 26 | id: setup 27 | uses: erlef/setup-beam@v1 28 | with: 29 | elixir-version: '1.19.3' 30 | otp-version: '28.1' 31 | 32 | - name: mix-cache 33 | uses: actions/cache@v4 34 | id: mix-cache 35 | with: 36 | path: deps 37 | # yamllint disable-line rule:line-length 38 | key: ${{ runner.os }}-${{ steps.setup.outputs.otp-version }}-${{ steps.setup.outputs.elixir-version }}-mix-${{ hashFiles(format('{0}{1}', github.workspace, '/mix.lock')) }} 39 | 40 | - name: mix-deps 41 | if: steps.mix-cache.outputs.cache-hit != 'true' 42 | run: | 43 | mix local.rebar --force 44 | mix local.hex --force 45 | mix deps.get 46 | 47 | - name: plt-cache 48 | uses: actions/cache@v4 49 | id: plt-cache 50 | with: 51 | path: plts 52 | key: ${{ runner.os }}-${{ steps.setup.outputs.otp-version }}-${{ steps.setup.outputs.elixir-version }}-plts 53 | 54 | - name: plt-create 55 | if: steps.plt-cache.outputs.cache-hit != 'true' 56 | run: | 57 | mkdir -p plts 58 | mix dialyzer --plt 59 | 60 | - name: dialyzer 61 | run: mix dialyzer 62 | 63 | quality_actions: 64 | runs-on: ubuntu-24.04 65 | 66 | steps: 67 | - uses: actions/checkout@v4 68 | with: 69 | persist-credentials: false 70 | 71 | - run: yamllint . 72 | working-directory: .github 73 | 74 | quality_elixir: 75 | runs-on: ubuntu-24.04 76 | 77 | steps: 78 | - uses: actions/checkout@v4 79 | with: 80 | persist-credentials: false 81 | 82 | - name: setup 83 | id: setup 84 | uses: erlef/setup-beam@v1 85 | with: 86 | elixir-version: '1.19.3' 87 | otp-version: '28.1' 88 | 89 | - name: mix-cache 90 | uses: actions/cache@v4 91 | id: mix-cache 92 | with: 93 | path: deps 94 | # yamllint disable-line rule:line-length 95 | key: ${{ runner.os }}-${{ steps.setup.outputs.otp-version }}-${{ steps.setup.outputs.elixir-version }}-mix-${{ hashFiles(format('{0}{1}', github.workspace, '/mix.lock')) }} 96 | 97 | - name: mix-deps 98 | if: steps.mix-cache.outputs.cache-hit != 'true' 99 | run: | 100 | mix local.rebar --force 101 | mix local.hex --force 102 | mix deps.get 103 | 104 | - name: check unused deps 105 | run: mix deps.unlock --check-unused 106 | 107 | - name: check for compiler warnings 108 | run: | 109 | mix deps.compile 110 | mix compile --warnings-as-errors 111 | 112 | - name: format 113 | run: mix format --check-formatted 114 | 115 | - name: credo 116 | run: mix credo --strict 117 | 118 | test: 119 | runs-on: ubuntu-24.04 120 | 121 | strategy: 122 | fail-fast: false 123 | matrix: 124 | include: 125 | - elixir: '1.17.3' 126 | otp: '26.2' 127 | - elixir: '1.18.4' 128 | otp: '27.3' 129 | - elixir: '1.19.3' 130 | otp: '28.1' 131 | 132 | steps: 133 | - uses: actions/checkout@v4 134 | with: 135 | persist-credentials: false 136 | 137 | - name: setup 138 | uses: erlef/setup-beam@v1 139 | with: 140 | elixir-version: ${{ matrix.elixir }} 141 | otp-version: ${{ matrix.otp }} 142 | 143 | - name: mix-cache 144 | uses: actions/cache@v4 145 | id: mix-cache 146 | with: 147 | path: deps 148 | # yamllint disable-line rule:line-length 149 | key: ${{ runner.os }}-${{ matrix.otp }}-${{ matrix.elixir }}-mix-${{ hashFiles(format('{0}{1}', github.workspace, '/mix.lock')) }} 150 | 151 | - name: mix-deps 152 | if: steps.mix-cache.outputs.cache-hit != 'true' 153 | run: | 154 | mix local.rebar --force 155 | mix local.hex --force 156 | mix deps.get 157 | 158 | - name: test 159 | run: mix coveralls 160 | -------------------------------------------------------------------------------- /lib/ref_inspector/database.ex: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Database do 2 | @moduledoc false 3 | 4 | use GenServer 5 | 6 | require Logger 7 | 8 | alias RefInspector.Config 9 | alias RefInspector.Database.Loader 10 | alias RefInspector.Database.Parser 11 | alias RefInspector.Database.State 12 | 13 | @ets_table_opts [:named_table, :protected, :set, read_concurrency: true] 14 | 15 | def start_link(opts) do 16 | state = init_state(opts) 17 | name = identifier(state.database) 18 | 19 | GenServer.start_link(__MODULE__, state, name: name) 20 | end 21 | 22 | @doc false 23 | def init(%State{database: nil}), do: {:stop, "missing database name"} 24 | 25 | def init(%State{} = state) do 26 | if state.startup_sync do 27 | :ok = reload_databases(state) 28 | else 29 | :ok = 30 | state.database 31 | |> identifier() 32 | |> GenServer.cast(:reload) 33 | end 34 | 35 | {:ok, state} 36 | end 37 | 38 | def handle_call(:reload, _from, state) do 39 | state = reinit_state(state) 40 | 41 | {:reply, reload_databases(state), state} 42 | end 43 | 44 | def handle_cast(:reload, state) do 45 | state = reinit_state(state) 46 | :ok = reload_databases(state) 47 | 48 | {:noreply, state} 49 | end 50 | 51 | @doc """ 52 | Returns all referer definitions. 53 | """ 54 | @spec list(atom) :: [tuple] 55 | def list(database) do 56 | table_name = identifier(database) 57 | 58 | case :ets.lookup(table_name, :data) do 59 | [{:data, entries}] -> entries 60 | _ -> [] 61 | end 62 | rescue 63 | _ -> [] 64 | end 65 | 66 | @doc """ 67 | Reloads the database. 68 | 69 | Depending on the boolean option `:async` the reload will be performed 70 | using `GenServer.cast/2` or `GenServer.call/2`. 71 | """ 72 | def reload(opts) do 73 | identifier = identifier(opts[:database]) 74 | 75 | if opts[:async] do 76 | GenServer.cast(identifier, :reload) 77 | else 78 | GenServer.call(identifier, :reload) 79 | end 80 | end 81 | 82 | defp create_ets_table(table_name) do 83 | case :ets.info(table_name) do 84 | :undefined -> 85 | _ = :ets.new(table_name, @ets_table_opts) 86 | :ok 87 | 88 | _ -> 89 | :ok 90 | end 91 | end 92 | 93 | defp identifier(database), do: :"ref_inspector_#{database}" 94 | 95 | defp init_state(opts) do 96 | :ok = Config.init_env() 97 | state = %State{} 98 | 99 | opts = 100 | opts 101 | |> init_state_option(:startup_silent, state) 102 | |> init_state_option(:startup_sync, state) 103 | |> Keyword.put_new(:database, :default) 104 | |> Keyword.put_new(:yaml_reader, Config.yaml_file_reader()) 105 | 106 | struct!(State, opts) 107 | end 108 | 109 | defp init_state_option(opts, key, state) do 110 | default = Map.fetch!(state, key) 111 | config = Config.get(key, default) 112 | 113 | Keyword.put_new(opts, key, config) 114 | end 115 | 116 | defp parse_database({:ok, entries}, _, _) do 117 | Parser.parse(entries) 118 | end 119 | 120 | defp parse_database({:error, reason}, file, silent) do 121 | _ = 122 | if !silent do 123 | Logger.info("Failed to load #{file}: #{inspect(reason)}") 124 | end 125 | 126 | %{} 127 | end 128 | 129 | if macro_exported?(Logger, :warning, 1) do 130 | defp read_databases([], silent, _) do 131 | _ = 132 | if !silent do 133 | Logger.warning("Reload error: no database files configured!") 134 | end 135 | 136 | [] 137 | end 138 | else 139 | defp read_databases([], silent, _) do 140 | _ = 141 | if !silent do 142 | Logger.warn("Reload error: no database files configured!") 143 | end 144 | 145 | [] 146 | end 147 | end 148 | 149 | defp read_databases(files, silent, yaml_reader) do 150 | Enum.map(files, fn file -> 151 | entries = 152 | Config.database_path() 153 | |> Path.join(file) 154 | |> Loader.load(yaml_reader) 155 | |> parse_database(file, silent) 156 | 157 | {file, entries} 158 | end) 159 | end 160 | 161 | defp reinit_state(state), do: state |> Map.to_list() |> init_state() 162 | 163 | defp reload_databases(%{database: database, startup_silent: silent, yaml_reader: yaml_reader}) do 164 | table_name = identifier(database) 165 | :ok = create_ets_table(table_name) 166 | 167 | Config.database_files() 168 | |> read_databases(silent, yaml_reader) 169 | |> update_ets_table(table_name) 170 | end 171 | 172 | defp update_ets_table(datasets, table_name) do 173 | true = :ets.insert(table_name, {:data, datasets}) 174 | :ok 175 | end 176 | end 177 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{ 2 | "benchee": {:hex, :benchee, "1.4.0", "9f1f96a30ac80bab94faad644b39a9031d5632e517416a8ab0a6b0ac4df124ce", [:mix], [{:deep_merge, "~> 1.0", [hex: :deep_merge, repo: "hexpm", optional: false]}, {:statistex, "~> 1.0", [hex: :statistex, repo: "hexpm", optional: false]}, {:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: true]}], "hexpm", "299cd10dd8ce51c9ea3ddb74bb150f93d25e968f93e4c1fa31698a8e4fa5d715"}, 3 | "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"}, 4 | "certifi": {:hex, :certifi, "2.15.0", "0e6e882fcdaaa0a5a9f2b3db55b1394dba07e8d6d9bcad08318fb604c6839712", [:rebar3], [], "hexpm", "b147ed22ce71d72eafdad94f055165c1c182f61a2ff49df28bcc71d1d5b94a60"}, 5 | "credo": {:hex, :credo, "1.7.12", "9e3c20463de4b5f3f23721527fcaf16722ec815e70ff6c60b86412c695d426c1", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "8493d45c656c5427d9c729235b99d498bd133421f3e0a683e5c1b561471291e5"}, 6 | "deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm", "ce708e5f094b9cd4e8f2be4f00d2f4250c4095be93f8cd6d018c753894885430"}, 7 | "dialyxir": {:hex, :dialyxir, "1.4.6", "7cca478334bf8307e968664343cbdb432ee95b4b68a9cba95bdabb0ad5bdfd9a", [:mix], [{:erlex, ">= 0.2.7", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "8cf5615c5cd4c2da6c501faae642839c8405b49f8aa057ad4ae401cb808ef64d"}, 8 | "earmark_parser": {:hex, :earmark_parser, "1.4.44", "f20830dd6b5c77afe2b063777ddbbff09f9759396500cdbe7523efd58d7a339c", [:mix], [], "hexpm", "4778ac752b4701a5599215f7030989c989ffdc4f6df457c5f36938cc2d2a2750"}, 9 | "erlex": {:hex, :erlex, "0.2.7", "810e8725f96ab74d17aac676e748627a07bc87eb950d2b83acd29dc047a30595", [:mix], [], "hexpm", "3ed95f79d1a844c3f6bf0cea61e0d5612a42ce56da9c03f01df538685365efb0"}, 10 | "ex_doc": {:hex, :ex_doc, "0.38.3", "ddafe36b8e9fe101c093620879f6604f6254861a95133022101c08e75e6c759a", [:mix], [{:earmark_parser, "~> 1.4.44", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "ecaa785456a67f63b4e7d7f200e8832fa108279e7eb73fd9928e7e66215a01f9"}, 11 | "excoveralls": {:hex, :excoveralls, "0.16.1", "0bd42ed05c7d2f4d180331a20113ec537be509da31fed5c8f7047ce59ee5a7c5", [:mix], [{:hackney, "~> 1.16", [hex: :hackney, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "dae763468e2008cf7075a64cb1249c97cb4bc71e236c5c2b5e5cdf1cfa2bf138"}, 12 | "file_system": {:hex, :file_system, "1.1.0", "08d232062284546c6c34426997dd7ef6ec9f8bbd090eb91780283c9016840e8f", [:mix], [], "hexpm", "bfcf81244f416871f2a2e15c1b515287faa5db9c6bcf290222206d120b3d43f6"}, 13 | "hackney": {:hex, :hackney, "1.25.0", "390e9b83f31e5b325b9f43b76e1a785cbdb69b5b6cd4e079aa67835ded046867", [:rebar3], [{:certifi, "~> 2.15.0", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "~> 6.1.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "~> 1.0.0", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~> 1.4", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.4.1", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~> 1.1.0", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}, {:unicode_util_compat, "~> 0.7.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "7209bfd75fd1f42467211ff8f59ea74d6f2a9e81cbcee95a56711ee79fd6b1d4"}, 14 | "idna": {:hex, :idna, "6.1.1", "8a63070e9f7d0c62eb9d9fcb360a7de382448200fbbd1b106cc96d3d8099df8d", [:rebar3], [{:unicode_util_compat, "~> 0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "92376eb7894412ed19ac475e4a86f7b413c1b9fbb5bd16dccd57934157944cea"}, 15 | "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, 16 | "makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"}, 17 | "makeup_elixir": {:hex, :makeup_elixir, "1.0.1", "e928a4f984e795e41e3abd27bfc09f51db16ab8ba1aebdba2b3a575437efafc2", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "7284900d412a3e5cfd97fdaed4f5ed389b8f2b4cb49efc0eb3bd10e2febf9507"}, 18 | "makeup_erlang": {:hex, :makeup_erlang, "1.0.2", "03e1804074b3aa64d5fad7aa64601ed0fb395337b982d9bcf04029d68d51b6a7", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "af33ff7ef368d5893e4a267933e7744e46ce3cf1f61e2dccf53a111ed3aa3727"}, 19 | "metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm", "69b09adddc4f74a40716ae54d140f93beb0fb8978d8636eaded0c31b6f099f16"}, 20 | "mimerl": {:hex, :mimerl, "1.4.0", "3882a5ca67fbbe7117ba8947f27643557adec38fa2307490c4c4207624cb213b", [:rebar3], [], "hexpm", "13af15f9f68c65884ecca3a3891d50a7b57d82152792f3e19d88650aa126b144"}, 21 | "nimble_parsec": {:hex, :nimble_parsec, "1.4.2", "8efba0122db06df95bfaa78f791344a89352ba04baedd3849593bfce4d0dc1c6", [:mix], [], "hexpm", "4b21398942dda052b403bbe1da991ccd03a053668d147d53fb8c4e0efe09c973"}, 22 | "parse_trans": {:hex, :parse_trans, "3.4.1", "6e6aa8167cb44cc8f39441d05193be6e6f4e7c2946cb2759f015f8c56b76e5ff", [:rebar3], [], "hexpm", "620a406ce75dada827b82e453c19cf06776be266f5a67cff34e1ef2cbb60e49a"}, 23 | "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.7", "354c321cf377240c7b8716899e182ce4890c5938111a1296add3ec74cf1715df", [:make, :mix, :rebar3], [], "hexpm", "fe4c190e8f37401d30167c8c405eda19469f34577987c76dde613e838bbc67f8"}, 24 | "statistex": {:hex, :statistex, "1.1.0", "7fec1eb2f580a0d2c1a05ed27396a084ab064a40cfc84246dbfb0c72a5c761e5", [:mix], [], "hexpm", "f5950ea26ad43246ba2cce54324ac394a4e7408fdcf98b8e230f503a0cba9cf5"}, 25 | "unicode_util_compat": {:hex, :unicode_util_compat, "0.7.1", "a48703a25c170eedadca83b11e88985af08d35f37c6f664d6dcfb106a97782fc", [:rebar3], [], "hexpm", "b3a917854ce3ae233619744ad1e0102e05673136776fb2fa76234f3e03b23642"}, 26 | "yamerl": {:hex, :yamerl, "0.10.0", "4ff81fee2f1f6a46f1700c0d880b24d193ddb74bd14ef42cb0bcf46e81ef2f8e", [:rebar3], [], "hexpm", "346adb2963f1051dc837a2364e4acf6eb7d80097c0f53cbdc3046ec8ec4b4e6e"}, 27 | } 28 | -------------------------------------------------------------------------------- /lib/ref_inspector/config.ex: -------------------------------------------------------------------------------- 1 | defmodule RefInspector.Config do 2 | @moduledoc """ 3 | Module to simplify access to configuration values with default values. 4 | 5 | There should be no configuration required to start using `:ref_inspector` if 6 | you rely on the default values: 7 | 8 | config :ref_inspector, 9 | database_files: ["referers.yml"], 10 | database_path: Application.app_dir(:ref_inspector, "priv"), 11 | http_opts: [], 12 | remote_urls: [{"referers.yml", "https://s3-eu-west-1.amazonaws.com/snowplow-hosted-assets/third-party/referer-parser/referers-latest.yaml"}], 13 | startup_silent: false, 14 | startup_sync: true, 15 | yaml_file_reader: {:yamerl_constr, :file, [[:str_node_as_binary]]} 16 | 17 | The default `:database_path` is evaluated at runtime and not compiled into 18 | a release! 19 | 20 | ## How to Configure 21 | 22 | There are two ways to change the configuration values with the preferred way 23 | depending on your environment and personal taste. 24 | 25 | ### Static Configuration 26 | 27 | If you can ensure the configuration are static and not dependent on i.e. the 28 | server your application is running on, you can use a static approach by 29 | modifying your `config.exs` file: 30 | 31 | config :ref_inspector, 32 | database_files: ["referers_search.yml", "referers_social.yml"], 33 | database_path: "/path/to/ref_inspector/database_files" 34 | 35 | ### Dynamic Configuration 36 | 37 | If a compile time configuration is not possible or does not match the usual 38 | approach taken in your application you can use a runtime approach. 39 | 40 | This is done by defining an initializer module that will automatically be 41 | called by `RefInspector.Supervisor` upon startup/restart. The configuration 42 | is expected to consist of a `{mod, fun}` or `{mod, fun, args}` tuple: 43 | 44 | # {mod, fun} 45 | config :ref_inspector, 46 | init: {MyInitModule, :my_init_mf} 47 | 48 | # {mod, fun, args} 49 | config :ref_inspector, 50 | init: {MyInitModule, :my_init_mfargs, [:foo, :bar]} 51 | 52 | defmodule MyInitModule do 53 | @spec my_init_mf() :: :ok 54 | def my_init_mf(), do: my_init_mfargs(:foo, :bar) 55 | 56 | @spec my_init_mfargs(atom, atom) :: :ok 57 | def my_init_mfargs(:foo, :bar) do 58 | priv_dir = Application.app_dir(:my_app, "priv") 59 | 60 | Application.put_env(:ref_inspector, :database_path, priv_dir) 61 | end 62 | end 63 | 64 | The function is required to always return `:ok`. 65 | 66 | ## Startup Behaviour 67 | 68 | Databases are loaded synchronously when starting the application. 69 | 70 | You can change this behaviour to have the application force an asynchronous 71 | database loading during the initial startup: 72 | 73 | config :ref_inspector, 74 | startup_sync: false 75 | 76 | This can lead to the first parsing calls to work with an empty database 77 | and therefore not return the results you expect. 78 | 79 | ### Starting Silently 80 | 81 | When starting the application you will receive warnings if the database is 82 | not available. If you want to hide these messages you can configure the 83 | startup the be completely silent: 84 | 85 | config :ref_inspector, 86 | startup_silent: true 87 | 88 | ## Database Configuration 89 | 90 | Configuring the database to use can be done using three related values: 91 | 92 | - `:database_files` 93 | - `:database_path` 94 | - `:remote_urls` 95 | 96 | The `:database_path` is the directory to look for when loading the databases. 97 | It is also the place where `RefInspector.Downloader` stores a copy of the 98 | configured files. 99 | 100 | For the actual files loaded there is `:database_files`, a list of filenames 101 | to load in the order specified. All files are expected to be inside the 102 | configured database path. 103 | 104 | When downloading the databases through `RefInspector.Downloader` the value 105 | `:remote_urls` is of utmost importance. It defines where each file is located. 106 | 107 | config :ref_inspector, 108 | remote_urls: [ 109 | "http://example.com/database.yml", 110 | {"database_local.yml", "http://example.com/database_remote.yml"} 111 | ] 112 | 113 | To configure a remote database name you can either define a plain URL. It will 114 | be stored locally under the filename that is extracted from the url. In above 115 | example that would be `"database.yml"`. 116 | 117 | If the remote and local names match you can configure a `{local, remote}` 118 | tuple to deactivate the automatic name extraction. 119 | 120 | ### Internal Domains 121 | 122 | To exclude some domains from parsing you can mark 123 | them as `:internal` using your configuration: 124 | 125 | config :ref_inspector, 126 | internal: ["www.example.com", "www.example.org"] 127 | 128 | If a referer matches one of the configured 129 | domains (== ends with, paths ignored!), it will return a result with 130 | the medium `:internal`. Both `:source` and `:term` will be left at the 131 | initial/unknown state not intended for further processing. 132 | 133 | ## Download Configuration 134 | 135 | Using the default configuration all download requests for your database files 136 | are done using [`:hackney`](https://hex.pm/packages/hackney). To pass custom 137 | configuration values to hackney you can use the key `:http_opts`: 138 | 139 | config :ref_inspector, 140 | http_opts: [proxy: "http://mycompanyproxy.com"] 141 | 142 | Please see 143 | [`:hackney.request/5`](https://hexdocs.pm/hackney/hackney.html#request-5) 144 | for a complete list of available options. 145 | 146 | If you want to change the library used to download the databases you can 147 | configure a module implementing the `RefInspector.Downloader.Adapter` 148 | behaviour: 149 | 150 | config :ref_inspector, 151 | downloader_adapter: MyDownloaderAdapter 152 | 153 | ## YAML File Reader Configuration 154 | 155 | By default the library [`:yamerl`](https://hex.pm/packages/yamerl) will 156 | be used to read and decode the yaml database files. You can configure this 157 | reader to be a custom module: 158 | 159 | config :ref_inspector, 160 | yaml_file_reader: {module, function} 161 | 162 | config :ref_inspector, 163 | yaml_file_reader: {module, function, extra_args} 164 | 165 | The configured module will receive the file to read as the first argument with 166 | any optionally configured extra arguments after that. 167 | """ 168 | 169 | @upstream_remote "https://s3-eu-west-1.amazonaws.com/snowplow-hosted-assets/third-party/referer-parser/referers-latest.yaml" 170 | 171 | @default_files ["referers.yml"] 172 | @default_urls [{"referers.yml", @upstream_remote}] 173 | 174 | @default_downloader_adapter RefInspector.Downloader.Adapter.Hackney 175 | @default_yaml_reader {:yamerl_constr, :file, [[:str_node_as_binary]]} 176 | 177 | @doc """ 178 | Provides access to configuration values with optional environment lookup. 179 | """ 180 | @spec get(atom, term) :: term 181 | def get(key, default \\ nil) do 182 | Application.get_env(:ref_inspector, key, default) 183 | end 184 | 185 | @doc """ 186 | Returns the list of configured database files. 187 | """ 188 | @spec database_files() :: [binary] 189 | def database_files do 190 | case get(:database_files) do 191 | nil -> default_files() 192 | files when is_list(files) -> files 193 | end 194 | end 195 | 196 | @doc """ 197 | Returns the configured database path. 198 | 199 | If the path is not defined the `priv` dir of `:ref_inspector` 200 | as returned by `Application.app_dir(:ref_inspector, "priv")` will be used. 201 | """ 202 | @spec database_path() :: String.t() 203 | def database_path do 204 | case get(:database_path) do 205 | nil -> Application.app_dir(:ref_inspector, "priv") 206 | path -> path 207 | end 208 | end 209 | 210 | @doc """ 211 | Returns the default list of database files. 212 | """ 213 | @spec default_files() :: [binary] 214 | def default_files, do: @default_files 215 | 216 | @doc """ 217 | Returns the default list of database urls. 218 | """ 219 | @spec default_urls() :: [{binary, binary}] 220 | def default_urls, do: @default_urls 221 | 222 | @doc """ 223 | Returns whether the remote database matches the default. 224 | """ 225 | @spec default_remote_database?() :: boolean 226 | def default_remote_database?, do: yaml_urls() == default_urls() 227 | 228 | @doc """ 229 | Returns the configured downloader adapter module. 230 | 231 | The modules is expected to adhere to the behaviour defined in 232 | `RefInspector.Downloader.Adapter`. 233 | """ 234 | @spec downloader_adapter() :: module 235 | def downloader_adapter, do: get(:downloader_adapter, @default_downloader_adapter) 236 | 237 | @doc """ 238 | Calls the optionally configured init method. 239 | """ 240 | @spec init_env() :: :ok 241 | def init_env do 242 | case get(:init) do 243 | nil -> :ok 244 | {mod, fun} -> apply(mod, fun, []) 245 | {mod, fun, args} -> apply(mod, fun, args) 246 | end 247 | end 248 | 249 | @doc """ 250 | Returns the `{mod, fun, extra_args}` to be used when reading a yaml file. 251 | """ 252 | @spec yaml_file_reader() :: {module, atom, [term]} 253 | def yaml_file_reader do 254 | case get(:yaml_file_reader) do 255 | {_, _, _} = mfargs -> mfargs 256 | {mod, fun} -> {mod, fun, []} 257 | _ -> @default_yaml_reader 258 | end 259 | end 260 | 261 | @doc """ 262 | Returns the remote urls of the database file. 263 | """ 264 | @spec yaml_urls() :: [String.t() | {String.t(), String.t()}] 265 | def yaml_urls do 266 | case get(:remote_urls) do 267 | files when is_list(files) and 0 < length(files) -> files 268 | _ -> default_urls() 269 | end 270 | end 271 | end 272 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## v3.0.0-dev 4 | 5 | - Backwards incompatible changes 6 | - Minimum required Elixir version is now `~> 1.17` 7 | 8 | ## v2.1.0 (2025-08-29) 9 | 10 | - Enhancements 11 | - Match referrers based on "longest host/path combination first" 12 | 13 | ## v2.0.0 (2022-07-26) 14 | 15 | - Enhancements 16 | - Calling `RefInspector.parse/1` with an empty referer (`""` or `nil`) will now return the input as is in the result struct instead of previously returning `nil` for both inputs 17 | 18 | - Bug fixes 19 | - The mix download task should no longer start unnecessary applications 20 | 21 | - Backwards incompatible changes 22 | - Minimum required Elixir version is now `~> 1.9` 23 | - Several deprecated functions have been removed completely: 24 | - `RefInspector.Downloader.path_local/1` 25 | - `RefInspector.Downloader.path_remote/1` 26 | - `RefInspector.Downloader.read_remote/1` 27 | - `RefInspector.Downloader.README.path_local/0` 28 | - `RefInspector.Downloader.README.path_priv/0` 29 | - Startup is now done with a blocking database load by default 30 | 31 | ## v1.3.1 (2020-05-06) 32 | 33 | - Bug fixes 34 | - Default remote URL for the referer parser database has been changed after upstream changes ([#3](https://github.com/elixir-inspector/ref_inspector/issues/3)) 35 | 36 | ## v1.3.0 (2019-11-27) 37 | 38 | - Bug fixes 39 | - Hosts are now matched more strict to match expectations ([#2](https://github.com/elixir-inspector/ref_inspector/issues/2)) 40 | - `*.host` will still match `host` 41 | - `*host` (without a dot) will no longer match `host` 42 | - Paths are now matched more strict to match expectations ([#2](https://github.com/elixir-inspector/ref_inspector/issues/2)) 43 | - `/some/folder` will still match `/some` 44 | - `/somefolder` (without a slash) will no longer match `/some` 45 | 46 | ## v1.2.0 (2019-08-03) 47 | 48 | - Enhancements 49 | - Warnings when starting without a database available can be silenced 50 | 51 | - Bug fixes 52 | - The mix download task now works properly with initializer modules 53 | 54 | ## v1.1.0 (2019-07-03) 55 | 56 | - Enhancements 57 | - Configuring `startup_sync: true` allows you to ensure a synchronous database load is attempted before allowing to parse referers 58 | - Database entries are now stored in a single named table instead of using an intermediate reference table 59 | - Default configuration entries for files and urls are available through `RefInspector.Config.default_files/0` and `RefInspector.Config.default_urls/0` 60 | - Output of mix task `ref_inspector.download` can be prevented by passing `--quiet` upon invocation. This does NOT imply `--force` and will still ask for confirmation 61 | - Passing `async: false` to `RefInspector.reload/1` allows you to block your calling process until the reload has finished 62 | - The library used to download the database files can be changed by configuring a module implementing the `RefInspector.Downloader.Adapter` behaviour 63 | - The library used to read YAML files can be changed by using the `:yaml_file_reader` configuration 64 | 65 | - Deprecations 66 | - Several functions are now declared internal and will result in a `Logger.info/1` message when called until they will be eventually removed: 67 | - `RefInspector.Downloader.path_local/1` 68 | - `RefInspector.Downloader.path_remote/1` 69 | - `RefInspector.Downloader.read_remote/1` 70 | - `RefInspector.Downloader.README.path_local/0` 71 | - `RefInspector.Downloader.README.path_priv/0` 72 | 73 | ## v1.0.0 (2018-11-24) 74 | 75 | - Ownership has been transferred to the [`elixir-inspector`](https://github.com/elixir-inspector) organisation 76 | 77 | - Enhancements 78 | - Documentation is now available inline (`@moduledoc`, ...) with the `README.md` file targeting the repository (development) instead of releases 79 | - Downloading the databases ensures hackney is started to allow calling `mix run --no-start -e "RefInspector.Downloader.download()"` 80 | - Initializer modules can be defined with additional arguments by using `{mod, fun, args}` 81 | - Parsing can now be performed on `URI.t()` referers 82 | - The default database path has been set to `Application.app_dir(:ref_inspector, "priv")` 83 | - The download mix task will now exit with code `1` if it aborts due to missing configuration 84 | 85 | - Backwards incompatible changes 86 | - Internal parser process pooling has been removed. If you require pooling you need to manually wrap `RefInspector.parse/1` 87 | - Medium information in the result struct is now returned as a `String.t()` instead of an `atom`. The only exceptions are `:unknown` and `:internal` referers 88 | - Minimum required Elixir version is now `~> 1.5` 89 | - Support for `{:system, var}` configuration has been removed 90 | 91 | ## v0.20.0 (2018-07-22) 92 | 93 | - Enhancements 94 | - Parsing speed has been improved and made more independent of database size 95 | - The configurable `:init` method will now be automatically executed when running the mix download task without manually ensuring the application is started 96 | 97 | - Deprecations 98 | - Accessing the system environment by configuring `{:system, var}` or `{:system, var, default}` will now result in a `Logger.info/1` message and will stop working in a future release 99 | 100 | - Backwards incompatible changes 101 | - The mix task alias `ref_inspector.yaml.download` has been removed 102 | - The reload alias `RefInspector.reload_databases/0` has been removed 103 | 104 | ## v0.19.0 (2018-02-13) 105 | 106 | - Enhancements 107 | - Finding the data table is now done via a named lookup table instead of calling the database state server 108 | - Old data tables are deleted with a configurable delay after reloading to avoid race conditions (and the resulting empty lookup responses) 109 | - If you need to check if the database is loaded (i.e. "no longer empty") you can use `RefInspector.ready?/0` 110 | 111 | ## v0.18.0 (2017-12-31) 112 | 113 | - Enhancements 114 | - Download task name has been shortened to `ref_inspector.download` 115 | - Reloading the database if part of the configuration is missing or broken (database path / database files) will issue a warning while resuming operation with an empty database 116 | 117 | - Deprecations 118 | - The reload method `RefInspector.reload_databases/0` has been renamed to `RefInspector.reload/0` 119 | - The mix task `ref_inspector.yaml.download` has been renamed. The alias in place will be removed in a future version 120 | 121 | - Bug fixes 122 | - ETS tables are now properly cleaned after reload 123 | 124 | ## v0.17.0 (2017-11-15) 125 | 126 | - Enhancements 127 | - All databases can be reloaded (asynchronously) using `RefInspector.reload_databases/0` 128 | - Configuration can be done on supervisor (re-) start by setting a `{mod, fun}` tuple for the config key `:init`. This method will be called without arguments 129 | - When using the mix download task with a default remote configuration an information README file is placed next to the downloaded file(s) 130 | 131 | - Soft deprecations (no warnings) 132 | - Support for `{:system, "ENV_VARIABLE"}` configuration has been removed from the documentation. It will eventually be removed completely after a proper deprecation phase 133 | 134 | ## v0.16.0 (2017-09-24) 135 | 136 | - Backwards incompatible changes 137 | - Minimum required Elixir version is now `~> 1.3` 138 | 139 | ## v0.15.0 (2017-09-12) 140 | 141 | - Enhancements 142 | - Supervision can now be done without starting the application 143 | - The database downloader has been promoted to a directly usable module 144 | 145 | ## v0.14.0 (2017-05-31) 146 | 147 | - Enhancements 148 | - Empty referers (`""` or `nil`) now return a result without performing an actual lookup 149 | - System environment configuration can set an optional default value to be used if the environment variable is unset 150 | 151 | - Bug fixes 152 | - Properly handles `nil` values passed to the lookup 153 | 154 | - Backwards incompatible changes 155 | - Support for single `:remote_url` download configuration has been removed 156 | 157 | ## v0.13.0 (2016-11-19) 158 | 159 | - Enhancements 160 | - Downloaded files can be automatically stored under a custom filename differing from the URL basename 161 | - Multiple files can be configured for download 162 | 163 | - Deprecations 164 | - Configuring a single `:remote_url` for download has been deprecated 165 | 166 | ## v0.12.0 (2016-09-22) 167 | 168 | - Enhancements 169 | - Multiple databases can be configured to load during startup. Lookups are done in order until a match is found 170 | 171 | - Backwards incompatible changes 172 | - Downloaded databases are stored under the basename of the remote file instead of the filename of the "first configured database" 173 | - Support for `:yaml` as database configuration has been removed 174 | 175 | ## v0.11.0 (2016-09-07) 176 | 177 | - Enhancements 178 | - Remote URL of database file is now configurable 179 | 180 | - Deprecations 181 | - Configuring a single `:yaml` as the database has been deprecated 182 | 183 | - Backwards incompatible changes 184 | - Support for loading a database file at runtime using `RefInspector.load/1` has been removed 185 | 186 | ## v0.10.0 (2016-08-19) 187 | 188 | - Enhancements 189 | - Database download is done using hackney in order to prepare an upcoming auto-update feature 190 | - If the initial load of the database (during process initialisation) fails a message will be sent through `Logger.info/1` 191 | 192 | - Backwards incompatible changes 193 | - Downloads are now done using `:hackney` instead of `mix`. This may force you to manually reconfigure the client 194 | - Minimum required Elixir version is now `~> 1.2` 195 | - Minimum required Erlang version is now `~> 18.0` 196 | 197 | ## v0.9.0 (2016-03-30) 198 | 199 | - Enhancements 200 | - Database is reloaded if the storage process gets restarted 201 | - Path can be configured by accessing the system environment 202 | - Referer database can be reloaded using `RefInspector.load/1` 203 | 204 | - Backwards incompatible changes 205 | - Reloading the database drops previously loaded (unconfigured) entries 206 | 207 | ## v0.8.0 (2015-07-18) 208 | 209 | - Enhancements 210 | - Domains to be detected as `:internal` can be configured 211 | 212 | ## v0.7.0 (2015-06-01) 213 | 214 | - Enhancements 215 | - Dependencies not used in production builds are marked as optional 216 | - Displays expanded download path for `mix ref_inspector.yaml.download` 217 | - Verification script now automatically downloads database file 218 | - Worker pool options are no longer defined at compile time 219 | 220 | - Backwards incompatible changes 221 | - Pool configuration is now expected to be a `Keyword.t()` 222 | 223 | ## v0.6.0 (2015-04-03) 224 | 225 | - Initial Release 226 | -------------------------------------------------------------------------------- /bench/data/referers.yml: -------------------------------------------------------------------------------- 1 | unknown: 2 | Google: 3 | domains: 4 | - developers.google.com 5 | 6 | Yahoo!: 7 | domains: 8 | - news.yahoo.com 9 | 10 | email: 11 | AOL Mail: 12 | domains: 13 | - mail.aol.com 14 | Freenet: 15 | domains: 16 | - webmail.freenet.de 17 | Gmail: 18 | domains: 19 | - mail.google.com 20 | - inbox.google.com 21 | Outlook.com: 22 | domains: 23 | - mail.live.com 24 | - outlook.live.com 25 | Yahoo! Mail: 26 | domains: 27 | - mail.yahoo.net 28 | - mail.yahoo.com 29 | - mail.yahoo.co.uk 30 | - mail.yahoo.co.jp 31 | 32 | social: 33 | Facebook: 34 | domains: 35 | - facebook.com 36 | - fb.me 37 | - m.facebook.com 38 | - l.facebook.com 39 | - lm.facebook.com 40 | Twitter: 41 | domains: 42 | - twitter.com 43 | - t.co 44 | Instagram: 45 | domains: 46 | - instagram.com 47 | Youtube: 48 | domains: 49 | - youtube.com 50 | - youtu.be 51 | XING: 52 | domains: 53 | - xing.com 54 | 55 | search: 56 | Alexa: 57 | parameters: 58 | - q 59 | domains: 60 | - alexa.com 61 | - search.toolbars.alexa.com 62 | Amazon: 63 | parameters: 64 | - keywords 65 | domains: 66 | - amazon.com 67 | - www.amazon.com 68 | Bing: 69 | parameters: 70 | - q 71 | - Q 72 | domains: 73 | - bing.com 74 | - www.bing.com 75 | - msnbc.msn.com 76 | - dizionario.it.msn.com 77 | - cc.bingj.com 78 | - m.bing.com 79 | Digg: 80 | parameters: 81 | - s 82 | domains: 83 | - digg.com 84 | GMX: 85 | parameters: 86 | - su 87 | domains: 88 | - suche.gmx.net 89 | Google: 90 | parameters: 91 | - q 92 | domains: 93 | - www.google.com 94 | - www.google.ac 95 | - www.google.ad 96 | - www.google.com.af 97 | - www.google.com.ag 98 | - www.google.com.ai 99 | - www.google.am 100 | - www.google.it.ao 101 | - www.google.com.ar 102 | - www.google.as 103 | - www.google.at 104 | - www.google.com.au 105 | - www.google.az 106 | - www.google.ba 107 | - www.google.com.bd 108 | - www.google.be 109 | - www.google.bf 110 | - www.google.bg 111 | - www.google.com.bh 112 | - www.google.bi 113 | - www.google.bj 114 | - www.google.com.bn 115 | - www.google.com.bo 116 | - www.google.com.br 117 | - www.google.bs 118 | - www.google.co.bw 119 | - www.google.com.by 120 | - www.google.by 121 | - www.google.com.bz 122 | - www.google.ca 123 | - www.google.com.kh 124 | - www.google.cc 125 | - www.google.cd 126 | - www.google.cf 127 | - www.google.cat 128 | - www.google.cg 129 | - www.google.ch 130 | - www.google.ci 131 | - www.google.co.ck 132 | - www.google.cl 133 | - www.google.cm 134 | - www.google.cn 135 | - www.google.com.co 136 | - www.google.co.cr 137 | - www.google.com.cu 138 | - www.google.cv 139 | - www.google.com.cy 140 | - www.google.cz 141 | - www.google.de 142 | - www.google.dj 143 | - www.google.dk 144 | - www.google.dm 145 | - www.google.com.do 146 | - www.google.dz 147 | - www.google.com.ec 148 | - www.google.ee 149 | - www.google.com.eg 150 | - www.google.es 151 | - www.google.com.et 152 | - www.google.fi 153 | - www.google.com.fj 154 | - www.google.fm 155 | - www.google.fr 156 | - www.google.ga 157 | - www.google.gd 158 | - www.google.ge 159 | - www.google.gf 160 | - www.google.gg 161 | - www.google.com.gh 162 | - www.google.com.gi 163 | - www.google.gl 164 | - www.google.gm 165 | - www.google.gp 166 | - www.google.gr 167 | - www.google.com.gt 168 | - www.google.gy 169 | - www.google.com.hk 170 | - www.google.hn 171 | - www.google.hr 172 | - www.google.ht 173 | - www.google.hu 174 | - www.google.co.id 175 | - www.google.iq 176 | - www.google.ie 177 | - www.google.co.il 178 | - www.google.im 179 | - www.google.co.in 180 | - www.google.io 181 | - www.google.is 182 | - www.google.it 183 | - www.google.je 184 | - www.google.com.jm 185 | - www.google.jo 186 | - www.google.co.jp 187 | - www.google.co.ke 188 | - www.google.ki 189 | - www.google.kg 190 | - www.google.co.kr 191 | - www.google.com.kw 192 | - www.google.kz 193 | - www.google.la 194 | - www.google.com.lb 195 | - www.google.com.lc 196 | - www.google.li 197 | - www.google.lk 198 | - www.google.co.ls 199 | - www.google.lt 200 | - www.google.lu 201 | - www.google.lv 202 | - www.google.com.ly 203 | - www.google.co.ma 204 | - www.google.md 205 | - www.google.me 206 | - www.google.mg 207 | - www.google.mk 208 | - www.google.ml 209 | - www.google.mn 210 | - www.google.ms 211 | - www.google.com.mt 212 | - www.google.mu 213 | - www.google.mv 214 | - www.google.mw 215 | - www.google.com.mx 216 | - www.google.com.my 217 | - www.google.co.mz 218 | - www.google.com.na 219 | - www.google.ne 220 | - www.google.com.nf 221 | - www.google.com.ng 222 | - www.google.com.ni 223 | - www.google.nl 224 | - www.google.no 225 | - www.google.com.np 226 | - www.google.nr 227 | - www.google.nu 228 | - www.google.co.nz 229 | - www.google.com.om 230 | - www.google.com.pa 231 | - www.google.com.pe 232 | - www.google.com.ph 233 | - www.google.com.pk 234 | - www.google.pl 235 | - www.google.pn 236 | - www.google.com.pr 237 | - www.google.ps 238 | - www.google.pt 239 | - www.google.com.py 240 | - www.google.com.qa 241 | - www.google.ro 242 | - www.google.rs 243 | - www.google.ru 244 | - www.google.rw 245 | - www.google.com.sa 246 | - www.google.com.sb 247 | - www.google.sc 248 | - www.google.se 249 | - www.google.com.sg 250 | - www.google.sh 251 | - www.google.si 252 | - www.google.sk 253 | - www.google.com.sl 254 | - www.google.sn 255 | - www.google.sm 256 | - www.google.so 257 | - www.google.st 258 | - www.google.com.sv 259 | - www.google.td 260 | - www.google.tg 261 | - www.google.co.th 262 | - www.google.com.tj 263 | - www.google.tk 264 | - www.google.tl 265 | - www.google.tm 266 | - www.google.to 267 | - www.google.com.tn 268 | - www.google.tn 269 | - www.google.com.tr 270 | - www.google.tt 271 | - www.google.com.tw 272 | - www.google.co.tz 273 | - www.google.com.ua 274 | - www.google.co.ug 275 | - www.google.ae 276 | - www.google.co.uk 277 | - www.google.us 278 | - www.google.com.uy 279 | - www.google.co.uz 280 | - www.google.com.vc 281 | - www.google.co.ve 282 | - www.google.vg 283 | - www.google.co.vi 284 | - www.google.com.vn 285 | - www.google.vu 286 | - www.google.ws 287 | - www.google.co.za 288 | - www.google.co.zm 289 | - www.google.co.zw 290 | - google.com 291 | - google.ac 292 | - google.ad 293 | - google.com.af 294 | - google.com.ag 295 | - google.com.ai 296 | - google.am 297 | - google.it.ao 298 | - google.com.ar 299 | - google.as 300 | - google.at 301 | - google.com.au 302 | - google.az 303 | - google.ba 304 | - google.com.bd 305 | - google.be 306 | - google.bf 307 | - google.bg 308 | - google.com.bh 309 | - google.bi 310 | - google.bj 311 | - google.com.bn 312 | - google.com.bo 313 | - google.com.br 314 | - google.bs 315 | - google.co.bw 316 | - google.com.by 317 | - google.by 318 | - google.com.bz 319 | - google.ca 320 | - google.com.kh 321 | - google.cc 322 | - google.cd 323 | - google.cf 324 | - google.cat 325 | - google.cg 326 | - google.ch 327 | - google.ci 328 | - google.co.ck 329 | - google.cl 330 | - google.cm 331 | - google.cn 332 | - google.com.co 333 | - google.co.cr 334 | - google.com.cu 335 | - google.cv 336 | - google.com.cy 337 | - google.cz 338 | - google.de 339 | - google.dj 340 | - google.dk 341 | - google.dm 342 | - google.com.do 343 | - google.dz 344 | - google.com.ec 345 | - google.ee 346 | - google.com.eg 347 | - google.es 348 | - google.com.et 349 | - google.fi 350 | - google.com.fj 351 | - google.fm 352 | - google.fr 353 | - google.ga 354 | - google.gd 355 | - google.ge 356 | - google.gf 357 | - google.gg 358 | - google.com.gh 359 | - google.com.gi 360 | - google.gl 361 | - google.gm 362 | - google.gp 363 | - google.gr 364 | - google.com.gt 365 | - google.gy 366 | - google.com.hk 367 | - google.hn 368 | - google.hr 369 | - google.ht 370 | - google.hu 371 | - google.co.id 372 | - google.iq 373 | - google.ie 374 | - google.co.il 375 | - google.im 376 | - google.co.in 377 | - google.io 378 | - google.is 379 | - google.it 380 | - google.je 381 | - google.com.jm 382 | - google.jo 383 | - google.co.jp 384 | - google.co.ke 385 | - google.ki 386 | - google.kg 387 | - google.co.kr 388 | - google.com.kw 389 | - google.kz 390 | - google.la 391 | - google.com.lb 392 | - google.com.lc 393 | - google.li 394 | - google.lk 395 | - google.co.ls 396 | - google.lt 397 | - google.lu 398 | - google.lv 399 | - google.com.ly 400 | - google.co.ma 401 | - google.md 402 | - google.me 403 | - google.mg 404 | - google.mk 405 | - google.ml 406 | - google.mn 407 | - google.ms 408 | - google.com.mt 409 | - google.mu 410 | - google.mv 411 | - google.mw 412 | - google.com.mx 413 | - google.com.my 414 | - google.co.mz 415 | - google.com.na 416 | - google.ne 417 | - google.com.nf 418 | - google.com.ng 419 | - google.com.ni 420 | - google.nl 421 | - google.no 422 | - google.com.np 423 | - google.nr 424 | - google.nu 425 | - google.co.nz 426 | - google.com.om 427 | - google.com.pa 428 | - google.com.pe 429 | - google.com.ph 430 | - google.com.pk 431 | - google.pl 432 | - google.pn 433 | - google.com.pr 434 | - google.ps 435 | - google.pt 436 | - google.com.py 437 | - google.com.qa 438 | - google.ro 439 | - google.rs 440 | - google.ru 441 | - google.rw 442 | - google.com.sa 443 | - google.com.sb 444 | - google.sc 445 | - google.se 446 | - google.com.sg 447 | - google.sh 448 | - google.si 449 | - google.sk 450 | - google.com.sl 451 | - google.sn 452 | - google.sm 453 | - google.so 454 | - google.st 455 | - google.com.sv 456 | - google.td 457 | - google.tg 458 | - google.co.th 459 | - google.com.tj 460 | - google.tk 461 | - google.tl 462 | - google.tm 463 | - google.to 464 | - google.com.tn 465 | - google.com.tr 466 | - google.tt 467 | - google.com.tw 468 | - google.co.tz 469 | - google.com.ua 470 | - google.co.ug 471 | - google.ae 472 | - google.co.uk 473 | - google.us 474 | - google.com.uy 475 | - google.co.uz 476 | - google.com.vc 477 | - google.co.ve 478 | - google.vg 479 | - google.co.vi 480 | - google.com.vn 481 | - google.vu 482 | - google.ws 483 | - google.co.za 484 | - google.co.zm 485 | - google.co.zw 486 | - google.tn 487 | Search.com: 488 | parameters: 489 | - q 490 | domains: 491 | - www.search.com 492 | Winamp: 493 | parameters: 494 | - q 495 | domains: 496 | - search.winamp.com 497 | Yahoo!: 498 | parameters: 499 | - p 500 | - q 501 | domains: 502 | - search.yahoo.com 503 | - yahoo.com 504 | - ar.search.yahoo.com 505 | - ar.yahoo.com 506 | - au.search.yahoo.com 507 | - au.yahoo.com 508 | - br.search.yahoo.com 509 | - br.yahoo.com 510 | - cade.searchde.yahoo.com 511 | - cade.yahoo.com 512 | - chinese.searchinese.yahoo.com 513 | - chinese.yahoo.com 514 | - cn.search.yahoo.com 515 | - cn.yahoo.com 516 | - de.search.yahoo.com 517 | - de.yahoo.com 518 | - dk.search.yahoo.com 519 | - dk.yahoo.com 520 | - es.search.yahoo.com 521 | - es.yahoo.com 522 | - espanol.searchpanol.yahoo.com 523 | - espanol.yahoo.com 524 | - fr.search.yahoo.com 525 | - fr.yahoo.com 526 | - ie.search.yahoo.com 527 | - ie.yahoo.com 528 | - it.search.yahoo.com 529 | - it.yahoo.com 530 | - kr.search.yahoo.com 531 | - kr.yahoo.com 532 | - mx.search.yahoo.com 533 | - mx.yahoo.com 534 | - no.search.yahoo.com 535 | - no.yahoo.com 536 | - nz.search.yahoo.com 537 | - nz.yahoo.com 538 | - one.cn.yahoo.com 539 | - one.searchn.yahoo.com 540 | - qc.search.yahoo.com 541 | - qc.yahoo.com 542 | - se.search.yahoo.com 543 | - se.yahoo.com 544 | - search.searcharch.yahoo.com 545 | - uk.search.yahoo.com 546 | - uk.yahoo.com 547 | - www.yahoo.co.jp 548 | - search.yahoo.co.jp 549 | 550 | paid: 551 | AdRoll: 552 | domains: 553 | - adroll.com 554 | Criteo: 555 | domains: 556 | - cas.jp.as.criteo.com 557 | - cas.criteo.com 558 | Doubleclick: 559 | domains: 560 | - ad.doubleclick.net 561 | - ad-apac.doubleclick.net 562 | - s0.2mdn.net 563 | - s1.2mdn.net 564 | - dp.g.doubleclick.net 565 | - pubads.g.doubleclick.net 566 | --------------------------------------------------------------------------------