├── .mise.toml ├── config ├── config.exs └── test.exs ├── .formatter.exs ├── test ├── test_helper.exs ├── support │ ├── paradex_app │ │ ├── repo.ex │ │ ├── postgrex_types.ex │ │ ├── talk_group.ex │ │ └── call.ex │ ├── sql_case.ex │ ├── helpers.ex │ └── data_case.ex ├── readme_test.exs └── paradex_test.exs ├── .iex.exs ├── testing.md ├── priv └── repo │ └── migrations │ ├── 20241018005237_add_hnsw_index.exs │ ├── 20241017230643_add_embeddings.exs │ └── 20241013014316_setup.exs ├── docker-compose.yml ├── .gitignore ├── lib ├── paradex │ └── field_name.ex └── paradex.ex ├── CHANGELOG.md ├── README.md ├── mix.exs ├── notebooks ├── generate_embeddings.livemd └── hybrid_search.livemd ├── mix.lock └── LICENSE /.mise.toml: -------------------------------------------------------------------------------- 1 | [tools] 2 | erlang = '26.2.5.3' 3 | elixir = '1.17.3-otp-26' -------------------------------------------------------------------------------- /config/config.exs: -------------------------------------------------------------------------------- 1 | import Config 2 | 3 | if config_env() == :test do 4 | import_config "test.exs" 5 | end 6 | -------------------------------------------------------------------------------- /.formatter.exs: -------------------------------------------------------------------------------- 1 | # Used by "mix format" 2 | [ 3 | inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] 4 | ] 5 | -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | ExUnit.start() 2 | ParadexApp.Repo.start_link() 3 | Ecto.Adapters.SQL.Sandbox.mode(ParadexApp.Repo, :manual) 4 | -------------------------------------------------------------------------------- /.iex.exs: -------------------------------------------------------------------------------- 1 | import Ecto.Query 2 | import Paradex 3 | 4 | alias ParadexApp.Repo 5 | alias ParadexApp.Call 6 | 7 | ParadexApp.Repo.start_link() 8 | -------------------------------------------------------------------------------- /test/support/paradex_app/repo.ex: -------------------------------------------------------------------------------- 1 | defmodule ParadexApp.Repo do 2 | use Ecto.Repo, 3 | otp_app: :paradex, 4 | adapter: Ecto.Adapters.Postgres 5 | end 6 | -------------------------------------------------------------------------------- /test/support/sql_case.ex: -------------------------------------------------------------------------------- 1 | defmodule Paradex.SQLCase do 2 | use ExUnit.CaseTemplate 3 | 4 | using do 5 | quote do 6 | import Paradex.Test.Helpers 7 | end 8 | end 9 | end 10 | -------------------------------------------------------------------------------- /test/support/paradex_app/postgrex_types.ex: -------------------------------------------------------------------------------- 1 | Postgrex.Types.define( 2 | ParadexApp.PostgrexTypes, 3 | Pgvector.extensions() ++ Paradex.extensions() ++ Ecto.Adapters.Postgres.extensions(), 4 | [] 5 | ) 6 | -------------------------------------------------------------------------------- /testing.md: -------------------------------------------------------------------------------- 1 | # Testing 2 | 3 | ```sh 4 | mise trust && mise install 5 | docker compose up -d 6 | MIX_ENV=test mix setup 7 | psql -h 127.0.0.1 -p 5433 -U postgres paradex_test -f "paradex_test_data.sql" 8 | MIX_ENV=test mix test 9 | ``` 10 | -------------------------------------------------------------------------------- /priv/repo/migrations/20241018005237_add_hnsw_index.exs: -------------------------------------------------------------------------------- 1 | defmodule ParadexApp.Repo.Migrations.AddHNSWIndex do 2 | use Ecto.Migration 3 | 4 | def change() do 5 | create index("calls", ["embedding vector_l2_ops"], using: :hnsw) 6 | end 7 | end 8 | -------------------------------------------------------------------------------- /priv/repo/migrations/20241017230643_add_embeddings.exs: -------------------------------------------------------------------------------- 1 | defmodule ParadexApp.Repo.Migrations.AddEmbeddings do 2 | use Ecto.Migration 3 | 4 | def change() do 5 | alter table(:calls) do 6 | add :embedding, :vector, size: 384 7 | end 8 | end 9 | end 10 | -------------------------------------------------------------------------------- /config/test.exs: -------------------------------------------------------------------------------- 1 | import Config 2 | 3 | config :paradex, :ecto_repos, [ParadexApp.Repo] 4 | 5 | config :paradex, ParadexApp.Repo, 6 | pool: Ecto.Adapters.SQL.Sandbox, 7 | database: "paradex_test", 8 | username: "postgres", 9 | password: "postgres", 10 | hostname: "localhost", 11 | port: 5433, 12 | types: ParadexApp.PostgrexTypes 13 | -------------------------------------------------------------------------------- /test/support/paradex_app/talk_group.ex: -------------------------------------------------------------------------------- 1 | defmodule ParadexApp.TalkGroup do 2 | use Ecto.Schema 3 | 4 | schema "talk_groups" do 5 | field(:decimal, :integer) 6 | field(:description, :string) 7 | field(:alpha_tag, :string) 8 | field(:category, :string) 9 | field(:tag, :string) 10 | field(:active, :boolean, default: true) 11 | 12 | timestamps(type: :utc_datetime) 13 | end 14 | end 15 | -------------------------------------------------------------------------------- /test/support/helpers.ex: -------------------------------------------------------------------------------- 1 | defmodule Paradex.Test.Helpers do 2 | defmacro assert_sql(left, right) do 3 | quote bind_quoted: [left: left, right: right] do 4 | assert Paradex.Test.Helpers.to_sql(left) == Paradex.Test.Helpers.to_sql(right) 5 | end 6 | end 7 | 8 | def to_sql(query) when is_binary(query), do: query 9 | 10 | def to_sql(query) do 11 | {sql, _} = Ecto.Adapters.SQL.to_sql(:all, ParadexApp.Repo, query) 12 | sql 13 | end 14 | end 15 | -------------------------------------------------------------------------------- /test/support/paradex_app/call.ex: -------------------------------------------------------------------------------- 1 | defmodule ParadexApp.Call do 2 | use Ecto.Schema 3 | 4 | alias ParadexApp.TalkGroup 5 | 6 | schema "calls" do 7 | field(:call_length, :integer) 8 | field(:start_time, :naive_datetime) 9 | field(:stop_time, :naive_datetime) 10 | field(:talkgroup_num, :integer) 11 | field(:transcript, :string) 12 | field(:embedding, Pgvector.Ecto.Vector) 13 | 14 | belongs_to(:talk_group, TalkGroup) 15 | end 16 | end 17 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | paradedb: 3 | image: paradedb/paradedb:latest 4 | environment: 5 | - POSTGRES_USER=postgres 6 | - POSTGRES_PASSWORD=postgres 7 | - POSTGRES_DB=paradex_test 8 | - PARADEDB_TELEMETRY=false 9 | healthcheck: 10 | test: ["CMD", "pg_isready", "-U", "postgres"] 11 | interval: 5s 12 | timeout: 5s 13 | retries: 5 14 | ports: 15 | - "5433:5432" 16 | volumes: 17 | - paradedb_data:/var/lib/postgresql/data/ 18 | 19 | volumes: 20 | paradedb_data: 21 | -------------------------------------------------------------------------------- /test/support/data_case.ex: -------------------------------------------------------------------------------- 1 | defmodule Paradex.DataCase do 2 | use ExUnit.CaseTemplate 3 | 4 | using do 5 | quote do 6 | alias ParadexApp.Repo 7 | 8 | import Ecto 9 | import Ecto.Changeset 10 | import Ecto.Query 11 | import Paradex.DataCase 12 | end 13 | end 14 | 15 | setup tags do 16 | Paradex.DataCase.setup_sandbox(tags) 17 | :ok 18 | end 19 | 20 | def setup_sandbox(tags) do 21 | repo_pid = Ecto.Adapters.SQL.Sandbox.start_owner!(ParadexApp.Repo, shared: not tags[:async]) 22 | on_exit(fn -> Ecto.Adapters.SQL.Sandbox.stop_owner(repo_pid) end) 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # The directory Mix will write compiled artifacts to. 2 | /_build/ 3 | 4 | # If you run "mix test --cover", coverage assets end up here. 5 | /cover/ 6 | 7 | # The directory Mix downloads your dependencies sources to. 8 | /deps/ 9 | 10 | # Where third-party dependencies like ExDoc output generated docs. 11 | /doc/ 12 | 13 | # Ignore .fetch files in case you like to edit your project deps locally. 14 | /.fetch 15 | 16 | # If the VM crashes, it generates a dump, let's ignore it too. 17 | erl_crash.dump 18 | 19 | # Also ignore archive artifacts (built via "mix archive.build"). 20 | *.ez 21 | 22 | # Ignore package tarball (built via "mix hex.build"). 23 | paradex-*.tar 24 | 25 | # Temporary files, for example, from tests. 26 | /tmp/ 27 | -------------------------------------------------------------------------------- /lib/paradex/field_name.ex: -------------------------------------------------------------------------------- 1 | defmodule Paradex.FieldName do 2 | @behaviour Postgrex.Extension 3 | 4 | import Postgrex.BinaryUtils, warn: false 5 | 6 | # Postgres column names are limited to 59 characters ~r/[a-zA-Z0-9_]/ so 7 | # they always fall below Erlang's 64 byte threshold for reference counting. 8 | 9 | @impl true 10 | def init(_opts), do: nil 11 | 12 | @impl true 13 | def matching(_state), do: [type: "fieldname"] 14 | 15 | @impl true 16 | def format(_state), do: :text 17 | 18 | @impl true 19 | def encode(_state) do 20 | quote do 21 | bin when is_binary(bin) -> [<> | bin] 22 | end 23 | end 24 | 25 | @impl true 26 | def decode(_state) do 27 | quote do 28 | <> -> bin 29 | end 30 | end 31 | end 32 | -------------------------------------------------------------------------------- /test/readme_test.exs: -------------------------------------------------------------------------------- 1 | defmodule ReadmeTest do 2 | use ExUnit.Case, async: true 3 | 4 | test "readme version matches mix" do 5 | [_, readme_version_text, _] = 6 | Path.join(__DIR__, "../README.md") 7 | |> File.read!() 8 | |> String.split(["", ""]) 9 | 10 | [readme_version] = 11 | Regex.run(~r/{:paradex, \"(?.*)\"}/, readme_version_text, capture: :all_names) 12 | 13 | {:ok, readme_version} = Version.parse_requirement(readme_version) 14 | [:~>, {readme_major, readme_minor, _, _, _}] = readme_version.lexed 15 | 16 | %Version{major: mix_major, minor: mix_minor} = 17 | Mix.Project.config()[:version] 18 | |> Version.parse!() 19 | 20 | assert mix_major === readme_major 21 | assert mix_minor === readme_minor 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /priv/repo/migrations/20241013014316_setup.exs: -------------------------------------------------------------------------------- 1 | defmodule ParadexApp.Repo.Migrations.Setup do 2 | use Ecto.Migration 3 | 4 | def up() do 5 | create table(:talk_groups) do 6 | add :decimal, :integer, null: false 7 | add :description, :text, null: false 8 | add :alpha_tag, :text, null: false 9 | add :category, :text, null: false 10 | add :tag, :text, null: false 11 | add :active, :boolean, null: false, default: true 12 | 13 | timestamps(type: :utc_datetime) 14 | end 15 | 16 | unique_index(:talk_groups, [:decimal, :active]) 17 | 18 | create table(:calls) do 19 | add :call_length, :integer, null: false 20 | add :start_time, :naive_datetime, null: false 21 | add :stop_time, :naive_datetime, null: false 22 | add :talk_group_id, references(:talk_groups) 23 | add :talkgroup_num, :integer, null: false 24 | add :transcript, :text 25 | end 26 | 27 | create index(:calls, :start_time) 28 | 29 | calls_text_fields = 30 | %{ 31 | transcript: %{ 32 | tokenizer: %{ 33 | type: "default", 34 | stemmer: "English" 35 | } 36 | } 37 | } 38 | |> Jason.encode!() 39 | 40 | create index( 41 | :calls, 42 | [:id, :transcript, :call_length, :talkgroup_num, :talk_group_id, :start_time, :stop_time], 43 | using: "bm25", 44 | name: "calls_search_idx", 45 | options: "key_field = 'id', text_fields = '#{calls_text_fields}'" 46 | ) 47 | 48 | create index( 49 | :talk_groups, 50 | [:id, :description, :alpha_tag, :category, :tag, :active], 51 | using: "bm25", 52 | name: "talk_groups_search_idx", 53 | options: "key_field = 'id'" 54 | ) 55 | end 56 | end 57 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## v0.4.0 (2025-02-06) 4 | * ParadeDB `0.15.0` removes `paradedb.fuzzy_phrase` in favor of the new `paradedb.match`. Subsequently `fuzzy_phrase/6` has been removed from Paradex and replaced with `match/6`. 5 | * `match/6` currently does not support the custom tokenizer option via the `tokenizer` parameter. Consequently the query will always be tokenized the same way as the field was at index time. 6 | * A version compatability table has been added to `README.md`. 7 | 8 | ## v0.3.3 (2024-01-14) 9 | * There are no code changes in this release. `README.md` has been updated to include compatibility with ParadeDB `0.14.x`. 10 | 11 | ## v0.3.2 (2024-11-22) 12 | * `::fieldname` casts have been updated to `::paradedb.fieldname`. This change is backwards compatible to `0.11.0`. 13 | * Test/example migrations have been updated to demo Paradex `0.13.0`'s new index creation syntax. 14 | * Be mindful that upgrading to ParadeDB `0.13.0` will require you to revise your existing migrations. 15 | 16 | ## v0.3.1 (2024-11-10) 17 | * There are no code changes in this release. `README.md` has been updated to include compatibility with ParadeDB `0.12.x`. 18 | * Included `CHANGELOG.md` and fixed macro groupings in hexdocs. 19 | 20 | ## v0.3.0 (2024-11-05) 21 | * Added `Paradex.extensions/0` which presently consists of `[ Paradex.FieldName ]`. 22 | * This allows Postgrex to parameterize field names in ParadeDB query objects, solving issue [#4](https://github.com/Moosieus/paradex/issues/4). 23 | * Updated docs to include instructions for the above. 24 | 25 | *If you've configured everything correctly and are receiving an error that `fieldname` doesn't exist, try updating ParadeDB.* 26 | 27 | ## v0.2.0 (2024-10-27) 28 | * Added `lenient` and `conjunction_mode` options to `parse` (non-breaking). 29 | * Added `lenient` option to `parse_with_field` as the 2nd argument, making `conjunction_mode` the 3rd (minor breaking change). 30 | 31 | ## v0.1.0 (2024-10-19) 32 | Initial Release. 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Paradex 2 | 3 | Ecto fragments for [ParadeDB](https://www.paradedb.com/) versions `0.11.x - 0.15.1`. Refer to the table below for compatability: 4 | 5 | | ParadeDB version | Paradex version | 6 | |---------------------|-----------------| 7 | | `0.11.0` - `0.14.1` | `0.3.3` | 8 | | `0.15.x` | `0.4.x` | 9 | 10 | ## Installation 11 | 12 | Add `:paradex` to your list of dependencies in `mix.exs`: 13 | 14 | 15 | ```elixir 16 | def deps do 17 | [ 18 | {:paradex, "~> 0.4.0"} 19 | ] 20 | end 21 | ``` 22 | 23 | 24 | Documentation is available on [HexDocs](https://hexdocs.pm/paradex/readme.html) and may also be generated with [ExDoc](https://github.com/elixir-lang/ex_doc). 25 | 26 | ## Usage 27 | 28 | You may install ParadeDB via [Docker](https://docs.paradedb.com/documentation/getting-started/install), or by [loading the extensions](https://docs.paradedb.com/deploy/self-hosted/extensions) into an existing Postgres database. 29 | 30 | Create `lib/postgrex_types.ex` with the contents below. You may omit `Pgvector.extensions()` if you aren't working with vectors: 31 | 32 | ```elixir 33 | Postgrex.Types.define( 34 | ParadexApp.PostgrexTypes, 35 | Pgvector.extensions() ++ Paradex.extensions() ++ Ecto.Adapters.Postgres.extensions(), 36 | [] 37 | ) 38 | ``` 39 | 40 | Add the following to `config/config.exs`: 41 | ```elixir 42 | config :my_app, MyApp.Repo, types: MyApp.PostgrexTypes 43 | ``` 44 | 45 | 46 | Create your [Ecto schema](https://github.com/Moosieus/paradex/blob/main/test/support/paradex_app/call.ex) and a [search index](https://docs.paradedb.com/documentation/indexing/create_index) in your [migrations](https://github.com/Moosieus/paradex/blob/main/priv/repo/migrations/20241013014316_setup.exs). 47 | 48 | Once complete, you're ready to run search queries: 49 | 50 | ```elixir 51 | import Ecto.Query 52 | import Paradex 53 | 54 | alias MyApp.Call 55 | alias MyApp.Repo 56 | 57 | from( 58 | c in Call, 59 | where: c.transcript ~> "bus" 60 | ) 61 | |> Repo.all() 62 | ``` 63 | 64 | More examples are available in the [API docs](https://hexdocs.pm/paradex/Paradex.html). 65 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule Paradex.MixProject do 2 | use Mix.Project 3 | 4 | @name "Paradex" 5 | @version "0.4.0" 6 | @source_url "https://github.com/Moosieus/paradex" 7 | 8 | def project do 9 | [ 10 | app: :paradex, 11 | name: @name, 12 | description: "Ecto fragments for ParadeDB.", 13 | source_url: @source_url, 14 | homepage_url: @source_url, 15 | version: @version, 16 | elixir: "~> 1.13", 17 | elixirc_paths: elixirc_paths(Mix.env()), 18 | start_permanent: Mix.env() == :prod, 19 | deps: deps(), 20 | docs: docs(), 21 | package: package(), 22 | aliases: aliases() 23 | ] 24 | end 25 | 26 | defp elixirc_paths(:test), do: elixirc_paths(:dev) ++ ["test/support"] 27 | defp elixirc_paths(_), do: ["lib"] 28 | 29 | def package do 30 | [ 31 | licenses: ["Apache-2.0"], 32 | files: ["lib", "mix.exs", "README.md", "CHANGELOG.md", "LICENSE"], 33 | links: %{"GitHub" => @source_url} 34 | ] 35 | end 36 | 37 | # Run "mix help compile.app" to learn about applications. 38 | def application do 39 | [ 40 | extra_applications: [:logger] 41 | ] 42 | end 43 | 44 | # Run "mix help deps" to learn about dependencies. 45 | defp deps do 46 | [ 47 | {:postgrex, ">= 0.0.0"}, 48 | {:pgvector, "~> 0.3.0", optional: true}, 49 | {:ecto, "~> 3.0", optional: true}, 50 | {:ecto_sql, "~> 3.0", only: :test}, 51 | {:jason, "~> 1.4", only: :test}, 52 | {:ex_doc, ">= 0.0.0", only: :dev, runtime: false}, 53 | {:dialyxir, "~> 1.0", only: [:dev, :test], runtime: false} 54 | ] 55 | end 56 | 57 | defp docs do 58 | [ 59 | name: @name, 60 | main: "readme", 61 | source_ref: "v#{@version}", 62 | source_url: @source_url, 63 | extras: [ 64 | "README.md", 65 | "CHANGELOG.md", 66 | "notebooks/hybrid_search.livemd" 67 | ], 68 | groups_for_docs: [ 69 | {:Macros, &(&1[:section] == :macros)}, 70 | {:"Full Text queries", &(&1[:section] == :full_text_queries)}, 71 | {:"Term-level queries", &(&1[:section] == :term_level_queries)}, 72 | {:"Range queries", &(&1[:section] == :range_queries)}, 73 | {:"Phrase-level queries", &(&1[:section] == :phrase_level_queries)}, 74 | {:"Compound queries", &(&1[:section] == :compound_queries)}, 75 | {:"Specialized queries", &(&1[:section] == :specialized_queries)}, 76 | {:Functions, &(&1[:section] == nil)} 77 | ] 78 | ] 79 | end 80 | 81 | defp aliases do 82 | [ 83 | setup: ["deps.get", "ecto.setup"], 84 | "ecto.setup": ["ecto.create", "ecto.migrate"], 85 | "ecto.reset": ["ecto.drop", "ecto.setup"], 86 | test: ["ecto.create --quiet", "ecto.migrate --quiet", "test"] 87 | ] 88 | end 89 | end 90 | -------------------------------------------------------------------------------- /notebooks/generate_embeddings.livemd: -------------------------------------------------------------------------------- 1 | # Generate Embeddings 2 | 3 | ```elixir 4 | Mix.install( 5 | [ 6 | # the ecto we know and love 7 | {:postgrex, ">= 0.0.0"}, 8 | {:pgvector, "~> 0.3.0"}, 9 | {:ecto, "~> 3.0"}, 10 | {:ecto_sql, "~> 3.0"}, 11 | # Nx stuff 12 | {:bumblebee, "~> 0.6.0"}, 13 | {:nx, "~> 0.9.0"}, 14 | {:exla, "~> 0.9.0"}, 15 | {:axon, "~> 0.7.0"}, 16 | {:kino, "~> 0.14.0"}, 17 | # paradex 18 | {:paradex, path: Path.join(__DIR__, "../"), env: :test} 19 | ], 20 | # I've configured this for my rig, yours will likely differ. 21 | config: [ 22 | paradex: [ 23 | {:ecto_repos, [ParadexApp.Repo]}, 24 | {ParadexApp.Repo, [ 25 | pool: Ecto.Adapters.SQL.Sandbox, 26 | database: "paradex_test", 27 | username: "postgres", 28 | password: "postgres", 29 | hostname: "localhost", 30 | timeout: 600_000, 31 | ownership_timeout: 600_000, 32 | pool_timeout: 600_000, 33 | port: 5433, 34 | types: ParadexApp.PostgrexTypes 35 | ]} 36 | ], 37 | exla: [ 38 | clients: [ 39 | cuda: [ 40 | platform: :cuda, 41 | memory_fraction: 0.85, 42 | device_id: 0 43 | ] 44 | ], 45 | client: :cuda 46 | ], 47 | nx: [ 48 | default_backend: {EXLA.Backend, client: :cuda, device_id: 0} 49 | ] 50 | ], 51 | config_path: :paradex, 52 | lockfile: :paradex 53 | ) 54 | ``` 55 | 56 | ## IGNORE ME!!! 57 | 58 | ```elixir 59 | import Ecto.Query 60 | 61 | alias ParadexApp.Repo 62 | alias ParadexApp.Call 63 | 64 | ParadexApp.Repo.start_link() 65 | 66 | {:ok, model_info} = Bumblebee.load_model({:hf, "sentence-transformers/all-MiniLM-L6-v2"}) 67 | {:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "sentence-transformers/all-MiniLM-L6-v2"}) 68 | 69 | serving = Bumblebee.Text.TextEmbedding.text_embedding(model_info, tokenizer) 70 | 71 | defmodule CallStream do 72 | defp start(), do: 0 73 | 74 | defp next(last_id) do 75 | calls = 76 | from( 77 | c in Call, 78 | where: c.id > ^last_id, 79 | where: is_nil(c.embedding), 80 | order_by: [asc: c.id], 81 | limit: 100 82 | ) |> Repo.all() 83 | 84 | case calls do 85 | [] -> {:halt, last_id} 86 | calls -> {calls, List.last(calls).id} 87 | end 88 | end 89 | 90 | defp finally(_last_id), do: :noop 91 | 92 | def resource() do 93 | Stream.resource(&start/0, &next/1, &finally/1) 94 | end 95 | end 96 | 97 | update_embedding = fn call -> 98 | %{embedding: embedding} = Nx.Serving.run(serving, call.transcript) 99 | 100 | Repo.update_all( 101 | from(c in Call, where: c.id == ^call.id), 102 | set: [embedding: Pgvector.new(embedding)] 103 | ) 104 | end 105 | 106 | ( 107 | Repo.transaction(fn -> 108 | CallStream.resource() 109 | |> Stream.each(& update_embedding.(&1)) 110 | |> Stream.run() 111 | end, timeout: :infinity) 112 | ) 113 | 114 | ``` 115 | -------------------------------------------------------------------------------- /notebooks/hybrid_search.livemd: -------------------------------------------------------------------------------- 1 | # Hybrid Search Example 2 | 3 | ```elixir 4 | Mix.install( 5 | [ 6 | # the ecto we know and love 7 | {:postgrex, ">= 0.0.0"}, 8 | {:pgvector, "~> 0.3.0"}, 9 | {:ecto, "~> 3.0"}, 10 | {:ecto_sql, "~> 3.0"}, 11 | # Nx stuff 12 | {:bumblebee, "~> 0.6.0"}, 13 | {:nx, "~> 0.9.0"}, 14 | {:exla, "~> 0.9.0"}, 15 | {:axon, "~> 0.7.0"}, 16 | {:kino, "~> 0.14.0"}, 17 | # paradex 18 | {:paradex, path: Path.join(__DIR__, "../"), env: :test} 19 | ], 20 | # I've configured this for my rig, yours will likely differ. 21 | config: [ 22 | paradex: [ 23 | {:ecto_repos, [ParadexApp.Repo]}, 24 | {ParadexApp.Repo, [ 25 | pool: Ecto.Adapters.SQL.Sandbox, 26 | database: "paradex_test", 27 | username: "postgres", 28 | password: "postgres", 29 | hostname: "localhost", 30 | timeout: 600_000, 31 | ownership_timeout: 600_000, 32 | pool_timeout: 600_000, 33 | port: 5433, 34 | types: ParadexApp.PostgrexTypes 35 | ]} 36 | ], 37 | exla: [ 38 | clients: [ 39 | cuda: [ 40 | platform: :cuda, 41 | memory_fraction: 0.85, 42 | device_id: 0 43 | ] 44 | ], 45 | client: :cuda 46 | ], 47 | nx: [ 48 | default_backend: {EXLA.Backend, client: :cuda, device_id: 0} 49 | ] 50 | ], 51 | config_path: :paradex, 52 | lockfile: :paradex 53 | ) 54 | ``` 55 | 56 | ## Summary 57 | 58 | **Full disclaimer:** *This ain't no peer reviewed study, and I'm using lots of big words here that I don't ~~quite~~ understand. If I got something wrong here, please call me out.* 59 | 60 | 61 | 62 | This livebook demonstrates a means to perform hybrid search using ParadeDB and Ecto. We'll use Paradex's sample dataset, which is mostly radio chatter between public bus drivers. The text embeddings will be generated with Nx. This example's mostly cribbed from [ParadeDB's tutorial](https://docs.paradedb.com/documentation/guides/hybrid). 63 | 64 | We'll start with our top-level imports: 65 | 66 | ```elixir 67 | import Ecto.Query 68 | import Paradex 69 | import Pgvector.Ecto.Query 70 | 71 | alias ParadexApp.Repo 72 | alias ParadexApp.Call 73 | 74 | Repo.start_link() 75 | ``` 76 | 77 | Next we'll load a transformer for generating text embeddings. I've gone with `sentence-transformers/all-MiniLM-L6-v2`, as I quite frankly don't know any better. 78 | 79 | ```elixir 80 | {:ok, model_info} = Bumblebee.load_model({:hf, "sentence-transformers/all-MiniLM-L6-v2"}) 81 | {:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "sentence-transformers/all-MiniLM-L6-v2"}) 82 | serving = Bumblebee.Text.TextEmbedding.text_embedding(model_info, tokenizer) 83 | ``` 84 | 85 | Next up, there parameters for our hybrid search. I'm using a simple Tantivy query on the text search side for demo sake. You could elect to use a more advanced query, though. 86 | 87 | ```elixir 88 | %{embedding: embedding} = Nx.Serving.run(serving, "bus late behind schedule") 89 | search = "bus late behind schedule" 90 | vector = Pgvector.new(embedding) 91 | top_n = 25 92 | ``` 93 | 94 | We'll run our two searches and rank the `top_n` results. I'm using L2 distanceas I quite frankly don't know any better. 95 | 96 | ```elixir 97 | semantic_search = 98 | from( 99 | c in Call, 100 | select: %{ 101 | id: c.id, 102 | rank: fragment("RANK() OVER (ORDER BY ?)", l2_distance(c.embedding, ^vector)) 103 | }, 104 | order_by: [asc: l2_distance(c.embedding, ^vector)], 105 | limit: ^top_n 106 | ) 107 | 108 | bm25_search = 109 | from( 110 | c in Call, 111 | select: %{ 112 | id: c.id, 113 | rank: fragment("RANK() OVER (ORDER BY paradedb.score(?) DESC)", c.id) 114 | }, 115 | where: c.transcript ~> ^search, 116 | limit: ^top_n 117 | ) 118 | 119 | ``` 120 | 121 | We can tie them together with reciprocal rank fusion like so: 122 | 123 | ```elixir 124 | hybrid_search = 125 | from( 126 | sem in subquery(semantic_search), 127 | full_join: bm25 in subquery(bm25_search), on: sem.id == bm25.id, 128 | order_by: [desc: fragment("score"), asc: sem.id], 129 | select: %{ 130 | id: coalesce(sem.id, bm25.id), 131 | score: fragment("COALESCE(1.0 / (60 + ?), 0.0) + COALESCE(1.0 / (60 + ?), 0.0)", sem.rank, bm25.rank) 132 | }, 133 | limit: ^top_n 134 | ) 135 | ``` 136 | 137 | We can either `join/5` our schema in the query above, or use it as a subquery for a bit more flexibility, say for preloading: 138 | 139 | ```elixir 140 | from( 141 | c in Call, 142 | join: r in subquery(hybrid_search), on: c.id == r.id, 143 | preload: [:talk_group], 144 | select: %{score: r.score, call: c} 145 | ) 146 | 147 | :ok 148 | ``` 149 | 150 | That'll print a bit much for demonstration sake, so I'll abridge the select query here: 151 | 152 | ```elixir 153 | from( 154 | c in Call, 155 | join: r in subquery(hybrid_search), on: c.id == r.id, 156 | select: %{score: r.score, id: c.id, transcript: c.transcript}, 157 | limit: 7 158 | ) 159 | |> Repo.all() 160 | ``` 161 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{ 2 | "db_connection": {:hex, :db_connection, "2.7.0", "b99faa9291bb09892c7da373bb82cba59aefa9b36300f6145c5f201c7adf48ec", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "dcf08f31b2701f857dfc787fbad78223d61a32204f217f15e881dd93e4bdd3ff"}, 3 | "decimal": {:hex, :decimal, "2.1.1", "5611dca5d4b2c3dd497dec8f68751f1f1a54755e8ed2a966c2633cf885973ad6", [:mix], [], "hexpm", "53cfe5f497ed0e7771ae1a475575603d77425099ba5faef9394932b35020ffcc"}, 4 | "dialyxir": {:hex, :dialyxir, "1.4.4", "fb3ce8741edeaea59c9ae84d5cec75da00fa89fe401c72d6e047d11a61f65f70", [:mix], [{:erlex, ">= 0.2.7", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "cd6111e8017ccd563e65621a4d9a4a1c5cd333df30cebc7face8029cacb4eff6"}, 5 | "earmark_parser": {:hex, :earmark_parser, "1.4.41", "ab34711c9dc6212dda44fcd20ecb87ac3f3fce6f0ca2f28d4a00e4154f8cd599", [:mix], [], "hexpm", "a81a04c7e34b6617c2792e291b5a2e57ab316365c2644ddc553bb9ed863ebefa"}, 6 | "ecto": {:hex, :ecto, "3.12.4", "267c94d9f2969e6acc4dd5e3e3af5b05cdae89a4d549925f3008b2b7eb0b93c3", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "ef04e4101688a67d061e1b10d7bc1fbf00d1d13c17eef08b71d070ff9188f747"}, 7 | "ecto_sql": {:hex, :ecto_sql, "3.12.1", "c0d0d60e85d9ff4631f12bafa454bc392ce8b9ec83531a412c12a0d415a3a4d0", [:mix], [{:db_connection, "~> 2.4.1 or ~> 2.5", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.12", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.7", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.19 or ~> 1.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.1 or ~> 2.2", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "aff5b958a899762c5f09028c847569f7dfb9cc9d63bdb8133bff8a5546de6bf5"}, 8 | "erlex": {:hex, :erlex, "0.2.7", "810e8725f96ab74d17aac676e748627a07bc87eb950d2b83acd29dc047a30595", [:mix], [], "hexpm", "3ed95f79d1a844c3f6bf0cea61e0d5612a42ce56da9c03f01df538685365efb0"}, 9 | "ex_doc": {:hex, :ex_doc, "0.34.2", "13eedf3844ccdce25cfd837b99bea9ad92c4e511233199440488d217c92571e8", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "5ce5f16b41208a50106afed3de6a2ed34f4acfd65715b82a0b84b49d995f95c1"}, 10 | "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, 11 | "makeup": {:hex, :makeup, "1.1.2", "9ba8837913bdf757787e71c1581c21f9d2455f4dd04cfca785c70bbfff1a76a3", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "cce1566b81fbcbd21eca8ffe808f33b221f9eee2cbc7a1706fc3da9ff18e6cac"}, 12 | "makeup_elixir": {:hex, :makeup_elixir, "0.16.2", "627e84b8e8bf22e60a2579dad15067c755531fea049ae26ef1020cad58fe9578", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "41193978704763f6bbe6cc2758b84909e62984c7752b3784bd3c218bb341706b"}, 13 | "makeup_erlang": {:hex, :makeup_erlang, "1.0.1", "c7f58c120b2b5aa5fd80d540a89fdf866ed42f1f3994e4fe189abebeab610839", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "8a89a1eeccc2d798d6ea15496a6e4870b75e014d1af514b1b71fa33134f57814"}, 14 | "nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"}, 15 | "pgvector": {:hex, :pgvector, "0.3.0", "c55c7c0f6224b06105fc3214965c6217e4cfe907d7524cd8c27ba7612b7f8582", [:mix], [{:ecto, "~> 3.0", [hex: :ecto, repo: "hexpm", optional: true]}, {:nx, "~> 0.5", [hex: :nx, repo: "hexpm", optional: true]}, {:postgrex, ">= 0.0.0", [hex: :postgrex, repo: "hexpm", optional: false]}], "hexpm", "aeb7c36c5851881fd1d8a39e213472fa0b07bd72cdb0acabc693055aa14693ab"}, 16 | "postgrex": {:hex, :postgrex, "0.19.1", "73b498508b69aded53907fe48a1fee811be34cc720e69ef4ccd568c8715495ea", [:mix], [{:db_connection, "~> 2.1", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: true]}], "hexpm", "8bac7885a18f381e091ec6caf41bda7bb8c77912bb0e9285212829afe5d8a8f8"}, 17 | "telemetry": {:hex, :telemetry, "1.3.0", "fedebbae410d715cf8e7062c96a1ef32ec22e764197f70cda73d82778d61e7a2", [:rebar3], [], "hexpm", "7015fc8919dbe63764f4b4b87a95b7c0996bd539e0d499be6ec9d7f3875b79e6"}, 18 | } 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /test/paradex_test.exs: -------------------------------------------------------------------------------- 1 | defmodule ParadexTest do 2 | use Paradex.SQLCase 3 | use Paradex.DataCase 4 | 5 | import Ecto.Query 6 | import Paradex 7 | 8 | alias ParadexApp.Repo 9 | alias ParadexApp.Call 10 | 11 | test "~>/2 generates a query" do 12 | query = 13 | from( 14 | c in Call, 15 | select: count(), 16 | where: c.transcript ~> "bus" 17 | ) 18 | 19 | sql = ~s{SELECT count(*) FROM "calls" AS c0 WHERE (c0."transcript" @@@ 'bus')} 20 | 21 | assert_sql(query, sql) 22 | 23 | assert Repo.all(query) == [215] 24 | end 25 | 26 | test "field names are successfully parameterized" do 27 | field = "transcript" 28 | 29 | query = 30 | from( 31 | c in Call, 32 | select: count(), 33 | where: c.id ~> parse_with_field(^field, "bus") 34 | ) 35 | 36 | sql = 37 | ~s[SELECT count(*) FROM "calls" AS c0 WHERE (c0."id" @@@ paradedb.parse_with_field($1::paradedb.fieldname, 'bus', lenient => FALSE, conjunction_mode => TRUE))] 38 | 39 | assert_sql(query, sql) 40 | 41 | assert Repo.all(query) == [215] 42 | end 43 | 44 | test "snippet/1 generates a query" do 45 | query = 46 | from( 47 | c in Call, 48 | select: {c.id, snippet(c.transcript)}, 49 | where: c.transcript ~> "mechanic" 50 | ) 51 | 52 | sql = 53 | ~s{SELECT c0."id", paradedb.snippet(c0."transcript", start_tag => '', end_tag => '', max_num_chars => 150) FROM "calls" AS c0 WHERE (c0."transcript" @@@ 'mechanic')} 54 | 55 | assert_sql(query, sql) 56 | 57 | assert Repo.all(query), "expected to execute successfully" 58 | end 59 | 60 | test "all/0 generates a query" do 61 | query = 62 | from( 63 | c in Call, 64 | select: c.id, 65 | where: c.id ~> all() 66 | ) 67 | 68 | sql = ~s[SELECT c0."id" FROM "calls" AS c0 WHERE (c0."id" @@@ paradedb.all())] 69 | 70 | assert_sql(query, sql) 71 | 72 | assert Repo.all(query), "expected to execute successfully" 73 | end 74 | 75 | test "boolean/1 generates a query with all keys" do 76 | x = "transcript:station" 77 | 78 | query = 79 | from( 80 | c in Call, 81 | select: count(), 82 | where: 83 | c.id 84 | ~> boolean( 85 | must: [parse("transcript:transfer")], 86 | should: [parse("transcript:bus")], 87 | must_not: [parse(^x)] 88 | ) 89 | ) 90 | 91 | sql = 92 | ~s{SELECT count(*) FROM \"calls\" AS c0 WHERE (c0.\"id\" @@@ paradedb.boolean(must => ARRAY[paradedb.parse('transcript:transfer', lenient => FALSE, conjunction_mode => TRUE)], should => ARRAY[paradedb.parse('transcript:bus', lenient => FALSE, conjunction_mode => TRUE)], must_not => ARRAY[paradedb.parse($1, lenient => FALSE, conjunction_mode => TRUE)]))} 93 | 94 | assert_sql(query, sql) 95 | 96 | assert Repo.all(query) == [3] 97 | end 98 | 99 | test "boolean/1 generates a query with partial keys" do 100 | x = "transcript:station" 101 | 102 | query = 103 | from( 104 | c in Call, 105 | select: count(), 106 | where: 107 | c.id 108 | ~> boolean( 109 | must: [parse("transcript:transfer")], 110 | must_not: [parse(^x)] 111 | ) 112 | ) 113 | 114 | sql = 115 | ~s|SELECT count(*) FROM \"calls\" AS c0 WHERE (c0.\"id\" @@@ paradedb.boolean(must => ARRAY[paradedb.parse('transcript:transfer', lenient => FALSE, conjunction_mode => TRUE)], should => '{}', must_not => ARRAY[paradedb.parse($1, lenient => FALSE, conjunction_mode => TRUE)]))| 116 | 117 | assert_sql(query, sql) 118 | 119 | assert Repo.all(query) == [3] 120 | end 121 | 122 | test "boost/1 generates a query" do 123 | query = 124 | from( 125 | c in Call, 126 | select: c.id, 127 | where: c.id ~> boost(2.0, parse("transcript:bus")) 128 | ) 129 | 130 | sql = 131 | ~s{SELECT c0.\"id\" FROM \"calls\" AS c0 WHERE (c0.\"id\" @@@ paradedb.boost(2.0::float::real, paradedb.parse('transcript:bus', lenient => FALSE, conjunction_mode => TRUE)))} 132 | 133 | assert_sql(query, sql) 134 | 135 | assert Repo.all(query), "expected to execute successfully" 136 | end 137 | 138 | test "const_score/1 generates a query" do 139 | query = 140 | from( 141 | c in Call, 142 | select: c.id, 143 | where: c.id ~> const_score(2.0, parse("transcript:bus")) 144 | ) 145 | 146 | sql = 147 | ~s{SELECT c0.\"id\" FROM \"calls\" AS c0 WHERE (c0.\"id\" @@@ paradedb.const_score(2.0::float::real, paradedb.parse('transcript:bus', lenient => FALSE, conjunction_mode => TRUE)))} 148 | 149 | assert_sql(query, sql) 150 | 151 | assert Repo.all(query), "expected to execute successfully" 152 | end 153 | 154 | test "disjunction_max/1 generates a query" do 155 | query = 156 | from( 157 | c in Call, 158 | select: c.id, 159 | where: 160 | c.id 161 | ~> disjunction_max([ 162 | parse("transcript:bus"), 163 | int4range("call_length", 10, nil, "[)") 164 | ]) 165 | ) 166 | 167 | sql = 168 | ~s{SELECT c0.\"id\" FROM \"calls\" AS c0 WHERE (c0.\"id\" @@@ paradedb.disjunction_max(ARRAY[paradedb.parse('transcript:bus', lenient => FALSE, conjunction_mode => TRUE),paradedb.range(field => 'call_length'::paradedb.fieldname, range => int4range(10, NULL, '[)'))], 0.0::float::real))} 169 | 170 | assert_sql(query, sql) 171 | 172 | assert Repo.all(query), "expected to execute successfully" 173 | end 174 | 175 | test "empty/1 generates a query" do 176 | query = 177 | from( 178 | c in Call, 179 | select: c.id, 180 | where: c.id ~> empty() 181 | ) 182 | 183 | sql = 184 | ~s[SELECT c0."id" FROM "calls" AS c0 WHERE (c0."id" @@@ paradedb.empty())] 185 | 186 | assert_sql(query, sql) 187 | 188 | assert Repo.all(query), "expected to execute successfully" 189 | end 190 | 191 | test "exists/1 generates a query" do 192 | query = 193 | from( 194 | c in Call, 195 | select: c.id, 196 | where: c.id ~> pdb_exists("call_length") 197 | ) 198 | 199 | sql = 200 | ~s{SELECT c0."id" FROM "calls" AS c0 WHERE (c0."id" @@@ paradedb.exists('call_length'::paradedb.fieldname))} 201 | 202 | assert_sql(query, sql) 203 | 204 | assert Repo.all(query), "expected to execute successfully" 205 | end 206 | 207 | test "fuzzy_term/5 generates a query" do 208 | query = 209 | from( 210 | c in Call, 211 | select: c.id, 212 | where: c.id ~> fuzzy_term("transcript", "bus", 2, true, false) 213 | ) 214 | 215 | sql = 216 | ~s{SELECT c0."id" FROM "calls" AS c0 WHERE (c0."id" @@@ paradedb.fuzzy_term('transcript'::paradedb.fieldname, 'bus', 2, TRUE, FALSE))} 217 | 218 | assert_sql(query, sql) 219 | 220 | assert Repo.all(query), "expected to execute successfully" 221 | end 222 | 223 | test "match/6 generates a query" do 224 | query = 225 | from( 226 | c in Call, 227 | select: c.id, 228 | where: c.id ~> match("transcript", "bus sotp", 1, false) 229 | ) 230 | 231 | sql = 232 | ~s{SELECT c0."id" FROM "calls" AS c0 WHERE (c0."id" @@@ paradedb.match('transcript'::paradedb.fieldname, 'bus sotp', distance => 1, transposition_cost_one => FALSE, prefix => FALSE, conjunction_mode => FALSE))} 233 | 234 | 235 | assert_sql(query, sql) 236 | 237 | assert Repo.all(query), "expected to execute successfully" 238 | end 239 | 240 | # test "more_like_this/?" do 241 | # 242 | # end 243 | 244 | test "parse/3 generates a query" do 245 | query = 246 | from( 247 | c in Call, 248 | select: c.id, 249 | where: c.id ~> parse("transcript:bus", false, false) 250 | ) 251 | 252 | sql = 253 | ~s{SELECT c0."id" FROM "calls" AS c0 WHERE (c0."id" @@@ paradedb.parse('transcript:bus', lenient => FALSE, conjunction_mode => FALSE))} 254 | 255 | assert_sql(query, sql) 256 | 257 | assert Repo.exists?(query), "expected to execute successfully and return true" 258 | end 259 | 260 | test "parse_with_field/4 generates a query" do 261 | query = 262 | from( 263 | c in Call, 264 | select: c.id, 265 | where: c.id ~> parse_with_field("transcript", "traffic congestion", true, false) 266 | ) 267 | 268 | sql = 269 | ~s{SELECT c0."id" FROM "calls" AS c0 WHERE (c0."id" @@@ paradedb.parse_with_field('transcript'::paradedb.fieldname, 'traffic congestion', lenient => TRUE, conjunction_mode => FALSE))} 270 | 271 | assert_sql(query, sql) 272 | 273 | assert Repo.all(query), "expected to execute successfully and return true" 274 | end 275 | 276 | test "phrase/3 generates a query" do 277 | query = 278 | from( 279 | c in Call, 280 | select: c.id, 281 | where: c.id ~> phrase("transcript", ["bus", "stop"], 1) 282 | ) 283 | 284 | sql = 285 | ~s{SELECT c0."id" FROM "calls" AS c0 WHERE (c0."id" @@@ paradedb.phrase('transcript'::paradedb.fieldname, ARRAY['bus','stop'], 1))} 286 | 287 | assert_sql(query, sql) 288 | 289 | assert Repo.all(query), "expected to execute successfully" 290 | end 291 | 292 | test "phrase_prefix/2 generates a query" do 293 | query = 294 | from( 295 | c in Call, 296 | select: c.id, 297 | where: c.id ~> phrase_prefix("transcript", ["en"]) 298 | ) 299 | 300 | sql = 301 | ~s{SELECT c0."id" FROM "calls" AS c0 WHERE (c0."id" @@@ paradedb.phrase_prefix('transcript'::paradedb.fieldname, ARRAY['en'], 0))} 302 | 303 | assert_sql(query, sql) 304 | 305 | assert Repo.all(query), "expected to execute successfully" 306 | end 307 | 308 | test "int4range/4 generates a query" do 309 | query = 310 | from( 311 | c in Call, 312 | select: c.id, 313 | where: c.id ~> int4range("call_length", 5, nil, "[)") 314 | ) 315 | 316 | sql = 317 | ~s{SELECT c0."id" FROM "calls" AS c0 WHERE (c0."id" @@@ paradedb.range(field => 'call_length'::paradedb.fieldname, range => int4range(5, NULL, '[)')))} 318 | 319 | assert_sql(query, sql) 320 | 321 | assert Repo.all(query), "expected to execute successfully" 322 | end 323 | 324 | test "int8range/4 generates a query" do 325 | query = 326 | from( 327 | c in Call, 328 | select: c.id, 329 | where: c.id ~> int8range("call_length", 5, nil, "[)") 330 | ) 331 | 332 | sql = 333 | ~s{SELECT c0."id" FROM "calls" AS c0 WHERE (c0."id" @@@ paradedb.range(field => 'call_length'::paradedb.fieldname, range => int8range(5, NULL, '[)')))} 334 | 335 | assert_sql(query, sql) 336 | 337 | assert Repo.all(query), "expected to execute successfully" 338 | end 339 | 340 | test "daterange/4 generates a query" do 341 | start = ~D[2024-10-09] 342 | stop = ~D[2024-10-10] 343 | 344 | query = 345 | from( 346 | c in Call, 347 | select: c.id, 348 | where: c.id ~> daterange("start_time", ^start, ^stop, "[]") 349 | ) 350 | 351 | sql = 352 | ~s{SELECT c0."id" FROM "calls" AS c0 WHERE (c0."id" @@@ paradedb.range(field => 'start_time'::paradedb.fieldname, range => daterange($1, $2, '[]')))} 353 | 354 | assert_sql(query, sql) 355 | 356 | assert Repo.all(query), "expected to execute successfully" 357 | end 358 | 359 | test "tsrange/4 generates a query" do 360 | from = ~U[2024-10-09 08:00:00.00Z] 361 | 362 | query = 363 | from( 364 | c in Call, 365 | select: c.id, 366 | where: c.id ~> tsrange("start_time", ^from, nil, "[)") 367 | ) 368 | 369 | sql = 370 | ~s{SELECT c0."id" FROM "calls" AS c0 WHERE (c0."id" @@@ paradedb.range(field => 'start_time'::paradedb.fieldname, range => tsrange($1, NULL, '[)')))} 371 | 372 | assert_sql(query, sql) 373 | 374 | assert Repo.all(query), "expected to execute successfully" 375 | end 376 | 377 | test "regex/2 generates a query" do 378 | query = 379 | from( 380 | c in Call, 381 | select: c.id, 382 | where: c.id ~> regex("transcript", "bus (stop|route)") 383 | ) 384 | 385 | sql = 386 | ~s{SELECT c0."id" FROM "calls" AS c0 WHERE (c0."id" @@@ paradedb.regex('transcript'::paradedb.fieldname, 'bus (stop|route)'))} 387 | 388 | assert_sql(query, sql) 389 | 390 | assert Repo.all(query), "expected to execute successfully" 391 | end 392 | 393 | test "term/2 generates a query" do 394 | query = 395 | from( 396 | c in Call, 397 | select: count(), 398 | where: c.id ~> term("talkgroup_num", 7695) 399 | ) 400 | 401 | sql = 402 | ~s{SELECT count(*) FROM "calls" AS c0 WHERE (c0."id" @@@ paradedb.term('talkgroup_num'::paradedb.fieldname, 7695))} 403 | 404 | assert_sql(query, sql) 405 | 406 | assert Repo.all(query) == [249] 407 | end 408 | 409 | test "term_set/1 generates a query" do 410 | query = 411 | from( 412 | c in Call, 413 | select: c.id, 414 | where: 415 | c.id 416 | ~> term_set([ 417 | term("talk_group_id", 144), 418 | term("talk_group_id", 145) 419 | ]) 420 | ) 421 | 422 | sql = 423 | ~s{SELECT c0."id" FROM "calls" AS c0 WHERE (c0."id" @@@ paradedb.term_set(terms => ARRAY[paradedb.term('talk_group_id'::paradedb.fieldname, 144),paradedb.term('talk_group_id'::paradedb.fieldname, 145)]))} 424 | 425 | assert_sql(query, sql) 426 | 427 | assert Repo.all(query), "expected to execute successfully" 428 | end 429 | 430 | test "results map to schemas" do 431 | query = 432 | from( 433 | c in Call, 434 | where: c.id ~> "transcript:mechanical", 435 | limit: 1 436 | ) 437 | 438 | assert match?(%Call{}, Repo.one!(query)) 439 | end 440 | end 441 | -------------------------------------------------------------------------------- /lib/paradex.ex: -------------------------------------------------------------------------------- 1 | defmodule Paradex do 2 | @moduledoc """ 3 | A collection of macros for composing ParadeDB queries. 4 | """ 5 | 6 | @doc """ 7 | Type extensions for Postgrex, for usage in `Postgrex.Types.define/3`. 8 | """ 9 | def extensions do 10 | [ 11 | Paradex.FieldName 12 | ] 13 | end 14 | 15 | @doc """ 16 | Macro for the [`@@@`](https://docs.paradedb.com/documentation/full-text/overview#basic-usage) full text search operator. 17 | 18 | `~>` is used as it's one of a few infix operators Elixir's capable of parsing, but aren't presently used. 19 | 20 | ## Examples 21 | Search queries can be run on fields directly: 22 | import Paradex 23 | 24 | from( 25 | c in Call, 26 | where: c.transcript ~> "bus" 27 | ) 28 | 29 | Alternatively a key field and query object can be used for advanced queries: 30 | 31 | from( 32 | c in Call, 33 | where: c.id ~> disjunction_max([ 34 | parse("transcript:bus"), 35 | int4range("call_length", 10, nil, "[)") 36 | ]) 37 | ) 38 | """ 39 | @doc section: :macros 40 | defmacro field ~> query do 41 | quote do 42 | fragment("? @@@ ?", unquote(field), unquote(query)) 43 | end 44 | end 45 | 46 | @doc """ 47 | Returns the BM25 search score for each result, aliasing it as `pdb_score` in the query. Can be used with `score/0` to order results: 48 | 49 | from( 50 | c in Call, 51 | select: {c, score(c.id)}, 52 | where: c.transcript ~> "mechanic", 53 | order_by: [desc: score()] 54 | ) 55 | """ 56 | @doc section: :macros 57 | defmacro score(key_field) do 58 | quote do 59 | fragment("paradedb.score(?) AS pdb_score", unquote(key_field)) 60 | end 61 | end 62 | 63 | @doc section: :macros 64 | defmacro score() do 65 | quote do 66 | fragment("pdb_score") 67 | end 68 | end 69 | 70 | @doc """ 71 | Same as `score/1` but uses a variable for the alias instead. Does not support literals. 72 | 73 | score_alias = "my_score" 74 | 75 | from( 76 | c in Call, 77 | select: {c, score_as(c.id, ^score_alias)}, 78 | where: c.transcript ~> "mechanic", 79 | order_by: [desc: score_as(^score_alias)] 80 | ) 81 | """ 82 | @doc section: :macros 83 | defmacro score_as(key_field, as) do 84 | quote do 85 | fragment("paradedb.score(?) AS ?", unquote(key_field), literal(unquote(as))) 86 | end 87 | end 88 | 89 | @doc section: :macros 90 | defmacro score_as(as) do 91 | quote do 92 | fragment("?", literal(unquote(as))) 93 | end 94 | end 95 | 96 | @doc """ 97 | Macro for [paradedb.snippet](https://docs.paradedb.com/documentation/full-text/highlighting), used for highlighting. 98 | 99 | from( 100 | c in Call, 101 | select: {c, snippet(c.transcript)}, 102 | where: c.transcript ~> "mechanic" 103 | ) 104 | """ 105 | @doc section: :macros 106 | defmacro snippet(field, start_tag \\ "", end_tag \\ "", max_num_chars \\ 150) do 107 | quote do 108 | fragment( 109 | "paradedb.snippet(?, start_tag => ?, end_tag => ?, max_num_chars => ?)", 110 | unquote(field), 111 | unquote(start_tag), 112 | unquote(end_tag), 113 | unquote(max_num_chars) 114 | ) 115 | end 116 | end 117 | 118 | ####################### 119 | ## Full Text Queries ## 120 | ####################### 121 | 122 | @doc """ 123 | Macro for [paradedb.match](https://docs.paradedb.com/documentation/advanced/full-text/match). 124 | 125 | from( 126 | c in Call, 127 | where: c.id ~> match("transcript", "bus stop") 128 | ) 129 | """ 130 | @doc section: :full_text_queries 131 | defmacro match( 132 | field, 133 | value, 134 | distance \\ 0, 135 | transposition_cost_one \\ true, 136 | prefix \\ false, 137 | conjunction_mode \\ false 138 | ) do 139 | quote do 140 | fragment( 141 | "paradedb.match(?::paradedb.fieldname, ?, distance => ?, transposition_cost_one => ?, prefix => ?, conjunction_mode => ?)", 142 | unquote(field), 143 | unquote(value), 144 | unquote(distance), 145 | unquote(transposition_cost_one), 146 | unquote(prefix), 147 | unquote(conjunction_mode) 148 | ) 149 | end 150 | end 151 | 152 | ######################## 153 | ## Term-Level Queries ## 154 | ######################## 155 | 156 | @doc """ 157 | Macro for [paradedb.exists](https://docs.paradedb.com/documentation/advanced/term/exists#exists). 158 | 159 | Prefixed with `pdb_` to avoid conflicting with `Ecto.Query.API.exists/1`. 160 | 161 | from( 162 | c in Call, 163 | where: c.id ~> pdb_exists("call_length") 164 | ) 165 | """ 166 | @doc section: :term_level_queries 167 | defmacro pdb_exists(field) do 168 | quote do 169 | fragment("paradedb.exists(?::paradedb.fieldname)", unquote(field)) 170 | end 171 | end 172 | 173 | @doc """ 174 | Macro for [paradedb.fuzzy_term](https://docs.paradedb.com/documentation/advanced/term/fuzzy_term). 175 | 176 | from( 177 | c in Call, 178 | where: c.id ~> fuzzy_term("transcript", "bus", 2, true, false) 179 | ) 180 | """ 181 | @doc section: :term_level_queries 182 | defmacro fuzzy_term(field, value, distance \\ 2, transpose_cost_one \\ true, prefix \\ false) do 183 | quote do 184 | fragment( 185 | "paradedb.fuzzy_term(?::paradedb.fieldname, ?, ?, ?, ?)", 186 | unquote(field), 187 | unquote(value), 188 | unquote(distance), 189 | unquote(transpose_cost_one), 190 | unquote(prefix) 191 | ) 192 | end 193 | end 194 | 195 | @doc """ 196 | Macro for [paradedb.range_term](https://docs.paradedb.com/documentation/advanced/term/range_term). 197 | """ 198 | @doc section: :term_level_queries 199 | defmacro range_term(field, value) do 200 | quote do 201 | fragment( 202 | "paradedb.range_term(?::paradedb.fieldname, ?)", 203 | unquote(field), 204 | unquote(value) 205 | ) 206 | end 207 | end 208 | 209 | @doc """ 210 | Macro for [paradedb.regex](https://docs.paradedb.com/documentation/advanced/term/regex). Be mindful that these regular expressions follow [Tantivy's syntax](https://docs.rs/tantivy-fst/latest/tantivy_fst/struct.Regex.html), which follows Rust's regex crate with a few variations. 211 | 212 | from( 213 | c in Call, 214 | where: c.id ~> regex("transcript", "(stop|route)") 215 | ) 216 | """ 217 | @doc section: :term_level_queries 218 | defmacro regex(field, pattern) do 219 | quote do 220 | fragment( 221 | "paradedb.regex(?::paradedb.fieldname, ?)", 222 | unquote(field), 223 | unquote(pattern) 224 | ) 225 | end 226 | end 227 | 228 | @doc """ 229 | Macro for [paradedb.term](https://docs.paradedb.com/documentation/advanced/term/term). 230 | 231 | from( 232 | c in Call, 233 | where: c.id ~> term("talkgroup_num", 7695) 234 | ) 235 | """ 236 | @doc section: :term_level_queries 237 | defmacro term(field, value) do 238 | quote do 239 | fragment( 240 | "paradedb.term(?::paradedb.fieldname, ?)", 241 | unquote(field), 242 | unquote(value) 243 | ) 244 | end 245 | end 246 | 247 | @doc """ 248 | Macro for [paradedb.term_set](https://docs.paradedb.com/documentation/advanced/term/term_set). 249 | 250 | from( 251 | c in Call, 252 | where: c.id ~> term_set([ 253 | term("talkgroup_num", 7700), 254 | term("call_length", 20) 255 | ]) 256 | ) 257 | """ 258 | @doc section: :term_level_queries 259 | defmacro term_set(terms) do 260 | quote do 261 | fragment( 262 | "paradedb.term_set(terms => ?)", 263 | unquote(terms) 264 | ) 265 | end 266 | end 267 | 268 | ################### 269 | ## Range Queries ## 270 | ################### 271 | 272 | @doc """ 273 | Macro for [paradedb.range](https://docs.paradedb.com/documentation/advanced/term/range) using the `int4` data type. 274 | 275 | from( 276 | c in Call, 277 | where: c.id ~> int4range("call_length", 5, nil, "[)") 278 | ) 279 | """ 280 | @doc section: :range_queries 281 | defmacro int4range(field, min, max, bounds) do 282 | quote do 283 | fragment( 284 | "paradedb.range(field => ?::paradedb.fieldname, range => int4range(?, ?, ?))", 285 | unquote(field), 286 | unquote(min), 287 | unquote(max), 288 | unquote(bounds) 289 | ) 290 | end 291 | end 292 | 293 | @doc """ 294 | Macro for [paradedb.range](https://docs.paradedb.com/documentation/advanced/term/range) using the `int8` data type. 295 | 296 | from( 297 | c in Call, 298 | where: c.id ~> int8range("call_length", 5, nil, "[)") 299 | ) 300 | """ 301 | @doc section: :range_queries 302 | defmacro int8range(field, min, max, bounds) do 303 | quote do 304 | fragment( 305 | "paradedb.range(field => ?::paradedb.fieldname, range => int8range(?, ?, ?))", 306 | unquote(field), 307 | unquote(min), 308 | unquote(max), 309 | unquote(bounds) 310 | ) 311 | end 312 | end 313 | 314 | @doc """ 315 | Macro for [paradedb.range](https://docs.paradedb.com/documentation/advanced/term/range) using the `date` data type. 316 | 317 | start = ~D[2024-10-09] 318 | stop = ~D[2024-10-10] 319 | 320 | query = 321 | from( 322 | c in Call, 323 | where: c.id ~> daterange("start_time", ^start, ^stop, "[]") 324 | ) 325 | """ 326 | @doc section: :range_queries 327 | defmacro daterange(field, min, max, bounds) do 328 | quote do 329 | fragment( 330 | "paradedb.range(field => ?::paradedb.fieldname, range => daterange(?, ?, ?))", 331 | unquote(field), 332 | unquote(min), 333 | unquote(max), 334 | unquote(bounds) 335 | ) 336 | end 337 | end 338 | 339 | @doc """ 340 | Macro for [paradedb.range](https://docs.paradedb.com/documentation/advanced/term/range) using the `timestamp` data type. 341 | 342 | begin = ~U[2024-10-09 08:00:00.00Z] 343 | 344 | query = 345 | from( 346 | c in Call, 347 | where: c.id ~> tsrange("start_time", ^begin, nil, "[)") 348 | ) 349 | """ 350 | @doc section: :range_queries 351 | defmacro tsrange(field, min, max, bounds) do 352 | quote do 353 | fragment( 354 | "paradedb.range(field => ?::paradedb.fieldname, range => tsrange(?, ?, ?))", 355 | unquote(field), 356 | unquote(min), 357 | unquote(max), 358 | unquote(bounds) 359 | ) 360 | end 361 | end 362 | 363 | ########################## 364 | ## Phrase-Level Queries ## 365 | ########################## 366 | 367 | @doc """ 368 | Macro for [paradedb.phrase](https://docs.paradedb.com/documentation/advanced/phrase/phrase). 369 | 370 | from( 371 | c in Call, 372 | where: c.id ~> phrase("transcript", ["bus", "stop"], 1) 373 | ) 374 | """ 375 | @doc section: :phrase_level_queries 376 | defmacro phrase(field, phrases, slop \\ 0) do 377 | quote do 378 | fragment( 379 | "paradedb.phrase(?::paradedb.fieldname, ?, ?)", 380 | unquote(field), 381 | unquote(phrases), 382 | unquote(slop) 383 | ) 384 | end 385 | end 386 | 387 | @doc """ 388 | Macro for [paradedb.phrase_prefix](https://docs.paradedb.com/documentation/advanced/phrase/phrase_prefix). 389 | 390 | from( 391 | c in Call, 392 | where: c.id ~> phrase_prefix("transcript", ["en"]) 393 | ) 394 | """ 395 | @doc section: :phrase_level_queries 396 | defmacro phrase_prefix(field, phrases, max_expansion \\ 0) do 397 | quote do 398 | fragment( 399 | "paradedb.phrase_prefix(?::paradedb.fieldname, ?, ?)", 400 | unquote(field), 401 | unquote(phrases), 402 | unquote(max_expansion) 403 | ) 404 | end 405 | end 406 | 407 | ###################### 408 | ## Compound Queries ## 409 | ###################### 410 | 411 | @doc """ 412 | Macro for [paradedb.all](https://docs.paradedb.com/documentation/advanced/compound/all). 413 | 414 | from( 415 | c in Call, 416 | where: c.id ~> all() 417 | ) 418 | """ 419 | @doc section: :compound_queries 420 | defmacro all() do 421 | quote do 422 | fragment("paradedb.all()") 423 | end 424 | end 425 | 426 | @doc """ 427 | Macro for [paradedb.boolean](https://docs.paradedb.com/documentation/advanced/compound/boolean). 428 | 429 | Each value must be a literal Keyword list at the top level, where each value is a list: 430 | x = "transcript:transfer" 431 | 432 | from( 433 | c in Call, 434 | where: c.id ~> boolean( 435 | must: [parse(^x)], 436 | must_not: [parse("transcript:station")] 437 | ) 438 | ) 439 | 440 | Keys other than `must`, `should`, and `must_not` are ignored, so be wary of misspelling. 441 | """ 442 | @doc section: :compound_queries 443 | defmacro boolean(queries) do 444 | must = Keyword.get(queries, :must, []) 445 | should = Keyword.get(queries, :should, []) 446 | must_not = Keyword.get(queries, :must_not, []) 447 | 448 | quote do 449 | fragment( 450 | "paradedb.boolean(must => ?, should => ?, must_not => ?)", 451 | unquote(must), 452 | unquote(should), 453 | unquote(must_not) 454 | ) 455 | end 456 | end 457 | 458 | @doc """ 459 | Macro for [paradedb.boost](https://docs.paradedb.com/documentation/advanced/compound/boost). 460 | 461 | from( 462 | c in Call, 463 | select: {c. score(c.id)}, 464 | boost(2.0, "transcript:bus") 465 | ) 466 | """ 467 | @doc section: :compound_queries 468 | defmacro boost(boost, query) do 469 | quote do 470 | fragment( 471 | "paradedb.boost(?::real, ?)", 472 | unquote(boost), 473 | unquote(query) 474 | ) 475 | end 476 | end 477 | 478 | @doc """ 479 | Macro for [paradedb.const_score](https://docs.paradedb.com/documentation/advanced/compound/const). 480 | 481 | from( 482 | c in Call, 483 | select: {c, score(c.id)}, 484 | where: c.id ~> const_score(2.0, parse("transcript:bus")) 485 | ) 486 | """ 487 | @doc section: :compound_queries 488 | defmacro const_score(score, query) do 489 | quote do 490 | fragment( 491 | "paradedb.const_score(?::real, ?)", 492 | unquote(score), 493 | unquote(query) 494 | ) 495 | end 496 | end 497 | 498 | @doc """ 499 | Macro for [paradedb.disjunction_max](https://docs.paradedb.com/documentation/advanced/compound/disjunction_max). 500 | 501 | from( 502 | c in Call, 503 | where: 504 | c.id ~> disjunction_max([ 505 | parse("transcript:bus"), 506 | int4range("call_length", 10, nil, "[)") 507 | ]) 508 | ) 509 | """ 510 | @doc section: :compound_queries 511 | defmacro disjunction_max(disjuncts, tie_breaker \\ 0.0) do 512 | quote do 513 | fragment( 514 | "paradedb.disjunction_max(?, ?::real)", 515 | unquote(disjuncts), 516 | unquote(tie_breaker) 517 | ) 518 | end 519 | end 520 | 521 | @doc """ 522 | Macro for [paradedb.empty](https://docs.paradedb.com/documentation/advanced/compound/empty). 523 | 524 | from( 525 | c in Call, 526 | where: c.id ~> empty() 527 | ) 528 | """ 529 | @doc section: :compound_queries 530 | defmacro empty() do 531 | quote do 532 | fragment("paradedb.empty()") 533 | end 534 | end 535 | 536 | @doc """ 537 | Macro for [paradedb.parse](https://docs.paradedb.com/documentation/advanced/compound/parse). 538 | 539 | from( 540 | c in Call, 541 | where: c.id ~> parse("transcript:bus") 542 | ) 543 | """ 544 | @doc section: :compound_queries 545 | defmacro parse(query, lenient \\ false, conjunction_mode \\ true) do 546 | quote do 547 | fragment( 548 | "paradedb.parse(?, lenient => ?, conjunction_mode => ?)", 549 | unquote(query), 550 | unquote(lenient), 551 | unquote(conjunction_mode) 552 | ) 553 | end 554 | end 555 | 556 | @doc """ 557 | Macro for [paradedb.parse_with_field](https://docs.paradedb.com/documentation/advanced/compound/parse#parse-with-field). 558 | 559 | from( 560 | c in Call, 561 | where: c.id ~> parse_with_field("transcript", "traffic congestion") 562 | ) 563 | """ 564 | @doc section: :compound_queries 565 | defmacro parse_with_field(field, query, lenient \\ false, conjunction_mode \\ true) do 566 | quote do 567 | fragment( 568 | "paradedb.parse_with_field(?::paradedb.fieldname, ?, lenient => ?, conjunction_mode => ?)", 569 | unquote(field), 570 | unquote(query), 571 | unquote(lenient), 572 | unquote(conjunction_mode) 573 | ) 574 | end 575 | end 576 | 577 | ######################### 578 | ## Specialized Queries ## 579 | ######################### 580 | 581 | # defmacro more_like_this() do 582 | # quote do 583 | # fragment("?") 584 | # end 585 | # end 586 | end 587 | --------------------------------------------------------------------------------