From 316bd6918af73d313a0b1fa9280508e27ee82baf Mon Sep 17 00:00:00 2001 From: Joel Koch Date: Sat, 7 Dec 2024 16:28:54 +0100 Subject: [PATCH 1/9] Add elixir bindings --- bindings/elixir/.formatter.exs | 5 + bindings/elixir/.github/workflows/ci.yml | 75 +++++++ bindings/elixir/.gitignore | 35 ++++ bindings/elixir/LICENSE | 21 ++ bindings/elixir/README.md | 60 ++++++ bindings/elixir/lib/sqlite_vec.ex | 21 ++ bindings/elixir/lib/sqlite_vec/bit.ex | 132 +++++++++++++ bindings/elixir/lib/sqlite_vec/downloader.ex | 86 ++++++++ bindings/elixir/lib/sqlite_vec/ecto/bit.ex | 24 +++ .../elixir/lib/sqlite_vec/ecto/float32.ex | 24 +++ bindings/elixir/lib/sqlite_vec/ecto/int8.ex | 24 +++ bindings/elixir/lib/sqlite_vec/ecto/query.ex | 187 ++++++++++++++++++ bindings/elixir/lib/sqlite_vec/float32.ex | 114 +++++++++++ bindings/elixir/lib/sqlite_vec/int8.ex | 107 ++++++++++ bindings/elixir/mix.exs | 96 +++++++++ bindings/elixir/mix.lock | 29 +++ .../elixir/notebooks/getting_started.livemd | 54 +++++ .../elixir/notebooks/usage_with_ecto.livemd | 177 +++++++++++++++++ bindings/elixir/priv/.gitkeep | 0 .../elixir/test/sqlite_vec/bit_ecto_test.exs | 167 ++++++++++++++++ bindings/elixir/test/sqlite_vec/bit_test.exs | 144 ++++++++++++++ .../test/sqlite_vec/float32_ecto_test.exs | 183 +++++++++++++++++ .../elixir/test/sqlite_vec/float32_test.exs | 121 ++++++++++++ .../elixir/test/sqlite_vec/int8_ecto_test.exs | 185 +++++++++++++++++ bindings/elixir/test/sqlite_vec/int8_test.exs | 113 +++++++++++ bindings/elixir/test/sqlite_vec_test.exs | 8 + bindings/elixir/test/test_helper.exs | 15 ++ 27 files changed, 2207 insertions(+) create mode 100644 bindings/elixir/.formatter.exs create mode 100644 bindings/elixir/.github/workflows/ci.yml create mode 100644 bindings/elixir/.gitignore create mode 100644 bindings/elixir/LICENSE create mode 100644 bindings/elixir/README.md create mode 100644 bindings/elixir/lib/sqlite_vec.ex create mode 100644 bindings/elixir/lib/sqlite_vec/bit.ex create mode 100644 bindings/elixir/lib/sqlite_vec/downloader.ex create mode 100644 bindings/elixir/lib/sqlite_vec/ecto/bit.ex create mode 100644 bindings/elixir/lib/sqlite_vec/ecto/float32.ex create mode 100644 bindings/elixir/lib/sqlite_vec/ecto/int8.ex create mode 100644 bindings/elixir/lib/sqlite_vec/ecto/query.ex create mode 100644 bindings/elixir/lib/sqlite_vec/float32.ex create mode 100644 bindings/elixir/lib/sqlite_vec/int8.ex create mode 100644 bindings/elixir/mix.exs create mode 100644 bindings/elixir/mix.lock create mode 100644 bindings/elixir/notebooks/getting_started.livemd create mode 100644 bindings/elixir/notebooks/usage_with_ecto.livemd create mode 100644 bindings/elixir/priv/.gitkeep create mode 100644 bindings/elixir/test/sqlite_vec/bit_ecto_test.exs create mode 100644 bindings/elixir/test/sqlite_vec/bit_test.exs create mode 100644 bindings/elixir/test/sqlite_vec/float32_ecto_test.exs create mode 100644 bindings/elixir/test/sqlite_vec/float32_test.exs create mode 100644 bindings/elixir/test/sqlite_vec/int8_ecto_test.exs create mode 100644 bindings/elixir/test/sqlite_vec/int8_test.exs create mode 100644 bindings/elixir/test/sqlite_vec_test.exs create mode 100644 bindings/elixir/test/test_helper.exs diff --git a/bindings/elixir/.formatter.exs b/bindings/elixir/.formatter.exs new file mode 100644 index 00000000..5c0dd53c --- /dev/null +++ b/bindings/elixir/.formatter.exs @@ -0,0 +1,5 @@ +# Used by "mix format" +[ + plugins: [DoctestFormatter], + inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] +] diff --git a/bindings/elixir/.github/workflows/ci.yml b/bindings/elixir/.github/workflows/ci.yml new file mode 100644 index 00000000..dd7800e2 --- /dev/null +++ b/bindings/elixir/.github/workflows/ci.yml @@ -0,0 +1,75 @@ +name: CI + +on: + pull_request: + push: + branches: + - "main" + +jobs: + lint: + runs-on: ${{ matrix.os }} + env: + MIX_ENV: dev + name: Lint + strategy: + matrix: + os: ["ubuntu-latest"] + elixir: ["1.17"] + otp: ["27"] + steps: + - uses: actions/checkout@v4 + - name: Install Erlang & Elixir + uses: erlef/setup-beam@v1 + with: + otp-version: ${{ matrix.otp }} + elixir-version: ${{ matrix.elixir }} + - uses: actions/cache@v3 + with: + path: deps + key: ${{ matrix.os }}-otp_${{ matrix.otp }}-elixir_${{ matrix.elixir }}-mix_${{ hashFiles('**/mix.lock') }} + restore-keys: ${{ matrix.os }}-otp_${{ matrix.otp }}-elixir_${{ matrix.elixir }}-mix_ + - name: Install mix dependencies + run: mix deps.get + - name: Compile mix dependencies + run: mix deps.compile + - name: Check formatting + run: mix format --check-formatted + - name: Check unused deps + run: mix deps.unlock --check-unused + - name: Credo check + run: mix credo --all + + test: + runs-on: ${{ matrix.os }} + env: + MIX_ENV: test + + name: Test Elixir ${{ matrix.elixir }}, OTP ${{ matrix.otp }}, OS ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: ["ubuntu-latest"] + elixir: ["1.17"] + otp: ["27"] + + steps: + - uses: actions/checkout@v4 + - uses: erlef/setup-beam@v1 + with: + otp-version: ${{ matrix.otp }} + elixir-version: ${{ matrix.elixir }} + + - uses: actions/cache@v3 + with: + path: deps + key: ${{ matrix.os }}-otp_${{ matrix.otp }}-elixir_${{ matrix.elixir }}-mix_${{ hashFiles('**/mix.lock') }} + restore-keys: ${{ matrix.os }}-otp_${{ matrix.otp }}-elixir_${{ matrix.elixir }}-mix_ + - name: Install mix dependencies + run: mix deps.get --only test + - name: Compile mix dependencies + run: mix deps.compile + - name: Compile project + run: mix compile + - name: Test project + run: mix test --include slow diff --git a/bindings/elixir/.gitignore b/bindings/elixir/.gitignore new file mode 100644 index 00000000..06492ccd --- /dev/null +++ b/bindings/elixir/.gitignore @@ -0,0 +1,35 @@ +# The directory Mix will write compiled artifacts to. +/_build/ + +# If you run "mix test --cover", coverage assets end up here. +/cover/ + +# The directory Mix downloads your dependencies sources to. +/deps/ + +# Where third-party dependencies like ExDoc output generated docs. +/doc/ + +# Ignore .fetch files in case you like to edit your project deps locally. +/.fetch + +# If the VM crashes, it generates a dump, let's ignore it too. +erl_crash.dump + +# Also ignore archive artifacts (built via "mix archive.build"). +*.ez + +# Ignore package tarball (built via "mix hex.build"). +sqlite_vec-*.tar + +# Temporary files, for example, from tests. +/tmp/ + +# downloaded libraries +/priv/**/vec0.* + +/notebooks/*.db +/notebooks/*.db-* + +# test database +/test/*.db* diff --git a/bindings/elixir/LICENSE b/bindings/elixir/LICENSE new file mode 100644 index 00000000..9140d7b7 --- /dev/null +++ b/bindings/elixir/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Joel Koch + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/bindings/elixir/README.md b/bindings/elixir/README.md new file mode 100644 index 00000000..1ff4ae04 --- /dev/null +++ b/bindings/elixir/README.md @@ -0,0 +1,60 @@ +# SqliteVec + +[![Hex Package](https://img.shields.io/hexpm/v/sqlite_vec.svg?style=for-the-badge)](https://hex.pm/packages/sqlite_vec) +[![Hex Docs](https://img.shields.io/badge/hex-docs-blue.svg?style=for-the-badge)](https://hexdocs.pm/sqlite_vec) +[![Build Status](https://img.shields.io/github/actions/workflow/status/joelpaulkoch/sqlite_vec/ci.yml?label=Build%20Status&style=for-the-badge&branch=main)](https://github.com/joelpaulkoch/sqlite_vec/actions) + +A wrapper to use [sqlite-vec](https://github.com/asg017/sqlite-vec), a SQLite extension for working with vectors, in Elixir. +The configured version of the precompiled loadable library will be downloaded from the GitHub releases. +Moreover, this package provides structs and custom Ecto types for working with Float32, Int8, and Bit vectors. + +## Limitations +- it's currently not possible to create int8 and bit vectors using `Ecto`. You must directly use SQL to do so +- not implemented operations: `vec_each`, `vec_quantize_i8` + +## Installation + +The package can be installed by adding `sqlite_vec` to your list of dependencies in `mix.exs`: + +```elixir +def deps do + [ + {:sqlite_vec, "~> 0.1.0"} + ] +end +``` + +## Getting Started + +`SqliteVec.path/0` returns the path of the downloaded library. +Therefore, you can load the extension using this path. + +For instance with `Exqlite`: +```elixir +{:ok, conn} = Basic.open(":memory:") +:ok = Basic.enable_load_extension(conn) + +Basic.load_extension(conn, SqliteVec.path()) +``` + +Or, with an `Ecto.Repo` and `ecto_sqlite3`: + +```elixir +defmodule MyApp.Repo do + use Ecto.Repo, + otp_app: :my_app, + adapter: Ecto.Adapters.SQLite3 +end + +config :my_app, MyApp.Repo, load_extensions: [SqliteVec.path()] +``` + +You can check out the [Getting Started](notebooks/getting_started.livemd) and [Usage with Ecto](notebooks/usage_with_ecto.livemd) notebooks. + +## Attribution + +Special thanks to these projects that helped to make this package: + +- [OctoFetch](https://hexdocs.pm/octo_fetch/readme.html) which does all the work for downloading the GitHub releases, and served as a blueprint for this package (yes, including this Attribution section :) ) +- [sqlite-vec](https://github.com/asg017/sqlite-vec), of course, which provides all of the functionality +- [pgvector](https://hexdocs.pm/pgvector/readme.html) provides something similar for postgres and quite some code could be reused diff --git a/bindings/elixir/lib/sqlite_vec.ex b/bindings/elixir/lib/sqlite_vec.ex new file mode 100644 index 00000000..3e57126a --- /dev/null +++ b/bindings/elixir/lib/sqlite_vec.ex @@ -0,0 +1,21 @@ +defmodule SqliteVec do + @moduledoc """ + Downloads the precompiled loadable library of `sqlite-vec` from GitHub releases. + """ + + @doc """ + Provides the path to the downloaded loadable library. + """ + def path() do + version = Application.get_env(:sqlite_vec, :version, SqliteVec.Downloader.default_version()) + + Application.app_dir(:sqlite_vec, "priv/#{version}/vec0") + end + + @doc """ + Downloads the specified `version` to `output_dir`. + """ + def download(output_dir, version) do + SqliteVec.Downloader.download(output_dir, override_version: version) + end +end diff --git a/bindings/elixir/lib/sqlite_vec/bit.ex b/bindings/elixir/lib/sqlite_vec/bit.ex new file mode 100644 index 00000000..75ad06ac --- /dev/null +++ b/bindings/elixir/lib/sqlite_vec/bit.ex @@ -0,0 +1,132 @@ +defmodule SqliteVec.Bit do + @moduledoc """ + A vector struct for bit vectors. + Vectors are stored as binaries. + + > ### Consider endianness {: .warning} + > + > When returned from `sqlite-vec` or created from `Nx.Tensor`, `SqliteVec.Bit.Vector` holds data in system endianness. + > You must consider endianness when converting the binary data to a list of numbers. + + iex> v = SqliteVec.Bit.new(Nx.tensor([-1.0, 2.0], type: :f32)) + ...> b = SqliteVec.Bit.to_binary(v) + ...> <> = b + ...> [f1, f2] + case System.endianness() do + :big -> [-1.0, 2.0] + :little -> [4.618539608568165e-41, 8.96831017167883e-44] + end + """ + + @type t :: %__MODULE__{data: binary()} + + defstruct [:data] + + @doc """ + Creates a new vector from a vector, list, or tensor. + + The vector must be a `SqliteVec.Bit` vector. + The list must only contain values of 0 or 1 and must have a length that's divisible by 8. + The tensor must have a rank of 1 and a type size that's divisible by 8. + + ## Examples + iex> SqliteVec.Bit.new([0, 0, 0, 0, 0, 0, 0, 1]) + %SqliteVec.Bit{data: <<0b00000001>>} + + iex> v1 = SqliteVec.Bit.new([0, 0, 0, 0, 0, 0, 0, 1]) + ...> SqliteVec.Bit.new(v1) + %SqliteVec.Bit{data: <<0b00000001>>} + + iex> SqliteVec.Bit.new(Nx.tensor([1, 2, 3], type: :u8)) + %SqliteVec.Bit{data: <<1::signed-integer-8, 2::signed-integer-8, 3::signed-integer-8>>} + """ + def new(vector_or_list_or_tensor) + + def new(%SqliteVec.Bit{} = vector) do + vector + end + + def new(list) when is_list(list) do + if list == [] do + raise ArgumentError, "list must not be empty" + end + + if not length_divisible_by_8?(list) do + raise ArgumentError, "expected list length to be divisible by 8" + end + + if Enum.any?(list, &(not bit?(&1))) do + raise ArgumentError, "expected list elements to be 0 or 1" + end + + bin = for v <- list, into: <<>>, do: <> + + from_binary(<>) + end + + if Code.ensure_loaded?(Nx) do + def new(tensor) when is_struct(tensor, Nx.Tensor) do + if Nx.rank(tensor) != 1 do + raise ArgumentError, "expected rank to be 1" + end + + if not binary_type_size?(Nx.type(tensor)) do + raise ArgumentError, "expected type size to be divisible by 8" + end + + bin = Nx.to_binary(tensor) + from_binary(<>) + end + + defp binary_type_size?({_type, size}), do: rem(size, 8) == 0 + end + + defp length_divisible_by_8?(list) do + rem(length(list), 8) == 0 + end + + defp bit?(0), do: true + defp bit?(1), do: true + defp bit?(_), do: false + + @doc """ + Creates a new vector from its binary representation + """ + def from_binary(binary) when is_binary(binary) do + %SqliteVec.Bit{data: binary} + end + + @doc """ + Converts the vector to its binary representation + """ + def to_binary(vector) when is_struct(vector, SqliteVec.Bit) do + vector.data + end + + @doc """ + Converts the vector to a list of bits + """ + def to_list(vector) when is_struct(vector, SqliteVec.Bit) do + <> = vector.data + + for <>, do: v + end + + if Code.ensure_loaded?(Nx) do + @doc """ + Converts the vector to a tensor + """ + def to_tensor(vector) when is_struct(vector, SqliteVec.Bit) do + <> = vector.data + Nx.from_binary(bin, :u8) + end + end +end + +defimpl Inspect, for: SqliteVec.Bit do + import Inspect.Algebra + + def inspect(vector, opts) do + concat(["vec_bit('", Inspect.List.inspect(SqliteVec.Bit.to_list(vector), opts), "')"]) + end +end diff --git a/bindings/elixir/lib/sqlite_vec/downloader.ex b/bindings/elixir/lib/sqlite_vec/downloader.ex new file mode 100644 index 00000000..9521c7b3 --- /dev/null +++ b/bindings/elixir/lib/sqlite_vec/downloader.ex @@ -0,0 +1,86 @@ +defmodule SqliteVec.Downloader do + @moduledoc """ + The downloader module for `SqliteVec`. + If the configured version already exists, the download is skipped. + Otherwise, the corresponding GitHub release artifact will be downloaded. + Afterwards, all other previously downloaded versions will be deleted. + """ + + use OctoFetch, + latest_version: "0.1.6", + github_repo: "asg017/sqlite-vec", + download_versions: %{ + "0.1.6" => [ + {:darwin, :amd64, "35d014e5f7bcac52645a97f1f1ca34fdb51dcd61d81ac6e6ba1c712393fbf8fd"}, + {:darwin, :arm64, "142e195b654092632fecfadbad2825f3140026257a70842778637597f6b8c827"}, + {:linux, :amd64, "438e0df29f3f8db3525b3aa0dcc0a199869c0bcec9d7abc5b51850469caf867f"}, + {:linux, :arm64, "d6e4ba12c5c0186eaab42fb4449b311008d86ffd943e6377d7d88018cffab3aa"}, + {:windows, :amd64, "f1c615577ad2e692d1e2fe046fe65994dafd8a8cae43e9e864f5f682dc295964"} + ], + "0.1.5" => [ + {:darwin, :amd64, "1daa90b7cdda7e873af4636a20a2b6daf0ebd4d664f2bbbcc2ffeae219bf34b6"}, + {:darwin, :arm64, "348ea4ce39b4b4749b19ee93e5e9674d6ed7616e3e313cb20f6354cdecbebc75"}, + {:linux, :amd64, "626bb9b66896269facdf7f87d94c308bf0523cb1e584ff7ff5b3f51936f21d24"}, + {:linux, :arm64, "8ce460c1f2adcbbc709f5ca1d1a3578c34c62c131d1a044bd3ff7c0729be2137"}, + {:windows, :amd64, "cfd31e96d2edf27749c4c2063134737fc98ac87b6e113acf204db57563b078bc"} + ], + "0.1.4" => [ + {:darwin, :amd64, "1be7676e9e63c427fe0ce84b738c1c9012f2bbb4b81ecc63719b5552f07e1b26"}, + {:darwin, :arm64, "e7962da8acd394ad95cfc4822d573d5b10ac9f93d2dd28b73e76841eb5da45ee"}, + {:linux, :amd64, "2d3855b9953f05aba033536efed3cd2a9cc4518ee009301b0c03b17f9d698819"}, + {:linux, :arm64, "b0b8d2b7b4beb9641417874689e737fe872d79e208c0c306565bd5fbfacb7124"}, + {:windows, :amd64, "39a5575c565af7c135b9f62db9d92aebd7af096cc2b952c8a31b40f674ccf2cf"} + ], + "0.1.3" => [ + {:darwin, :amd64, "8ef228a8935883f8b5c52f191a8123909ea48ab58f6eceb5d4c12ada654556cf"}, + {:darwin, :arm64, "c57a552c8a8df823a8deb937f81d8a9ec5c81377e66e86cd5db8508b74ef4068"}, + {:linux, :amd64, "5fa404f6d61de7b462d1f1504332a522a64331103603ca079714f078cdb28606"} + ], + "0.1.2" => [ + {:darwin, :amd64, "d2d4d312fac1d609723b75cc777df42f3ff0770903cd89d53ca201c6e10c25f9"}, + {:darwin, :arm64, "a449cb190366ee0080bcab132d788b0f792600bfa8dd7c0aba539444c6e126ba"}, + {:linux, :amd64, "539e6bb92612665e1fd1870df1b2c5db66e327bf5a98aee1666c57fb3c6e128d"} + ] + } + + @impl true + def download_name(version, :darwin, arch), do: download_name(version, :macos, arch) + def download_name(version, os, :amd64), do: download_name(version, os, :x86_64) + def download_name(version, os, :arm64), do: download_name(version, os, :aarch64) + + def download_name(version, os, arch), do: "sqlite-vec-#{version}-loadable-#{os}-#{arch}.tar.gz" + + def pre_download_hook(_file, output_dir) do + if library_exists?(output_dir) do + :skip + else + :cont + end + end + + defp library_exists?(output_dir) do + matches = + output_dir + |> Path.join("vec0.*") + |> Path.wildcard() + + matches != [] + end + + def post_write_hook(file) do + output_dir = file |> Path.dirname() |> Path.join("..") |> Path.expand() + current_version = file |> Path.dirname() |> Path.basename() + + remove_other_versions(output_dir, current_version) + + :ok + end + + defp remove_other_versions(output_dir, current_version) do + output_dir + |> Path.join("*") + |> Path.wildcard() + |> Enum.filter(fn path -> Path.basename(path) != current_version end) + |> Enum.map(&File.rm_rf(&1)) + end +end diff --git a/bindings/elixir/lib/sqlite_vec/ecto/bit.ex b/bindings/elixir/lib/sqlite_vec/ecto/bit.ex new file mode 100644 index 00000000..eadc34e2 --- /dev/null +++ b/bindings/elixir/lib/sqlite_vec/ecto/bit.ex @@ -0,0 +1,24 @@ +if Code.ensure_loaded?(Ecto) do + defmodule SqliteVec.Ecto.Bit do + @moduledoc """ + `Ecto.Type` for `SqliteVec.Bit` + """ + use Ecto.Type + + def type, do: :binary + + def cast(value) do + {:ok, SqliteVec.Bit.new(value)} + end + + def load(data) do + {:ok, SqliteVec.Bit.from_binary(data)} + end + + def dump(%SqliteVec.Bit{} = vector) do + {:ok, SqliteVec.Bit.to_binary(vector)} + end + + def dump(_), do: :error + end +end diff --git a/bindings/elixir/lib/sqlite_vec/ecto/float32.ex b/bindings/elixir/lib/sqlite_vec/ecto/float32.ex new file mode 100644 index 00000000..aacfd72b --- /dev/null +++ b/bindings/elixir/lib/sqlite_vec/ecto/float32.ex @@ -0,0 +1,24 @@ +if Code.ensure_loaded?(Ecto) do + defmodule SqliteVec.Ecto.Float32 do + @moduledoc """ + `Ecto.Type` for `SqliteVec.Float32` + """ + use Ecto.Type + + def type, do: :binary + + def cast(value) do + {:ok, SqliteVec.Float32.new(value)} + end + + def load(data) do + {:ok, SqliteVec.Float32.from_binary(data)} + end + + def dump(%SqliteVec.Float32{} = vector) do + {:ok, SqliteVec.Float32.to_binary(vector)} + end + + def dump(_), do: :error + end +end diff --git a/bindings/elixir/lib/sqlite_vec/ecto/int8.ex b/bindings/elixir/lib/sqlite_vec/ecto/int8.ex new file mode 100644 index 00000000..7e144ddf --- /dev/null +++ b/bindings/elixir/lib/sqlite_vec/ecto/int8.ex @@ -0,0 +1,24 @@ +if Code.ensure_loaded?(Ecto) do + defmodule SqliteVec.Ecto.Int8 do + @moduledoc """ + `Ecto.Type` for `SqliteVec.Int8` + """ + use Ecto.Type + + def type, do: :binary + + def cast(value) do + {:ok, SqliteVec.Int8.new(value)} + end + + def load(data) do + {:ok, SqliteVec.Int8.from_binary(data)} + end + + def dump(%SqliteVec.Int8{} = vector) do + {:ok, SqliteVec.Int8.to_binary(vector)} + end + + def dump(_), do: :error + end +end diff --git a/bindings/elixir/lib/sqlite_vec/ecto/query.ex b/bindings/elixir/lib/sqlite_vec/ecto/query.ex new file mode 100644 index 00000000..881b4cba --- /dev/null +++ b/bindings/elixir/lib/sqlite_vec/ecto/query.ex @@ -0,0 +1,187 @@ +if Code.ensure_loaded?(Ecto) do + defmodule SqliteVec.Ecto.Query do + @moduledoc """ + Macros for Ecto + """ + + @doc """ + Creates a bit vector + """ + defmacro vec_bit(vector) do + quote do + fragment("vec_bit(?)", type(^unquote(vector).data, :binary)) + end + end + + @doc """ + Creates an int8 vector + """ + defmacro vec_int8(vector) do + quote do + fragment("vec_int8(?)", type(^unquote(vector).data, :binary)) + end + end + + @doc """ + Creates a float32 vector + """ + defmacro vec_f32(vector) do + quote do + fragment("vec_f32(?)", type(^unquote(vector).data, :binary)) + end + end + + @doc """ + Performs a K-nearest-neighbors (KNN) query. You must specify a LIMIT or 'k = ?' constraint. + """ + defmacro match(left, right) do + quote do + fragment("? match ?", unquote(left), unquote(right)) + end + end + + @doc """ + Calculates the L2 euclidian distance between vectors a and b. Only valid for float32 or int8 vectors. + + Returns an error under the following conditions: + - a or b are invalid vectors + - a or b do not share the same vector element types (ex float32 or int8) + - a or b are bit vectors. Use vec_distance_hamming() for distance calculations between two bitvectors. + - a or b do not have the same length. + """ + # credo:disable-for-next-line Credo.Check.Readability.FunctionNames + defmacro vec_distance_L2(a, b) do + quote do + fragment("vec_distance_L2(?, ?)", unquote(a), unquote(b)) + end + end + + @doc """ + Calculates the cosine distance between vectors a and b. Only valid for float32 or int8 vectors. + + Returns an error under the following conditions: + - a or b are invalid vectors + - a or b do not share the same vector element types (ex float32 or int8) + - a or b are bit vectors. Use vec_distance_hamming() for distance calculations between two bitvectors. + - a or b do not have the same length + """ + defmacro vec_distance_cosine(a, b) do + quote do + fragment("vec_distance_cosine(?, ?)", unquote(a), unquote(b)) + end + end + + @doc """ + Calculates the hamming distance between two bitvectors a and b. Only valid for bitvectors. + + Returns an error under the following conditions: + - a or b are not bitvectors + - a and b do not share the same length + - Memory cannot be allocated + """ + defmacro vec_distance_hamming(a, b) do + quote do + fragment("vec_distance_hamming(?, ?)", unquote(a), unquote(b)) + end + end + + @doc """ + Returns the number of elements in the given vector + """ + defmacro vec_length(vector) do + quote do + fragment("vec_length(?)", unquote(vector)) + end + end + + @doc """ + Returns the name of the type of `vector` as text + """ + defmacro vec_type(vector) do + quote do + fragment("vec_type(?)", unquote(vector)) + end + end + + @doc """ + Adds every element in vector a with vector b, returning a new vector c. + Both vectors must be of the same type and same length. + Only float32 and int8 vectors are supported. + + An error is raised if either a or b are invalid, or if they are not the same type or same length. + """ + defmacro vec_add(a, b) do + quote do + fragment("vec_add(?, ?)", unquote(a), unquote(b)) + end + end + + @doc """ + Subtracts every element in vector a with vector b, returning a new vector c. + Both vectors must be of the same type and same length. + Only float32 and int8 vectors are supported. + + An error is raised if either a or b are invalid, or if they are not the same type or same length. + """ + defmacro vec_sub(a, b) do + quote do + fragment("vec_sub(?, ?)", unquote(a), unquote(b)) + end + end + + @doc """ + Performs L2 normalization on the given vector. + Only float32 vectors are currently supported. + + Returns an error if the input is an invalid vector or not a float32 vector. + """ + defmacro vec_normalize(vector) do + quote do + fragment("vec_normalize(?)", unquote(vector)) + end + end + + @doc """ + Extract a subset of vector from the start element (inclusive) to the end element (exclusive). + + This is especially useful for Matryoshka embeddings, also known as "adaptive length" embeddings. + Use with vec_normalize() to get proper results. + + Returns an error in the following conditions: + - If vector is not a valid vector + - If start is less than zero or greater than or equal to end + - If end is greater than the length of vector, or less than or equal to start. + - If vector is a bitvector, start and end must be divisible by 8. + """ + defmacro vec_slice(vector, start_index, end_index) do + quote do + fragment("vec_slice(?, ?, ?)", unquote(vector), unquote(start_index), unquote(end_index)) + end + end + + @doc """ + Represents a vector as JSON text. + The input vector can be a vector BLOB or JSON text. + + Returns an error if vector is an invalid vector, or when memory cannot be allocated. + """ + defmacro vec_to_json(vector) do + quote do + fragment("vec_to_json(?)", unquote(vector)) + end + end + + @doc """ + Quantize a float32 or int8 vector into a bitvector. + For every element in the vector, a 1 is assigned to positive numbers and a 0 is assigned to negative numbers. + These values are then packed into a bit vector. + + Returns an error if vector is invalid, or if vector is not a float32 or int8 vector. + """ + defmacro vec_quantize_binary(vector) do + quote do + fragment("vec_quantize_binary(?)", unquote(vector)) + end + end + end +end diff --git a/bindings/elixir/lib/sqlite_vec/float32.ex b/bindings/elixir/lib/sqlite_vec/float32.ex new file mode 100644 index 00000000..f3a6cc76 --- /dev/null +++ b/bindings/elixir/lib/sqlite_vec/float32.ex @@ -0,0 +1,114 @@ +defmodule SqliteVec.Float32 do + @moduledoc """ + A vector struct for float32 vectors. + Vectors are stored as binaries in the endianness of the system. + + > ### Consider endianness {: .warning} + > + > `SqliteVec.Float32.Vector` holds data in system endianness. + > Therefore, the same vector data will be interpreted differently on another system with different endianness. + > Moreover, you must consider endianness when converting the binary data directly to a list of numbers. + + iex> v = SqliteVec.Float32.new([-1.0, 2.0]) + ...> b = SqliteVec.Float32.to_binary(v) + ...> <> = b + ...> [f1, f2] + case System.endianness() do + :big -> [-1.0, 2.0] + :little -> [4.618539608568165e-41, 8.96831017167883e-44] + end + """ + + @type t :: %__MODULE__{data: binary()} + + defstruct [:data] + + @doc """ + Creates a new vector from a vector, list, or tensor + + The vector must be a `SqliteVec.Float32` vector. + The list may contain any number but the values will be converted to f32 format. + The tensor must have a rank of 1 and must be of type :f32. + + ## Examples + iex> SqliteVec.Float32.new([1.0, 2.0]) + %SqliteVec.Float32{data: <<1.0::float-32-native, 2.0::float-32-native>>} + + iex> v1 = SqliteVec.Float32.new([1, 2]) + ...> SqliteVec.Float32.new(v1) + %SqliteVec.Float32{data: <<1.0::float-32-native, 2.0::float-32-native>>} + + iex> SqliteVec.Float32.new(Nx.tensor([1, 2], type: :f32)) + %SqliteVec.Float32{data: <<1.0::float-32-native, 2.0::float-32-native>>} + """ + def new(vector_or_list_or_tensor) + + def new(%SqliteVec.Float32{} = vector) do + vector + end + + def new(list) when is_list(list) do + if list == [] do + raise ArgumentError, "list must not be empty" + end + + bin = for v <- list, into: <<>>, do: <> + from_binary(<>) + end + + if Code.ensure_loaded?(Nx) do + def new(tensor) when is_struct(tensor, Nx.Tensor) do + if Nx.rank(tensor) != 1 do + raise ArgumentError, "expected rank to be 1" + end + + if Nx.type(tensor) != {:f, 32} do + raise ArgumentError, "expected type to be :f32" + end + + bin = tensor |> Nx.to_binary() + from_binary(<>) + end + end + + @doc """ + Creates a new vector from its binary representation + """ + def from_binary(binary) when is_binary(binary) do + %SqliteVec.Float32{data: binary} + end + + @doc """ + Converts the vector to its binary representation + """ + def to_binary(vector) when is_struct(vector, SqliteVec.Float32) do + vector.data + end + + @doc """ + Converts the vector to a list + """ + def to_list(vector) when is_struct(vector, SqliteVec.Float32) do + <> = vector.data + + for <>, do: v + end + + if Code.ensure_loaded?(Nx) do + @doc """ + Converts the vector to a tensor + """ + def to_tensor(vector) when is_struct(vector, SqliteVec.Float32) do + <> = vector.data + Nx.from_binary(bin, :f32) + end + end +end + +defimpl Inspect, for: SqliteVec.Float32 do + import Inspect.Algebra + + def inspect(vector, opts) do + concat(["vec_f32('", Inspect.List.inspect(SqliteVec.Float32.to_list(vector), opts), "')"]) + end +end diff --git a/bindings/elixir/lib/sqlite_vec/int8.ex b/bindings/elixir/lib/sqlite_vec/int8.ex new file mode 100644 index 00000000..745920cf --- /dev/null +++ b/bindings/elixir/lib/sqlite_vec/int8.ex @@ -0,0 +1,107 @@ +defmodule SqliteVec.Int8 do + @moduledoc """ + A vector struct for int8 vectors. + Vectors are stored as binaries. + """ + + @type t :: %__MODULE__{data: binary()} + + defstruct [:data] + + @doc """ + Creates a new vector from a vector, list, or tensor + + The vector must be a `SqliteVec.Int8` vector. + The list must only contain valid int8 values, i.e. values between and including -128 and 127. + The tensor must have a rank of 1 and must be of type :s8. + + ## Examples + iex> SqliteVec.Int8.new([1, 2, 3]) + %SqliteVec.Int8{data: <<1::integer-8, 2::integer-8, 3::integer-8>>} + + iex> v1 = SqliteVec.Int8.new([1, 2, 3]) + ...> SqliteVec.Int8.new(v1) + %SqliteVec.Int8{data: <<1::integer-8, 2::integer-8, 3::integer-8>>} + + iex> SqliteVec.Int8.new(Nx.tensor([1, 2, 3], type: :s8)) + %SqliteVec.Int8{data: <<1::integer-8, 2::integer-8, 3::integer-8>>} + """ + def new(vector_or_list_or_tensor) + + def new(%SqliteVec.Int8{} = vector) do + vector + end + + def new(list) when is_list(list) do + if list == [] do + raise ArgumentError, "list must not be empty" + end + + if Enum.any?(list, &(not valid_int8?(&1))) do + raise ArgumentError, "expected list elements to be valid int8 values" + end + + bin = for v <- list, into: <<>>, do: <> + from_binary(<>) + end + + if Code.ensure_loaded?(Nx) do + def new(tensor) when is_struct(tensor, Nx.Tensor) do + if Nx.rank(tensor) != 1 do + raise ArgumentError, "expected rank to be 1" + end + + if Nx.type(tensor) != {:s, 8} do + raise ArgumentError, "expected type to be :s8" + end + + bin = Nx.to_binary(tensor) + from_binary(<>) + end + end + + defp valid_int8?(value) do + is_integer(value) and -128 <= value and value <= 127 + end + + @doc """ + Creates a new vector from its binary representation + """ + def from_binary(binary) when is_binary(binary) do + %SqliteVec.Int8{data: binary} + end + + @doc """ + Converts the vector to its binary representation + """ + def to_binary(vector) when is_struct(vector, SqliteVec.Int8) do + vector.data + end + + @doc """ + Converts the vector to a list + """ + def to_list(vector) when is_struct(vector, SqliteVec.Int8) do + <> = vector.data + + for <>, do: v + end + + if Code.ensure_loaded?(Nx) do + @doc """ + Converts the vector to a tensor + """ + def to_tensor(vector) when is_struct(vector, SqliteVec.Int8) do + <> = vector.data + Nx.from_binary(bin, :s8) + end + end +end + +defimpl Inspect, for: SqliteVec.Int8 do + import Inspect.Algebra + + def inspect(vector, opts) do + concat(["vec_int8('", Inspect.List.inspect(SqliteVec.Int8.to_list(vector), opts), "')"]) + end +end diff --git a/bindings/elixir/mix.exs b/bindings/elixir/mix.exs new file mode 100644 index 00000000..7a207ccc --- /dev/null +++ b/bindings/elixir/mix.exs @@ -0,0 +1,96 @@ +defmodule SqliteVec.MixProject do + use Mix.Project + + @source_url "https://github.com/joelpaulkoch/sqlite_vec" + @version "0.1.0" + + def project do + [ + app: :sqlite_vec, + version: @version, + elixir: "~> 1.17", + start_permanent: Mix.env() == :prod, + deps: deps(), + compilers: Mix.compilers() ++ [:download_sqlite_vec], + aliases: [ + "compile.download_sqlite_vec": &download_sqlite_vec/1 + ], + preferred_cli_env: [ + "test.watch": :test + ], + name: "SqliteVec", + package: package(), + docs: docs(), + description: "A wrapper around sqlite-vec", + source_url: @source_url, + homepage_url: @source_url + ] + end + + defp download_sqlite_vec(_) do + version = Application.get_env(:sqlite_vec, :version, SqliteVec.Downloader.default_version()) + + output_dir = Path.join(__DIR__, "priv/#{version}") + File.mkdir_p!(output_dir) + + case SqliteVec.download(output_dir, version) do + :skip -> + :ok + + {:ok, _successful_files, []} -> + :ok + + {:ok, _successful_files, failed_files} -> + message = "failed to download: " <> Enum.join(failed_files, ", ") + raise(message) + + {:error, message} -> + raise(message) + end + end + + defp deps do + [ + {:octo_fetch, "~> 0.4.0"}, + {:ecto, "~> 3.0", optional: true}, + {:nx, "~> 0.9", optional: true}, + {:ecto_sql, "~> 3.0", only: :test}, + {:ecto_sqlite3, "~> 0.17", only: :test}, + {:stream_data, "~> 1.0", only: :test}, + {:ex_doc, "~> 0.34", only: :dev, runtime: false}, + {:mix_test_watch, "~> 1.0", only: [:dev, :test], runtime: false}, + {:credo, "~> 1.7", only: [:dev, :test], runtime: false}, + {:doctest_formatter, "~> 0.3.1", only: [:dev, :test], runtime: false} + ] + end + + defp package do + [ + maintainers: ["Joel Koch"], + licenses: ["MIT"], + links: %{ + GitHub: "https://github.com/joelpaulkoch/sqlite_vec" + }, + files: ~w(lib priv/.gitkeep .formatter.exs mix.exs README.md LICENSE) + ] + end + + defp docs do + [ + main: "readme", + source_ref: "v#{@version}", + source_url: @source_url, + extras: [ + {"README.md", title: "README"}, + "notebooks/getting_started.livemd", + "notebooks/usage_with_ecto.livemd" + ], + groups_for_extras: [ + Notebooks: [ + "notebooks/getting_started.livemd", + "notebooks/usage_with_ecto.livemd" + ] + ] + ] + end +end diff --git a/bindings/elixir/mix.lock b/bindings/elixir/mix.lock new file mode 100644 index 00000000..8ac6ff92 --- /dev/null +++ b/bindings/elixir/mix.lock @@ -0,0 +1,29 @@ +%{ + "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"}, + "castore": {:hex, :castore, "1.0.10", "43bbeeac820f16c89f79721af1b3e092399b3a1ecc8df1a472738fd853574911", [:mix], [], "hexpm", "1b0b7ea14d889d9ea21202c43a4fa015eb913021cb535e8ed91946f4b77a8848"}, + "cc_precompiler": {:hex, :cc_precompiler, "0.1.10", "47c9c08d8869cf09b41da36538f62bc1abd3e19e41701c2cea2675b53c704258", [:mix], [{:elixir_make, "~> 0.7", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "f6e046254e53cd6b41c6bacd70ae728011aa82b2742a80d6e2214855c6e06b22"}, + "complex": {:hex, :complex, "0.5.0", "af2d2331ff6170b61bb738695e481b27a66780e18763e066ee2cd863d0b1dd92", [:mix], [], "hexpm", "2683bd3c184466cfb94fad74cbfddfaa94b860e27ad4ca1bffe3bff169d91ef1"}, + "credo": {:hex, :credo, "1.7.10", "6e64fe59be8da5e30a1b96273b247b5cf1cc9e336b5fd66302a64b25749ad44d", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "71fbc9a6b8be21d993deca85bf151df023a3097b01e09a2809d460348561d8cd"}, + "db_connection": {:hex, :db_connection, "2.7.0", "b99faa9291bb09892c7da373bb82cba59aefa9b36300f6145c5f201c7adf48ec", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "dcf08f31b2701f857dfc787fbad78223d61a32204f217f15e881dd93e4bdd3ff"}, + "decimal": {:hex, :decimal, "2.2.0", "df3d06bb9517e302b1bd265c1e7f16cda51547ad9d99892049340841f3e15836", [:mix], [], "hexpm", "af8daf87384b51b7e611fb1a1f2c4d4876b65ef968fa8bd3adf44cff401c7f21"}, + "doctest_formatter": {:hex, :doctest_formatter, "0.3.1", "a3fd87c1f75e8a78e7737ec4a4494800ddda705998a59320b87fe4c59c030794", [:mix], [], "hexpm", "3c092540d8b73ffc526a92daa2dc2ecd50714f14325eeacbc7b4e790f890443a"}, + "earmark_parser": {:hex, :earmark_parser, "1.4.41", "ab34711c9dc6212dda44fcd20ecb87ac3f3fce6f0ca2f28d4a00e4154f8cd599", [:mix], [], "hexpm", "a81a04c7e34b6617c2792e291b5a2e57ab316365c2644ddc553bb9ed863ebefa"}, + "ecto": {:hex, :ecto, "3.12.5", "4a312960ce612e17337e7cefcf9be45b95a3be6b36b6f94dfb3d8c361d631866", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "6eb18e80bef8bb57e17f5a7f068a1719fbda384d40fc37acb8eb8aeca493b6ea"}, + "ecto_sql": {:hex, :ecto_sql, "3.12.1", "c0d0d60e85d9ff4631f12bafa454bc392ce8b9ec83531a412c12a0d415a3a4d0", [:mix], [{:db_connection, "~> 2.4.1 or ~> 2.5", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.12", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.7", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.19 or ~> 1.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.1 or ~> 2.2", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "aff5b958a899762c5f09028c847569f7dfb9cc9d63bdb8133bff8a5546de6bf5"}, + "ecto_sqlite3": {:hex, :ecto_sqlite3, "0.17.5", "fbee5c17ff6afd8e9ded519b0abb363926c65d30b27577232bb066b2a79957b8", [:mix], [{:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:ecto, "~> 3.12", [hex: :ecto, repo: "hexpm", optional: false]}, {:ecto_sql, "~> 3.12", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:exqlite, "~> 0.22", [hex: :exqlite, repo: "hexpm", optional: false]}], "hexpm", "3b54734d998cbd032ac59403c36acf4e019670e8b6ceef9c6c33d8986c4e9704"}, + "elixir_make": {:hex, :elixir_make, "0.9.0", "6484b3cd8c0cee58f09f05ecaf1a140a8c97670671a6a0e7ab4dc326c3109726", [:mix], [], "hexpm", "db23d4fd8b757462ad02f8aa73431a426fe6671c80b200d9710caf3d1dd0ffdb"}, + "ex_doc": {:hex, :ex_doc, "0.35.1", "de804c590d3df2d9d5b8aec77d758b00c814b356119b3d4455e4b8a8687aecaf", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "2121c6402c8d44b05622677b761371a759143b958c6c19f6558ff64d0aed40df"}, + "exqlite": {:hex, :exqlite, "0.27.1", "73fc0b3dc3b058a77a2b3771f82a6af2ddcf370b069906968a34083d2ffd2884", [:make, :mix], [{:cc_precompiler, "~> 0.1", [hex: :cc_precompiler, repo: "hexpm", optional: false]}, {:db_connection, "~> 2.1", [hex: :db_connection, repo: "hexpm", optional: false]}, {:elixir_make, "~> 0.8", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: true]}], "hexpm", "79ef5756451cfb022e8013e1ed00d0f8f7d1333c19502c394dc16b15cfb4e9b4"}, + "file_system": {:hex, :file_system, "1.0.1", "79e8ceaddb0416f8b8cd02a0127bdbababe7bf4a23d2a395b983c1f8b3f73edd", [:mix], [], "hexpm", "4414d1f38863ddf9120720cd976fce5bdde8e91d8283353f0e31850fa89feb9e"}, + "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, + "makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"}, + "makeup_elixir": {:hex, :makeup_elixir, "1.0.0", "74bb8348c9b3a51d5c589bf5aebb0466a84b33274150e3b6ece1da45584afc82", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "49159b7d7d999e836bedaf09dcf35ca18b312230cf901b725a64f3f42e407983"}, + "makeup_erlang": {:hex, :makeup_erlang, "1.0.1", "c7f58c120b2b5aa5fd80d540a89fdf866ed42f1f3994e4fe189abebeab610839", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "8a89a1eeccc2d798d6ea15496a6e4870b75e014d1af514b1b71fa33134f57814"}, + "mix_test_watch": {:hex, :mix_test_watch, "1.2.0", "1f9acd9e1104f62f280e30fc2243ae5e6d8ddc2f7f4dc9bceb454b9a41c82b42", [:mix], [{:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}], "hexpm", "278dc955c20b3fb9a3168b5c2493c2e5cffad133548d307e0a50c7f2cfbf34f6"}, + "nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"}, + "nx": {:hex, :nx, "0.9.2", "17563029c01bf749aad3c31234326d7665abd0acc33ee2acbe531a4759f29a8a", [:mix], [{:complex, "~> 0.5", [hex: :complex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "914d74741617d8103de8ab1f8c880353e555263e1c397b8a1109f79a3716557f"}, + "octo_fetch": {:hex, :octo_fetch, "0.4.0", "074b5ecbc08be10b05b27e9db08bc20a3060142769436242702931c418695b19", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~> 1.1", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "cf8be6f40cd519d7000bb4e84adcf661c32e59369ca2827c4e20042eda7a7fc6"}, + "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.7", "354c321cf377240c7b8716899e182ce4890c5938111a1296add3ec74cf1715df", [:make, :mix, :rebar3], [], "hexpm", "fe4c190e8f37401d30167c8c405eda19469f34577987c76dde613e838bbc67f8"}, + "stream_data": {:hex, :stream_data, "1.1.2", "05499eaec0443349ff877aaabc6e194e82bda6799b9ce6aaa1aadac15a9fdb4d", [:mix], [], "hexpm", "129558d2c77cbc1eb2f4747acbbea79e181a5da51108457000020a906813a1a9"}, + "telemetry": {:hex, :telemetry, "1.3.0", "fedebbae410d715cf8e7062c96a1ef32ec22e764197f70cda73d82778d61e7a2", [:rebar3], [], "hexpm", "7015fc8919dbe63764f4b4b87a95b7c0996bd539e0d499be6ec9d7f3875b79e6"}, +} diff --git a/bindings/elixir/notebooks/getting_started.livemd b/bindings/elixir/notebooks/getting_started.livemd new file mode 100644 index 00000000..c3bbe787 --- /dev/null +++ b/bindings/elixir/notebooks/getting_started.livemd @@ -0,0 +1,54 @@ +# Getting Started + +```elixir +Mix.install([ + {:sqlite_vec, github: "joelpaulkoch/sqlite_vec"}, + {:exqlite, "~> 0.25.0"} +]) +``` + +## Sample usage + +This example is taken directly from the original `sqlite-vec` [README](https://github.com/asg017/sqlite-vec/). + +First, we open a new connection, then we load the extension. +Now we are ready to create the virtual table where we can store vectors. + +```elixir +alias Exqlite.Basic + +{:ok, conn} = Basic.open(":memory:") +:ok = Basic.enable_load_extension(conn) + +Basic.load_extension(conn, SqliteVec.path()) +Basic.exec(conn, "create virtual table vec_examples using vec0(sample_embedding float[8]);", []) +``` + +We insert some vectors into the database. + +```elixir +Basic.exec(conn, """ +-- vectors can be provided as JSON or in a compact binary format +insert into vec_examples(rowid, sample_embedding) + values + (1, '[-0.200, 0.250, 0.341, -0.211, 0.645, 0.935, -0.316, -0.924]'), + (2, '[0.443, -0.501, 0.355, -0.771, 0.707, -0.708, -0.185, 0.362]'), + (3, '[0.716, -0.927, 0.134, 0.052, -0.669, 0.793, -0.634, -0.162]'), + (4, '[-0.710, 0.330, 0.656, 0.041, -0.990, 0.726, 0.385, -0.958]'); +""") +``` + +Then, we can query for nearest neighbors. + +```elixir +Basic.exec(conn, """ +-- KNN style query +select + rowid, + distance +from vec_examples +where sample_embedding match '[0.890, 0.544, 0.825, 0.961, 0.358, 0.0196, 0.521, 0.175]' +order by distance +limit 2; +""") +``` diff --git a/bindings/elixir/notebooks/usage_with_ecto.livemd b/bindings/elixir/notebooks/usage_with_ecto.livemd new file mode 100644 index 00000000..c056064d --- /dev/null +++ b/bindings/elixir/notebooks/usage_with_ecto.livemd @@ -0,0 +1,177 @@ +# Usage with Ecto + +```elixir +Mix.install([ + {:sqlite_vec, github: "joelpaulkoch/sqlite_vec"}, + {:ecto, "~> 3.12"}, + {:ecto_sql, "~> 3.12"}, + {:ecto_sqlite3, "~> 0.17.2"}, + {:kino, "~> 0.14.1"}, + {:nx, "~> 0.9.1"} +]) +``` + +## Setup + +You can load the extension in the runtime configuration like so: + + + +```elixir +config :ecto_sqlite3, load_extensions: [SqliteVec.path()] +``` + + + +Next, we must define our Repo. + +```elixir +defmodule MyApp.Repo do + use Ecto.Repo, + otp_app: :my_app, + adapter: Ecto.Adapters.SQLite3 +end + +db_path = Path.join(System.tmp_dir!(), "demo.db") +Kino.start_child({MyApp.Repo, database: db_path, load_extensions: [SqliteVec.path()]}) +``` + +## With a regular table + +You can use the vector types in any regular table. Note however, that using virtual tables as described below will be [faster](https://alexgarcia.xyz/sqlite-vec/features/knn.html). + +```elixir +defmodule MyApp.Repo.Migrations.CreateEmbeddingsTable do + use Ecto.Migration + + def up do + execute("CREATE TABLE embeddings(id INTEGER PRIMARY KEY, embedding float[4], metadata TEXT)") + end + + def down do + execute("DROP TABLE embeddings") + end +end +``` + +```elixir +Ecto.Migrator.up(MyApp.Repo, 1, MyApp.Repo.Migrations.CreateEmbeddingsTable) +``` + +Schema definition: + +```elixir +defmodule Embedding do + use Ecto.Schema + + schema "embeddings" do + field(:embedding, SqliteVec.Ecto.Float32) + field(:metadata, :string) + end +end +``` + +Insert some vectors: + +```elixir +MyApp.Repo.insert(%Embedding{ + embedding: SqliteVec.Float32.new([1, 2, 3, 4]) +}) + +MyApp.Repo.insert(%Embedding{ + embedding: SqliteVec.Float32.new([3, 4, 5, 6]) +}) + +MyApp.Repo.insert(%Embedding{ + embedding: SqliteVec.Float32.new(Nx.tensor([3, 4, 5, 6], type: :f32)), + metadata: "from tensor" +}) +``` + +Query them: + +```elixir +import Ecto.Query +import SqliteVec.Ecto.Query + +v = SqliteVec.Float32.new([2, 2, 3, 3]) + +MyApp.Repo.all( + from(i in Embedding, + order_by: vec_distance_L2(i.embedding, vec_f32(v)) + ) +) +``` + +## With a virtual table + +You can create a virtual table to enable fast KNN queries. Since version 0.1.6, auxiliary columns and metadata are available in virtual tables. +Please refer to [this](https://alexgarcia.xyz/sqlite-vec/features/vec0.html) documentation to decide what's best for your use case. + +```elixir +defmodule MyApp.Repo.Migrations.CreateVirtualEmbeddingsTable do + use Ecto.Migration + + def up do + execute( + "CREATE VIRTUAL TABLE virtual_embeddings_table USING vec0(id INTEGER PRIMARY KEY, embedding float[2], metadata TEXT, +auxiliary_column TEXT)" + ) + end + + def down do + execute("DROP VIRTUAL TABLE virtual_embeddings_table") + end +end +``` + +```elixir +Ecto.Migrator.up(MyApp.Repo, 2, MyApp.Repo.Migrations.CreateVirtualEmbeddingsTable) +``` + +```elixir +defmodule VirtualEmbedding do + use Ecto.Schema + + schema "virtual_embeddings_table" do + field(:embedding, SqliteVec.Ecto.Float32) + field(:metadata, :string) + field(:auxiliary_column, :string) + end +end +``` + +```elixir +MyApp.Repo.insert(%VirtualEmbedding{ + embedding: SqliteVec.Float32.new([0, 0]), + metadata: "first vector", + auxiliary_column: "first vector" +}) + +MyApp.Repo.insert(%VirtualEmbedding{ + embedding: SqliteVec.Float32.new([1, 1]), + metadata: "second vector", + auxiliary_column: "second vector" +}) + +MyApp.Repo.insert(%VirtualEmbedding{ + embedding: SqliteVec.Float32.new([2, 3]), + metadata: "third vector", + auxiliary_column: "third vector" +}) +``` + +You can perform a K-nearest-neighbors query using `match` and `limit`. + +```elixir +import Ecto.Query +import SqliteVec.Ecto.Query + +v = SqliteVec.Float32.new([2, 2]) + +MyApp.Repo.all( + from(i in VirtualEmbedding, + where: match(i.embedding, vec_f32(v)), + limit: 3 + ) +) +``` diff --git a/bindings/elixir/priv/.gitkeep b/bindings/elixir/priv/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/bindings/elixir/test/sqlite_vec/bit_ecto_test.exs b/bindings/elixir/test/sqlite_vec/bit_ecto_test.exs new file mode 100644 index 00000000..05b70758 --- /dev/null +++ b/bindings/elixir/test/sqlite_vec/bit_ecto_test.exs @@ -0,0 +1,167 @@ +defmodule BitItem do + use Ecto.Schema + + schema "bit_ecto_items" do + field(:embedding, SqliteVec.Ecto.Bit) + end +end + +defmodule BitEctoTest do + use ExUnit.Case, async: false + + import Ecto.Query + import SqliteVec.Ecto.Query + + setup_all do + Ecto.Adapters.SQL.query!(Repo, "DROP TABLE IF EXISTS test", []) + + Ecto.Adapters.SQL.query!(Repo, "CREATE TABLE test (some_column)", []) + + Ecto.Adapters.SQL.query!(Repo, "INSERT INTO test (some_column) VALUES ($1)", ["test dummy"]) + + Ecto.Adapters.SQL.query!(Repo, "DROP TABLE IF EXISTS bit_ecto_items", []) + + Ecto.Adapters.SQL.query!( + Repo, + "CREATE VIRTUAL TABLE bit_ecto_items USING vec0(id INTEGER PRIMARY KEY, embedding bit[8])", + [] + ) + + create_items() + :ok + end + + defp create_items do + Ecto.Adapters.SQL.query!( + Repo, + "insert into bit_ecto_items(id, embedding) values(1, vec_bit(X'FF')), (2, vec_bit(X'00')), (3, vec_bit(X'0A'))", + [] + ) + end + + test "match performs a KNN query" do + items = + Repo.all( + from(i in BitItem, + where: + match( + i.embedding, + vec_bit(SqliteVec.Bit.new([0, 0, 0, 0, 0, 0, 0, 1])) + ), + limit: 3 + ) + ) + + assert Enum.map(items, fn v -> + v.id + end) == [2, 3, 1] + + assert Enum.map(items, fn v -> v.embedding |> SqliteVec.Bit.to_list() end) == [ + [0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 1, 0, 1, 0], + [1, 1, 1, 1, 1, 1, 1, 1] + ] + end + + test "vector hamming distance" do + items = + Repo.all( + from(i in BitItem, + order_by: + vec_distance_hamming( + i.embedding, + vec_bit(SqliteVec.Bit.new([0, 0, 0, 0, 0, 0, 0, 1])) + ), + limit: 5 + ) + ) + + assert Enum.map(items, fn v -> + v.id + end) == [2, 3, 1] + + assert Enum.map(items, fn v -> v.embedding |> SqliteVec.Bit.to_list() end) == [ + [0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 1, 0, 1, 0], + [1, 1, 1, 1, 1, 1, 1, 1] + ] + end + + test "vec_length returns number of elements of vector" do + vector = SqliteVec.Bit.new([0, 0, 0, 0, 0, 0, 0, 0] ++ [0, 0, 0, 0, 1, 0, 1, 0]) + assert Repo.one(from("test", select: vec_length(vec_bit(vector)))) == 16 + end + + test "vec_type returns vector type as string" do + vector = SqliteVec.Bit.new([0, 0, 0, 0, 0, 0, 0, 0]) + assert Repo.one(from("test", select: vec_type(vec_bit(vector)))) == "bit" + end + + test "vec_add errors" do + v1 = SqliteVec.Bit.new([0, 0, 0, 0, 0, 0, 0, 0]) + v2 = SqliteVec.Bit.new([0, 0, 0, 0, 0, 0, 0, 1]) + + assert_raise Exqlite.Error, fn -> + Repo.one(from("test", select: vec_add(vec_bit(v1), vec_bit(v2)))) + end + end + + test "vec_sub errros" do + v1 = SqliteVec.Bit.new([0, 0, 0, 0, 0, 0, 0, 0]) + v2 = SqliteVec.Bit.new([0, 0, 0, 0, 0, 0, 0, 1]) + + assert_raise Exqlite.Error, fn -> + Repo.one(from("test", select: vec_sub(vec_bit(v1), vec_bit(v2)))) + end + end + + test "vec_normalize errors" do + vector = SqliteVec.Bit.new([0, 0, 0, 0, 0, 0, 0, 0]) + + assert_raise Exqlite.Error, fn -> + Repo.one(from("test", select: vec_normalize(vec_bit(vector)))) + end + end + + test "vec_slice extracts subset of vector" do + vector = + SqliteVec.Bit.new( + [0, 0, 0, 0, 0, 0, 0, 1] ++ + [0, 0, 0, 0, 0, 0, 1, 0] ++ + [0, 0, 0, 0, 0, 0, 1, 1] ++ + [0, 0, 0, 0, 0, 1, 0, 0] + ) + + binary = Repo.one(from("test", select: vec_slice(vec_bit(vector), ^8, ^24))) + + assert SqliteVec.Bit.from_binary(binary) == + SqliteVec.Bit.new([0, 0, 0, 0, 0, 0, 1, 0] ++ [0, 0, 0, 0, 0, 0, 1, 1]) + end + + test "vec_to_json returns vector as json" do + vector = + SqliteVec.Bit.new( + [0, 0, 0, 0, 0, 0, 0, 1] ++ + [0, 0, 0, 0, 0, 0, 1, 0] ++ + [0, 0, 0, 0, 0, 0, 1, 1] + ) + + assert Repo.one(from("test", select: vec_to_json(vec_bit(vector)))) == + "[1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0]" + end + + test "vec_quantize_binary errors" do + vector = SqliteVec.Bit.new([0, 0, 0, 0, 0, 0, 0, 1]) + + assert_raise Exqlite.Error, fn -> + Repo.one(from("test", select: vec_quantize_binary(vec_bit(vector)))) + end + end + + @tag :skip + test "cast" do + embedding = [1, 2] + items = Repo.all(from(i in BitItem, where: i.embedding == ^embedding)) + assert Enum.map(items, fn v -> v.id end) == [1] + end +end diff --git a/bindings/elixir/test/sqlite_vec/bit_test.exs b/bindings/elixir/test/sqlite_vec/bit_test.exs new file mode 100644 index 00000000..398a0853 --- /dev/null +++ b/bindings/elixir/test/sqlite_vec/bit_test.exs @@ -0,0 +1,144 @@ +defmodule SqliteVec.Bit.Test do + use ExUnit.Case + use ExUnitProperties + + doctest SqliteVec.Bit + + defp shape_generator do + {StreamData.positive_integer()} + end + + defp type_generator do + [ + # {:u, 2}, + # {:u, 4}, + {:u, 8}, + {:u, 16}, + {:u, 32}, + {:u, 64}, + # {:s, 2}, + # {:s, 4}, + {:s, 8}, + {:s, 16}, + {:s, 32}, + {:s, 64}, + {:f, 8}, + {:f, 16}, + {:f, 32}, + {:f, 64}, + {:bf, 16}, + {:c, 64}, + {:c, 128} + ] + |> Enum.map(&StreamData.constant(&1)) + |> StreamData.one_of() + end + + defp tensor_generator do + gen all(seed <- StreamData.integer(), shape <- shape_generator(), type <- type_generator()) do + key = Nx.Random.key(seed) + + {tensor, _key} = + case type do + {:s, _} -> random_integer(type, shape, key) + {:u, _} -> random_integer(type, shape, key) + {:f, _} -> random_float(type, shape, key) + {:bf, _} -> random_float(type, shape, key) + {:c, _} -> random_complex(type, shape, key) + end + + tensor + end + |> StreamData.filter(&finite?(&1)) + end + + defp random_integer(type, shape, key) do + min = Nx.Constants.min_finite(type) |> Nx.to_number() + max = Nx.Constants.max_finite(type) |> Nx.to_number() + Nx.Random.randint(key, min, max, shape: shape, type: type) + end + + defp random_float(type, shape, key) do + min = (Nx.Constants.min_finite(type) |> Nx.to_number()) / 2 + max = (Nx.Constants.max_finite(type) |> Nx.to_number()) / 2 + Nx.Random.uniform(key, min, max, shape: shape, type: type) + end + + defp random_complex(type, shape, key) do + Nx.Random.uniform(key, shape: shape, type: type) + end + + defp finite?(tensor) do + tensor |> Nx.is_infinity() |> Nx.any() |> Nx.to_number() == 0 + end + + test "creating vector from vector works" do + vector = SqliteVec.Bit.new([1, 0, 1, 0, 1, 1, 1, 1]) + assert vector == vector |> SqliteVec.Bit.new() + end + + test "creating vector from list of bits works" do + list = [1, 0, 1, 0, 1, 1, 1, 1] + assert list == list |> SqliteVec.Bit.new() |> SqliteVec.Bit.to_list() + end + + test "creating vector from empty list errors" do + assert_raise ArgumentError, fn -> SqliteVec.Bit.new([]) end + end + + test "list length must be divisible by 8" do + list = [1, 0, 0, 0] + assert_raise ArgumentError, fn -> SqliteVec.Bit.new(list) end + end + + test "list elements are expected to be 0 or 1" do + list = [2, 1, 1, 1, 1, 1, 1, 1] + assert_raise ArgumentError, fn -> SqliteVec.Bit.new(list) end + end + + property "creating vector from list of bits and calling to_list/1 returns original list" do + check all( + bytes <- StreamData.positive_integer(), + bitlist <- StreamData.list_of(StreamData.integer(0..1), length: 8 * bytes) + ) do + assert bitlist == bitlist |> SqliteVec.Bit.new() |> SqliteVec.Bit.to_list() + end + end + + test "creating vector from tensor works" do + tensor = Nx.tensor([1, 2, 3], type: :u8) + + assert tensor == tensor |> SqliteVec.Bit.new() |> SqliteVec.Bit.to_tensor() + end + + test "creating vector from tensor of a type with size that's not divisible by 8 errors" do + types = [:u2, :u4, :s2, :s4] + + for type <- types do + assert_raise ArgumentError, fn -> SqliteVec.Bit.new(Nx.tensor([1], type: type)) end + end + end + + property "creating vector from tensor and calling to_tensor/1 returns u8 tensor with original binary value" do + check all(tensor <- tensor_generator()) do + assert Nx.to_binary(tensor) == + tensor + |> SqliteVec.Bit.new() + |> SqliteVec.Bit.to_tensor() + |> Nx.to_binary() + end + end + + test "inspect" do + vector = SqliteVec.Bit.new([1, 0, 1, 0, 1, 1, 1, 1]) + assert "vec_bit('[1, 0, 1, 0, 1, 1, 1, 1]')" == inspect(vector) + end + + test "equals" do + assert SqliteVec.Bit.new([0, 0, 0, 0, 1, 1, 1, 1]) == + SqliteVec.Bit.new([0, 0, 0, 0, 1, 1, 1, 1]) + + refute SqliteVec.Bit.new([1, 0, 0, 0, 1, 1, 1, 1]) == + SqliteVec.Bit.new([0, 0, 0, 0, 1, 1, 1, 1]) + end +end diff --git a/bindings/elixir/test/sqlite_vec/float32_ecto_test.exs b/bindings/elixir/test/sqlite_vec/float32_ecto_test.exs new file mode 100644 index 00000000..7925ffc0 --- /dev/null +++ b/bindings/elixir/test/sqlite_vec/float32_ecto_test.exs @@ -0,0 +1,183 @@ +defmodule Float32Item do + use Ecto.Schema + + schema "float32_ecto_items" do + field(:embedding, SqliteVec.Ecto.Float32) + end +end + +defmodule EctoTest do + use ExUnit.Case, async: false + + import Ecto.Query + import SqliteVec.Ecto.Query + + setup_all do + Ecto.Adapters.SQL.query!(Repo, "DROP TABLE IF EXISTS test", []) + + Ecto.Adapters.SQL.query!(Repo, "CREATE TABLE test (some_column)", []) + + Ecto.Adapters.SQL.query!(Repo, "INSERT INTO test (some_column) VALUES ($1)", ["test dummy"]) + + Ecto.Adapters.SQL.query!(Repo, "DROP TABLE IF EXISTS float32_ecto_items", []) + + Ecto.Adapters.SQL.query!( + Repo, + "CREATE VIRTUAL TABLE float32_ecto_items USING vec0(id INTEGER PRIMARY KEY, embedding float[2])", + [] + ) + + create_items() + :ok + end + + defp create_items do + Repo.insert(%Float32Item{ + embedding: SqliteVec.Float32.new([1, 2]) + }) + + Repo.insert(%Float32Item{ + embedding: SqliteVec.Float32.new([52.0, 43.0]) + }) + + Repo.insert(%Float32Item{ + embedding: SqliteVec.Float32.new(Nx.tensor([3, 4], type: :f32)) + }) + end + + test "match performs a KNN query" do + v = SqliteVec.Float32.new([2, 2]) + + items = + Repo.all( + from(i in Float32Item, + where: match(i.embedding, vec_f32(v)), + limit: 3 + ) + ) + + assert Enum.map(items, fn v -> + v.id + end) == [1, 3, 2] + + assert Enum.map(items, fn v -> v.embedding |> SqliteVec.Float32.to_list() end) == [ + [1.0, 2.0], + [3.0, 4.0], + [52.0, 43.0] + ] + end + + test "vector l2 distance" do + v = SqliteVec.Float32.new([2, 2]) + + items = + Repo.all( + from(i in Float32Item, + order_by: vec_distance_L2(i.embedding, vec_f32(v)), + limit: 5 + ) + ) + + assert Enum.map(items, fn v -> + v.id + end) == [1, 3, 2] + + assert Enum.map(items, fn v -> v.embedding |> SqliteVec.Float32.to_list() end) == [ + [1.0, 2.0], + [3.0, 4.0], + [52.0, 43.0] + ] + end + + test "vector cosine distance" do + items = + Repo.all( + from(i in Float32Item, + order_by: vec_distance_cosine(i.embedding, vec_f32(SqliteVec.Float32.new([1, 1]))), + limit: 5 + ) + ) + + assert Enum.map(items, fn v -> v.id end) == [2, 3, 1] + end + + test "vector cosine similarity" do + items = + Repo.all( + from(i in Float32Item, + order_by: 1 - vec_distance_cosine(i.embedding, vec_f32(SqliteVec.Float32.new([1, 1]))), + limit: 5 + ) + ) + + assert Enum.map(items, fn v -> v.id end) == [1, 3, 2] + end + + test "vec_length returns number of elements of vector" do + vector = SqliteVec.Float32.new([1, 2, 3]) + assert Repo.one(from("test", select: vec_length(vec_f32(vector)))) == 3 + end + + test "vec_type returns vector type as string" do + vector = SqliteVec.Float32.new([1, 2, 3]) + assert Repo.one(from("test", select: vec_type(vec_f32(vector)))) == "float32" + end + + test "vec_add adds two vectors element wise" do + v1 = SqliteVec.Float32.new([1, 2, 3]) + v2 = SqliteVec.Float32.new([4, 5, 6]) + + binary = Repo.one(from("test", select: vec_add(vec_f32(v1), vec_f32(v2)))) + + assert SqliteVec.Float32.from_binary(binary) == + SqliteVec.Float32.new([5, 7, 9]) + end + + test "vec_sub subtracts two vectors element wise" do + v1 = SqliteVec.Float32.new([1, 22, 3]) + v2 = SqliteVec.Float32.new([4, 15, 26]) + + binary = Repo.one(from("test", select: vec_sub(vec_f32(v1), vec_f32(v2)))) + + assert SqliteVec.Float32.from_binary(binary) == + SqliteVec.Float32.new([-3, 7, -23]) + end + + test "vec_normalize performs l2 normalization" do + vector = SqliteVec.Float32.new([3, 4]) + l2_norm = :math.sqrt(3 * 3 + 4 * 4) + binary = Repo.one(from("test", select: vec_normalize(vec_f32(vector)))) + + assert SqliteVec.Float32.from_binary(binary) == + SqliteVec.Float32.new([3 / l2_norm, 4 / l2_norm]) + end + + test "vec_slice extracts subset of vector" do + vector = SqliteVec.Float32.new([1, 2, 3, 4]) + + binary = Repo.one(from("test", select: vec_slice(vec_f32(vector), ^1, ^3))) + + assert SqliteVec.Float32.from_binary(binary) == + SqliteVec.Float32.new([2, 3]) + end + + test "vec_to_json returns vector as json" do + vector = SqliteVec.Float32.new([1, 2, 3]) + + assert Repo.one(from("test", select: vec_to_json(vec_f32(vector)))) == + "[1.000000,2.000000,3.000000]" + end + + test "vec_quantize_binary quantizes vector into bitvector" do + vector = SqliteVec.Float32.new([1, -2, 3, -4, -5, 6, -7, -8]) + binary = Repo.one(from("test", select: vec_quantize_binary(vec_f32(vector)))) + + assert binary == <<0b00100101>> + end + + test "cast" do + embedding = [1.0, 2.0] + items = Repo.all(from(i in Float32Item, where: i.embedding == ^embedding)) + assert Enum.map(items, fn v -> v.id end) == [1] + end +end diff --git a/bindings/elixir/test/sqlite_vec/float32_test.exs b/bindings/elixir/test/sqlite_vec/float32_test.exs new file mode 100644 index 00000000..2eb361aa --- /dev/null +++ b/bindings/elixir/test/sqlite_vec/float32_test.exs @@ -0,0 +1,121 @@ +defmodule SqliteVec.Float32.Test do + use ExUnit.Case + use ExUnitProperties + + doctest SqliteVec.Float32 + + defp float32_generator do + gen all(float <- StreamData.float()) do + <> = <> + + float32 + end + end + + defp shape_generator do + {StreamData.positive_integer()} + end + + defp f32_tensor_generator do + gen all(seed <- StreamData.integer(), shape <- shape_generator()) do + type = {:f, 32} + key = Nx.Random.key(seed) + + min = (Nx.Constants.min_finite(type) |> Nx.to_number()) / 2 + max = (Nx.Constants.max_finite(type) |> Nx.to_number()) / 2 + + {tensor, _key} = Nx.Random.uniform(key, min, max, shape: shape, type: type) + + tensor + end + |> StreamData.filter(&finite?(&1)) + end + + defp finite?(tensor) do + tensor |> Nx.is_infinity() |> Nx.any() |> Nx.to_number() == 0 + end + + test "creating vector from vector works" do + vector = SqliteVec.Float32.new([1, 2, 3]) + assert vector == vector |> SqliteVec.Float32.new() + end + + test "creating vector from list works" do + list = [1.0, 2.0, 3.0] + assert list == list |> SqliteVec.Float32.new() |> SqliteVec.Float32.to_list() + end + + test "creating vector from empty list errors" do + assert_raise ArgumentError, fn -> SqliteVec.Float32.new([]) end + end + + property "creating vector from list of float32 and calling to_list/1 returns original list" do + check all(list <- StreamData.list_of(float32_generator(), min_length: 1)) do + assert list == list |> SqliteVec.Float32.new() |> SqliteVec.Float32.to_list() + end + end + + test "creating vector from tensor of type f32 works" do + tensor = Nx.tensor([1.0, 2.0, 3.0], type: :f32) + assert tensor == tensor |> SqliteVec.Float32.new() |> SqliteVec.Float32.to_tensor() + end + + test "creating vector from tensor that's not of type f32 errors" do + types = + [ + :u2, + :u4, + :u8, + :u16, + :u32, + :u64, + :s2, + :s4, + :s8, + :s16, + :s32, + :s64, + :f8, + :f16, + # :f32, + :f64, + :bf16, + :c64, + :c128 + ] + + for type <- types do + assert_raise ArgumentError, fn -> SqliteVec.Float32.new(Nx.tensor([1], type: type)) end + end + end + + property "creating vector from tensor of type :f32 and calling to_tensor/1 returns original tensor" do + check all(tensor <- f32_tensor_generator()) do + assert tensor == + tensor + |> SqliteVec.Float32.new() + |> SqliteVec.Float32.to_tensor() + |> Nx.as_type(Nx.type(tensor)) + end + end + + test "inspect" do + vector = SqliteVec.Float32.new([1, 2, 3]) + assert "vec_f32('[1.0, 2.0, 3.0]')" == inspect(vector) + end + + test "equals" do + assert SqliteVec.Float32.new([1, 2, 3]) == SqliteVec.Float32.new([1, 2, 3]) + refute SqliteVec.Float32.new([1, 2, 3]) == SqliteVec.Float32.new([1, 2, 4]) + end + + test "vectors are stored as binaries in system endianness" do + case System.endianness() do + :little -> + assert SqliteVec.Float32.new([2]).data == <<0x00, 0x00, 0x00, 0x40>> + + :big -> + assert SqliteVec.Float32.new([2]).data == <<0x40, 0x00, 0x00, 0x00>> + end + end +end diff --git a/bindings/elixir/test/sqlite_vec/int8_ecto_test.exs b/bindings/elixir/test/sqlite_vec/int8_ecto_test.exs new file mode 100644 index 00000000..416f2008 --- /dev/null +++ b/bindings/elixir/test/sqlite_vec/int8_ecto_test.exs @@ -0,0 +1,185 @@ +defmodule Int8Item do + use Ecto.Schema + + schema "int8_ecto_items" do + field(:embedding, SqliteVec.Ecto.Int8) + end +end + +defmodule Int8EctoTest do + use ExUnit.Case, async: false + + import Ecto.Query + import SqliteVec.Ecto.Query + + setup_all do + Ecto.Adapters.SQL.query!(Repo, "DROP TABLE IF EXISTS test", []) + + Ecto.Adapters.SQL.query!(Repo, "CREATE TABLE test (some_column)", []) + + Ecto.Adapters.SQL.query!(Repo, "INSERT INTO test (some_column) VALUES ($1)", ["test dummy"]) + + Ecto.Adapters.SQL.query!(Repo, "DROP TABLE IF EXISTS int8_ecto_items", []) + + Ecto.Adapters.SQL.query!( + Repo, + "CREATE VIRTUAL TABLE int8_ecto_items USING vec0(id INTEGER PRIMARY KEY, embedding int8[2])", + [] + ) + + create_items() + :ok + end + + defp create_items do + Ecto.Adapters.SQL.query!( + Repo, + "insert into int8_ecto_items(id, embedding) values(1, vec_int8('[1, 2]')), (2, vec_int8('[52, 43]')), (3, vec_int8('[3, 4]'))", + [] + ) + + # Repo.insert(%Int8Item{ + # embedding: SqliteVec.Int8.new([1, 2]) + # }) + + # Repo.insert(%Int8Item{ + # embedding: SqliteVec.Int8.new([52, 43]) + # }) + + # Repo.insert(%Int8Item{ + # embedding: Nx.tensor([3, 4], type: :s8) + # }) + end + + test "vector l2 distance" do + items = + Repo.all( + from(i in Int8Item, + order_by: vec_distance_L2(i.embedding, vec_int8(SqliteVec.Int8.new([2, 2]))), + limit: 5 + ) + ) + + assert Enum.map(items, fn v -> + v.id + end) == [1, 3, 2] + + assert Enum.map(items, fn v -> v.embedding |> SqliteVec.Int8.to_list() end) == [ + [1, 2], + [3, 4], + [52, 43] + ] + end + + test "match performs a KNN query" do + items = + Repo.all( + from(i in Int8Item, + where: match(i.embedding, vec_int8(SqliteVec.Int8.new([2, 2]))), + limit: 3 + ) + ) + + assert Enum.map(items, fn v -> + v.id + end) == [1, 3, 2] + + assert Enum.map(items, fn v -> v.embedding |> SqliteVec.Int8.to_list() end) == [ + [1, 2], + [3, 4], + [52, 43] + ] + end + + test "vector cosine distance" do + items = + Repo.all( + from(i in Int8Item, + order_by: vec_distance_cosine(i.embedding, vec_int8(SqliteVec.Int8.new([1, 1]))), + limit: 5 + ) + ) + + assert Enum.map(items, fn v -> v.id end) == [2, 3, 1] + end + + test "vector cosine similarity" do + items = + Repo.all( + from(i in Int8Item, + order_by: 1 - vec_distance_cosine(i.embedding, vec_int8(SqliteVec.Int8.new([1, 1]))), + limit: 5 + ) + ) + + assert Enum.map(items, fn v -> v.id end) == [1, 3, 2] + end + + test "vec_length returns number of elements of vector" do + vector = SqliteVec.Int8.new([1, 2, 3]) + assert Repo.one(from("test", select: vec_length(vec_int8(vector)))) == 3 + end + + test "vec_type returns vector type as string" do + vector = SqliteVec.Int8.new([1, 2, 3]) + assert Repo.one(from("test", select: vec_type(vec_int8(vector)))) == "int8" + end + + test "vec_add adds two vectors element wise" do + v1 = SqliteVec.Int8.new([1, 2, 3]) + v2 = SqliteVec.Int8.new([4, 5, 6]) + + binary = Repo.one(from("test", select: vec_add(vec_int8(v1), vec_int8(v2)))) + + assert SqliteVec.Int8.from_binary(binary) == + SqliteVec.Int8.new([5, 7, 9]) + end + + test "vec_sub subtracts two vectors element wise" do + v1 = SqliteVec.Int8.new([1, 22, 3]) + v2 = SqliteVec.Int8.new([4, 15, 26]) + + binary = Repo.one(from("test", select: vec_sub(vec_int8(v1), vec_int8(v2)))) + + assert SqliteVec.Int8.from_binary(binary) == + SqliteVec.Int8.new([-3, 7, -23]) + end + + test "vec_normalize errors" do + vector = SqliteVec.Int8.new([3, 4]) + + assert_raise Exqlite.Error, fn -> + Repo.one(from("test", select: vec_normalize(vec_int8(vector)))) + end + end + + test "vec_slice extracts subset of vector" do + vector = SqliteVec.Int8.new([1, 2, 3, 4]) + + binary = Repo.one(from("test", select: vec_slice(vec_int8(vector), ^1, ^3))) + + assert SqliteVec.Int8.from_binary(binary) == + SqliteVec.Int8.new([2, 3]) + end + + test "vec_to_json returns vector as json" do + vector = SqliteVec.Int8.new([1, 2, 3]) + + assert Repo.one(from("test", select: vec_to_json(vec_int8(vector)))) == + "[1,2,3]" + end + + test "vec_quantize_binary quantizes vector into bitvector" do + vector = SqliteVec.Int8.new([1, -2, 3, -4, -5, 6, -7, -8]) + binary = Repo.one(from("test", select: vec_quantize_binary(vec_int8(vector)))) + + assert binary == <<0b00100101>> + end + + @tag :skip + test "cast" do + embedding = [1, 2] + items = Repo.all(from(i in Int8Item, where: i.embedding == ^embedding)) + assert Enum.map(items, fn v -> v.id end) == [1] + end +end diff --git a/bindings/elixir/test/sqlite_vec/int8_test.exs b/bindings/elixir/test/sqlite_vec/int8_test.exs new file mode 100644 index 00000000..961251fb --- /dev/null +++ b/bindings/elixir/test/sqlite_vec/int8_test.exs @@ -0,0 +1,113 @@ +defmodule SqliteVec.Int8.Test do + use ExUnit.Case + use ExUnitProperties + + doctest SqliteVec.Int8 + + defp int8_generator do + gen all(integer <- StreamData.integer()) do + <> = <> + + int8 + end + end + + defp shape_generator do + {StreamData.positive_integer()} + end + + defp s8_tensor_generator do + gen all(seed <- StreamData.integer(), shape <- shape_generator()) do + type = {:s, 8} + key = Nx.Random.key(seed) + + min = Nx.Constants.min_finite(type) |> Nx.to_number() + max = Nx.Constants.max_finite(type) |> Nx.to_number() + + {tensor, _key} = Nx.Random.randint(key, min, max, shape: shape, type: type) + tensor + end + end + + test "creating vector from vector works" do + vector = SqliteVec.Int8.new([1, 2, 3]) + assert vector == vector |> SqliteVec.Int8.new() + end + + test "creating vector from list works" do + list = [1, 2, 3] + assert list == list |> SqliteVec.Int8.new() |> SqliteVec.Int8.to_list() + end + + test "creating vector from empty list errors" do + assert_raise ArgumentError, fn -> SqliteVec.Int8.new([]) end + end + + test "list elements are expected to be valid int8 values" do + assert_raise ArgumentError, fn -> SqliteVec.Int8.new([128]) end + assert_raise ArgumentError, fn -> SqliteVec.Int8.new([-129]) end + + assert SqliteVec.Int8.new([127]) + assert SqliteVec.Int8.new([-128]) + end + + property "creating vector from list of int8 and calling to_list/1 returns original list" do + check all(list <- StreamData.list_of(int8_generator(), min_length: 1)) do + assert list == list |> SqliteVec.Int8.new() |> SqliteVec.Int8.to_list() + end + end + + test "creating vector from tensor of type s8 works" do + tensor = Nx.tensor([1, 2, 3], type: :s8) + assert tensor == tensor |> SqliteVec.Int8.new() |> SqliteVec.Int8.to_tensor() + end + + test "creating vector from tensor that's not of type s8 errors" do + types = + [ + :u2, + :u4, + :u8, + :u16, + :u32, + :u64, + :s2, + :s4, + # :s8, + :s16, + :s32, + :s64, + :f8, + :f16, + :f32, + :f64, + :bf16, + :c64, + :c128 + ] + + for type <- types do + assert_raise ArgumentError, fn -> SqliteVec.Int8.new(Nx.tensor([1], type: type)) end + end + end + + property "creating vector from tensor of type :s8 and calling to_tensor/1 returns original tensor" do + check all(tensor <- s8_tensor_generator()) do + assert tensor == + tensor + |> SqliteVec.Int8.new() + |> SqliteVec.Int8.to_tensor() + |> Nx.as_type(Nx.type(tensor)) + end + end + + test "inspect" do + vector = SqliteVec.Int8.new([1, 2, 3]) + assert "vec_int8('[1, 2, 3]')" == inspect(vector) + end + + test "equals" do + assert SqliteVec.Int8.new([1, 2, 3]) == SqliteVec.Int8.new([1, 2, 3]) + refute SqliteVec.Int8.new([1, 2, 3]) == SqliteVec.Int8.new([1, 2, 4]) + end +end diff --git a/bindings/elixir/test/sqlite_vec_test.exs b/bindings/elixir/test/sqlite_vec_test.exs new file mode 100644 index 00000000..5b8b8584 --- /dev/null +++ b/bindings/elixir/test/sqlite_vec_test.exs @@ -0,0 +1,8 @@ +defmodule SqliteVecTest do + use ExUnit.Case + + @tag :slow + test "supported downloads should work" do + assert :ok = OctoFetch.Test.test_all_supported_downloads(SqliteVec.Downloader) + end +end diff --git a/bindings/elixir/test/test_helper.exs b/bindings/elixir/test/test_helper.exs new file mode 100644 index 00000000..1cd1f454 --- /dev/null +++ b/bindings/elixir/test/test_helper.exs @@ -0,0 +1,15 @@ +defmodule Repo do + use Ecto.Repo, + otp_app: :my_app, + adapter: Ecto.Adapters.SQLite3 +end + +Repo.start_link( + database: Path.join(__DIR__, "sqlite_vec_test#{System.get_env("MIX_TEST_PARTITION")}.db"), + pool: Ecto.Adapters.SQL.Sandbox, + pool_size: 5, + load_extensions: [SqliteVec.path()] +) + +ExUnit.configure(exclude: :slow) +ExUnit.start() From 79c370c31fd28543c0d606b136bb7d1fc689704f Mon Sep 17 00:00:00 2001 From: Joel Koch Date: Sat, 7 Dec 2024 16:35:09 +0100 Subject: [PATCH 2/9] Add elixir example --- examples/simple-elixir/demo.exs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 examples/simple-elixir/demo.exs diff --git a/examples/simple-elixir/demo.exs b/examples/simple-elixir/demo.exs new file mode 100644 index 00000000..09273dfc --- /dev/null +++ b/examples/simple-elixir/demo.exs @@ -0,0 +1,33 @@ +Mix.install([ + {:sqlite_vec, path: Path.join(__DIR__, "../../bindings/elixir")}, + {:exqlite, "~> 0.25.0"} +]) + +alias Exqlite.Basic + +{:ok, conn} = Basic.open(":memory:") +:ok = Basic.enable_load_extension(conn) + +Basic.load_extension(conn, SqliteVec.path()) +Basic.exec(conn, "create virtual table vec_examples using vec0(sample_embedding float[8]);", []) + +Basic.exec(conn, """ +-- vectors can be provided as JSON or in a compact binary format +insert into vec_examples(rowid, sample_embedding) + values + (1, '[-0.200, 0.250, 0.341, -0.211, 0.645, 0.935, -0.316, -0.924]'), + (2, '[0.443, -0.501, 0.355, -0.771, 0.707, -0.708, -0.185, 0.362]'), + (3, '[0.716, -0.927, 0.134, 0.052, -0.669, 0.793, -0.634, -0.162]'), + (4, '[-0.710, 0.330, 0.656, 0.041, -0.990, 0.726, 0.385, -0.958]'); +""") + +Basic.exec(conn, """ +-- KNN style query +select + rowid, + distance +from vec_examples +where sample_embedding match '[0.890, 0.544, 0.825, 0.961, 0.358, 0.0196, 0.521, 0.175]' +order by distance +limit 2; +""") From 6866a5ee7f13c56d5dfbeea077db4d1c5195af80 Mon Sep 17 00:00:00 2001 From: Joel Koch Date: Sat, 7 Dec 2024 16:39:12 +0100 Subject: [PATCH 3/9] Pull sqlite_vec from hex in notebooks --- bindings/elixir/notebooks/getting_started.livemd | 2 +- bindings/elixir/notebooks/usage_with_ecto.livemd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bindings/elixir/notebooks/getting_started.livemd b/bindings/elixir/notebooks/getting_started.livemd index c3bbe787..726903ec 100644 --- a/bindings/elixir/notebooks/getting_started.livemd +++ b/bindings/elixir/notebooks/getting_started.livemd @@ -2,7 +2,7 @@ ```elixir Mix.install([ - {:sqlite_vec, github: "joelpaulkoch/sqlite_vec"}, + {:sqlite_vec, "~> 0.1"}, {:exqlite, "~> 0.25.0"} ]) ``` diff --git a/bindings/elixir/notebooks/usage_with_ecto.livemd b/bindings/elixir/notebooks/usage_with_ecto.livemd index c056064d..46ba4bfe 100644 --- a/bindings/elixir/notebooks/usage_with_ecto.livemd +++ b/bindings/elixir/notebooks/usage_with_ecto.livemd @@ -2,7 +2,7 @@ ```elixir Mix.install([ - {:sqlite_vec, github: "joelpaulkoch/sqlite_vec"}, + {:sqlite_vec, "~> 0.1"}, {:ecto, "~> 3.12"}, {:ecto_sql, "~> 3.12"}, {:ecto_sqlite3, "~> 0.17.2"}, From 6a4b640c18ae30408d71a1353053b455862e8832 Mon Sep 17 00:00:00 2001 From: Joel Koch Date: Tue, 30 Sep 2025 23:31:11 +0200 Subject: [PATCH 4/9] Remove octo_fetch --- bindings/elixir/lib/sqlite_vec.ex | 20 +---- bindings/elixir/lib/sqlite_vec/downloader.ex | 86 -------------------- bindings/elixir/mix.exs | 27 ------ bindings/elixir/priv/.gitkeep | 0 bindings/elixir/test/sqlite_vec_test.exs | 8 -- 5 files changed, 1 insertion(+), 140 deletions(-) delete mode 100644 bindings/elixir/lib/sqlite_vec/downloader.ex delete mode 100644 bindings/elixir/priv/.gitkeep delete mode 100644 bindings/elixir/test/sqlite_vec_test.exs diff --git a/bindings/elixir/lib/sqlite_vec.ex b/bindings/elixir/lib/sqlite_vec.ex index 3e57126a..5e093038 100644 --- a/bindings/elixir/lib/sqlite_vec.ex +++ b/bindings/elixir/lib/sqlite_vec.ex @@ -1,21 +1,3 @@ defmodule SqliteVec do - @moduledoc """ - Downloads the precompiled loadable library of `sqlite-vec` from GitHub releases. - """ - - @doc """ - Provides the path to the downloaded loadable library. - """ - def path() do - version = Application.get_env(:sqlite_vec, :version, SqliteVec.Downloader.default_version()) - - Application.app_dir(:sqlite_vec, "priv/#{version}/vec0") - end - - @doc """ - Downloads the specified `version` to `output_dir`. - """ - def download(output_dir, version) do - SqliteVec.Downloader.download(output_dir, override_version: version) - end + @moduledoc false end diff --git a/bindings/elixir/lib/sqlite_vec/downloader.ex b/bindings/elixir/lib/sqlite_vec/downloader.ex deleted file mode 100644 index 9521c7b3..00000000 --- a/bindings/elixir/lib/sqlite_vec/downloader.ex +++ /dev/null @@ -1,86 +0,0 @@ -defmodule SqliteVec.Downloader do - @moduledoc """ - The downloader module for `SqliteVec`. - If the configured version already exists, the download is skipped. - Otherwise, the corresponding GitHub release artifact will be downloaded. - Afterwards, all other previously downloaded versions will be deleted. - """ - - use OctoFetch, - latest_version: "0.1.6", - github_repo: "asg017/sqlite-vec", - download_versions: %{ - "0.1.6" => [ - {:darwin, :amd64, "35d014e5f7bcac52645a97f1f1ca34fdb51dcd61d81ac6e6ba1c712393fbf8fd"}, - {:darwin, :arm64, "142e195b654092632fecfadbad2825f3140026257a70842778637597f6b8c827"}, - {:linux, :amd64, "438e0df29f3f8db3525b3aa0dcc0a199869c0bcec9d7abc5b51850469caf867f"}, - {:linux, :arm64, "d6e4ba12c5c0186eaab42fb4449b311008d86ffd943e6377d7d88018cffab3aa"}, - {:windows, :amd64, "f1c615577ad2e692d1e2fe046fe65994dafd8a8cae43e9e864f5f682dc295964"} - ], - "0.1.5" => [ - {:darwin, :amd64, "1daa90b7cdda7e873af4636a20a2b6daf0ebd4d664f2bbbcc2ffeae219bf34b6"}, - {:darwin, :arm64, "348ea4ce39b4b4749b19ee93e5e9674d6ed7616e3e313cb20f6354cdecbebc75"}, - {:linux, :amd64, "626bb9b66896269facdf7f87d94c308bf0523cb1e584ff7ff5b3f51936f21d24"}, - {:linux, :arm64, "8ce460c1f2adcbbc709f5ca1d1a3578c34c62c131d1a044bd3ff7c0729be2137"}, - {:windows, :amd64, "cfd31e96d2edf27749c4c2063134737fc98ac87b6e113acf204db57563b078bc"} - ], - "0.1.4" => [ - {:darwin, :amd64, "1be7676e9e63c427fe0ce84b738c1c9012f2bbb4b81ecc63719b5552f07e1b26"}, - {:darwin, :arm64, "e7962da8acd394ad95cfc4822d573d5b10ac9f93d2dd28b73e76841eb5da45ee"}, - {:linux, :amd64, "2d3855b9953f05aba033536efed3cd2a9cc4518ee009301b0c03b17f9d698819"}, - {:linux, :arm64, "b0b8d2b7b4beb9641417874689e737fe872d79e208c0c306565bd5fbfacb7124"}, - {:windows, :amd64, "39a5575c565af7c135b9f62db9d92aebd7af096cc2b952c8a31b40f674ccf2cf"} - ], - "0.1.3" => [ - {:darwin, :amd64, "8ef228a8935883f8b5c52f191a8123909ea48ab58f6eceb5d4c12ada654556cf"}, - {:darwin, :arm64, "c57a552c8a8df823a8deb937f81d8a9ec5c81377e66e86cd5db8508b74ef4068"}, - {:linux, :amd64, "5fa404f6d61de7b462d1f1504332a522a64331103603ca079714f078cdb28606"} - ], - "0.1.2" => [ - {:darwin, :amd64, "d2d4d312fac1d609723b75cc777df42f3ff0770903cd89d53ca201c6e10c25f9"}, - {:darwin, :arm64, "a449cb190366ee0080bcab132d788b0f792600bfa8dd7c0aba539444c6e126ba"}, - {:linux, :amd64, "539e6bb92612665e1fd1870df1b2c5db66e327bf5a98aee1666c57fb3c6e128d"} - ] - } - - @impl true - def download_name(version, :darwin, arch), do: download_name(version, :macos, arch) - def download_name(version, os, :amd64), do: download_name(version, os, :x86_64) - def download_name(version, os, :arm64), do: download_name(version, os, :aarch64) - - def download_name(version, os, arch), do: "sqlite-vec-#{version}-loadable-#{os}-#{arch}.tar.gz" - - def pre_download_hook(_file, output_dir) do - if library_exists?(output_dir) do - :skip - else - :cont - end - end - - defp library_exists?(output_dir) do - matches = - output_dir - |> Path.join("vec0.*") - |> Path.wildcard() - - matches != [] - end - - def post_write_hook(file) do - output_dir = file |> Path.dirname() |> Path.join("..") |> Path.expand() - current_version = file |> Path.dirname() |> Path.basename() - - remove_other_versions(output_dir, current_version) - - :ok - end - - defp remove_other_versions(output_dir, current_version) do - output_dir - |> Path.join("*") - |> Path.wildcard() - |> Enum.filter(fn path -> Path.basename(path) != current_version end) - |> Enum.map(&File.rm_rf(&1)) - end -end diff --git a/bindings/elixir/mix.exs b/bindings/elixir/mix.exs index 7a207ccc..8e703383 100644 --- a/bindings/elixir/mix.exs +++ b/bindings/elixir/mix.exs @@ -11,10 +11,6 @@ defmodule SqliteVec.MixProject do elixir: "~> 1.17", start_permanent: Mix.env() == :prod, deps: deps(), - compilers: Mix.compilers() ++ [:download_sqlite_vec], - aliases: [ - "compile.download_sqlite_vec": &download_sqlite_vec/1 - ], preferred_cli_env: [ "test.watch": :test ], @@ -27,31 +23,8 @@ defmodule SqliteVec.MixProject do ] end - defp download_sqlite_vec(_) do - version = Application.get_env(:sqlite_vec, :version, SqliteVec.Downloader.default_version()) - - output_dir = Path.join(__DIR__, "priv/#{version}") - File.mkdir_p!(output_dir) - - case SqliteVec.download(output_dir, version) do - :skip -> - :ok - - {:ok, _successful_files, []} -> - :ok - - {:ok, _successful_files, failed_files} -> - message = "failed to download: " <> Enum.join(failed_files, ", ") - raise(message) - - {:error, message} -> - raise(message) - end - end - defp deps do [ - {:octo_fetch, "~> 0.4.0"}, {:ecto, "~> 3.0", optional: true}, {:nx, "~> 0.9", optional: true}, {:ecto_sql, "~> 3.0", only: :test}, diff --git a/bindings/elixir/priv/.gitkeep b/bindings/elixir/priv/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/bindings/elixir/test/sqlite_vec_test.exs b/bindings/elixir/test/sqlite_vec_test.exs deleted file mode 100644 index 5b8b8584..00000000 --- a/bindings/elixir/test/sqlite_vec_test.exs +++ /dev/null @@ -1,8 +0,0 @@ -defmodule SqliteVecTest do - use ExUnit.Case - - @tag :slow - test "supported downloads should work" do - assert :ok = OctoFetch.Test.test_all_supported_downloads(SqliteVec.Downloader) - end -end From e7cde7acd69ff0d7b58ec3ee7e1d6568568379cc Mon Sep 17 00:00:00 2001 From: Joel Koch Date: Tue, 30 Sep 2025 23:33:24 +0200 Subject: [PATCH 5/9] Remove mix_test_watch and doctest_formatter --- bindings/elixir/.formatter.exs | 1 - bindings/elixir/mix.exs | 7 +------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/bindings/elixir/.formatter.exs b/bindings/elixir/.formatter.exs index 5c0dd53c..d2cda26e 100644 --- a/bindings/elixir/.formatter.exs +++ b/bindings/elixir/.formatter.exs @@ -1,5 +1,4 @@ # Used by "mix format" [ - plugins: [DoctestFormatter], inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] ] diff --git a/bindings/elixir/mix.exs b/bindings/elixir/mix.exs index 8e703383..0814a814 100644 --- a/bindings/elixir/mix.exs +++ b/bindings/elixir/mix.exs @@ -11,9 +11,6 @@ defmodule SqliteVec.MixProject do elixir: "~> 1.17", start_permanent: Mix.env() == :prod, deps: deps(), - preferred_cli_env: [ - "test.watch": :test - ], name: "SqliteVec", package: package(), docs: docs(), @@ -31,9 +28,7 @@ defmodule SqliteVec.MixProject do {:ecto_sqlite3, "~> 0.17", only: :test}, {:stream_data, "~> 1.0", only: :test}, {:ex_doc, "~> 0.34", only: :dev, runtime: false}, - {:mix_test_watch, "~> 1.0", only: [:dev, :test], runtime: false}, - {:credo, "~> 1.7", only: [:dev, :test], runtime: false}, - {:doctest_formatter, "~> 0.3.1", only: [:dev, :test], runtime: false} + {:credo, "~> 1.7", only: [:dev, :test], runtime: false} ] end From b9f56683ecb4f91d64c1d6b70c4ba0696ef2d855 Mon Sep 17 00:00:00 2001 From: Joel Koch Date: Tue, 30 Sep 2025 23:36:52 +0200 Subject: [PATCH 6/9] Compile with elixir_make --- bindings/elixir/Makefile | 1 + bindings/elixir/VERSION | 1 + bindings/elixir/lib/sqlite_vec.ex | 9 +++++++++ bindings/elixir/mix.exs | 16 +++++++++++++++- bindings/elixir/sqlite-vec.c | 1 + bindings/elixir/sqlite-vec.h.tmpl | 1 + 6 files changed, 28 insertions(+), 1 deletion(-) create mode 120000 bindings/elixir/Makefile create mode 120000 bindings/elixir/VERSION create mode 120000 bindings/elixir/sqlite-vec.c create mode 120000 bindings/elixir/sqlite-vec.h.tmpl diff --git a/bindings/elixir/Makefile b/bindings/elixir/Makefile new file mode 120000 index 00000000..94aaae2c --- /dev/null +++ b/bindings/elixir/Makefile @@ -0,0 +1 @@ +../../Makefile \ No newline at end of file diff --git a/bindings/elixir/VERSION b/bindings/elixir/VERSION new file mode 120000 index 00000000..558194c5 --- /dev/null +++ b/bindings/elixir/VERSION @@ -0,0 +1 @@ +../../VERSION \ No newline at end of file diff --git a/bindings/elixir/lib/sqlite_vec.ex b/bindings/elixir/lib/sqlite_vec.ex index 5e093038..8ff1d535 100644 --- a/bindings/elixir/lib/sqlite_vec.ex +++ b/bindings/elixir/lib/sqlite_vec.ex @@ -1,3 +1,12 @@ defmodule SqliteVec do @moduledoc false + + @target System.get_env("MIX_TARGET", "") + + @doc """ + Provides the path to the loadable library. + """ + def path() do + Path.join([:code.priv_dir(:sqlite_vec), @target, "vec0"]) + end end diff --git a/bindings/elixir/mix.exs b/bindings/elixir/mix.exs index 0814a814..0564c64f 100644 --- a/bindings/elixir/mix.exs +++ b/bindings/elixir/mix.exs @@ -2,7 +2,7 @@ defmodule SqliteVec.MixProject do use Mix.Project @source_url "https://github.com/joelpaulkoch/sqlite_vec" - @version "0.1.0" + @version File.read!("VERSION") def project do [ @@ -11,6 +11,19 @@ defmodule SqliteVec.MixProject do elixir: "~> 1.17", start_permanent: Mix.env() == :prod, deps: deps(), + compilers: [:elixir_make] ++ Mix.compilers(), + make_targets: ["loadable"], + make_makefile: "Makefile", + make_env: fn -> + prefix = + Path.join([ + System.get_env("MIX_APP_PATH", ""), + "priv", + System.get_env("MIX_TARGET", "") + ]) + + %{"PREFIX" => prefix} + end, name: "SqliteVec", package: package(), docs: docs(), @@ -22,6 +35,7 @@ defmodule SqliteVec.MixProject do defp deps do [ + {:elixir_make, "~> 0.9.0", runtime: false}, {:ecto, "~> 3.0", optional: true}, {:nx, "~> 0.9", optional: true}, {:ecto_sql, "~> 3.0", only: :test}, diff --git a/bindings/elixir/sqlite-vec.c b/bindings/elixir/sqlite-vec.c new file mode 120000 index 00000000..30dd5b20 --- /dev/null +++ b/bindings/elixir/sqlite-vec.c @@ -0,0 +1 @@ +../../sqlite-vec.c \ No newline at end of file diff --git a/bindings/elixir/sqlite-vec.h.tmpl b/bindings/elixir/sqlite-vec.h.tmpl new file mode 120000 index 00000000..68760938 --- /dev/null +++ b/bindings/elixir/sqlite-vec.h.tmpl @@ -0,0 +1 @@ +../../sqlite-vec.h.tmpl \ No newline at end of file From d15e5d42b3c158f2d061648a6878c2ec200a3025 Mon Sep 17 00:00:00 2001 From: Joel Koch Date: Tue, 30 Sep 2025 23:37:57 +0200 Subject: [PATCH 7/9] Make prefix overridable --- Makefile | 47 +++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/Makefile b/Makefile index 43e1d980..00d3ce61 100644 --- a/Makefile +++ b/Makefile @@ -63,14 +63,17 @@ else RENAME_WHEELS_ARGS= endif -prefix=dist -$(prefix): - mkdir -p $(prefix) +ifndef PREFIX +PREFIX=dist +endif + +$(PREFIX): + mkdir -p $(PREFIX) -TARGET_LOADABLE=$(prefix)/vec0.$(LOADABLE_EXTENSION) -TARGET_STATIC=$(prefix)/libsqlite_vec0.a -TARGET_STATIC_H=$(prefix)/sqlite-vec.h -TARGET_CLI=$(prefix)/sqlite3 +TARGET_LOADABLE=$(PREFIX)/vec0.$(LOADABLE_EXTENSION) +TARGET_STATIC=$(PREFIX)/libsqlite_vec0.a +TARGET_STATIC_H=$(PREFIX)/sqlite-vec.h +TARGET_CLI=$(PREFIX)/sqlite3 loadable: $(TARGET_LOADABLE) static: $(TARGET_STATIC) @@ -78,21 +81,21 @@ cli: $(TARGET_CLI) all: loadable static cli -OBJS_DIR=$(prefix)/.objs -LIBS_DIR=$(prefix)/.libs -BUILD_DIR=$(prefix)/.build +OBJS_DIR=$(PREFIX)/.objs +LIBS_DIR=$(PREFIX)/.libs +BUILD_DIR=$(PREFIX)/.build -$(OBJS_DIR): $(prefix) +$(OBJS_DIR): $(PREFIX) mkdir -p $@ -$(LIBS_DIR): $(prefix) +$(LIBS_DIR): $(PREFIX) mkdir -p $@ -$(BUILD_DIR): $(prefix) +$(BUILD_DIR): $(PREFIX) mkdir -p $@ -$(TARGET_LOADABLE): sqlite-vec.c sqlite-vec.h $(prefix) +$(TARGET_LOADABLE): sqlite-vec.c sqlite-vec.h $(PREFIX) $(CC) \ -fPIC -shared \ -Wall -Wextra \ @@ -101,12 +104,12 @@ $(TARGET_LOADABLE): sqlite-vec.c sqlite-vec.h $(prefix) $(CFLAGS) \ $< -o $@ -$(TARGET_STATIC): sqlite-vec.c sqlite-vec.h $(prefix) $(OBJS_DIR) +$(TARGET_STATIC): sqlite-vec.c sqlite-vec.h $(PREFIX) $(OBJS_DIR) $(CC) -Ivendor/ $(CFLAGS) -DSQLITE_CORE -DSQLITE_VEC_STATIC \ -O3 -c $< -o $(OBJS_DIR)/vec.o $(AR) rcs $@ $(OBJS_DIR)/vec.o -$(TARGET_STATIC_H): sqlite-vec.h $(prefix) +$(TARGET_STATIC_H): sqlite-vec.h $(PREFIX) cp $< $@ @@ -136,7 +139,7 @@ $(LIBS_DIR)/sqlite-vec.a: $(OBJS_DIR)/sqlite-vec.o $(LIBS_DIR) $(AR) rcs $@ $< -$(TARGET_CLI): sqlite-vec.h $(LIBS_DIR)/sqlite-vec.a $(LIBS_DIR)/shell.a $(LIBS_DIR)/sqlite3.a examples/sqlite3-cli/core_init.c $(prefix) +$(TARGET_CLI): sqlite-vec.h $(LIBS_DIR)/sqlite-vec.a $(LIBS_DIR)/shell.a $(LIBS_DIR)/sqlite3.a examples/sqlite3-cli/core_init.c $(PREFIX) $(CC) -g3 \ -Ivendor/ -I./ \ -DSQLITE_CORE \ @@ -197,13 +200,13 @@ test-loadable-watch: watchexec --exts c,py,Makefile --clear -- make test-loadable test-unit: - $(CC) tests/test-unit.c sqlite-vec.c -I./ -Ivendor -o $(prefix)/test-unit && $(prefix)/test-unit + $(CC) tests/test-unit.c sqlite-vec.c -I./ -Ivendor -o $(PREFIX)/test-unit && $(PREFIX)/test-unit site-dev: - npm --prefix site run dev + npm --PREFIX site run dev site-build: - npm --prefix site run build + npm --PREFIX site run build install: install -d $(INSTALL_LIB_DIR) @@ -229,9 +232,9 @@ uninstall: # ███████████████████████████████ WASM SECTION ███████████████████████████████ -WASM_DIR=$(prefix)/.wasm +WASM_DIR=$(PREFIX)/.wasm -$(WASM_DIR): $(prefix) +$(WASM_DIR): $(PREFIX) mkdir -p $@ SQLITE_WASM_VERSION=3450300 From ad9528f1efd348c8a4c5a9f74e02c2c63b69a39c Mon Sep 17 00:00:00 2001 From: Joel Koch Date: Tue, 30 Sep 2025 23:38:05 +0200 Subject: [PATCH 8/9] Update readme --- bindings/elixir/README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bindings/elixir/README.md b/bindings/elixir/README.md index 1ff4ae04..f0f0cfb9 100644 --- a/bindings/elixir/README.md +++ b/bindings/elixir/README.md @@ -5,7 +5,7 @@ [![Build Status](https://img.shields.io/github/actions/workflow/status/joelpaulkoch/sqlite_vec/ci.yml?label=Build%20Status&style=for-the-badge&branch=main)](https://github.com/joelpaulkoch/sqlite_vec/actions) A wrapper to use [sqlite-vec](https://github.com/asg017/sqlite-vec), a SQLite extension for working with vectors, in Elixir. -The configured version of the precompiled loadable library will be downloaded from the GitHub releases. +Builds the loadable library using `elixir_make`. Moreover, this package provides structs and custom Ecto types for working with Float32, Int8, and Bit vectors. ## Limitations @@ -26,7 +26,7 @@ end ## Getting Started -`SqliteVec.path/0` returns the path of the downloaded library. +`SqliteVec.path/0` returns the path of the loadable library. Therefore, you can load the extension using this path. For instance with `Exqlite`: @@ -55,6 +55,5 @@ You can check out the [Getting Started](notebooks/getting_started.livemd) and [U Special thanks to these projects that helped to make this package: -- [OctoFetch](https://hexdocs.pm/octo_fetch/readme.html) which does all the work for downloading the GitHub releases, and served as a blueprint for this package (yes, including this Attribution section :) ) - [sqlite-vec](https://github.com/asg017/sqlite-vec), of course, which provides all of the functionality - [pgvector](https://hexdocs.pm/pgvector/readme.html) provides something similar for postgres and quite some code could be reused From 1f9a09704bab72a2742dc4bd1ff06b32c6cd995e Mon Sep 17 00:00:00 2001 From: Joel Koch Date: Tue, 30 Sep 2025 23:58:52 +0200 Subject: [PATCH 9/9] Add publish script to replace symlinks with content --- bindings/elixir/mix.exs | 3 ++- bindings/elixir/publish.sh | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100755 bindings/elixir/publish.sh diff --git a/bindings/elixir/mix.exs b/bindings/elixir/mix.exs index 0564c64f..48d95f55 100644 --- a/bindings/elixir/mix.exs +++ b/bindings/elixir/mix.exs @@ -53,7 +53,8 @@ defmodule SqliteVec.MixProject do links: %{ GitHub: "https://github.com/joelpaulkoch/sqlite_vec" }, - files: ~w(lib priv/.gitkeep .formatter.exs mix.exs README.md LICENSE) + files: + ~w(lib .formatter.exs mix.exs README.md LICENSE VERSION Makefile sqlite-vec.c sqlite-vec.h.tmpl) ] end diff --git a/bindings/elixir/publish.sh b/bindings/elixir/publish.sh new file mode 100755 index 00000000..94fa42b8 --- /dev/null +++ b/bindings/elixir/publish.sh @@ -0,0 +1,17 @@ +makefile=$(readlink -f Makefile) +rm Makefile +cp "$makefile" Makefile + +version=$(readlink -f VERSION) +rm VERSION +cp "$version" VERSION + +sqlite_vec_c=$(readlink -f sqlite-vec.c) +rm sqlite-vec.c +cp "$sqlite_vec_c" sqlite-vec.c + +sqlite_vec_h_tmpl=$(readlink -f sqlite-vec.h.tmpl) +rm sqlite-vec.h.tmpl +cp "$sqlite_vec_h_tmpl" sqlite-vec.h.tmpl + +mix hex.publish