Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Elixir bindings #151

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions bindings/elixir/.formatter.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Used by "mix format"
[
plugins: [DoctestFormatter],
inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"]
]
75 changes: 75 additions & 0 deletions bindings/elixir/.github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
name: CI

on:
pull_request:
push:
branches:
- "main"

jobs:
lint:
runs-on: ${{ matrix.os }}
env:
MIX_ENV: dev
name: Lint
strategy:
matrix:
os: ["ubuntu-latest"]
elixir: ["1.17"]
otp: ["27"]
steps:
- uses: actions/checkout@v4
- name: Install Erlang & Elixir
uses: erlef/setup-beam@v1
with:
otp-version: ${{ matrix.otp }}
elixir-version: ${{ matrix.elixir }}
- uses: actions/cache@v3
with:
path: deps
key: ${{ matrix.os }}-otp_${{ matrix.otp }}-elixir_${{ matrix.elixir }}-mix_${{ hashFiles('**/mix.lock') }}
restore-keys: ${{ matrix.os }}-otp_${{ matrix.otp }}-elixir_${{ matrix.elixir }}-mix_
- name: Install mix dependencies
run: mix deps.get
- name: Compile mix dependencies
run: mix deps.compile
- name: Check formatting
run: mix format --check-formatted
- name: Check unused deps
run: mix deps.unlock --check-unused
- name: Credo check
run: mix credo --all

test:
runs-on: ${{ matrix.os }}
env:
MIX_ENV: test

name: Test Elixir ${{ matrix.elixir }}, OTP ${{ matrix.otp }}, OS ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: ["ubuntu-latest"]
elixir: ["1.17"]
otp: ["27"]

steps:
- uses: actions/checkout@v4
- uses: erlef/setup-beam@v1
with:
otp-version: ${{ matrix.otp }}
elixir-version: ${{ matrix.elixir }}

- uses: actions/cache@v3
with:
path: deps
key: ${{ matrix.os }}-otp_${{ matrix.otp }}-elixir_${{ matrix.elixir }}-mix_${{ hashFiles('**/mix.lock') }}
restore-keys: ${{ matrix.os }}-otp_${{ matrix.otp }}-elixir_${{ matrix.elixir }}-mix_
- name: Install mix dependencies
run: mix deps.get --only test
- name: Compile mix dependencies
run: mix deps.compile
- name: Compile project
run: mix compile
- name: Test project
run: mix test --include slow
35 changes: 35 additions & 0 deletions bindings/elixir/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# The directory Mix will write compiled artifacts to.
/_build/

# If you run "mix test --cover", coverage assets end up here.
/cover/

# The directory Mix downloads your dependencies sources to.
/deps/

# Where third-party dependencies like ExDoc output generated docs.
/doc/

# Ignore .fetch files in case you like to edit your project deps locally.
/.fetch

# If the VM crashes, it generates a dump, let's ignore it too.
erl_crash.dump

# Also ignore archive artifacts (built via "mix archive.build").
*.ez

# Ignore package tarball (built via "mix hex.build").
sqlite_vec-*.tar

# Temporary files, for example, from tests.
/tmp/

# downloaded libraries
/priv/**/vec0.*

/notebooks/*.db
/notebooks/*.db-*

# test database
/test/*.db*
21 changes: 21 additions & 0 deletions bindings/elixir/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2024 Joel Koch

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
60 changes: 60 additions & 0 deletions bindings/elixir/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# SqliteVec

[![Hex Package](https://img.shields.io/hexpm/v/sqlite_vec.svg?style=for-the-badge)](https://hex.pm/packages/sqlite_vec)
[![Hex Docs](https://img.shields.io/badge/hex-docs-blue.svg?style=for-the-badge)](https://hexdocs.pm/sqlite_vec)
[![Build Status](https://img.shields.io/github/actions/workflow/status/joelpaulkoch/sqlite_vec/ci.yml?label=Build%20Status&style=for-the-badge&branch=main)](https://github.com/joelpaulkoch/sqlite_vec/actions)

A wrapper to use [sqlite-vec](https://github.com/asg017/sqlite-vec), a SQLite extension for working with vectors, in Elixir.
The configured version of the precompiled loadable library will be downloaded from the GitHub releases.
Moreover, this package provides structs and custom Ecto types for working with Float32, Int8, and Bit vectors.

## Limitations
- it's currently not possible to create int8 and bit vectors using `Ecto`. You must directly use SQL to do so
- not implemented operations: `vec_each`, `vec_quantize_i8`

## Installation

The package can be installed by adding `sqlite_vec` to your list of dependencies in `mix.exs`:

```elixir
def deps do
[
{:sqlite_vec, "~> 0.1.0"}
]
end
```

## Getting Started

`SqliteVec.path/0` returns the path of the downloaded library.
Therefore, you can load the extension using this path.

For instance with `Exqlite`:
```elixir
{:ok, conn} = Basic.open(":memory:")
:ok = Basic.enable_load_extension(conn)

Basic.load_extension(conn, SqliteVec.path())
```

Or, with an `Ecto.Repo` and `ecto_sqlite3`:

```elixir
defmodule MyApp.Repo do
use Ecto.Repo,
otp_app: :my_app,
adapter: Ecto.Adapters.SQLite3
end

config :my_app, MyApp.Repo, load_extensions: [SqliteVec.path()]
```

You can check out the [Getting Started](notebooks/getting_started.livemd) and [Usage with Ecto](notebooks/usage_with_ecto.livemd) notebooks.

## Attribution

Special thanks to these projects that helped to make this package:

- [OctoFetch](https://hexdocs.pm/octo_fetch/readme.html) which does all the work for downloading the GitHub releases, and served as a blueprint for this package (yes, including this Attribution section :) )
- [sqlite-vec](https://github.com/asg017/sqlite-vec), of course, which provides all of the functionality
- [pgvector](https://hexdocs.pm/pgvector/readme.html) provides something similar for postgres and quite some code could be reused
21 changes: 21 additions & 0 deletions bindings/elixir/lib/sqlite_vec.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
defmodule SqliteVec do
@moduledoc """
Downloads the precompiled loadable library of `sqlite-vec` from GitHub releases.
"""

@doc """
Provides the path to the downloaded loadable library.
"""
def path() do
version = Application.get_env(:sqlite_vec, :version, SqliteVec.Downloader.default_version())

Application.app_dir(:sqlite_vec, "priv/#{version}/vec0")
end

@doc """
Downloads the specified `version` to `output_dir`.
"""
def download(output_dir, version) do
SqliteVec.Downloader.download(output_dir, override_version: version)
end
end
132 changes: 132 additions & 0 deletions bindings/elixir/lib/sqlite_vec/bit.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
defmodule SqliteVec.Bit do
@moduledoc """
A vector struct for bit vectors.
Vectors are stored as binaries.

> ### Consider endianness {: .warning}
>
> When returned from `sqlite-vec` or created from `Nx.Tensor`, `SqliteVec.Bit.Vector` holds data in system endianness.
> You must consider endianness when converting the binary data to a list of numbers.

iex> v = SqliteVec.Bit.new(Nx.tensor([-1.0, 2.0], type: :f32))
...> b = SqliteVec.Bit.to_binary(v)
...> <<f1::float-32, f2::float-32>> = b
...> [f1, f2]
case System.endianness() do
:big -> [-1.0, 2.0]
:little -> [4.618539608568165e-41, 8.96831017167883e-44]
end
"""

@type t :: %__MODULE__{data: binary()}

defstruct [:data]

@doc """
Creates a new vector from a vector, list, or tensor.

The vector must be a `SqliteVec.Bit` vector.
The list must only contain values of 0 or 1 and must have a length that's divisible by 8.
The tensor must have a rank of 1 and a type size that's divisible by 8.

## Examples
iex> SqliteVec.Bit.new([0, 0, 0, 0, 0, 0, 0, 1])
%SqliteVec.Bit{data: <<0b00000001>>}

iex> v1 = SqliteVec.Bit.new([0, 0, 0, 0, 0, 0, 0, 1])
...> SqliteVec.Bit.new(v1)
%SqliteVec.Bit{data: <<0b00000001>>}

iex> SqliteVec.Bit.new(Nx.tensor([1, 2, 3], type: :u8))
%SqliteVec.Bit{data: <<1::signed-integer-8, 2::signed-integer-8, 3::signed-integer-8>>}
"""
def new(vector_or_list_or_tensor)

def new(%SqliteVec.Bit{} = vector) do
vector
end

def new(list) when is_list(list) do
if list == [] do
raise ArgumentError, "list must not be empty"
end

if not length_divisible_by_8?(list) do
raise ArgumentError, "expected list length to be divisible by 8"
end

if Enum.any?(list, &(not bit?(&1))) do
raise ArgumentError, "expected list elements to be 0 or 1"
end

bin = for v <- list, into: <<>>, do: <<v::1>>

from_binary(<<bin::binary>>)
end

if Code.ensure_loaded?(Nx) do
def new(tensor) when is_struct(tensor, Nx.Tensor) do
if Nx.rank(tensor) != 1 do
raise ArgumentError, "expected rank to be 1"
end

if not binary_type_size?(Nx.type(tensor)) do
raise ArgumentError, "expected type size to be divisible by 8"
end

bin = Nx.to_binary(tensor)
from_binary(<<bin::binary>>)
end

defp binary_type_size?({_type, size}), do: rem(size, 8) == 0
end

defp length_divisible_by_8?(list) do
rem(length(list), 8) == 0
end

defp bit?(0), do: true
defp bit?(1), do: true
defp bit?(_), do: false

@doc """
Creates a new vector from its binary representation
"""
def from_binary(binary) when is_binary(binary) do
%SqliteVec.Bit{data: binary}
end

@doc """
Converts the vector to its binary representation
"""
def to_binary(vector) when is_struct(vector, SqliteVec.Bit) do
vector.data
end

@doc """
Converts the vector to a list of bits
"""
def to_list(vector) when is_struct(vector, SqliteVec.Bit) do
<<bin::binary>> = vector.data

for <<v::1 <- bin>>, do: v
end

if Code.ensure_loaded?(Nx) do
@doc """
Converts the vector to a tensor
"""
def to_tensor(vector) when is_struct(vector, SqliteVec.Bit) do
<<bin::binary>> = vector.data
Nx.from_binary(bin, :u8)
end
end
end

defimpl Inspect, for: SqliteVec.Bit do
import Inspect.Algebra

def inspect(vector, opts) do
concat(["vec_bit('", Inspect.List.inspect(SqliteVec.Bit.to_list(vector), opts), "')"])
end
end
Loading