neighbor-0.6.0/0000755000004100000410000000000015024712502013340 5ustar www-datawww-dataneighbor-0.6.0/lib/0000755000004100000410000000000015024712502014106 5ustar www-datawww-dataneighbor-0.6.0/lib/generators/0000755000004100000410000000000015024712502016257 5ustar www-datawww-dataneighbor-0.6.0/lib/generators/neighbor/0000755000004100000410000000000015024712502020054 5ustar www-datawww-dataneighbor-0.6.0/lib/generators/neighbor/templates/0000755000004100000410000000000015024712502022052 5ustar www-datawww-dataneighbor-0.6.0/lib/generators/neighbor/templates/sqlite.rb.tt0000644000004100000410000000007515024712502024330 0ustar www-datawww-data# Load the sqlite-vec extension Neighbor::SQLite.initialize! neighbor-0.6.0/lib/generators/neighbor/templates/cube.rb.tt0000644000004100000410000000020715024712502023742 0ustar www-datawww-dataclass <%= migration_class_name %> < ActiveRecord::Migration<%= migration_version %> def change enable_extension "cube" end end neighbor-0.6.0/lib/generators/neighbor/templates/vector.rb.tt0000644000004100000410000000021115024712502024321 0ustar www-datawww-dataclass <%= migration_class_name %> < ActiveRecord::Migration<%= migration_version %> def change enable_extension "vector" end end neighbor-0.6.0/lib/generators/neighbor/vector_generator.rb0000644000004100000410000000104015024712502023744 0ustar www-datawww-datarequire "rails/generators" require "rails/generators/active_record" module Neighbor module Generators class VectorGenerator < Rails::Generators::Base include ActiveRecord::Generators::Migration source_root File.join(__dir__, "templates") def copy_migration migration_template "vector.rb", "db/migrate/install_neighbor_vector.rb", migration_version: migration_version end def migration_version "[#{ActiveRecord::VERSION::MAJOR}.#{ActiveRecord::VERSION::MINOR}]" end end end end neighbor-0.6.0/lib/generators/neighbor/sqlite_generator.rb0000644000004100000410000000043415024712502023751 0ustar www-datawww-datarequire "rails/generators" module Neighbor module Generators class SqliteGenerator < Rails::Generators::Base source_root File.join(__dir__, "templates") def copy_templates template "sqlite.rb", "config/initializers/neighbor.rb" end end end end neighbor-0.6.0/lib/generators/neighbor/cube_generator.rb0000644000004100000410000000103215024712502023361 0ustar www-datawww-datarequire "rails/generators" require "rails/generators/active_record" module Neighbor module Generators class CubeGenerator < Rails::Generators::Base include ActiveRecord::Generators::Migration source_root File.join(__dir__, "templates") def copy_migration migration_template "cube.rb", "db/migrate/install_neighbor_cube.rb", migration_version: migration_version end def migration_version "[#{ActiveRecord::VERSION::MAJOR}.#{ActiveRecord::VERSION::MINOR}]" end end end end neighbor-0.6.0/lib/neighbor/0000755000004100000410000000000015024712502015703 5ustar www-datawww-dataneighbor-0.6.0/lib/neighbor/sqlite.rb0000644000004100000410000000126415024712502017534 0ustar www-datawww-datamodule Neighbor module SQLite # note: this is a public API (unlike PostgreSQL and MySQL) def self.initialize! return if defined?(@initialized) require_relative "type/sqlite_vector" require_relative "type/sqlite_int8_vector" require "sqlite_vec" require "active_record/connection_adapters/sqlite3_adapter" ActiveRecord::ConnectionAdapters::SQLite3Adapter.prepend(InstanceMethods) @initialized = true end module InstanceMethods def configure_connection super db = @raw_connection db.enable_load_extension(1) SqliteVec.load(db) db.enable_load_extension(0) end end end end neighbor-0.6.0/lib/neighbor/mysql.rb0000644000004100000410000000200315024712502017370 0ustar www-datawww-datamodule Neighbor module MySQL def self.initialize! require_relative "type/mysql_vector" require "active_record/connection_adapters/abstract_mysql_adapter" # ensure schema can be dumped ActiveRecord::ConnectionAdapters::AbstractMysqlAdapter::NATIVE_DATABASE_TYPES[:vector] = {name: "vector"} # ensure schema can be loaded unless ActiveRecord::ConnectionAdapters::TableDefinition.method_defined?(:vector) ActiveRecord::ConnectionAdapters::TableDefinition.send(:define_column_methods, :vector) end # prevent unknown OID warning ActiveRecord::ConnectionAdapters::AbstractMysqlAdapter.singleton_class.prepend(RegisterTypes) end module RegisterTypes def initialize_type_map(m) super register_vector_type(m) end def register_vector_type(m) m.register_type %r(^vector)i do |sql_type| limit = extract_limit(sql_type) Type::MysqlVector.new(limit: limit) end end end end end neighbor-0.6.0/lib/neighbor/reranking.rb0000644000004100000410000000113515024712502020210 0ustar www-datawww-datamodule Neighbor module Reranking def self.rrf(first_ranking, *rankings, k: 60) rankings.unshift(first_ranking) ranks = [] results = [] rankings.each do |ranking| ranks << ranking.map.with_index.to_h { |v, i| [v, i + 1] } results.concat(ranking) end results = results.uniq.map do |result| score = ranks.sum do |rank| r = rank[result] r ? 1.0 / (k + r) : 0.0 end {result: result, score: score} end results.sort_by { |v| -v[:score] } end end end neighbor-0.6.0/lib/neighbor/type/0000755000004100000410000000000015024712502016664 5ustar www-datawww-dataneighbor-0.6.0/lib/neighbor/type/halfvec.rb0000644000004100000410000000106715024712502020625 0ustar www-datawww-datamodule Neighbor module Type class Halfvec < ActiveRecord::Type::Value def type :halfvec end def serialize(value) if Utils.array?(value) value = "[#{value.to_a.map(&:to_f).join(",")}]" end super(value) end private def cast_value(value) if value.is_a?(String) value[1..-1].split(",").map(&:to_f) elsif Utils.array?(value) value.to_a else raise "can't cast #{value.class.name} to halfvec" end end end end end neighbor-0.6.0/lib/neighbor/type/cube.rb0000644000004100000410000000173515024712502020135 0ustar www-datawww-datamodule Neighbor module Type class Cube < ActiveRecord::Type::Value def type :cube end def serialize(value) if Utils.array?(value) value = value.to_a if value.first.is_a?(Array) value = value.map { |v| serialize_point(v) }.join(", ") else value = serialize_point(value) end end super(value) end private def cast_value(value) if Utils.array?(value) value.to_a elsif value.is_a?(Numeric) [value] elsif value.is_a?(String) if value.include?("),(") value[1..-1].split("),(").map { |v| v.split(",").map(&:to_f) } else value[1..-1].split(",").map(&:to_f) end else raise "can't cast #{value.class.name} to cube" end end def serialize_point(value) "(#{value.map(&:to_f).join(", ")})" end end end end neighbor-0.6.0/lib/neighbor/type/sqlite_vector.rb0000644000004100000410000000113015024712502022067 0ustar www-datawww-datamodule Neighbor module Type class SqliteVector < ActiveRecord::Type::Binary def serialize(value) if Utils.array?(value) value = value.to_a.pack("f*") end super(value) end def deserialize(value) value = super cast_value(value) unless value.nil? end private def cast_value(value) if value.is_a?(String) value.unpack("f*") elsif Utils.array?(value) value.to_a else raise "can't cast #{value.class.name} to vector" end end end end end neighbor-0.6.0/lib/neighbor/type/sqlite_int8_vector.rb0000644000004100000410000000113415024712502023035 0ustar www-datawww-datamodule Neighbor module Type class SqliteInt8Vector < ActiveRecord::Type::Binary def serialize(value) if Utils.array?(value) value = value.to_a.pack("c*") end super(value) end def deserialize(value) value = super cast_value(value) unless value.nil? end private def cast_value(value) if value.is_a?(String) value.unpack("c*") elsif Utils.array?(value) value.to_a else raise "can't cast #{value.class.name} to vector" end end end end end neighbor-0.6.0/lib/neighbor/type/vector.rb0000644000004100000410000000106415024712502020514 0ustar www-datawww-datamodule Neighbor module Type class Vector < ActiveRecord::Type::Value def type :vector end def serialize(value) if Utils.array?(value) value = "[#{value.to_a.map(&:to_f).join(",")}]" end super(value) end private def cast_value(value) if value.is_a?(String) value[1..-1].split(",").map(&:to_f) elsif Utils.array?(value) value.to_a else raise "can't cast #{value.class.name} to vector" end end end end end neighbor-0.6.0/lib/neighbor/type/sparsevec.rb0000644000004100000410000000133215024712502021203 0ustar www-datawww-datamodule Neighbor module Type class Sparsevec < ActiveRecord::Type::Value def type :sparsevec end def serialize(value) if value.is_a?(SparseVector) value = "{#{value.indices.zip(value.values).map { |i, v| "#{i.to_i + 1}:#{v.to_f}" }.join(",")}}/#{value.dimensions.to_i}" end super(value) end private def cast_value(value) if value.is_a?(SparseVector) value elsif value.is_a?(String) SparseVector.from_text(value) elsif Utils.array?(value) value = SparseVector.new(value.to_a) else raise "can't cast #{value.class.name} to sparsevec" end end end end end neighbor-0.6.0/lib/neighbor/type/mysql_vector.rb0000644000004100000410000000120115024712502021732 0ustar www-datawww-datamodule Neighbor module Type class MysqlVector < ActiveRecord::Type::Binary def type :vector end def serialize(value) if Utils.array?(value) value = value.to_a.pack("e*") end super(value) end def deserialize(value) value = super cast_value(value) unless value.nil? end private def cast_value(value) if value.is_a?(String) value.unpack("e*") elsif Utils.array?(value) value.to_a else raise "can't cast #{value.class.name} to vector" end end end end end neighbor-0.6.0/lib/neighbor/sparse_vector.rb0000644000004100000410000000356215024712502021115 0ustar www-datawww-datamodule Neighbor class SparseVector attr_reader :dimensions, :indices, :values NO_DEFAULT = Object.new def initialize(value, dimensions = NO_DEFAULT) if value.is_a?(Hash) if dimensions == NO_DEFAULT raise ArgumentError, "missing dimensions" end from_hash(value, dimensions) else unless dimensions == NO_DEFAULT raise ArgumentError, "extra argument" end from_array(value) end end def to_s "{#{@indices.zip(@values).map { |i, v| "#{i.to_i + 1}:#{v.to_f}" }.join(",")}}/#{@dimensions.to_i}" end def to_a arr = Array.new(dimensions, 0.0) @indices.zip(@values) do |i, v| arr[i] = v end arr end private def from_hash(data, dimensions) elements = data.select { |_, v| v != 0 }.sort @dimensions = dimensions.to_i @indices = elements.map { |v| v[0].to_i } @values = elements.map { |v| v[1].to_f } end def from_array(arr) arr = arr.to_a @dimensions = arr.size @indices = [] @values = [] arr.each_with_index do |v, i| if v != 0 @indices << i @values << v.to_f end end end class << self def from_text(string) elements, dimensions = string.split("/", 2) indices = [] values = [] elements[1..-2].split(",").each do |e| index, value = e.split(":", 2) indices << index.to_i - 1 values << value.to_f end from_parts(dimensions.to_i, indices, values) end private def from_parts(dimensions, indices, values) vec = allocate vec.instance_variable_set(:@dimensions, dimensions) vec.instance_variable_set(:@indices, indices) vec.instance_variable_set(:@values, values) vec end end end end neighbor-0.6.0/lib/neighbor/model.rb0000644000004100000410000001504115024712502017331 0ustar www-datawww-datamodule Neighbor module Model def has_neighbors(*attribute_names, dimensions: nil, normalize: nil, type: nil) if attribute_names.empty? raise ArgumentError, "has_neighbors requires an attribute name" end attribute_names.map!(&:to_sym) class_eval do @neighbor_attributes ||= {} if @neighbor_attributes.empty? def self.neighbor_attributes parent_attributes = if superclass.respond_to?(:neighbor_attributes) superclass.neighbor_attributes else {} end parent_attributes.merge(@neighbor_attributes || {}) end end attribute_names.each do |attribute_name| raise Error, "has_neighbors already called for #{attribute_name.inspect}" if neighbor_attributes[attribute_name] @neighbor_attributes[attribute_name] = {dimensions: dimensions, normalize: normalize, type: type&.to_sym} end if ActiveRecord::VERSION::STRING.to_f >= 7.2 decorate_attributes(attribute_names) do |name, cast_type| Neighbor::Attribute.new(cast_type: cast_type, model: self, type: type, attribute_name: name) end else attribute_names.each do |attribute_name| attribute attribute_name do |cast_type| Neighbor::Attribute.new(cast_type: cast_type, model: self, type: type, attribute_name: attribute_name) end end end if normalize attribute_names.each do |attribute_name| normalizes attribute_name, with: ->(v) { Neighbor::Utils.normalize(v, column_info: columns_hash[attribute_name.to_s]) } end end return if @neighbor_attributes.size != attribute_names.size validate do adapter = Utils.adapter(self.class) self.class.neighbor_attributes.each do |k, v| value = read_attribute(k) next if value.nil? column_info = self.class.columns_hash[k.to_s] dimensions = v[:dimensions] dimensions ||= column_info&.limit unless column_info&.type == :binary type = v[:type] || Utils.type(adapter, column_info&.type) if !Neighbor::Utils.validate_dimensions(value, type, dimensions, adapter).nil? errors.add(k, "must have #{dimensions} dimensions") end if !Neighbor::Utils.validate_finite(value, type) errors.add(k, "must have finite values") end end end scope :nearest_neighbors, ->(attribute_name, vector, distance:, precision: nil) { attribute_name = attribute_name.to_sym options = neighbor_attributes[attribute_name] raise ArgumentError, "Invalid attribute" unless options normalize = options[:normalize] dimensions = options[:dimensions] type = options[:type] return none if vector.nil? distance = distance.to_s column_info = columns_hash[attribute_name.to_s] column_type = column_info&.type adapter = Neighbor::Utils.adapter(klass) if type && adapter != :sqlite raise ArgumentError, "type only works with SQLite" end operator = Neighbor::Utils.operator(adapter, column_type, distance) raise ArgumentError, "Invalid distance: #{distance}" unless operator # ensure normalize set (can be true or false) normalize_required = Utils.normalize_required?(adapter, column_type) if distance == "cosine" && normalize_required && normalize.nil? raise Neighbor::Error, "Set normalize for cosine distance with cube" end column_attribute = klass.type_for_attribute(attribute_name) vector = column_attribute.cast(vector) dimensions ||= column_info&.limit unless column_info&.type == :binary Neighbor::Utils.validate(vector, dimensions: dimensions, type: type || Utils.type(adapter, column_info&.type), adapter: adapter) vector = Neighbor::Utils.normalize(vector, column_info: column_info) if normalize quoted_attribute = nil query = nil connection_pool.with_connection do |c| quoted_attribute = "#{c.quote_table_name(table_name)}.#{c.quote_column_name(attribute_name)}" query = c.quote(column_attribute.serialize(vector)) end if !precision.nil? if adapter != :postgresql || column_type != :vector raise ArgumentError, "Precision not supported for this type" end case precision.to_s when "half" cast_dimensions = dimensions || column_info&.limit raise ArgumentError, "Unknown dimensions" unless cast_dimensions quoted_attribute += "::halfvec(#{connection_pool.with_connection { |c| c.quote(cast_dimensions.to_i) }})" else raise ArgumentError, "Invalid precision" end end order = Utils.order(adapter, type, operator, quoted_attribute, query) # https://stats.stackexchange.com/questions/146221/is-cosine-similarity-identical-to-l2-normalized-euclidean-distance # with normalized vectors: # cosine similarity = 1 - (euclidean distance)**2 / 2 # cosine distance = 1 - cosine similarity # this transformation doesn't change the order, so only needed for select neighbor_distance = if distance == "cosine" && normalize_required "POWER(#{order}, 2) / 2.0" elsif [:vector, :halfvec, :sparsevec].include?(column_type) && distance == "inner_product" "(#{order}) * -1" else order end # for select, use column_names instead of * to account for ignored columns select_columns = select_values.any? ? [] : column_names select(*select_columns, "#{neighbor_distance} AS neighbor_distance") .where.not(attribute_name => nil) .reorder(Arel.sql(order)) } def nearest_neighbors(attribute_name, **options) attribute_name = attribute_name.to_sym # important! check if neighbor attribute before accessing raise ArgumentError, "Invalid attribute" unless self.class.neighbor_attributes[attribute_name] self.class .where.not(Array(self.class.primary_key).to_h { |k| [k, self[k]] }) .nearest_neighbors(attribute_name, self[attribute_name], **options) end end end end end neighbor-0.6.0/lib/neighbor/postgresql.rb0000644000004100000410000000422615024712502020437 0ustar www-datawww-datamodule Neighbor module PostgreSQL def self.initialize! require_relative "type/cube" require_relative "type/halfvec" require_relative "type/sparsevec" require_relative "type/vector" require "active_record/connection_adapters/postgresql_adapter" # ensure schema can be dumped ActiveRecord::ConnectionAdapters::PostgreSQLAdapter::NATIVE_DATABASE_TYPES[:cube] = {name: "cube"} ActiveRecord::ConnectionAdapters::PostgreSQLAdapter::NATIVE_DATABASE_TYPES[:halfvec] = {name: "halfvec"} ActiveRecord::ConnectionAdapters::PostgreSQLAdapter::NATIVE_DATABASE_TYPES[:sparsevec] = {name: "sparsevec"} ActiveRecord::ConnectionAdapters::PostgreSQLAdapter::NATIVE_DATABASE_TYPES[:vector] = {name: "vector"} # ensure schema can be loaded ActiveRecord::ConnectionAdapters::TableDefinition.send(:define_column_methods, :cube, :halfvec, :sparsevec, :vector) # prevent unknown OID warning ActiveRecord::ConnectionAdapters::PostgreSQLAdapter.singleton_class.prepend(RegisterTypes) # support vector[]/halfvec[] ActiveRecord::ConnectionAdapters::PostgreSQL::OID::Array.prepend(ArrayMethods) end module RegisterTypes def initialize_type_map(m = type_map) super m.register_type "cube", Type::Cube.new m.register_type "halfvec" do |_, _, sql_type| limit = extract_limit(sql_type) Type::Halfvec.new(limit: limit) end m.register_type "sparsevec" do |_, _, sql_type| limit = extract_limit(sql_type) Type::Sparsevec.new(limit: limit) end m.register_type "vector" do |_, _, sql_type| limit = extract_limit(sql_type) Type::Vector.new(limit: limit) end end end ArrayWrapper = Struct.new(:to_a) module ArrayMethods def type_cast_array(value, method, ...) if (subtype.is_a?(Neighbor::Type::Vector) || subtype.is_a?(Neighbor::Type::Halfvec)) && method != :deserialize && value.is_a?(::Array) && value.all? { |v| v.is_a?(::Numeric) } super(ArrayWrapper.new(value), method, ...) else super end end end end end neighbor-0.6.0/lib/neighbor/utils.rb0000644000004100000410000001170315024712502017372 0ustar www-datawww-datamodule Neighbor module Utils def self.validate_dimensions(value, type, expected, adapter) dimensions = type == :sparsevec ? value.dimensions : value.size dimensions *= 8 if type == :bit && [:sqlite, :mysql].include?(adapter) if expected && dimensions != expected "Expected #{expected} dimensions, not #{dimensions}" end end def self.validate_finite(value, type) case type when :bit, :integer true when :sparsevec value.values.all?(&:finite?) else value.all?(&:finite?) end end def self.validate(value, dimensions:, type:, adapter:) if (message = validate_dimensions(value, type, dimensions, adapter)) raise Error, message end if !validate_finite(value, type) raise Error, "Values must be finite" end end def self.normalize(value, column_info:) return nil if value.nil? raise Error, "Normalize not supported for type" unless [:cube, :vector, :halfvec].include?(column_info&.type) norm = Math.sqrt(value.sum { |v| v * v }) # store zero vector as all zeros # since NaN makes the distance always 0 # could also throw error norm > 0 ? value.map { |v| v / norm } : value end def self.array?(value) !value.nil? && value.respond_to?(:to_a) end def self.adapter(model) case model.connection_db_config.adapter when /sqlite/i :sqlite when /mysql|trilogy/i model.connection_pool.with_connection { |c| c.try(:mariadb?) } ? :mariadb : :mysql else :postgresql end end def self.type(adapter, column_type) case adapter when :mysql if column_type == :binary :bit else column_type end else column_type end end def self.operator(adapter, column_type, distance) case adapter when :sqlite case distance when "euclidean" "vec_distance_L2" when "cosine" "vec_distance_cosine" when "taxicab" "vec_distance_L1" when "hamming" "vec_distance_hamming" end when :mariadb case column_type when :vector case distance when "euclidean" "VEC_DISTANCE_EUCLIDEAN" when "cosine" "VEC_DISTANCE_COSINE" end when :integer case distance when "hamming" "BIT_COUNT" end else raise ArgumentError, "Unsupported type: #{column_type}" end when :mysql case column_type when :vector case distance when "cosine" "COSINE" when "euclidean" "EUCLIDEAN" end when :binary case distance when "hamming" "BIT_COUNT" end else raise ArgumentError, "Unsupported type: #{column_type}" end else case column_type when :bit case distance when "hamming" "<~>" when "jaccard" "<%>" when "hamming2" "#" end when :vector, :halfvec, :sparsevec case distance when "inner_product" "<#>" when "cosine" "<=>" when "euclidean" "<->" when "taxicab" "<+>" end when :cube case distance when "taxicab" "<#>" when "chebyshev" "<=>" when "euclidean", "cosine" "<->" end else raise ArgumentError, "Unsupported type: #{column_type}" end end end def self.order(adapter, type, operator, quoted_attribute, query) case adapter when :sqlite case type when :int8 "#{operator}(vec_int8(#{quoted_attribute}), vec_int8(#{query}))" when :bit "#{operator}(vec_bit(#{quoted_attribute}), vec_bit(#{query}))" else "#{operator}(#{quoted_attribute}, #{query})" end when :mariadb if operator == "BIT_COUNT" "BIT_COUNT(#{quoted_attribute} ^ #{query})" else "#{operator}(#{quoted_attribute}, #{query})" end when :mysql if operator == "BIT_COUNT" "BIT_COUNT(#{quoted_attribute} ^ #{query})" elsif operator == "COSINE" "DISTANCE(#{quoted_attribute}, #{query}, 'COSINE')" else "DISTANCE(#{quoted_attribute}, #{query}, 'EUCLIDEAN')" end else if operator == "#" "bit_count(#{quoted_attribute} # #{query})" else "#{quoted_attribute} #{operator} #{query}" end end end def self.normalize_required?(adapter, column_type) case adapter when :postgresql column_type == :cube else false end end end end neighbor-0.6.0/lib/neighbor/version.rb0000644000004100000410000000005015024712502017710 0ustar www-datawww-datamodule Neighbor VERSION = "0.6.0" end neighbor-0.6.0/lib/neighbor/normalized_attribute.rb0000644000004100000410000000101115024712502022450 0ustar www-datawww-datamodule Neighbor class NormalizedAttribute < ActiveRecord::Type::Value delegate :type, :serialize, :deserialize, to: :@cast_type def initialize(cast_type:, model:, attribute_name:) @cast_type = cast_type @model = model @attribute_name = attribute_name.to_s end def cast(...) Neighbor::Utils.normalize(@cast_type.cast(...), column_info: @model.columns_hash[@attribute_name]) end private def cast_value(...) @cast_type.send(:cast_value, ...) end end end neighbor-0.6.0/lib/neighbor/railtie.rb0000644000004100000410000000076115024712502017665 0ustar www-datawww-datamodule Neighbor class Railtie < Rails::Railtie generators do require "rails/generators/generated_attribute" # rails generate model Item embedding:vector{3} Rails::Generators::GeneratedAttribute.singleton_class.prepend(Neighbor::GeneratedAttribute) end end module GeneratedAttribute def parse_type_and_options(type, *, **) if type =~ /\A(vector|halfvec|bit|sparsevec)\{(\d+)\}\z/ return $1, limit: $2.to_i end super end end end neighbor-0.6.0/lib/neighbor/attribute.rb0000644000004100000410000000224315024712502020234 0ustar www-datawww-datamodule Neighbor class Attribute < ActiveRecord::Type::Value delegate :type, :serialize, :deserialize, :cast, to: :new_cast_type def initialize(cast_type:, model:, type:, attribute_name:) @cast_type = cast_type @model = model @type = type @attribute_name = attribute_name end private def cast_value(...) new_cast_type.send(:cast_value, ...) end def new_cast_type @new_cast_type ||= begin if @cast_type.is_a?(ActiveModel::Type::Value) case Utils.adapter(@model) when :sqlite case @type&.to_sym when :int8 Type::SqliteInt8Vector.new when :bit @cast_type when :float32, nil Type::SqliteVector.new else raise ArgumentError, "Unsupported type" end when :mariadb if @model.columns_hash[@attribute_name.to_s]&.type == :integer @cast_type else Type::MysqlVector.new end else @cast_type end else @cast_type end end end end end neighbor-0.6.0/lib/neighbor.rb0000644000004100000410000000145015024712502016230 0ustar www-datawww-data# dependencies require "active_support" # adapter hooks require_relative "neighbor/mysql" require_relative "neighbor/postgresql" require_relative "neighbor/sqlite" # modules require_relative "neighbor/reranking" require_relative "neighbor/sparse_vector" require_relative "neighbor/utils" require_relative "neighbor/version" module Neighbor class Error < StandardError; end end ActiveSupport.on_load(:active_record) do require_relative "neighbor/attribute" require_relative "neighbor/model" require_relative "neighbor/normalized_attribute" extend Neighbor::Model begin Neighbor::PostgreSQL.initialize! rescue Gem::LoadError # tries to load pg gem, which may not be available end Neighbor::MySQL.initialize! end require_relative "neighbor/railtie" if defined?(Rails::Railtie) neighbor-0.6.0/LICENSE.txt0000644000004100000410000000207315024712502015165 0ustar www-datawww-dataThe MIT License (MIT) Copyright (c) 2021-2025 Andrew Kane Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. neighbor-0.6.0/neighbor.gemspec0000644000004100000410000000424115024712502016503 0ustar www-datawww-data######################################################### # This file has been automatically generated by gem2tgz # ######################################################### # -*- encoding: utf-8 -*- # stub: neighbor 0.6.0 ruby lib Gem::Specification.new do |s| s.name = "neighbor".freeze s.version = "0.6.0" s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version= s.require_paths = ["lib".freeze] s.authors = ["Andrew Kane".freeze] s.date = "1980-01-02" s.email = "andrew@ankane.org".freeze s.files = ["CHANGELOG.md".freeze, "LICENSE.txt".freeze, "README.md".freeze, "lib/generators/neighbor/cube_generator.rb".freeze, "lib/generators/neighbor/sqlite_generator.rb".freeze, "lib/generators/neighbor/templates/cube.rb.tt".freeze, "lib/generators/neighbor/templates/sqlite.rb.tt".freeze, "lib/generators/neighbor/templates/vector.rb.tt".freeze, "lib/generators/neighbor/vector_generator.rb".freeze, "lib/neighbor.rb".freeze, "lib/neighbor/attribute.rb".freeze, "lib/neighbor/model.rb".freeze, "lib/neighbor/mysql.rb".freeze, "lib/neighbor/normalized_attribute.rb".freeze, "lib/neighbor/postgresql.rb".freeze, "lib/neighbor/railtie.rb".freeze, "lib/neighbor/reranking.rb".freeze, "lib/neighbor/sparse_vector.rb".freeze, "lib/neighbor/sqlite.rb".freeze, "lib/neighbor/type/cube.rb".freeze, "lib/neighbor/type/halfvec.rb".freeze, "lib/neighbor/type/mysql_vector.rb".freeze, "lib/neighbor/type/sparsevec.rb".freeze, "lib/neighbor/type/sqlite_int8_vector.rb".freeze, "lib/neighbor/type/sqlite_vector.rb".freeze, "lib/neighbor/type/vector.rb".freeze, "lib/neighbor/utils.rb".freeze, "lib/neighbor/version.rb".freeze] s.homepage = "https://github.com/ankane/neighbor".freeze s.licenses = ["MIT".freeze] s.required_ruby_version = Gem::Requirement.new(">= 3.2".freeze) s.rubygems_version = "3.3.15".freeze s.summary = "Nearest neighbor search for Rails".freeze if s.respond_to? :specification_version then s.specification_version = 4 end if s.respond_to? :add_runtime_dependency then s.add_runtime_dependency(%q.freeze, [">= 7.1"]) else s.add_dependency(%q.freeze, [">= 7.1"]) end end neighbor-0.6.0/README.md0000644000004100000410000004524515024712502014631 0ustar www-datawww-data# Neighbor Nearest neighbor search for Rails Supports: - Postgres (cube and pgvector) - MariaDB 11.8 - MySQL 9 (searching requires HeatWave) - experimental - SQLite (sqlite-vec) - experimental [![Build Status](https://github.com/ankane/neighbor/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/neighbor/actions) ## Installation Add this line to your application’s Gemfile: ```ruby gem "neighbor" ``` ### For Postgres Neighbor supports two extensions: [cube](https://www.postgresql.org/docs/current/cube.html) and [pgvector](https://github.com/pgvector/pgvector). cube ships with Postgres, while pgvector supports more dimensions and approximate nearest neighbor search. For cube, run: ```sh rails generate neighbor:cube rails db:migrate ``` For pgvector, [install the extension](https://github.com/pgvector/pgvector#installation) and run: ```sh rails generate neighbor:vector rails db:migrate ``` ### For SQLite Add this line to your application’s Gemfile: ```ruby gem "sqlite-vec" ``` And run: ```sh rails generate neighbor:sqlite ``` ## Getting Started Create a migration ```ruby class AddEmbeddingToItems < ActiveRecord::Migration[8.0] def change # cube add_column :items, :embedding, :cube # pgvector, MariaDB, and MySQL add_column :items, :embedding, :vector, limit: 3 # dimensions # sqlite-vec add_column :items, :embedding, :binary end end ``` Add to your model ```ruby class Item < ApplicationRecord has_neighbors :embedding end ``` Update the vectors ```ruby item.update(embedding: [1.0, 1.2, 0.5]) ``` Get the nearest neighbors to a record ```ruby item.nearest_neighbors(:embedding, distance: "euclidean").first(5) ``` Get the nearest neighbors to a vector ```ruby Item.nearest_neighbors(:embedding, [0.9, 1.3, 1.1], distance: "euclidean").first(5) ``` Records returned from `nearest_neighbors` will have a `neighbor_distance` attribute ```ruby nearest_item = item.nearest_neighbors(:embedding, distance: "euclidean").first nearest_item.neighbor_distance ``` See the additional docs for: - [cube](#cube) - [pgvector](#pgvector) - [MariaDB](#mariadb) - [MySQL](#mysql) - [sqlite-vec](#sqlite-vec) Or check out some [examples](#examples) ## cube ### Distance Supported values are: - `euclidean` - `cosine` - `taxicab` - `chebyshev` For cosine distance with cube, vectors must be normalized before being stored. ```ruby class Item < ApplicationRecord has_neighbors :embedding, normalize: true end ``` For inner product with cube, see [this example](examples/disco/user_recs_cube.rb). ### Dimensions The `cube` type can have up to 100 dimensions by default. See the [Postgres docs](https://www.postgresql.org/docs/current/cube.html) for how to increase this. For cube, it’s a good idea to specify the number of dimensions to ensure all records have the same number. ```ruby class Item < ApplicationRecord has_neighbors :embedding, dimensions: 3 end ``` ## pgvector ### Distance Supported values are: - `euclidean` - `inner_product` - `cosine` - `taxicab` - `hamming` - `jaccard` ### Dimensions The `vector` type can have up to 16,000 dimensions, and vectors with up to 2,000 dimensions can be indexed. The `halfvec` type can have up to 16,000 dimensions, and half vectors with up to 4,000 dimensions can be indexed. The `bit` type can have up to 83 million dimensions, and bit vectors with up to 64,000 dimensions can be indexed. The `sparsevec` type can have up to 16,000 non-zero elements, and sparse vectors with up to 1,000 non-zero elements can be indexed. ### Indexing Add an approximate index to speed up queries. Create a migration with: ```ruby class AddIndexToItemsEmbedding < ActiveRecord::Migration[8.0] def change add_index :items, :embedding, using: :hnsw, opclass: :vector_l2_ops # or add_index :items, :embedding, using: :ivfflat, opclass: :vector_l2_ops end end ``` Use `:vector_cosine_ops` for cosine distance and `:vector_ip_ops` for inner product. Set the size of the dynamic candidate list with HNSW ```ruby Item.connection.execute("SET hnsw.ef_search = 100") ``` Or the number of probes with IVFFlat ```ruby Item.connection.execute("SET ivfflat.probes = 3") ``` ### Half-Precision Vectors Use the `halfvec` type to store half-precision vectors ```ruby class AddEmbeddingToItems < ActiveRecord::Migration[8.0] def change add_column :items, :embedding, :halfvec, limit: 3 # dimensions end end ``` ### Half-Precision Indexing Index vectors at half precision for smaller indexes ```ruby class AddIndexToItemsEmbedding < ActiveRecord::Migration[8.0] def change add_index :items, "(embedding::halfvec(3)) halfvec_l2_ops", using: :hnsw end end ``` Get the nearest neighbors ```ruby Item.nearest_neighbors(:embedding, [0.9, 1.3, 1.1], distance: "euclidean", precision: "half").first(5) ``` ### Binary Vectors Use the `bit` type to store binary vectors ```ruby class AddEmbeddingToItems < ActiveRecord::Migration[8.0] def change add_column :items, :embedding, :bit, limit: 3 # dimensions end end ``` Get the nearest neighbors by Hamming distance ```ruby Item.nearest_neighbors(:embedding, "101", distance: "hamming").first(5) ``` ### Binary Quantization Use expression indexing for binary quantization ```ruby class AddIndexToItemsEmbedding < ActiveRecord::Migration[8.0] def change add_index :items, "(binary_quantize(embedding)::bit(3)) bit_hamming_ops", using: :hnsw end end ``` ### Sparse Vectors Use the `sparsevec` type to store sparse vectors ```ruby class AddEmbeddingToItems < ActiveRecord::Migration[8.0] def change add_column :items, :embedding, :sparsevec, limit: 3 # dimensions end end ``` Get the nearest neighbors ```ruby embedding = Neighbor::SparseVector.new({0 => 0.9, 1 => 1.3, 2 => 1.1}, 3) Item.nearest_neighbors(:embedding, embedding, distance: "euclidean").first(5) ``` ## MariaDB ### Distance Supported values are: - `euclidean` - `cosine` - `hamming` ### Indexing Vector columns must use `null: false` to add a vector index ```ruby class CreateItems < ActiveRecord::Migration[8.0] def change create_table :items do |t| t.vector :embedding, limit: 3, null: false t.index :embedding, type: :vector end end end ``` ### Binary Vectors Use the `bigint` type to store binary vectors ```ruby class AddEmbeddingToItems < ActiveRecord::Migration[8.0] def change add_column :items, :embedding, :bigint end end ``` Note: Binary vectors can have up to 64 dimensions Get the nearest neighbors by Hamming distance ```ruby Item.nearest_neighbors(:embedding, 5, distance: "hamming").first(5) ``` ## MySQL ### Distance Supported values are: - `euclidean` - `cosine` - `hamming` Note: The `DISTANCE()` function is [only available on HeatWave](https://dev.mysql.com/doc/refman/9.0/en/vector-functions.html) ### Binary Vectors Use the `binary` type to store binary vectors ```ruby class AddEmbeddingToItems < ActiveRecord::Migration[8.0] def change add_column :items, :embedding, :binary end end ``` Get the nearest neighbors by Hamming distance ```ruby Item.nearest_neighbors(:embedding, "\x05", distance: "hamming").first(5) ``` ## sqlite-vec ### Distance Supported values are: - `euclidean` - `cosine` - `taxicab` - `hamming` ### Dimensions For sqlite-vec, it’s a good idea to specify the number of dimensions to ensure all records have the same number. ```ruby class Item < ApplicationRecord has_neighbors :embedding, dimensions: 3 end ``` ### Virtual Tables You can also use [virtual tables](https://alexgarcia.xyz/sqlite-vec/features/knn.html) ```ruby class AddEmbeddingToItems < ActiveRecord::Migration[8.0] def change # Rails 8+ create_virtual_table :items, :vec0, [ "id integer PRIMARY KEY AUTOINCREMENT NOT NULL", "embedding float[3] distance_metric=L2" ] # Rails < 8 execute <<~SQL CREATE VIRTUAL TABLE items USING vec0( id integer PRIMARY KEY AUTOINCREMENT NOT NULL, embedding float[3] distance_metric=L2 ) SQL end end ``` Use `distance_metric=cosine` for cosine distance You can optionally ignore any shadow tables that are created ```ruby ActiveRecord::SchemaDumper.ignore_tables += [ "items_chunks", "items_rowids", "items_vector_chunks00" ] ``` Get the `k` nearest neighbors ```ruby Item.where("embedding MATCH ?", [1, 2, 3].to_s).where(k: 5).order(:distance) ``` Filter by primary key ```ruby Item.where(id: [2, 3]).where("embedding MATCH ?", [1, 2, 3].to_s).where(k: 5).order(:distance) ``` ### Int8 Vectors Use the `type` option for int8 vectors ```ruby class Item < ApplicationRecord has_neighbors :embedding, dimensions: 3, type: :int8 end ``` ### Binary Vectors Use the `type` option for binary vectors ```ruby class Item < ApplicationRecord has_neighbors :embedding, dimensions: 8, type: :bit end ``` Get the nearest neighbors by Hamming distance ```ruby Item.nearest_neighbors(:embedding, "\x05", distance: "hamming").first(5) ``` ## Examples - [Embeddings](#openai-embeddings) with OpenAI - [Binary embeddings](#cohere-embeddings) with Cohere - [Sentence embeddings](#sentence-embeddings) with Informers - [Hybrid search](#hybrid-search) with Informers - [Sparse search](#sparse-search) with Transformers.rb - [Recommendations](#disco-recommendations) with Disco ### OpenAI Embeddings Generate a model ```sh rails generate model Document content:text embedding:vector{1536} rails db:migrate ``` And add `has_neighbors` ```ruby class Document < ApplicationRecord has_neighbors :embedding end ``` Create a method to call the [embeddings API](https://platform.openai.com/docs/guides/embeddings) ```ruby def embed(input) url = "https://api.openai.com/v1/embeddings" headers = { "Authorization" => "Bearer #{ENV.fetch("OPENAI_API_KEY")}", "Content-Type" => "application/json" } data = { input: input, model: "text-embedding-3-small" } response = Net::HTTP.post(URI(url), data.to_json, headers).tap(&:value) JSON.parse(response.body)["data"].map { |v| v["embedding"] } end ``` Pass your input ```ruby input = [ "The dog is barking", "The cat is purring", "The bear is growling" ] embeddings = embed(input) ``` Store the embeddings ```ruby documents = [] input.zip(embeddings) do |content, embedding| documents << {content: content, embedding: embedding} end Document.insert_all!(documents) ``` And get similar documents ```ruby document = Document.first document.nearest_neighbors(:embedding, distance: "cosine").first(5).map(&:content) ``` See the [complete code](examples/openai/example.rb) ### Cohere Embeddings Generate a model ```sh rails generate model Document content:text embedding:bit{1536} rails db:migrate ``` And add `has_neighbors` ```ruby class Document < ApplicationRecord has_neighbors :embedding end ``` Create a method to call the [embed API](https://docs.cohere.com/reference/embed) ```ruby def embed(input, input_type) url = "https://api.cohere.com/v2/embed" headers = { "Authorization" => "Bearer #{ENV.fetch("CO_API_KEY")}", "Content-Type" => "application/json" } data = { texts: input, model: "embed-v4.0", input_type: input_type, embedding_types: ["ubinary"] } response = Net::HTTP.post(URI(url), data.to_json, headers).tap(&:value) JSON.parse(response.body)["embeddings"]["ubinary"].map { |e| e.map { |v| v.chr.unpack1("B*") }.join } end ``` Pass your input ```ruby input = [ "The dog is barking", "The cat is purring", "The bear is growling" ] embeddings = embed(input, "search_document") ``` Store the embeddings ```ruby documents = [] input.zip(embeddings) do |content, embedding| documents << {content: content, embedding: embedding} end Document.insert_all!(documents) ``` Embed the search query ```ruby query = "forest" query_embedding = embed([query], "search_query")[0] ``` And search the documents ```ruby Document.nearest_neighbors(:embedding, query_embedding, distance: "hamming").first(5).map(&:content) ``` See the [complete code](examples/cohere/example.rb) ### Sentence Embeddings You can generate embeddings locally with [Informers](https://github.com/ankane/informers). Generate a model ```sh rails generate model Document content:text embedding:vector{384} rails db:migrate ``` And add `has_neighbors` ```ruby class Document < ApplicationRecord has_neighbors :embedding end ``` Load a [model](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) ```ruby model = Informers.pipeline("embedding", "sentence-transformers/all-MiniLM-L6-v2") ``` Pass your input ```ruby input = [ "The dog is barking", "The cat is purring", "The bear is growling" ] embeddings = model.(input) ``` Store the embeddings ```ruby documents = [] input.zip(embeddings) do |content, embedding| documents << {content: content, embedding: embedding} end Document.insert_all!(documents) ``` And get similar documents ```ruby document = Document.first document.nearest_neighbors(:embedding, distance: "cosine").first(5).map(&:content) ``` See the [complete code](examples/informers/example.rb) ### Hybrid Search You can use Neighbor for hybrid search with [Informers](https://github.com/ankane/informers). Generate a model ```sh rails generate model Document content:text embedding:vector{768} rails db:migrate ``` And add `has_neighbors` and a scope for keyword search ```ruby class Document < ApplicationRecord has_neighbors :embedding scope :search, ->(query) { where("to_tsvector(content) @@ plainto_tsquery(?)", query) .order(Arel.sql("ts_rank_cd(to_tsvector(content), plainto_tsquery(?)) DESC", query)) } end ``` Create some documents ```ruby Document.create!(content: "The dog is barking") Document.create!(content: "The cat is purring") Document.create!(content: "The bear is growling") ``` Generate an embedding for each document ```ruby embed = Informers.pipeline("embedding", "Snowflake/snowflake-arctic-embed-m-v1.5") embed_options = {model_output: "sentence_embedding", pooling: "none"} # specific to embedding model Document.find_each do |document| embedding = embed.(document.content, **embed_options) document.update!(embedding: embedding) end ``` Perform keyword search ```ruby query = "growling bear" keyword_results = Document.search(query).limit(20).load_async ``` And semantic search in parallel (the query prefix is specific to the [embedding model](https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v1.5)) ```ruby query_prefix = "Represent this sentence for searching relevant passages: " query_embedding = embed.(query_prefix + query, **embed_options) semantic_results = Document.nearest_neighbors(:embedding, query_embedding, distance: "cosine").limit(20).load_async ``` To combine the results, use Reciprocal Rank Fusion (RRF) ```ruby Neighbor::Reranking.rrf(keyword_results, semantic_results).first(5) ``` Or a reranking model ```ruby rerank = Informers.pipeline("reranking", "mixedbread-ai/mxbai-rerank-xsmall-v1") results = (keyword_results + semantic_results).uniq rerank.(query, results.map(&:content)).first(5).map { |v| results[v[:doc_id]] } ``` See the [complete code](examples/hybrid/example.rb) ### Sparse Search You can generate sparse embeddings locally with [Transformers.rb](https://github.com/ankane/transformers-ruby). Generate a model ```sh rails generate model Document content:text embedding:sparsevec{30522} rails db:migrate ``` And add `has_neighbors` ```ruby class Document < ApplicationRecord has_neighbors :embedding end ``` Load a [model](https://huggingface.co/opensearch-project/opensearch-neural-sparse-encoding-v1) to generate embeddings ```ruby class EmbeddingModel def initialize(model_id) @model = Transformers::AutoModelForMaskedLM.from_pretrained(model_id) @tokenizer = Transformers::AutoTokenizer.from_pretrained(model_id) @special_token_ids = @tokenizer.special_tokens_map.map { |_, token| @tokenizer.vocab[token] } end def embed(input) feature = @tokenizer.(input, padding: true, truncation: true, return_tensors: "pt", return_token_type_ids: false) output = @model.(**feature)[0] values = Torch.max(output * feature[:attention_mask].unsqueeze(-1), dim: 1)[0] values = Torch.log(1 + Torch.relu(values)) values[0.., @special_token_ids] = 0 values.to_a end end model = EmbeddingModel.new("opensearch-project/opensearch-neural-sparse-encoding-v1") ``` Pass your input ```ruby input = [ "The dog is barking", "The cat is purring", "The bear is growling" ] embeddings = model.embed(input) ``` Store the embeddings ```ruby documents = [] input.zip(embeddings) do |content, embedding| documents << {content: content, embedding: Neighbor::SparseVector.new(embedding)} end Document.insert_all!(documents) ``` Embed the search query ```ruby query = "forest" query_embedding = model.embed([query])[0] ``` And search the documents ```ruby Document.nearest_neighbors(:embedding, Neighbor::SparseVector.new(query_embedding), distance: "inner_product").first(5).map(&:content) ``` See the [complete code](examples/sparse/example.rb) ### Disco Recommendations You can use Neighbor for online item-based recommendations with [Disco](https://github.com/ankane/disco). We’ll use MovieLens data for this example. Generate a model ```sh rails generate model Movie name:string factors:cube rails db:migrate ``` And add `has_neighbors` ```ruby class Movie < ApplicationRecord has_neighbors :factors, dimensions: 20, normalize: true end ``` Fit the recommender ```ruby data = Disco.load_movielens recommender = Disco::Recommender.new(factors: 20) recommender.fit(data) ``` Store the item factors ```ruby movies = [] recommender.item_ids.each do |item_id| movies << {name: item_id, factors: recommender.item_factors(item_id)} end Movie.create!(movies) ``` And get similar movies ```ruby movie = Movie.find_by(name: "Star Wars (1977)") movie.nearest_neighbors(:factors, distance: "cosine").first(5).map(&:name) ``` See the complete code for [cube](examples/disco/item_recs_cube.rb) and [pgvector](examples/disco/item_recs_vector.rb) ## History View the [changelog](https://github.com/ankane/neighbor/blob/master/CHANGELOG.md) ## Contributing Everyone is encouraged to help improve this project. Here are a few ways you can help: - [Report bugs](https://github.com/ankane/neighbor/issues) - Fix bugs and [submit pull requests](https://github.com/ankane/neighbor/pulls) - Write, clarify, or fix documentation - Suggest or add new features To get started with development: ```sh git clone https://github.com/ankane/neighbor.git cd neighbor bundle install # Postgres createdb neighbor_test bundle exec rake test:postgresql # SQLite bundle exec rake test:sqlite # MariaDB docker run -e MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=1 -e MARIADB_DATABASE=neighbor_test -p 3307:3306 mariadb:11.8 bundle exec rake test:mariadb # MySQL docker run -e MYSQL_ALLOW_EMPTY_PASSWORD=1 -e MYSQL_DATABASE=neighbor_test -p 3306:3306 mysql:9 bundle exec rake test:mysql ``` neighbor-0.6.0/CHANGELOG.md0000644000004100000410000000532515024712502015156 0ustar www-datawww-data## 0.6.0 (2025-06-12) - Added support for MariaDB 11.8 - Dropped experimental support for MariaDB 11.7 - Dropped support for Ruby < 3.2 and Active Record < 7.1 ## 0.5.2 (2025-01-05) - Improved support for Postgres arrays ## 0.5.1 (2024-12-03) - Added experimental support for MariaDB 11.7 - Dropped experimental support for MariaDB 11.6 Vector ## 0.5.0 (2024-10-07) - Added experimental support for SQLite (sqlite-vec) - Added experimental support for MariaDB 11.6 Vector - Added experimental support for MySQL 9 - Changed `normalize` option to use Active Record normalization - Fixed connection leasing for Active Record 7.2 - Dropped support for Active Record < 7 ## 0.4.3 (2024-09-02) - Added `rrf` method ## 0.4.2 (2024-08-27) - Fixed error with `nil` values ## 0.4.1 (2024-08-26) - Added `precision` option - Added support for `bit` dimensions to model generator - Fixed error with Numo arrays ## 0.4.0 (2024-06-25) - Added support for `halfvec` and `sparsevec` types - Added support for `taxicab`, `hamming`, and `jaccard` distances with `vector` extension - Added deserialization for `cube` and `vector` columns without `has_neighbor` - Added support for composite primary keys - Changed `nearest_neighbors` to replace previous `order` scopes - Changed `normalize` option to use `before_save` callback - Changed dimensions and finite values checks to use Active Record validations - Fixed issue with `nearest_neighbors` scope overriding `select` values - Removed default attribute name - Dropped support for Ruby < 3.1 ## 0.3.2 (2023-12-12) - Added deprecation warning for `has_neighbors` without an attribute name - Added deprecation warning for `nearest_neighbors` without an attribute name ## 0.3.1 (2023-09-25) - Added support for passing multiple attributes to `has_neighbors` - Fixed error with `nearest_neighbors` scope with Ruby 3.2 and Active Record 6.1 ## 0.3.0 (2023-07-24) - Dropped support for Ruby < 3 and Active Record < 6.1 ## 0.2.3 (2023-04-02) - Added support for dimensions to model generator ## 0.2.2 (2022-07-13) - Added support for configurable attribute name - Added support for multiple attributes per model ## 0.2.1 (2021-12-15) - Added support for Active Record 7 ## 0.2.0 (2021-04-21) - Added support for pgvector - Added `normalize` option - Made `dimensions` optional - Raise an error if `nearest_neighbors` already defined - Raise an error for non-finite values - Fixed NaN with zero vectors and cosine distance Breaking changes - The `distance` option has been moved from `has_neighbors` to `nearest_neighbors`, and there is no longer a default ## 0.1.2 (2021-02-21) - Added `nearest_neighbors` scope ## 0.1.1 (2021-02-16) - Fixed `Could not dump table` error ## 0.1.0 (2021-02-15) - First release