Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
746bf31
Add SQ8-to-SQ8 distance functions and optimizations
dor-forer Dec 28, 2025
8697a3e
Add SQ8-to-SQ8 benchmark tests and update related scripts
dor-forer Dec 28, 2025
e0ce268
Format
dor-forer Dec 28, 2025
ab6b077
Orgnizing
dor-forer Dec 28, 2025
931e339
Add full sq8 bencharks
dor-forer Dec 28, 2025
a56474d
Optimize the sq8 sq8
dor-forer Dec 28, 2025
a25f45c
Optimize SQ8 distance functions for NEON by reducing operations and i…
dor-forer Dec 28, 2025
0ad941e
format
dor-forer Dec 28, 2025
68cd068
Add NEON DOTPROD-optimized distance functions for SQ8-to-SQ8 calculat…
dor-forer Dec 28, 2025
0b4b568
PR
dor-forer Dec 28, 2025
d0fd2e4
Remove NEON DOTPROD-optimized distance functions for INT8, UINT8, and…
dor-forer Dec 28, 2025
9de6163
Fix vector layout documentation by removing inv_norm from comments in…
dor-forer Dec 28, 2025
63a46a1
Remove 'constexpr' from ones vector declaration in NEON inner product…
dor-forer Dec 28, 2025
525f8da
Refactor distance functions to remove inv_norm parameter and update d…
dor-forer Dec 29, 2025
13a477b
Update SQ8 Cosine test to normalize both input vectors and adjust dis…
dor-forer Dec 29, 2025
c18000e
Rename 'compressed' to 'quantized' in SQ8 functions for clarity and c…
dor-forer Dec 29, 2025
bbf810e
Implement SQ8-to-SQ8 distance functions with precomputed sum and norm…
dor-forer Dec 29, 2025
dbbb7d9
Add edge case tests for SQ8-to-SQ8 precomputed cosine distance functions
dor-forer Dec 29, 2025
36ab068
Refactor SQ8 test cases to use CreateSQ8QuantizedVector for vector po…
dor-forer Dec 29, 2025
00617d7
Implement SQ8-to-SQ8 precomputed distance functions using ARM NEON, S…
dor-forer Dec 29, 2025
4331d91
Implement SQ8-to-SQ8 precomputed inner product and cosine functions; …
dor-forer Dec 29, 2025
2e7b30d
Refactor SQ8 distance functions and remove precomputed variants
dor-forer Dec 30, 2025
a111e36
Refactor SQ8 distance functions and tests for improved clarity and co…
dor-forer Dec 30, 2025
d510b8a
Refactor SQ8 benchmarks by removing precomputed variants and updating…
dor-forer Dec 30, 2025
ee26740
foramt
dor-forer Dec 30, 2025
afe1a4f
Remove serialization benchmark script for HNSW disk serialization
dor-forer Dec 30, 2025
a31f95c
Refactor SQ8 distance functions and tests to remove precomputed norm …
dor-forer Dec 31, 2025
f12ecf4
format
dor-forer Dec 31, 2025
0e36030
Merge branch 'main' of https://github.com/RedisAI/VectorSimilarity in…
dor-forer Dec 31, 2025
fdc16c6
Refactor SQ8 distance tests to use compressed vectors and improve nor…
dor-forer Dec 31, 2025
e5f519c
Update vector layout documentation to reflect removal of sum of squar…
dor-forer Dec 31, 2025
db1e671
Refactor SQ8 distance functions to remove norm computation
dor-forer Jan 1, 2026
d5b8587
Update SQ8-to-SQ8 distance function comment to remove norm reference
dor-forer Jan 1, 2026
91f48df
Refactor cosine similarity functions to remove unnecessary subtractio…
dor-forer Jan 1, 2026
b660111
Refactor cosine similarity functions to use specific SIMD implementat…
dor-forer Jan 1, 2026
9166cac
Refactor benchmark setup to allocate additional space for sum and sum…
dor-forer Jan 4, 2026
f28f4e7
Add CPU feature checks to disable optimizations for AArch64 in SQ8 di…
dor-forer Jan 4, 2026
e50dc45
Add CPU feature checks to disable optimizations for AArch64 in SQ8 di…
dor-forer Jan 4, 2026
6bbbc38
Fix formatting issues in SQ8 inner product function and clean up cond…
dor-forer Jan 4, 2026
66a5f88
Enhance SQ8 Inner Product Implementations with Optimized Dot Product …
dor-forer Jan 4, 2026
d7972e9
Fix header guard duplication and update test assertion for floating-p…
dor-forer Jan 4, 2026
a8075bf
Add missing pragma once directive in NEON header files
dor-forer Jan 4, 2026
cddc497
Refactor SQ8 distance functions for improved performance and clarity
dor-forer Jan 4, 2026
4f0fec7
Update SQ8 vector population functions to include metadata and adjust…
dor-forer Jan 4, 2026
8ab4192
Refactor SQ8 inner product functions for improved clarity and perform…
dor-forer Jan 4, 2026
8c59cb2
Rename inner product implementation functions for AVX2 and AVX512 for…
dor-forer Jan 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 64 additions & 7 deletions src/VecSim/spaces/IP/IP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,25 @@ using bfloat16 = vecsim_types::bfloat16;
using float16 = vecsim_types::float16;

float FLOAT_INTEGER_InnerProduct(const float *pVect1v, const uint8_t *pVect2v, size_t dimension,
float min_val, float delta, float inv_norm) {
float min_val, float delta) {
float res = 0;
for (size_t i = 0; i < dimension; i++) {
float dequantized_V2 = (pVect2v[i] * delta + min_val);
res += pVect1v[i] * dequantized_V2;
}
return res * inv_norm;
return res;
}

float SQ8_InnerProduct(const void *pVect1v, const void *pVect2v, size_t dimension) {
const auto *pVect1 = static_cast<const float *>(pVect1v);
const auto *pVect2 = static_cast<const uint8_t *>(pVect2v);
// pVect2 is a vector of uint8_t, so we need to de-quantize it, normalize it and then multiply
// it. it is structured as [quantized values (int8_t * dim)][min_val (float)][delta
// (float)][inv_norm (float)] The last two values are used to dequantize the vector.
// (float)]] The last two values are used to dequantize the vector.
const float min_val = *reinterpret_cast<const float *>(pVect2 + dimension);
const float delta = *reinterpret_cast<const float *>(pVect2 + dimension + sizeof(float));
// Compute inner product with dequantization
const float res = FLOAT_INTEGER_InnerProduct(pVect1, pVect2, dimension, min_val, delta, 1.0f);
const float res = FLOAT_INTEGER_InnerProduct(pVect1, pVect2, dimension, min_val, delta);
return 1.0f - res;
}

Expand All @@ -44,10 +44,67 @@ float SQ8_Cosine(const void *pVect1v, const void *pVect2v, size_t dimension) {
// Get quantization parameters
const float min_val = *reinterpret_cast<const float *>(pVect2 + dimension);
const float delta = *reinterpret_cast<const float *>(pVect2 + dimension + sizeof(float));
const float inv_norm = *reinterpret_cast<const float *>(pVect2 + dimension + 2 * sizeof(float));
// Compute inner product with dequantization
const float res =
FLOAT_INTEGER_InnerProduct(pVect1, pVect2, dimension, min_val, delta, inv_norm);
const float res = FLOAT_INTEGER_InnerProduct(pVect1, pVect2, dimension, min_val, delta);
return 1.0f - res;
}

// SQ8-to-SQ8: Both vectors are uint8 quantized with precomputed sum
// Vector layout: [uint8_t values (dim)] [min_val (float)] [delta (float)] [sum (float)]
float SQ8_SQ8_InnerProduct(const void *pVect1v, const void *pVect2v, size_t dimension) {
const auto *pVect1 = static_cast<const uint8_t *>(pVect1v);
const auto *pVect2 = static_cast<const uint8_t *>(pVect2v);

// Compute inner product of quantized values: Σ(q1[i]*q2[i])
float product = 0;
for (size_t i = 0; i < dimension; i++) {
product += pVect1[i] * pVect2[i];
}

// Get quantization parameters from pVect1
const float min_val1 = *reinterpret_cast<const float *>(pVect1 + dimension);
const float delta1 = *reinterpret_cast<const float *>(pVect1 + dimension + sizeof(float));
const float sum1 = *reinterpret_cast<const float *>(pVect1 + dimension + 2 * sizeof(float));

// Get quantization parameters from pVect2
const float min_val2 = *reinterpret_cast<const float *>(pVect2 + dimension);
const float delta2 = *reinterpret_cast<const float *>(pVect2 + dimension + sizeof(float));
const float sum2 = *reinterpret_cast<const float *>(pVect2 + dimension + 2 * sizeof(float));

// Apply the algebraic formula using precomputed sums:
// IP = min1*sum2 + min2*sum1 + delta1*delta2*Σ(q1[i]*q2[i]) - dim*min1*min2
float res = min_val1 * sum2 + min_val2 * sum1 -
static_cast<float>(dimension) * min_val1 * min_val2 + delta1 * delta2 * product;
return 1.0f - res;
}

// SQ8-to-SQ8: Both vectors are uint8 quantized and normalized with precomputed sum
// Vector layout: [uint8_t values (dim)] [min_val (float)] [delta (float)] [sum (float)]
float SQ8_SQ8_Cosine(const void *pVect1v, const void *pVect2v, size_t dimension) {
const auto *pVect1 = static_cast<const uint8_t *>(pVect1v);
const auto *pVect2 = static_cast<const uint8_t *>(pVect2v);

// Compute inner product of quantized values: Σ(q1[i]*q2[i])
float product = 0;
for (size_t i = 0; i < dimension; i++) {
product += pVect1[i] * pVect2[i];
}

// Extract metadata from the end of vectors
// Get quantization parameters from pVect1
const float min_val1 = *reinterpret_cast<const float *>(pVect1 + dimension);
const float delta1 = *reinterpret_cast<const float *>(pVect1 + dimension + sizeof(float));
const float sum1 = *reinterpret_cast<const float *>(pVect1 + dimension + 2 * sizeof(float));

// Get quantization parameters from pVect2
const float min_val2 = *reinterpret_cast<const float *>(pVect2 + dimension);
const float delta2 = *reinterpret_cast<const float *>(pVect2 + dimension + sizeof(float));
const float sum2 = *reinterpret_cast<const float *>(pVect2 + dimension + 2 * sizeof(float));

// Apply the algebraic formula using precomputed sums:
// IP = min1*sum2 + min2*sum1 + delta1*delta2*Σ(q1[i]*q2[i]) - dim*min1*min2
float res = min_val1 * sum2 + min_val2 * sum1 -
static_cast<float>(dimension) * min_val1 * min_val2 + delta1 * delta2 * product;
return 1.0f - res;
}

Expand Down
8 changes: 8 additions & 0 deletions src/VecSim/spaces/IP/IP.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ float SQ8_InnerProduct(const void *pVect1v, const void *pVect2v, size_t dimensio
// pVect1v vector of type fp32 and pVect2v vector of type uint8
float SQ8_Cosine(const void *pVect1v, const void *pVect2v, size_t dimension);

// SQ8-to-SQ8: Both vectors are uint8 quantized with precomputed sum
// Vector layout: [uint8_t values (dim)] [min_val (float)] [delta (float)] [sum (float)]
float SQ8_SQ8_InnerProduct(const void *pVect1v, const void *pVect2v, size_t dimension);

// SQ8-to-SQ8: Both vectors are uint8 quantized and normalized with precomputed sum
// Vector layout: [uint8_t values (dim)] [min_val (float)] [delta (float)] [sum (float)]
float SQ8_SQ8_Cosine(const void *pVect1v, const void *pVect2v, size_t dimension);

float FP32_InnerProduct(const void *pVect1, const void *pVect2, size_t dimension);

double FP64_InnerProduct(const void *pVect1, const void *pVect2, size_t dimension);
Expand Down
9 changes: 1 addition & 8 deletions src/VecSim/spaces/IP/IP_AVX2_FMA_SQ8.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,7 @@ float SQ8_InnerProductSIMD16_AVX2_FMA(const void *pVect1v, const void *pVect2v,

template <unsigned char residual> // 0..15
float SQ8_CosineSIMD16_AVX2_FMA(const void *pVect1v, const void *pVect2v, size_t dimension) {
// Get dequantization parameters from the end of quantized vector
const uint8_t *pVect2 = static_cast<const uint8_t *>(pVect2v);
const float inv_norm = *reinterpret_cast<const float *>(pVect2 + dimension + 2 * sizeof(float));

// Calculate inner product using common implementation with normalization
float ip = SQ8_InnerProductImp_FMA<residual>(pVect1v, pVect2v, dimension);

// For cosine, we need to account for the vector norms
// The inv_norm parameter is stored after min_val and delta in the quantized vector
return 1.0f - ip * inv_norm;
return 1.0f - ip;
}
15 changes: 4 additions & 11 deletions src/VecSim/spaces/IP/IP_AVX2_SQ8.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ static inline void InnerProductStepSQ8(const float *&pVect1, const uint8_t *&pVe
}

template <unsigned char residual> // 0..15
float SQ8_InnerProductImp(const void *pVect1v, const void *pVect2v, size_t dimension) {
float SQ8_InnerProductImp_AVX2(const void *pVect1v, const void *pVect2v, size_t dimension) {
const float *pVect1 = static_cast<const float *>(pVect1v);
// pVect2 is a quantized uint8_t vector
const uint8_t *pVect2 = static_cast<const uint8_t *>(pVect2v);
Expand Down Expand Up @@ -89,19 +89,12 @@ float SQ8_InnerProductImp(const void *pVect1v, const void *pVect2v, size_t dimen

template <unsigned char residual> // 0..15
float SQ8_InnerProductSIMD16_AVX2(const void *pVect1v, const void *pVect2v, size_t dimension) {
return 1.0f - SQ8_InnerProductImp<residual>(pVect1v, pVect2v, dimension);
return 1.0f - SQ8_InnerProductImp_AVX2<residual>(pVect1v, pVect2v, dimension);
}

template <unsigned char residual> // 0..15
float SQ8_CosineSIMD16_AVX2(const void *pVect1v, const void *pVect2v, size_t dimension) {
// Get dequantization parameters from the end of quantized vector
const uint8_t *pVect2 = static_cast<const uint8_t *>(pVect2v);
const float inv_norm = *reinterpret_cast<const float *>(pVect2 + dimension + 2 * sizeof(float));

// Calculate inner product using common implementation with normalization
float ip = SQ8_InnerProductImp<residual>(pVect1v, pVect2v, dimension);

// For cosine, we need to account for the vector norms
// The inv_norm parameter is stored after min_val and delta in the quantized vector
return 1.0f - ip * inv_norm;
float ip = SQ8_InnerProductImp_AVX2<residual>(pVect1v, pVect2v, dimension);
return 1.0f - ip;
}
78 changes: 78 additions & 0 deletions src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_SQ8_SQ8.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Copyright (c) 2006-Present, Redis Ltd.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
* GNU Affero General Public License v3 (AGPLv3).
*/
#pragma once
#include "VecSim/spaces/space_includes.h"
#include "VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_UINT8.h"
#include <immintrin.h>

/**
* SQ8-to-SQ8 distance functions using AVX512 VNNI with precomputed sum.
* These functions compute distance between two SQ8 (scalar quantized 8-bit) vectors,
* where BOTH vectors are uint8 quantized.
*
* Uses precomputed sum stored in the vector data,
* eliminating the need to compute them during distance calculation.
*
* Uses algebraic optimization to leverage integer VNNI instructions:
*
* With sum = Σv[i] (sum of original float values), the formula is:
* IP = min1*sum2 + min2*sum1 + δ1*δ2 * Σ(q1[i]*q2[i]) - dim*min1*min2
*
* Since sum is precomputed, we only need to compute the dot product Σ(q1[i]*q2[i]).
* The dot product is computed using the efficient UINT8_InnerProductImp which uses
* VNNI instructions (_mm512_dpwssd_epi32) for native integer dot product computation.
*
* Vector layout: [uint8_t values (dim)] [min_val (float)] [delta (float)] [sum (float)]
*/

// Common implementation for inner product between two SQ8 vectors with precomputed sum
// Uses UINT8_InnerProductImp for efficient dot product computation with VNNI
template <unsigned char residual> // 0..63
float SQ8_SQ8_InnerProductImp(const void *pVec1v, const void *pVec2v, size_t dimension) {
// Compute raw dot product using efficient UINT8 AVX512 VNNI implementation
// UINT8_InnerProductImp uses _mm512_dpwssd_epi32 for native integer dot product
int dot_product = UINT8_InnerProductImp<residual>(pVec1v, pVec2v, dimension);

// Get dequantization parameters and precomputed values from the end of vectors
// Layout: [data (dim)] [min (float)] [delta (float)] [sum (float)]
const uint8_t *pVec1 = static_cast<const uint8_t *>(pVec1v);
const uint8_t *pVec2 = static_cast<const uint8_t *>(pVec2v);

const float *params1 = reinterpret_cast<const float *>(pVec1 + dimension);
const float min1 = params1[0];
const float delta1 = params1[1];
const float sum1 = params1[2]; // Precomputed sum of original float elements

const float *params2 = reinterpret_cast<const float *>(pVec2 + dimension);
const float min2 = params2[0];
const float delta2 = params2[1];
const float sum2 = params2[2]; // Precomputed sum of original float elements

// Apply the algebraic formula using precomputed sums:
// IP = min1*sum2 + min2*sum1 + δ1*δ2 * Σ(q1[i]*q2[i]) - dim*min1*min2
return min1 * sum2 + min2 * sum1 + delta1 * delta2 * static_cast<float>(dot_product) -
static_cast<float>(dimension) * min1 * min2;
}

// SQ8-to-SQ8 Inner Product distance function
// Returns 1 - inner_product (distance form)
template <unsigned char residual> // 0..63
float SQ8_SQ8_InnerProductSIMD64_AVX512F_BW_VL_VNNI(const void *pVec1v, const void *pVec2v,
size_t dimension) {
return 1.0f - SQ8_SQ8_InnerProductImp<residual>(pVec1v, pVec2v, dimension);
}

// SQ8-to-SQ8 Cosine distance function
// Returns 1 - (inner_product)
template <unsigned char residual> // 0..63
float SQ8_SQ8_CosineSIMD64_AVX512F_BW_VL_VNNI(const void *pVec1v, const void *pVec2v,
size_t dimension) {
// Assume vectors are normalized.
return SQ8_SQ8_InnerProductSIMD64_AVX512F_BW_VL_VNNI<residual>(pVec1v, pVec2v, dimension);
}
1 change: 1 addition & 0 deletions src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_UINT8.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
* GNU Affero General Public License v3 (AGPLv3).
*/
#pragma once
#include "VecSim/spaces/space_includes.h"

static inline void InnerProductStep(uint8_t *&pVect1, uint8_t *&pVect2, __m512i &sum) {
Expand Down
11 changes: 3 additions & 8 deletions src/VecSim/spaces/IP/IP_AVX512F_SQ8_BW_VL_VNNI.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ static inline void SQ8_InnerProductStep(const float *&pVec1, const uint8_t *&pVe

// Common implementation for both inner product and cosine similarity
template <unsigned char residual> // 0..15
float SQ8_InnerProductImp(const void *pVec1v, const void *pVec2v, size_t dimension,
float inv_norm = 1.0f) {
float SQ8_InnerProductImp_AVX512(const void *pVec1v, const void *pVec2v, size_t dimension) {
const float *pVec1 = static_cast<const float *>(pVec1v);
const uint8_t *pVec2 = static_cast<const uint8_t *>(pVec2v);
const float *pEnd1 = pVec1 + dimension;
Expand Down Expand Up @@ -92,7 +91,7 @@ template <unsigned char residual> // 0..15
float SQ8_InnerProductSIMD16_AVX512F_BW_VL_VNNI(const void *pVec1v, const void *pVec2v,
size_t dimension) {
// Calculate inner product using common implementation
float ip = SQ8_InnerProductImp<residual>(pVec1v, pVec2v, dimension);
float ip = SQ8_InnerProductImp_AVX512<residual>(pVec1v, pVec2v, dimension);

// The inner product similarity is 1 - ip
return 1.0f - ip;
Expand All @@ -101,12 +100,8 @@ float SQ8_InnerProductSIMD16_AVX512F_BW_VL_VNNI(const void *pVec1v, const void *
template <unsigned char residual> // 0..15
float SQ8_CosineSIMD16_AVX512F_BW_VL_VNNI(const void *pVec1v, const void *pVec2v,
size_t dimension) {
// Get the inverse norm factor stored after min_val and delta
const uint8_t *pVec2 = static_cast<const uint8_t *>(pVec2v);
const float inv_norm = *reinterpret_cast<const float *>(pVec2 + dimension + 2 * sizeof(float));

// Calculate inner product using common implementation with normalization
float ip = SQ8_InnerProductImp<residual>(pVec1v, pVec2v, dimension, inv_norm);
float ip = SQ8_InnerProductImp_AVX512<residual>(pVec1v, pVec2v, dimension);

// The cosine similarity is 1 - ip
return 1.0f - ip;
Expand Down
77 changes: 77 additions & 0 deletions src/VecSim/spaces/IP/IP_NEON_DOTPROD_SQ8_SQ8.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* Copyright (c) 2006-Present, Redis Ltd.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
* GNU Affero General Public License v3 (AGPLv3).
*/
#pragma once
#include "VecSim/spaces/space_includes.h"
#include "VecSim/spaces/IP/IP_NEON_DOTPROD_UINT8.h"
#include <arm_neon.h>

/**
* SQ8-to-SQ8 distance functions using ARM NEON DOTPROD with precomputed sum.
* These functions compute distance between two SQ8 (scalar quantized 8-bit) vectors,
* where BOTH vectors are uint8 quantized.
*
* Uses precomputed sum stored in the vector data,
* eliminating the need to compute them during distance calculation.
*
* Uses algebraic optimization with DOTPROD instruction:
*
* With sum = Σv[i] (sum of original float values), the formula is:
* IP = min1*sum2 + min2*sum1 + δ1*δ2 * Σ(q1[i]*q2[i]) - dim*min1*min2
*
* Since sum is precomputed, we only need to compute the dot product Σ(q1[i]*q2[i]).
* The dot product is computed using the efficient UINT8_InnerProductImp which uses
* the DOTPROD instruction (vdotq_u32) for native uint8 dot product computation.
*
* Vector layout: [uint8_t values (dim)] [min_val (float)] [delta (float)] [sum (float)]
*/

// Common implementation for inner product between two SQ8 vectors with precomputed sum
// Uses UINT8_InnerProductImp for efficient dot product computation with DOTPROD
template <unsigned char residual> // 0..63
float SQ8_SQ8_InnerProductSIMD64_NEON_DOTPROD_IMP(const void *pVec1v, const void *pVec2v,
size_t dimension) {
// Compute raw dot product using efficient UINT8 DOTPROD implementation
// UINT8_InnerProductImp uses vdotq_u32 for native uint8 dot product
float dot_product = UINT8_InnerProductImp<residual>(pVec1v, pVec2v, dimension);

// Get dequantization parameters and precomputed values from the end of vectors
// Layout: [data (dim)] [min (float)] [delta (float)] [sum (float)]
const uint8_t *pVec1 = static_cast<const uint8_t *>(pVec1v);
const uint8_t *pVec2 = static_cast<const uint8_t *>(pVec2v);

const float *params1 = reinterpret_cast<const float *>(pVec1 + dimension);
const float min1 = params1[0];
const float delta1 = params1[1];
const float sum1 = params1[2]; // Precomputed sum of original float elements

const float *params2 = reinterpret_cast<const float *>(pVec2 + dimension);
const float min2 = params2[0];
const float delta2 = params2[1];
const float sum2 = params2[2]; // Precomputed sum of original float elements

// Apply algebraic formula using precomputed sums:
// IP = min1*sum2 + min2*sum1 + δ1*δ2 * Σ(q1*q2) - dim*min1*min2
return min1 * sum2 + min2 * sum1 + delta1 * delta2 * dot_product -
static_cast<float>(dimension) * min1 * min2;
}

// SQ8-to-SQ8 Inner Product distance function
// Returns 1 - inner_product (distance form)
template <unsigned char residual> // 0..63
float SQ8_SQ8_InnerProductSIMD64_NEON_DOTPROD(const void *pVec1v, const void *pVec2v,
size_t dimension) {
return 1.0f - SQ8_SQ8_InnerProductSIMD64_NEON_DOTPROD_IMP<residual>(pVec1v, pVec2v, dimension);
}

// SQ8-to-SQ8 Cosine distance function
// Returns 1 - inner_product (assumes vectors are pre-normalized)
template <unsigned char residual> // 0..63
float SQ8_SQ8_CosineSIMD64_NEON_DOTPROD(const void *pVec1v, const void *pVec2v, size_t dimension) {
return SQ8_SQ8_InnerProductSIMD64_NEON_DOTPROD<residual>(pVec1v, pVec2v, dimension);
}
1 change: 1 addition & 0 deletions src/VecSim/spaces/IP/IP_NEON_DOTPROD_UINT8.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
* GNU Affero General Public License v3 (AGPLv3).
*/
#pragma once
#include "VecSim/spaces/space_includes.h"
#include <arm_neon.h>

Expand Down
10 changes: 1 addition & 9 deletions src/VecSim/spaces/IP/IP_NEON_SQ8.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,15 +114,7 @@ float SQ8_InnerProductSIMD16_NEON(const void *pVect1v, const void *pVect2v, size

template <unsigned char residual> // 0..15
float SQ8_CosineSIMD16_NEON(const void *pVect1v, const void *pVect2v, size_t dimension) {
const uint8_t *pVect2 = static_cast<const uint8_t *>(pVect2v);

// Get quantization parameters
const float inv_norm = *reinterpret_cast<const float *>(pVect2 + dimension + 2 * sizeof(float));

// Compute inner product with dequantization using the common function
const float res = SQ8_InnerProductSIMD16_NEON_IMP<residual>(pVect1v, pVect2v, dimension);

// For cosine, we need to account for the vector norms
// The inv_norm parameter is stored after min_val and delta in the quantized vector
return 1.0f - res * inv_norm;
return 1.0f - res;
}
Loading
Loading