This commit is contained in:
Iliyan Angelov
2025-09-19 11:58:53 +03:00
parent 306b20e24a
commit 6b247e5b9f
11423 changed files with 1500615 additions and 778 deletions

View File

@@ -0,0 +1,46 @@
import json
from redis._parsers.helpers import pairs_to_dict
from redis.commands.vectorset.utils import (
parse_vemb_result,
parse_vlinks_result,
parse_vsim_result,
)
from ..helpers import get_protocol_version
from .commands import (
VEMB_CMD,
VGETATTR_CMD,
VINFO_CMD,
VLINKS_CMD,
VSIM_CMD,
VectorSetCommands,
)
class VectorSet(VectorSetCommands):
def __init__(self, client, **kwargs):
"""Create a new VectorSet client."""
# Set the module commands' callbacks
self._MODULE_CALLBACKS = {
VEMB_CMD: parse_vemb_result,
VGETATTR_CMD: lambda r: r and json.loads(r) or None,
}
self._RESP2_MODULE_CALLBACKS = {
VINFO_CMD: lambda r: r and pairs_to_dict(r) or None,
VSIM_CMD: parse_vsim_result,
VLINKS_CMD: parse_vlinks_result,
}
self._RESP3_MODULE_CALLBACKS = {}
self.client = client
self.execute_command = client.execute_command
if get_protocol_version(self.client) in ["3", 3]:
self._MODULE_CALLBACKS.update(self._RESP3_MODULE_CALLBACKS)
else:
self._MODULE_CALLBACKS.update(self._RESP2_MODULE_CALLBACKS)
for k, v in self._MODULE_CALLBACKS.items():
self.client.set_response_callback(k, v)

View File

@@ -0,0 +1,374 @@
import json
from enum import Enum
from typing import Awaitable, Dict, List, Optional, Union
from redis.client import NEVER_DECODE
from redis.commands.helpers import get_protocol_version
from redis.exceptions import DataError
from redis.typing import CommandsProtocol, EncodableT, KeyT, Number
VADD_CMD = "VADD"
VSIM_CMD = "VSIM"
VREM_CMD = "VREM"
VDIM_CMD = "VDIM"
VCARD_CMD = "VCARD"
VEMB_CMD = "VEMB"
VLINKS_CMD = "VLINKS"
VINFO_CMD = "VINFO"
VSETATTR_CMD = "VSETATTR"
VGETATTR_CMD = "VGETATTR"
VRANDMEMBER_CMD = "VRANDMEMBER"
class QuantizationOptions(Enum):
"""Quantization options for the VADD command."""
NOQUANT = "NOQUANT"
BIN = "BIN"
Q8 = "Q8"
class CallbacksOptions(Enum):
"""Options that can be set for the commands callbacks"""
RAW = "RAW"
WITHSCORES = "WITHSCORES"
ALLOW_DECODING = "ALLOW_DECODING"
RESP3 = "RESP3"
class VectorSetCommands(CommandsProtocol):
"""Redis VectorSet commands"""
def vadd(
self,
key: KeyT,
vector: Union[List[float], bytes],
element: str,
reduce_dim: Optional[int] = None,
cas: Optional[bool] = False,
quantization: Optional[QuantizationOptions] = None,
ef: Optional[Number] = None,
attributes: Optional[Union[dict, str]] = None,
numlinks: Optional[int] = None,
) -> Union[Awaitable[int], int]:
"""
Add vector ``vector`` for element ``element`` to a vector set ``key``.
``reduce_dim`` sets the dimensions to reduce the vector to.
If not provided, the vector is not reduced.
``cas`` is a boolean flag that indicates whether to use CAS (check-and-set style)
when adding the vector. If not provided, CAS is not used.
``quantization`` sets the quantization type to use.
If not provided, int8 quantization is used.
The options are:
- NOQUANT: No quantization
- BIN: Binary quantization
- Q8: Signed 8-bit quantization
``ef`` sets the exploration factor to use.
If not provided, the default exploration factor is used.
``attributes`` is a dictionary or json string that contains the attributes to set for the vector.
If not provided, no attributes are set.
``numlinks`` sets the number of links to create for the vector.
If not provided, the default number of links is used.
For more information see https://redis.io/commands/vadd
"""
if not vector or not element:
raise DataError("Both vector and element must be provided")
pieces = []
if reduce_dim:
pieces.extend(["REDUCE", reduce_dim])
values_pieces = []
if isinstance(vector, bytes):
values_pieces.extend(["FP32", vector])
else:
values_pieces.extend(["VALUES", len(vector)])
values_pieces.extend(vector)
pieces.extend(values_pieces)
pieces.append(element)
if cas:
pieces.append("CAS")
if quantization:
pieces.append(quantization.value)
if ef:
pieces.extend(["EF", ef])
if attributes:
if isinstance(attributes, dict):
# transform attributes to json string
attributes_json = json.dumps(attributes)
else:
attributes_json = attributes
pieces.extend(["SETATTR", attributes_json])
if numlinks:
pieces.extend(["M", numlinks])
return self.execute_command(VADD_CMD, key, *pieces)
def vsim(
self,
key: KeyT,
input: Union[List[float], bytes, str],
with_scores: Optional[bool] = False,
count: Optional[int] = None,
ef: Optional[Number] = None,
filter: Optional[str] = None,
filter_ef: Optional[str] = None,
truth: Optional[bool] = False,
no_thread: Optional[bool] = False,
epsilon: Optional[Number] = None,
) -> Union[
Awaitable[Optional[List[Union[List[EncodableT], Dict[EncodableT, Number]]]]],
Optional[List[Union[List[EncodableT], Dict[EncodableT, Number]]]],
]:
"""
Compare a vector or element ``input`` with the other vectors in a vector set ``key``.
``with_scores`` sets if the results should be returned with the
similarity scores of the elements in the result.
``count`` sets the number of results to return.
``ef`` sets the exploration factor.
``filter`` sets filter that should be applied for the search.
``filter_ef`` sets the max filtering effort.
``truth`` when enabled forces the command to perform linear scan.
``no_thread`` when enabled forces the command to execute the search
on the data structure in the main thread.
``epsilon`` floating point between 0 and 1, if specified will return
only elements with distance no further than the specified one.
For more information see https://redis.io/commands/vsim
"""
if not input:
raise DataError("'input' should be provided")
pieces = []
options = {}
if isinstance(input, bytes):
pieces.extend(["FP32", input])
elif isinstance(input, list):
pieces.extend(["VALUES", len(input)])
pieces.extend(input)
else:
pieces.extend(["ELE", input])
if with_scores:
pieces.append("WITHSCORES")
options[CallbacksOptions.WITHSCORES.value] = True
if count:
pieces.extend(["COUNT", count])
if epsilon:
pieces.extend(["EPSILON", epsilon])
if ef:
pieces.extend(["EF", ef])
if filter:
pieces.extend(["FILTER", filter])
if filter_ef:
pieces.extend(["FILTER-EF", filter_ef])
if truth:
pieces.append("TRUTH")
if no_thread:
pieces.append("NOTHREAD")
return self.execute_command(VSIM_CMD, key, *pieces, **options)
def vdim(self, key: KeyT) -> Union[Awaitable[int], int]:
"""
Get the dimension of a vector set.
In the case of vectors that were populated using the `REDUCE`
option, for random projection, the vector set will report the size of
the projected (reduced) dimension.
Raises `redis.exceptions.ResponseError` if the vector set doesn't exist.
For more information see https://redis.io/commands/vdim
"""
return self.execute_command(VDIM_CMD, key)
def vcard(self, key: KeyT) -> Union[Awaitable[int], int]:
"""
Get the cardinality(the number of elements) of a vector set with key ``key``.
Raises `redis.exceptions.ResponseError` if the vector set doesn't exist.
For more information see https://redis.io/commands/vcard
"""
return self.execute_command(VCARD_CMD, key)
def vrem(self, key: KeyT, element: str) -> Union[Awaitable[int], int]:
"""
Remove an element from a vector set.
For more information see https://redis.io/commands/vrem
"""
return self.execute_command(VREM_CMD, key, element)
def vemb(
self, key: KeyT, element: str, raw: Optional[bool] = False
) -> Union[
Awaitable[Optional[Union[List[EncodableT], Dict[str, EncodableT]]]],
Optional[Union[List[EncodableT], Dict[str, EncodableT]]],
]:
"""
Get the approximated vector of an element ``element`` from vector set ``key``.
``raw`` is a boolean flag that indicates whether to return the
interal representation used by the vector.
For more information see https://redis.io/commands/vembed
"""
options = {}
pieces = []
pieces.extend([key, element])
if get_protocol_version(self.client) in ["3", 3]:
options[CallbacksOptions.RESP3.value] = True
if raw:
pieces.append("RAW")
options[NEVER_DECODE] = True
if (
hasattr(self.client, "connection_pool")
and self.client.connection_pool.connection_kwargs["decode_responses"]
) or (
hasattr(self.client, "nodes_manager")
and self.client.nodes_manager.connection_kwargs["decode_responses"]
):
# allow decoding in the postprocessing callback
# if the user set decode_responses=True
# in the connection pool
options[CallbacksOptions.ALLOW_DECODING.value] = True
options[CallbacksOptions.RAW.value] = True
return self.execute_command(VEMB_CMD, *pieces, **options)
def vlinks(
self, key: KeyT, element: str, with_scores: Optional[bool] = False
) -> Union[
Awaitable[
Optional[
List[Union[List[Union[str, bytes]], Dict[Union[str, bytes], Number]]]
]
],
Optional[List[Union[List[Union[str, bytes]], Dict[Union[str, bytes], Number]]]],
]:
"""
Returns the neighbors for each level the element ``element`` exists in the vector set ``key``.
The result is a list of lists, where each list contains the neighbors for one level.
If the element does not exist, or if the vector set does not exist, None is returned.
If the ``WITHSCORES`` option is provided, the result is a list of dicts,
where each dict contains the neighbors for one level, with the scores as values.
For more information see https://redis.io/commands/vlinks
"""
options = {}
pieces = []
pieces.extend([key, element])
if with_scores:
pieces.append("WITHSCORES")
options[CallbacksOptions.WITHSCORES.value] = True
return self.execute_command(VLINKS_CMD, *pieces, **options)
def vinfo(self, key: KeyT) -> Union[Awaitable[dict], dict]:
"""
Get information about a vector set.
For more information see https://redis.io/commands/vinfo
"""
return self.execute_command(VINFO_CMD, key)
def vsetattr(
self, key: KeyT, element: str, attributes: Optional[Union[dict, str]] = None
) -> Union[Awaitable[int], int]:
"""
Associate or remove JSON attributes ``attributes`` of element ``element``
for vector set ``key``.
For more information see https://redis.io/commands/vsetattr
"""
if attributes is None:
attributes_json = "{}"
elif isinstance(attributes, dict):
# transform attributes to json string
attributes_json = json.dumps(attributes)
else:
attributes_json = attributes
return self.execute_command(VSETATTR_CMD, key, element, attributes_json)
def vgetattr(
self, key: KeyT, element: str
) -> Union[Optional[Awaitable[dict]], Optional[dict]]:
"""
Retrieve the JSON attributes of an element ``elemet`` for vector set ``key``.
If the element does not exist, or if the vector set does not exist, None is
returned.
For more information see https://redis.io/commands/vgetattr
"""
return self.execute_command(VGETATTR_CMD, key, element)
def vrandmember(
self, key: KeyT, count: Optional[int] = None
) -> Union[
Awaitable[Optional[Union[List[str], str]]], Optional[Union[List[str], str]]
]:
"""
Returns random elements from a vector set ``key``.
``count`` is the number of elements to return.
If ``count`` is not provided, a single element is returned as a single string.
If ``count`` is positive(smaller than the number of elements
in the vector set), the command returns a list with up to ``count``
distinct elements from the vector set
If ``count`` is negative, the command returns a list with ``count`` random elements,
potentially with duplicates.
If ``count`` is greater than the number of elements in the vector set,
only the entire set is returned as a list.
If the vector set does not exist, ``None`` is returned.
For more information see https://redis.io/commands/vrandmember
"""
pieces = []
pieces.append(key)
if count is not None:
pieces.append(count)
return self.execute_command(VRANDMEMBER_CMD, *pieces)

View File

@@ -0,0 +1,94 @@
from redis._parsers.helpers import pairs_to_dict
from redis.commands.vectorset.commands import CallbacksOptions
def parse_vemb_result(response, **options):
"""
Handle VEMB result since the command can returning different result
structures depending on input options and on quantization type of the vector set.
Parsing VEMB result into:
- List[Union[bytes, Union[int, float]]]
- Dict[str, Union[bytes, str, float]]
"""
if response is None:
return response
if options.get(CallbacksOptions.RAW.value):
result = {}
result["quantization"] = (
response[0].decode("utf-8")
if options.get(CallbacksOptions.ALLOW_DECODING.value)
else response[0]
)
result["raw"] = response[1]
result["l2"] = float(response[2])
if len(response) > 3:
result["range"] = float(response[3])
return result
else:
if options.get(CallbacksOptions.RESP3.value):
return response
result = []
for i in range(len(response)):
try:
result.append(int(response[i]))
except ValueError:
# if the value is not an integer, it should be a float
result.append(float(response[i]))
return result
def parse_vlinks_result(response, **options):
"""
Handle VLINKS result since the command can be returning different result
structures depending on input options.
Parsing VLINKS result into:
- List[List[str]]
- List[Dict[str, Number]]
"""
if response is None:
return response
if options.get(CallbacksOptions.WITHSCORES.value):
result = []
# Redis will return a list of list of strings.
# This list have to be transformed to list of dicts
for level_item in response:
level_data_dict = {}
for key, value in pairs_to_dict(level_item).items():
value = float(value)
level_data_dict[key] = value
result.append(level_data_dict)
return result
else:
# return the list of elements for each level
# list of lists
return response
def parse_vsim_result(response, **options):
"""
Handle VSIM result since the command can be returning different result
structures depending on input options.
Parsing VSIM result into:
- List[List[str]]
- List[Dict[str, Number]]
"""
if response is None:
return response
if options.get(CallbacksOptions.WITHSCORES.value):
# Redis will return a list of list of pairs.
# This list have to be transformed to dict
result_dict = {}
for key, value in pairs_to_dict(response).items():
value = float(value)
result_dict[key] = value
return result_dict
else:
# return the list of elements for each level
# list of lists
return response