Class `swarmauri_vectorstore_qdrant.PersistentQdrantVectorStore.PersistentQdrantVectorStore`

swarmauri_vectorstore_qdrant.PersistentQdrantVectorStore.PersistentQdrantVectorStore

PersistentQdrantVectorStore(**kwargs)

Bases: VectorStoreSaveLoadMixin, VectorStoreRetrieveMixin, VectorStorePersistentMixin, VectorStoreBase

PersistentQdrantVectorStore is a concrete implementation that integrates functionality for saving, loading, storing, and retrieving vector documents, leveraging a locally hosted Qdrant instance as the backend.

Source code in swarmauri_vectorstore_qdrant/PersistentQdrantVectorStore.py

def __init__(self, **kwargs):
    super().__init__(**kwargs)

    self._embedder = Doc2VecEmbedding(vector_size=self.vector_size)
    self._distance = CosineDistance()

type `class-attribute` `instance-attribute`

type = 'PersistentQdrantVectorStore'

model_config `class-attribute` `instance-attribute`

model_config = ConfigDict(arbitrary_types_allowed=True)

client `class-attribute` `instance-attribute`

client = Field(default=None, init=False)

id `class-attribute` `instance-attribute`

id = Field(default_factory=generate_id)

members `class-attribute` `instance-attribute`

members = None

owners `class-attribute` `instance-attribute`

owners = None

host `class-attribute` `instance-attribute`

host = None

default_logger `class-attribute`

default_logger = None

logger `class-attribute` `instance-attribute`

logger = None

name `class-attribute` `instance-attribute`

name = None

resource `class-attribute` `instance-attribute`

resource = Field(default=VECTOR_STORE.value)

version `class-attribute` `instance-attribute`

version = '0.1.0'

documents `class-attribute` `instance-attribute`

documents = []

embedder `property`

embedder

collection_name `instance-attribute`

collection_name

collection `class-attribute` `instance-attribute`

collection = Field(
    None,
    description="Collection object for interacting with the persistent-based store",
)

path `class-attribute` `instance-attribute`

path = Field(
    None,
    description="URL of the persistent-based store to connect to",
)

vector_size `class-attribute` `instance-attribute`

vector_size = Field(
    None,
    description="Size of the vectors used in the store",
)

vectorizer `class-attribute` `instance-attribute`

vectorizer = Field(
    None,
    description="Vectorizer object for converting documents to vectors",
)

connect

connect()

Connects to the Qdrant vector store using the provided URL.

Source code in swarmauri_vectorstore_qdrant/PersistentQdrantVectorStore.py

def connect(self) -> None:
    """
    Connects to the Qdrant vector store using the provided URL.
    """
    if self.client is None:
        self.client = QdrantClient(path=self.path)

    # Check if the collection exists
    existing_collections = self.client.get_collections().collections
    collection_names = [collection.name for collection in existing_collections]

    if self.collection_name not in collection_names:
        # Ensure the collection exists with the desired configuration
        self.client.recreate_collection(
            collection_name=self.collection_name,
            vectors_config=VectorParams(
                size=self.vector_size, distance=Distance.COSINE
            ),
        )

disconnect

disconnect()

Disconnects from the Qdrant vector store.

Source code in swarmauri_vectorstore_qdrant/PersistentQdrantVectorStore.py

def disconnect(self) -> None:
    """
    Disconnects from the Qdrant vector store.
    """
    if self.client is not None:
        self.client = None

add_document

add_document(document)

Add a single document to the document store.

PARAMETER	DESCRIPTION
`document`	The document to be added to the store. TYPE: `Document`

Source code in swarmauri_vectorstore_qdrant/PersistentQdrantVectorStore.py

def add_document(self, document: Document) -> None:
    """
    Add a single document to the document store.

    Parameters:
        document (Document): The document to be added to the store.
    """
    embedding = None
    if not document.embedding:
        self._embedder.fit([document.content])  # Fit only once
        embedding = (
            self._embedder.transform([document.content])[0].to_numpy().tolist()
        )
    else:
        embedding = document.embedding

    payload = {
        "content": document.content,
        "metadata": document.metadata,
    }

    doc = PointStruct(id=document.id, vector=embedding, payload=payload)

    self.client.upsert(
        collection_name=self.collection_name,
        points=[doc],
    )

add_documents

add_documents(documents)

Add multiple documents to the document store in a batch operation.

PARAMETER	DESCRIPTION
`documents`	A list of documents to be added to the store. TYPE: `List[Document]`

Source code in swarmauri_vectorstore_qdrant/PersistentQdrantVectorStore.py

def add_documents(self, documents: List[Document]) -> None:
    """
    Add multiple documents to the document store in a batch operation.

    Parameters:
        documents (List[Document]): A list of documents to be added to the store.
    """
    points = [
        PointStruct(
            id=doc.id,
            vector=doc.embedding
            or self._embedder.fit_transform([doc.content])[0].to_numpy().tolist(),
            payload={"content": doc.content, "metadata": doc.metadata},
        )
        for doc in documents
    ]
    self.client.upsert(self.collection_name, points=points)

get_document

get_document(id)

Retrieve a single document by its identifier.

PARAMETER	DESCRIPTION
`id`	The unique identifier of the document to retrieve. TYPE: `str`

RETURNS	DESCRIPTION
`Union[Document, None]`	Union[Document, None]: The requested document if found; otherwise, None.

Source code in swarmauri_vectorstore_qdrant/PersistentQdrantVectorStore.py

def get_document(self, id: str) -> Union[Document, None]:
    """
    Retrieve a single document by its identifier.

    Parameters:
        id (str): The unique identifier of the document to retrieve.

    Returns:
        Union[Document, None]: The requested document if found; otherwise, None.
    """
    response = self.client.retrieve(
        collection_name=self.collection_name,
        ids=[id],
    )
    if response:
        payload = response[0].payload
        return Document(
            id=id, content=payload["content"], metadata=payload["metadata"]
        )
    return None

get_all_documents

get_all_documents()

Retrieve all documents stored in the document store.

RETURNS	DESCRIPTION
`List[Document]`	List[Document]: A list of all documents in the store.

Source code in swarmauri_vectorstore_qdrant/PersistentQdrantVectorStore.py

def get_all_documents(self) -> List[Document]:
    """
    Retrieve all documents stored in the document store.

    Returns:
        List[Document]: A list of all documents in the store.
    """
    response = self.client.scroll(
        collection_name=self.collection_name,
    )

    return [
        Document(
            id=doc.id,
            content=doc.payload["content"],
            metadata=doc.payload["metadata"],
        )
        for doc in response[0]
    ]

delete_document

delete_document(id)

Delete a document from the document store by its identifier.

PARAMETER	DESCRIPTION
`id`	The unique identifier of the document to delete. TYPE: `str`

Source code in swarmauri_vectorstore_qdrant/PersistentQdrantVectorStore.py

def delete_document(self, id: str) -> None:
    """
    Delete a document from the document store by its identifier.

    Parameters:
        id (str): The unique identifier of the document to delete.
    """
    self.client.delete(self.collection_name, points_selector=[id])

update_document

update_document(id, updated_document)

Update a document in the document store.

PARAMETER	DESCRIPTION
`id`	The unique identifier of the document to update. TYPE: `str`
`updated_document`	The updated document instance. TYPE: `Document`

Source code in swarmauri_vectorstore_qdrant/PersistentQdrantVectorStore.py

def update_document(self, id: str, updated_document: Document) -> None:
    """
    Update a document in the document store.

    Parameters:
        id (str): The unique identifier of the document to update.
        updated_document (Document): The updated document instance.
    """
    # Precompute the embedding outside the update process
    if not updated_document.embedding:
        # Transform without refitting to avoid vocabulary issues
        document_vector = self._embedder.transform([updated_document.content])[0]
    else:
        document_vector = updated_document.embedding

    document_vector = document_vector.to_numpy().tolist()

    self.client.upsert(
        self.collection_name,
        points=[
            PointStruct(
                id=id,
                vector=document_vector,
                payload={
                    "content": updated_document.content,
                    "metadata": updated_document.metadata,
                },
            )
        ],
    )

clear_documents

clear_documents()

Deletes all documents from the vector store.

Source code in swarmauri_vectorstore_qdrant/PersistentQdrantVectorStore.py

def clear_documents(self) -> None:
    """
    Deletes all documents from the vector store.
    """
    self.client.delete_collection(self.collection_name)

document_count

document_count()

Returns the number of documents in the store.

Source code in swarmauri_vectorstore_qdrant/PersistentQdrantVectorStore.py

def document_count(self) -> int:
    """
    Returns the number of documents in the store.
    """
    response = self.client.scroll(
        collection_name=self.collection_name,
    )
    return len(response)

retrieve

retrieve(query, top_k=5)

Retrieve the top_k most relevant documents based on the given query. For the purpose of this example, this method performs a basic search.

PARAMETER	DESCRIPTION
`query`	The query string used for document retrieval. TYPE: `str`
`top_k`	The number of top relevant documents to retrieve. TYPE: `int` DEFAULT: `5`

RETURNS	DESCRIPTION
`List[Document]`	List[Document]: A list of the top_k most relevant documents.

Source code in swarmauri_vectorstore_qdrant/PersistentQdrantVectorStore.py

def retrieve(self, query: str, top_k: int = 5) -> List[Document]:
    """
    Retrieve the top_k most relevant documents based on the given query.
    For the purpose of this example, this method performs a basic search.

    Args:
        query (str): The query string used for document retrieval.
        top_k (int): The number of top relevant documents to retrieve.

    Returns:
        List[Document]: A list of the top_k most relevant documents.
    """
    query_vector = self._embedder.infer_vector(query).value
    results = self.client.search(
        collection_name=self.collection_name, query_vector=query_vector, limit=top_k
    )

    return [
        Document(
            id=res.id,
            content=res.payload["content"],
            metadata=res.payload["metadata"],
        )
        for res in results
    ]

model_dump_json

model_dump_json(*args, **kwargs)

Source code in swarmauri_vectorstore_qdrant/PersistentQdrantVectorStore.py

def model_dump_json(self, *args, **kwargs) -> str:
    # Call the disconnect method before serialization
    self.disconnect()

    # Now proceed with the usual JSON serialization
    return super().model_dump_json(*args, **kwargs)

register_model `classmethod`

register_model()

Decorator to register a base model in the unified registry.

RETURNS	DESCRIPTION
`Callable`	A decorator function that registers the model class. TYPE: `Callable[[Type[BaseModel]], Type[BaseModel]]`

Source code in swarmauri_base/DynamicBase.py

@classmethod
def register_model(cls) -> Callable[[Type[BaseModel]], Type[BaseModel]]:
    """
    Decorator to register a base model in the unified registry.

    Returns:
        Callable: A decorator function that registers the model class.
    """

    def decorator(model_cls: Type[BaseModel]):
        """Register ``model_cls`` as a base model."""
        model_name = model_cls.__name__
        if model_name in cls._registry:
            glogger.warning(
                "Model '%s' is already registered; skipping duplicate.", model_name
            )
            return model_cls

        cls._registry[model_name] = {"model_cls": model_cls, "subtypes": {}}
        glogger.debug("Registered base model '%s'.", model_name)
        DynamicBase._recreate_models()
        return model_cls

    return decorator

register_type `classmethod`

register_type(resource_type=None, type_name=None)

Decorator to register a subtype under one or more base models in the unified registry.

PARAMETER	DESCRIPTION
`resource_type`	The base model(s) under which to register the subtype. If None, all direct base classes (except DynamicBase) are used. TYPE: `Optional[Union[Type[T], List[Type[T]]]]` DEFAULT: `None`
`type_name`	An optional custom type name for the subtype. TYPE: `Optional[str]` DEFAULT: `None`

RETURNS	DESCRIPTION
`Callable`	A decorator function that registers the subtype. TYPE: `Callable[[Type[DynamicBase]], Type[DynamicBase]]`

Source code in swarmauri_base/DynamicBase.py

@classmethod
def register_type(
    cls,
    resource_type: Optional[Union[Type[T], List[Type[T]]]] = None,
    type_name: Optional[str] = None,
) -> Callable[[Type["DynamicBase"]], Type["DynamicBase"]]:
    """
    Decorator to register a subtype under one or more base models in the unified registry.

    Parameters:
        resource_type (Optional[Union[Type[T], List[Type[T]]]]):
            The base model(s) under which to register the subtype. If None, all direct base classes (except DynamicBase)
            are used.
        type_name (Optional[str]): An optional custom type name for the subtype.

    Returns:
        Callable: A decorator function that registers the subtype.
    """

    def decorator(subclass: Type["DynamicBase"]):
        """Register ``subclass`` as a subtype."""
        if resource_type is None:
            resource_types = [
                base for base in subclass.__bases__ if base is not cls
            ]
        elif not isinstance(resource_type, list):
            resource_types = [resource_type]
        else:
            resource_types = resource_type

        for rt in resource_types:
            if not issubclass(subclass, rt):
                raise TypeError(
                    f"'{subclass.__name__}' must be a subclass of '{rt.__name__}'."
                )
            final_type_name = type_name or getattr(
                subclass, "_type", subclass.__name__
            )
            base_model_name = rt.__name__

            if base_model_name not in cls._registry:
                cls._registry[base_model_name] = {"model_cls": rt, "subtypes": {}}
                glogger.debug(
                    "Created new registry entry for base model '%s'.",
                    base_model_name,
                )

            subtypes_dict = cls._registry[base_model_name]["subtypes"]
            if final_type_name in subtypes_dict:
                glogger.warning(
                    "Type '%s' already exists under '%s'; skipping duplicate.",
                    final_type_name,
                    base_model_name,
                )
                continue

            subtypes_dict[final_type_name] = subclass
            glogger.debug(
                "Registered '%s' as '%s' under '%s'.",
                subclass.__name__,
                final_type_name,
                base_model_name,
            )

        DynamicBase._recreate_models()
        return subclass

    return decorator

model_validate_toml `classmethod`

model_validate_toml(toml_data)

Validate a model from a TOML string.

Source code in swarmauri_base/TomlMixin.py

@classmethod
def model_validate_toml(cls, toml_data: str):
    """Validate a model from a TOML string."""
    try:
        # Parse TOML into a Python dictionary
        toml_content = tomllib.loads(toml_data)

        # Convert the dictionary to JSON and validate using Pydantic
        return cls.model_validate_json(json.dumps(toml_content))
    except tomllib.TOMLDecodeError as e:
        raise ValueError(f"Invalid TOML data: {e}")
    except ValidationError as e:
        raise ValueError(f"Validation failed: {e}")

model_dump_toml

model_dump_toml(
    fields_to_exclude=None, api_key_placeholder=None
)

Return a TOML representation of the model.

Source code in swarmauri_base/TomlMixin.py

def model_dump_toml(self, fields_to_exclude=None, api_key_placeholder=None):
    """Return a TOML representation of the model."""
    if fields_to_exclude is None:
        fields_to_exclude = []

    # Load the JSON string into a Python dictionary
    json_data = json.loads(self.model_dump_json())

    # Function to recursively remove specific keys and handle api_key placeholders
    def process_fields(data, fields_to_exclude):
        """Recursively filter fields and apply placeholders."""
        if isinstance(data, dict):
            return {
                key: (
                    api_key_placeholder
                    if key == "api_key" and api_key_placeholder is not None
                    else process_fields(value, fields_to_exclude)
                )
                for key, value in data.items()
                if key not in fields_to_exclude
            }
        elif isinstance(data, list):
            return [process_fields(item, fields_to_exclude) for item in data]
        else:
            return data

    # Filter the JSON data
    filtered_data = process_fields(json_data, fields_to_exclude)

    # Convert the filtered data into TOML
    return toml.dumps(filtered_data)

model_validate_yaml `classmethod`

model_validate_yaml(yaml_data)

Validate a model from a YAML string.

Source code in swarmauri_base/YamlMixin.py

@classmethod
def model_validate_yaml(cls, yaml_data: str):
    """Validate a model from a YAML string."""
    try:
        # Parse YAML into a Python dictionary
        yaml_content = yaml.safe_load(yaml_data)

        # Convert the dictionary to JSON and validate using Pydantic
        return cls.model_validate_json(json.dumps(yaml_content))
    except yaml.YAMLError as e:
        raise ValueError(f"Invalid YAML data: {e}")
    except ValidationError as e:
        raise ValueError(f"Validation failed: {e}")

model_dump_yaml

model_dump_yaml(
    fields_to_exclude=None, api_key_placeholder=None
)

Return a YAML representation of the model.

Source code in swarmauri_base/YamlMixin.py

def model_dump_yaml(self, fields_to_exclude=None, api_key_placeholder=None):
    """Return a YAML representation of the model."""
    if fields_to_exclude is None:
        fields_to_exclude = []

    # Load the JSON string into a Python dictionary
    json_data = json.loads(self.model_dump_json())

    # Function to recursively remove specific keys and handle api_key placeholders
    def process_fields(data, fields_to_exclude):
        """Recursively filter fields and apply placeholders."""
        if isinstance(data, dict):
            return {
                key: (
                    api_key_placeholder
                    if key == "api_key" and api_key_placeholder is not None
                    else process_fields(value, fields_to_exclude)
                )
                for key, value in data.items()
                if key not in fields_to_exclude
            }
        elif isinstance(data, list):
            return [process_fields(item, fields_to_exclude) for item in data]
        else:
            return data

    # Filter the JSON data
    filtered_data = process_fields(json_data, fields_to_exclude)

    # Convert the filtered data into YAML using safe mode
    return yaml.safe_dump(filtered_data, default_flow_style=False)

model_post_init

model_post_init(logger=None)

Assign a logger instance after model initialization.

Source code in swarmauri_base/LoggerMixin.py

def model_post_init(self, logger: Optional[FullUnion[LoggerBase]] = None) -> None:
    """Assign a logger instance after model initialization."""

    # Directly assign the provided FullUnion[LoggerBase] or fallback to the
    # class-level default.
    self.logger = self.logger or logger or self.default_logger

document_dumps

document_dumps()

Placeholder

Source code in swarmauri_base/vector_stores/VectorStoreBase.py

def document_dumps(self) -> str:
    """
    Placeholder
    """
    return json.dumps([each.to_dict() for each in self.documents])

document_dump

document_dump(file_path)

Placeholder

Source code in swarmauri_base/vector_stores/VectorStoreBase.py

def document_dump(self, file_path: str) -> None:
    """
    Placeholder
    """
    with open(file_path, "w", encoding="utf-8") as f:
        json.dump(
            [each.to_dict() for each in self.documents],
            f,
            ensure_ascii=False,
            indent=4,
        )

document_loads

document_loads(json_data)

Placeholder

Source code in swarmauri_base/vector_stores/VectorStoreBase.py

def document_loads(self, json_data: str) -> None:
    """
    Placeholder
    """
    self.documents = [
        globals()[each["type"]].from_dict(each) for each in json.loads(json_data)
    ]

document_load

document_load(file_path)

Placeholder

Source code in swarmauri_base/vector_stores/VectorStoreBase.py

def document_load(self, file_path: str) -> None:
    """
    Placeholder
    """
    with open(file_path, "r", encoding="utf-8"):
        self.documents = [
            globals()[each["type"]].from_dict(each) for each in json.load(file_path)
        ]

save_store

save_store(directory_path)

Saves both the vectorizer's model and the documents.

Source code in swarmauri_base/vector_stores/VectorStoreSaveLoadMixin.py

def save_store(self, directory_path: str) -> None:
    """
    Saves both the vectorizer's model and the documents.
    """
    # Ensure the directory exists
    if not os.path.exists(directory_path):
        os.makedirs(directory_path)

    # Save the vectorizer model
    model_path = os.path.join(directory_path, "embedding_model")
    self._vectorizer.save_model(model_path)

    # Save documents
    documents_path = os.path.join(directory_path, "documents.json")
    with open(documents_path, "w", encoding="utf-8") as f:
        json.dump(
            [each.to_dict() for each in self.documents],
            f,
            ensure_ascii=False,
            indent=4,
        )

load_store

load_store(directory_path)

Loads both the vectorizer's model and the documents.

Source code in swarmauri_base/vector_stores/VectorStoreSaveLoadMixin.py

def load_store(self, directory_path: str) -> None:
    """
    Loads both the vectorizer's model and the documents.
    """
    # Load the vectorizer model
    model_path = os.path.join(directory_path, "embedding_model")
    self.vectorizer.load_model(model_path)

    # Load documents
    documents_path = os.path.join(directory_path, "documents.json")
    with open(documents_path, "r", encoding="utf-8") as f:
        self.documents = [self._load_document(each) for each in json.load(f)]

save_parts

save_parts(directory_path, chunk_size=10485760)

Splits the file into parts if it's too large and saves those parts individually.

Source code in swarmauri_base/vector_stores/VectorStoreSaveLoadMixin.py

def save_parts(self, directory_path: str, chunk_size: int = 10485760) -> None:
    """
    Splits the file into parts if it's too large and saves those parts individually.
    """
    file_number = 1
    model_path = os.path.join(directory_path, "embedding_model")
    parts_directory = os.path.join(directory_path, "parts")

    if not os.path.exists(parts_directory):
        os.makedirs(parts_directory)

    with open(f"{model_path}/model.safetensors", "rb") as f:
        chunk = f.read(chunk_size)
        while chunk:
            with open(
                f"{parts_directory}/model.safetensors.part{file_number:03}", "wb"
            ) as chunk_file:
                chunk_file.write(chunk)
            file_number += 1
            chunk = f.read(chunk_size)

    # Split the documents into parts and save them
    os.path.join(directory_path, "documents")

    self._split_json_file(directory_path, chunk_size=chunk_size)

load_parts

load_parts(directory_path, file_pattern='*.part*')

Combines file parts from a directory back into a single file and loads it.

Source code in swarmauri_base/vector_stores/VectorStoreSaveLoadMixin.py

def load_parts(self, directory_path: str, file_pattern: str = "*.part*") -> None:
    """
    Combines file parts from a directory back into a single file and loads it.
    """
    model_path = os.path.join(directory_path, "embedding_model")
    parts_directory = os.path.join(directory_path, "parts")
    output_file_path = os.path.join(model_path, "model.safetensors")

    parts = sorted(glob.glob(os.path.join(parts_directory, file_pattern)))
    with open(output_file_path, "wb") as output_file:
        for part in parts:
            with open(part, "rb") as file_part:
                output_file.write(file_part.read())

    # Load the combined_model now
    model_path = os.path.join(directory_path, "embedding_model")
    self._vectorizer.load_model(model_path)

    # Load document files
    self._load_documents(directory_path)

Class swarmauri_vectorstore_qdrant.PersistentQdrantVectorStore.PersistentQdrantVectorStore

swarmauri_vectorstore_qdrant.PersistentQdrantVectorStore.PersistentQdrantVectorStore

type class-attribute instance-attribute

model_config class-attribute instance-attribute

client class-attribute instance-attribute

id class-attribute instance-attribute

members class-attribute instance-attribute

owners class-attribute instance-attribute

host class-attribute instance-attribute

default_logger class-attribute

logger class-attribute instance-attribute

name class-attribute instance-attribute

resource class-attribute instance-attribute

version class-attribute instance-attribute

documents class-attribute instance-attribute

embedder property

collection_name instance-attribute

collection class-attribute instance-attribute

path class-attribute instance-attribute

vector_size class-attribute instance-attribute

vectorizer class-attribute instance-attribute

connect

disconnect

add_document

add_documents

get_document

get_all_documents

delete_document

update_document

clear_documents

document_count

retrieve

model_dump_json

register_model classmethod

register_type classmethod

model_validate_toml classmethod

model_dump_toml

model_validate_yaml classmethod

model_dump_yaml

model_post_init

document_dumps

document_dump

document_loads

document_load

save_store

load_store

save_parts

load_parts

Class `swarmauri_vectorstore_qdrant.PersistentQdrantVectorStore.PersistentQdrantVectorStore`

type `class-attribute` `instance-attribute`

model_config `class-attribute` `instance-attribute`

client `class-attribute` `instance-attribute`

id `class-attribute` `instance-attribute`

members `class-attribute` `instance-attribute`

owners `class-attribute` `instance-attribute`

host `class-attribute` `instance-attribute`

default_logger `class-attribute`

logger `class-attribute` `instance-attribute`

name `class-attribute` `instance-attribute`

resource `class-attribute` `instance-attribute`

version `class-attribute` `instance-attribute`

documents `class-attribute` `instance-attribute`

embedder `property`

collection_name `instance-attribute`

collection `class-attribute` `instance-attribute`

path `class-attribute` `instance-attribute`

vector_size `class-attribute` `instance-attribute`

vectorizer `class-attribute` `instance-attribute`

register_model `classmethod`

register_type `classmethod`

model_validate_toml `classmethod`

model_validate_yaml `classmethod`