Skip to content

Class swarmauri_base.vector_stores.VectorStoreSaveLoadMixin.VectorStoreSaveLoadMixin

swarmauri_base.vector_stores.VectorStoreSaveLoadMixin.VectorStoreSaveLoadMixin

Bases: IVectorStoreSaveLoad, BaseModel

Base class for vector stores with built-in support for saving and loading the vectorizer's model and the documents.

save_store

save_store(directory_path)

Saves both the vectorizer's model and the documents.

Source code in swarmauri_base/vector_stores/VectorStoreSaveLoadMixin.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def save_store(self, directory_path: str) -> None:
    """
    Saves both the vectorizer's model and the documents.
    """
    # Ensure the directory exists
    if not os.path.exists(directory_path):
        os.makedirs(directory_path)

    # Save the vectorizer model
    model_path = os.path.join(directory_path, "embedding_model")
    self._vectorizer.save_model(model_path)

    # Save documents
    documents_path = os.path.join(directory_path, "documents.json")
    with open(documents_path, "w", encoding="utf-8") as f:
        json.dump(
            [each.to_dict() for each in self.documents],
            f,
            ensure_ascii=False,
            indent=4,
        )

load_store

load_store(directory_path)

Loads both the vectorizer's model and the documents.

Source code in swarmauri_base/vector_stores/VectorStoreSaveLoadMixin.py
38
39
40
41
42
43
44
45
46
47
48
49
def load_store(self, directory_path: str) -> None:
    """
    Loads both the vectorizer's model and the documents.
    """
    # Load the vectorizer model
    model_path = os.path.join(directory_path, "embedding_model")
    self.vectorizer.load_model(model_path)

    # Load documents
    documents_path = os.path.join(directory_path, "documents.json")
    with open(documents_path, "r", encoding="utf-8") as f:
        self.documents = [self._load_document(each) for each in json.load(f)]

save_parts

save_parts(directory_path, chunk_size=10485760)

Splits the file into parts if it's too large and saves those parts individually.

Source code in swarmauri_base/vector_stores/VectorStoreSaveLoadMixin.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def save_parts(self, directory_path: str, chunk_size: int = 10485760) -> None:
    """
    Splits the file into parts if it's too large and saves those parts individually.
    """
    file_number = 1
    model_path = os.path.join(directory_path, "embedding_model")
    parts_directory = os.path.join(directory_path, "parts")

    if not os.path.exists(parts_directory):
        os.makedirs(parts_directory)

    with open(f"{model_path}/model.safetensors", "rb") as f:
        chunk = f.read(chunk_size)
        while chunk:
            with open(
                f"{parts_directory}/model.safetensors.part{file_number:03}", "wb"
            ) as chunk_file:
                chunk_file.write(chunk)
            file_number += 1
            chunk = f.read(chunk_size)

    # Split the documents into parts and save them
    os.path.join(directory_path, "documents")

    self._split_json_file(directory_path, chunk_size=chunk_size)

load_parts

load_parts(directory_path, file_pattern='*.part*')

Combines file parts from a directory back into a single file and loads it.

Source code in swarmauri_base/vector_stores/VectorStoreSaveLoadMixin.py
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
def load_parts(self, directory_path: str, file_pattern: str = "*.part*") -> None:
    """
    Combines file parts from a directory back into a single file and loads it.
    """
    model_path = os.path.join(directory_path, "embedding_model")
    parts_directory = os.path.join(directory_path, "parts")
    output_file_path = os.path.join(model_path, "model.safetensors")

    parts = sorted(glob.glob(os.path.join(parts_directory, file_pattern)))
    with open(output_file_path, "wb") as output_file:
        for part in parts:
            with open(part, "rb") as file_part:
                output_file.write(file_part.read())

    # Load the combined_model now
    model_path = os.path.join(directory_path, "embedding_model")
    self._vectorizer.load_model(model_path)

    # Load document files
    self._load_documents(directory_path)