Advanced Usage / Custom Components

This page shows how to use custom components with LocalSearch.

Custom Embeddings

You can provide a custom embedding class by subclassing LocalSearch.backend.embeddings.BaseEmbeddings.

from LocalSearch.backend.embeddings import BaseEmbeddings
from LocalSearch.backend.engine import SearchEngine
from LocalSearch.backend.llms import GroqLLM

class MyEmbeddings(BaseEmbeddings):
    def embed_documents(self, docs):
        # custom embedding logic
        return [[0.1, 0.2]] * len(docs)

llm = GroqLLM(api_key='gsk_********************************')
engine = SearchEngine(directory_path=r"C:\docs", llm=llm, embedder=MyEmbeddings())
result = engine.search("custom embedding test")

Custom Text Extractor

from LocalSearch.backend.textExtractor import BaseTextExtractor

class MyTextExtractor(BaseTextExtractor):
    def extract_text(self, filepath):
        # custom extraction logic
        return "text from " + filepath

engine = SearchEngine(directory_path=r"C:\docs", llm=llm, text_extractor=MyTextExtractor())

Custom Vector Store

import numpy as np
from LocalSearch.backend.vector_store import BaseVectorStore
from typing import List, Set
from LocalSearch.backend.engine import SearchEngine

class MyVectorStore(BaseVectorStore):
    def __init__(self):
        self.store = {}
        self.current_id = 0

    def add(self, vectors: np.ndarray, ids: np.ndarray, metadata: List[dict]):
        for v, i, m in zip(vectors, ids, metadata):
            self.store[i] = {'vector': v, 'metadata': m}

    def search(self, query_vector: np.ndarray, top_k: int):
        # simple placeholder: return first top_k items
        return list(self.store.items())[:top_k]

    def remove_by_id(self, vector_id: int) -> None:
        self.store.pop(vector_id, None)

    def save(self, path: str):
        pass  # implement saving logic

    def load(self, path: str):
        pass  # implement loading logic

    def dimension(self) -> int:
        return len(next(iter(self.store.values()))['vector'])

    def get_all_ids(self) -> Set[int]:
        return set(self.store.keys())

    def prepare_index(self, directory_path: str, recursive: bool = True):
        return {'index': None, 'current_files': set(), 'used_ids': set()}

engine = SearchEngine(directory_path=r"C:\docs", llm=llm, vector_store=MyVectorStore())

Custom Metadata Store

from LocalSearch.backend.metadata_store import BaseMetadataStore
from typing import Dict, List
from LocalSearch.backend.engine import SearchEngine

class MyMetadataStore(BaseMetadataStore):
    def __init__(self):
        self.metadata = {}
        self.chunk_mapping = []

    def load_metadata(self) -> Dict:
        return self.metadata

    def save_metadata(self, metadata: Dict) -> None:
        self.metadata = metadata

    def get_file_info(self, file_path: str):
        return self.metadata.get(file_path, {})

    def is_modified(self, file_path: str, current_info: Dict) -> bool:
        return self.metadata.get(file_path) != current_info

    def update(self, file_path: str, file_info: Dict) -> None:
        self.metadata[file_path] = file_info

    def load_chunk_mapping(self) -> List:
        return self.chunk_mapping

    def save_chunk_mapping(self, chunk_mapping: List) -> None:
        self.chunk_mapping = chunk_mapping

engine = SearchEngine(directory_path=r"C:\docs", llm=llm, metadata_store=MyMetadataStore())