Advanced Usage / Custom Components
This page shows how to use custom components with LocalSearch.
Custom Embeddings
You can provide a custom embedding class by subclassing LocalSearch.backend.embeddings.BaseEmbeddings.
from LocalSearch.backend.embeddings import BaseEmbeddings
from LocalSearch.backend.engine import SearchEngine
from LocalSearch.backend.llms import GroqLLM
class MyEmbeddings(BaseEmbeddings):
def embed_documents(self, docs):
# custom embedding logic
return [[0.1, 0.2]] * len(docs)
llm = GroqLLM(api_key='gsk_********************************')
engine = SearchEngine(directory_path=r"C:\docs", llm=llm, embedder=MyEmbeddings())
result = engine.search("custom embedding test")
Custom Text Extractor
from LocalSearch.backend.textExtractor import BaseTextExtractor
class MyTextExtractor(BaseTextExtractor):
def extract_text(self, filepath):
# custom extraction logic
return "text from " + filepath
engine = SearchEngine(directory_path=r"C:\docs", llm=llm, text_extractor=MyTextExtractor())
Custom Vector Store
import numpy as np
from LocalSearch.backend.vector_store import BaseVectorStore
from typing import List, Set
from LocalSearch.backend.engine import SearchEngine
class MyVectorStore(BaseVectorStore):
def __init__(self):
self.store = {}
self.current_id = 0
def add(self, vectors: np.ndarray, ids: np.ndarray, metadata: List[dict]):
for v, i, m in zip(vectors, ids, metadata):
self.store[i] = {'vector': v, 'metadata': m}
def search(self, query_vector: np.ndarray, top_k: int):
# simple placeholder: return first top_k items
return list(self.store.items())[:top_k]
def remove_by_id(self, vector_id: int) -> None:
self.store.pop(vector_id, None)
def save(self, path: str):
pass # implement saving logic
def load(self, path: str):
pass # implement loading logic
def dimension(self) -> int:
return len(next(iter(self.store.values()))['vector'])
def get_all_ids(self) -> Set[int]:
return set(self.store.keys())
def prepare_index(self, directory_path: str, recursive: bool = True):
return {'index': None, 'current_files': set(), 'used_ids': set()}
engine = SearchEngine(directory_path=r"C:\docs", llm=llm, vector_store=MyVectorStore())
Custom Metadata Store
from LocalSearch.backend.metadata_store import BaseMetadataStore
from typing import Dict, List
from LocalSearch.backend.engine import SearchEngine
class MyMetadataStore(BaseMetadataStore):
def __init__(self):
self.metadata = {}
self.chunk_mapping = []
def load_metadata(self) -> Dict:
return self.metadata
def save_metadata(self, metadata: Dict) -> None:
self.metadata = metadata
def get_file_info(self, file_path: str):
return self.metadata.get(file_path, {})
def is_modified(self, file_path: str, current_info: Dict) -> bool:
return self.metadata.get(file_path) != current_info
def update(self, file_path: str, file_info: Dict) -> None:
self.metadata[file_path] = file_info
def load_chunk_mapping(self) -> List:
return self.chunk_mapping
def save_chunk_mapping(self, chunk_mapping: List) -> None:
self.chunk_mapping = chunk_mapping
engine = SearchEngine(directory_path=r"C:\docs", llm=llm, metadata_store=MyMetadataStore())