from typing import Tuple, List, Dict
class OptimizedVectorSearch:
Production-ready vector search with HNSW indexing.
Optimized for sub-50ms latency at scale.
def __init__(self, dimension: int, max_elements: int = 1000000):
self.dimension = dimension
self.max_elements = max_elements
# HNSW parameters tuned for production
# M=16 provides good recall with reasonable memory
self.ef_construction = 200 # Quality during build
self.ef_search = 64 # Quality during query
def create_index(self) -> faiss.IndexHNSWFlat:
"""Create HNSW index with optimized parameters."""
index = faiss.IndexHNSWFlat(self.dimension, self.M)
# Set construction parameters
index.hnsw.efConstruction = self.ef_construction
# Set search parameters (can be adjusted per query)
index.hnsw.efSearch = self.ef_search
bytes_per_vector = (self.dimension * 4 + self.M * 2 * 4)
memory_gb = (bytes_per_vector * self.max_elements) / (1024**3)
print(f"✓ HNSW Index created:")
print(f" Dimension: {self.dimension}")
print(f" M (links/node): {self.M}")
print(f" ef_construction: {self.ef_construction}")
print(f" ef_search: {self.ef_search}")
print(f" Max elements: {self.max_elements:,}")
print(f" Estimated memory: {memory_gb:.2f} GB")
print(f"✗ Index creation failed: {e}")
def add_vectors(self, vectors: np.ndarray, batch_size: int = 50000) -> None:
"""Add vectors in batches to avoid memory issues."""
raise ValueError("Index not initialized")
if vectors.shape[0] > self.max_elements:
raise ValueError(f"Vector count {vectors.shape[0]} exceeds max_elements {self.max_elements}")
for i in range(0, len(vectors), batch_size):
batch = vectors[i:i+batch_size]
self.index.add(batch.astype('float32'))
total_added += len(batch)
if i % 100000 == 0 and i > 0:
print(f" Progress: {total_added:,} vectors added")
elapsed = time.time() - start_time
print(f"✓ Added {total_added:,} vectors in {elapsed:.2f}s "
f"({total_added/elapsed:.0f} vectors/sec)")
def search(self, query: np.ndarray, k: int = 10,
ef_search: int = None) -> Tuple[np.ndarray, np.ndarray]:
Search for k nearest neighbors.
query: Query vector (1, dimension)
ef_search: Search width (higher = better recall, slower)
raise ValueError("Index not initialized")
# Adjust search width if needed
self.index.hnsw.efSearch = ef_search
if query.shape[1] != self.dimension:
raise ValueError(f"Query dimension mismatch: expected {self.dimension}")
distances, indices = self.index.search(query.astype('float32'), k)
elapsed = time.time() - start_time
print(f"Query: {elapsed*1000:.2f}ms, k={k}, ef={self.index.hnsw.efSearch}")
return distances, indices
def tune_for_recall(self, test_queries: np.ndarray,
test_neighbors: np.ndarray,
target_recall: float = 0.95) -> Dict:
Auto-tune ef_search to achieve target recall.
Requires ground truth neighbors for validation.
print(f"\nTuning for {target_recall:.0%} recall...")
for ef in [32, 64, 128, 256, 512]:
self.index.hnsw.efSearch = ef
for i, query in enumerate(test_queries):
query = query.reshape(1, -1)
_, indices = self.search(query, k=test_neighbors.shape[1])
# Check if ground truth neighbors are in results
if test_neighbors[i][0] in indices[0]:
recall = correct / len(test_queries)
print(f" ef={ef}: recall={recall:.3f}")
if recall >= target_recall and recall > best_recall:
if recall >= target_recall:
self.index.hnsw.efSearch = best_ef
print(f"✓ Optimal ef_search: {best_ef} (recall: {best_recall:.3f})")
print("⚠ Could not achieve target recall with available ef values")
return {'optimal_ef': best_ef, 'recall': best_recall}
# Production usage example
if __name__ == "__main__":
# Configuration for 1M vectors, 768-dim
N_VECTORS = 100000 # Demo with 100k
print("PRODUCTION VECTOR SEARCH SETUP")
search = OptimizedVectorSearch(dimension=DIM, max_elements=N_VECTORS)
print("\nGenerating sample vectors...")
vectors = np.random.random((N_VECTORS, DIM)).astype('float32')
print("\nAdding vectors to index...")
search.add_vectors(vectors)
print("SEARCH PERFORMANCE")
query = np.random.random((1, DIM)).astype('float32')