/** * Recherche vectorielle sur les embeddings V2 * Cosine similarity + top-K * * Utilisé par : server/api/chatbot-v2.post.ts * Données : server/data/embeddings-v2.json (généré par scripts/vectorize-v2.js) */ import { readFileSync, existsSync } from 'fs' import { fileURLToPath } from 'url' import { resolve, dirname } from 'path' // ── Types ────────────────────────────────────────────────────────────────────── export interface EmbeddingEntry { fiche_id: string nom: string famille: number hashtags: string[] embedding: number[] text_preview: string } export interface SearchResult { fiche_id: string nom: string famille: number hashtags: string[] score: number text_preview: string } // ── Cosine similarity ────────────────────────────────────────────────────────── export function cosineSimilarity(a: number[], b: number[]): number { if (a.length !== b.length) return 0 let dot = 0, normA = 0, normB = 0 for (let i = 0; i < a.length; i++) { dot += a[i] * b[i] normA += a[i] * a[i] normB += b[i] * b[i] } const denom = Math.sqrt(normA) * Math.sqrt(normB) return denom === 0 ? 0 : dot / denom } // ── Top-K search ─────────────────────────────────────────────────────────────── export function topKSearch( embeddings: EmbeddingEntry[], queryEmbedding: number[], k: number = 5 ): SearchResult[] { return embeddings .map(e => ({ fiche_id: e.fiche_id, nom: e.nom, famille: e.famille, hashtags: e.hashtags, score: cosineSimilarity(e.embedding, queryEmbedding), text_preview: e.text_preview })) .sort((a, b) => b.score - a.score) .slice(0, k) } // ── Chargement lazy des embeddings (cache module-level) ──────────────────────── let _embeddingsV2: EmbeddingEntry[] | null = null export function loadEmbeddingsV2(): EmbeddingEntry[] { if (_embeddingsV2 !== null) return _embeddingsV2 try { // Résolution du chemin depuis server/utils/ vers server/data/ const currentDir = dirname(fileURLToPath(import.meta.url)) const embPath = resolve(currentDir, '..', 'data', 'embeddings-v2.json') if (!existsSync(embPath)) { console.warn('[vectorSearch] embeddings-v2.json absent - V2 vector search désactivé') console.warn('[vectorSearch] Lancer : MISTRAL_API_KEY=xxx node scripts/vectorize-v2.js') _embeddingsV2 = [] return [] } const raw = readFileSync(embPath, 'utf-8') const data = JSON.parse(raw) _embeddingsV2 = data.embeddings ?? [] console.log(`[vectorSearch] ${_embeddingsV2!.length} embeddings V2 chargés (${data.meta?.model ?? 'unknown'})`) return _embeddingsV2! } catch (e: any) { console.warn('[vectorSearch] Erreur chargement embeddings-v2.json :', e?.message ?? e) _embeddingsV2 = [] return [] } }