Files
nav-carte/server/utils/vectorSearch.ts
Jules Neny cf60d4b973 feat(aep-v2): restore V2 cascade composants récupérés depuis vault history
- Récupérés depuis commit vault b700612^ (état pré-chirurgie git)
- FicheFamilleModal.vue (284L) — PV2-5g
- FicheModalV2.vue (341L) + NavMapV2.vue (243L) — PV2-5
- HashtagFilter.vue (97L) + IntentionBanner.vue (76L) — PV2-5
- GraphView.vue (860L) — PV2-5b+5e+5f+5g complet
- ChatbotPlaceholder.vue (423L) — version chatbot-v2
- pages/index.vue (517L) — carte unifiée 3 onglets
- types/structure-v2.ts, assets/css/v2-bifurcation.css
- server/api/chatbot-v2.post.ts, server/utils/vectorSearch.ts

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-06 23:16:45 +02:00

97 lines
3.2 KiB
TypeScript

/**
* Recherche vectorielle sur les embeddings V2
* Cosine similarity + top-K
*
* Utilisé par : server/api/chatbot-v2.post.ts
* Données : server/data/embeddings-v2.json (généré par scripts/vectorize-v2.js)
*/
import { readFileSync, existsSync } from 'fs'
import { fileURLToPath } from 'url'
import { resolve, dirname } from 'path'
// ── Types ──────────────────────────────────────────────────────────────────────
export interface EmbeddingEntry {
fiche_id: string
nom: string
famille: number
hashtags: string[]
embedding: number[]
text_preview: string
}
export interface SearchResult {
fiche_id: string
nom: string
famille: number
hashtags: string[]
score: number
text_preview: string
}
// ── Cosine similarity ──────────────────────────────────────────────────────────
export function cosineSimilarity(a: number[], b: number[]): number {
if (a.length !== b.length) return 0
let dot = 0, normA = 0, normB = 0
for (let i = 0; i < a.length; i++) {
dot += a[i] * b[i]
normA += a[i] * a[i]
normB += b[i] * b[i]
}
const denom = Math.sqrt(normA) * Math.sqrt(normB)
return denom === 0 ? 0 : dot / denom
}
// ── Top-K search ───────────────────────────────────────────────────────────────
export function topKSearch(
embeddings: EmbeddingEntry[],
queryEmbedding: number[],
k: number = 5
): SearchResult[] {
return embeddings
.map(e => ({
fiche_id: e.fiche_id,
nom: e.nom,
famille: e.famille,
hashtags: e.hashtags,
score: cosineSimilarity(e.embedding, queryEmbedding),
text_preview: e.text_preview
}))
.sort((a, b) => b.score - a.score)
.slice(0, k)
}
// ── Chargement lazy des embeddings (cache module-level) ────────────────────────
let _embeddingsV2: EmbeddingEntry[] | null = null
export function loadEmbeddingsV2(): EmbeddingEntry[] {
if (_embeddingsV2 !== null) return _embeddingsV2
try {
// Résolution du chemin depuis server/utils/ vers server/data/
const currentDir = dirname(fileURLToPath(import.meta.url))
const embPath = resolve(currentDir, '..', 'data', 'embeddings-v2.json')
if (!existsSync(embPath)) {
console.warn('[vectorSearch] embeddings-v2.json absent - V2 vector search désactivé')
console.warn('[vectorSearch] Lancer : MISTRAL_API_KEY=xxx node scripts/vectorize-v2.js')
_embeddingsV2 = []
return []
}
const raw = readFileSync(embPath, 'utf-8')
const data = JSON.parse(raw)
_embeddingsV2 = data.embeddings ?? []
console.log(`[vectorSearch] ${_embeddingsV2!.length} embeddings V2 chargés (${data.meta?.model ?? 'unknown'})`)
return _embeddingsV2!
} catch (e: any) {
console.warn('[vectorSearch] Erreur chargement embeddings-v2.json :', e?.message ?? e)
_embeddingsV2 = []
return []
}
}