wip: snapshot V2 cascade onglet 2 (sauvegarde avant chirurgie git-hygiene)
This commit is contained in:
96
server/utils/vectorSearch.ts
Normal file
96
server/utils/vectorSearch.ts
Normal file
@@ -0,0 +1,96 @@
|
||||
/**
|
||||
* Recherche vectorielle sur les embeddings V2
|
||||
* Cosine similarity + top-K
|
||||
*
|
||||
* Utilisé par : server/api/chatbot-v2.post.ts
|
||||
* Données : server/data/embeddings-v2.json (généré par scripts/vectorize-v2.js)
|
||||
*/
|
||||
|
||||
import { readFileSync, existsSync } from 'fs'
|
||||
import { fileURLToPath } from 'url'
|
||||
import { resolve, dirname } from 'path'
|
||||
|
||||
// ── Types ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
export interface EmbeddingEntry {
|
||||
fiche_id: string
|
||||
nom: string
|
||||
famille: number
|
||||
hashtags: string[]
|
||||
embedding: number[]
|
||||
text_preview: string
|
||||
}
|
||||
|
||||
export interface SearchResult {
|
||||
fiche_id: string
|
||||
nom: string
|
||||
famille: number
|
||||
hashtags: string[]
|
||||
score: number
|
||||
text_preview: string
|
||||
}
|
||||
|
||||
// ── Cosine similarity ──────────────────────────────────────────────────────────
|
||||
|
||||
export function cosineSimilarity(a: number[], b: number[]): number {
|
||||
if (a.length !== b.length) return 0
|
||||
let dot = 0, normA = 0, normB = 0
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
dot += a[i] * b[i]
|
||||
normA += a[i] * a[i]
|
||||
normB += b[i] * b[i]
|
||||
}
|
||||
const denom = Math.sqrt(normA) * Math.sqrt(normB)
|
||||
return denom === 0 ? 0 : dot / denom
|
||||
}
|
||||
|
||||
// ── Top-K search ───────────────────────────────────────────────────────────────
|
||||
|
||||
export function topKSearch(
|
||||
embeddings: EmbeddingEntry[],
|
||||
queryEmbedding: number[],
|
||||
k: number = 5
|
||||
): SearchResult[] {
|
||||
return embeddings
|
||||
.map(e => ({
|
||||
fiche_id: e.fiche_id,
|
||||
nom: e.nom,
|
||||
famille: e.famille,
|
||||
hashtags: e.hashtags,
|
||||
score: cosineSimilarity(e.embedding, queryEmbedding),
|
||||
text_preview: e.text_preview
|
||||
}))
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, k)
|
||||
}
|
||||
|
||||
// ── Chargement lazy des embeddings (cache module-level) ────────────────────────
|
||||
|
||||
let _embeddingsV2: EmbeddingEntry[] | null = null
|
||||
|
||||
export function loadEmbeddingsV2(): EmbeddingEntry[] {
|
||||
if (_embeddingsV2 !== null) return _embeddingsV2
|
||||
|
||||
try {
|
||||
// Résolution du chemin : process.cwd() pointe sur la racine projet en dev/prod Nitro
|
||||
// (import.meta.url casse en bundle .nuxt compilé)
|
||||
const embPath = resolve(process.cwd(), 'server', 'data', 'embeddings-v2.json')
|
||||
|
||||
if (!existsSync(embPath)) {
|
||||
console.warn('[vectorSearch] embeddings-v2.json absent - V2 vector search désactivé')
|
||||
console.warn('[vectorSearch] Lancer : MISTRAL_API_KEY=xxx node scripts/vectorize-v2.js')
|
||||
_embeddingsV2 = []
|
||||
return []
|
||||
}
|
||||
|
||||
const raw = readFileSync(embPath, 'utf-8')
|
||||
const data = JSON.parse(raw)
|
||||
_embeddingsV2 = data.embeddings ?? []
|
||||
console.log(`[vectorSearch] ${_embeddingsV2!.length} embeddings V2 chargés (${data.meta?.model ?? 'unknown'})`)
|
||||
return _embeddingsV2!
|
||||
} catch (e: any) {
|
||||
console.warn('[vectorSearch] Erreur chargement embeddings-v2.json :', e?.message ?? e)
|
||||
_embeddingsV2 = []
|
||||
return []
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user