- ChatbotPlaceholder.vue : fetch /api/chatbot -> /api/chatbot-v2 (utilise embeddings Mistral) - scripts/vectorize-v2.js renomme en .cjs (package.json type=module incompat avec require) - Fix payload Mistral : 'inputs' (deprecated) -> 'input' (API actuelle) - Vectorisation testee : 120 embeddings -> server/data/embeddings-v2.json (3.4MB, gitignored) - Cle Mistral en rotation : nouvelle dans .env local (pas commit) + a synchroniser sur VPS
128 lines
3.8 KiB
JavaScript
128 lines
3.8 KiB
JavaScript
// scripts/vectorize-v2.js
|
|
// Usage : MISTRAL_API_KEY=xxx node scripts/vectorize-v2.js
|
|
// Génère : server/data/embeddings-v2.json
|
|
//
|
|
// SETUP AVANT DEPLOY :
|
|
// cd nav-carte && MISTRAL_API_KEY=xxx node scripts/vectorize-v2.js
|
|
// Coût estimé : ~0.10 EUR pour 120 fiches
|
|
//
|
|
// Prérequis : Node >= 18 (fetch natif disponible)
|
|
|
|
const fs = require('fs')
|
|
const path = require('path')
|
|
|
|
const MISTRAL_API_KEY = process.env.MISTRAL_API_KEY
|
|
if (!MISTRAL_API_KEY) {
|
|
console.error('Erreur : MISTRAL_API_KEY manquante')
|
|
console.error('Usage : MISTRAL_API_KEY=xxx node scripts/vectorize-v2.js')
|
|
process.exit(1)
|
|
}
|
|
|
|
const dataPath = path.join(process.cwd(), 'public', 'data', 'reseaux-bifurcation.json')
|
|
const outPath = path.join(process.cwd(), 'server', 'data', 'embeddings-v2.json')
|
|
|
|
// Créer server/data/ si absent
|
|
const outDir = path.dirname(outPath)
|
|
if (!fs.existsSync(outDir)) fs.mkdirSync(outDir, { recursive: true })
|
|
|
|
const rawData = fs.readFileSync(dataPath, 'utf-8')
|
|
const data = JSON.parse(rawData)
|
|
const structures = data.structures
|
|
|
|
if (!Array.isArray(structures) || structures.length === 0) {
|
|
console.error('Erreur : aucune structure trouvée dans reseaux-bifurcation.json')
|
|
process.exit(1)
|
|
}
|
|
|
|
async function embedBatch(texts) {
|
|
const res = await fetch('https://api.mistral.ai/v1/embeddings', {
|
|
method: 'POST',
|
|
headers: {
|
|
'Authorization': `Bearer ${MISTRAL_API_KEY}`,
|
|
'Content-Type': 'application/json'
|
|
},
|
|
body: JSON.stringify({
|
|
model: 'mistral-embed',
|
|
input: texts
|
|
})
|
|
})
|
|
if (!res.ok) {
|
|
const err = await res.text()
|
|
throw new Error(`Mistral API error ${res.status}: ${err}`)
|
|
}
|
|
const json = await res.json()
|
|
return json.data.map(d => d.embedding)
|
|
}
|
|
|
|
function buildText(s) {
|
|
const parts = [
|
|
s.nom,
|
|
s.description_courte ?? '',
|
|
(s.description_longue ?? '').slice(0, 800),
|
|
(s.hashtags ?? []).join(' '),
|
|
(s.sources ?? []).map(src => src.titre).join(' '),
|
|
(s.pensees ?? []).map(p => p.label).join(' ')
|
|
]
|
|
return parts.filter(Boolean).join('\n\n')
|
|
}
|
|
|
|
async function main() {
|
|
const embeddings = []
|
|
const BATCH_SIZE = 8 // Mistral embed : rate limit prudent
|
|
|
|
console.log(`Vectorisation de ${structures.length} structures (modele : mistral-embed)...`)
|
|
console.log(`Sortie : ${outPath}`)
|
|
console.log()
|
|
|
|
for (let i = 0; i < structures.length; i += BATCH_SIZE) {
|
|
const batch = structures.slice(i, i + BATCH_SIZE)
|
|
const texts = batch.map(buildText)
|
|
|
|
try {
|
|
const batchEmbeddings = await embedBatch(texts)
|
|
batch.forEach((s, j) => {
|
|
embeddings.push({
|
|
fiche_id: s.id,
|
|
nom: s.nom,
|
|
famille: s.famille_principale,
|
|
hashtags: s.hashtags ?? [],
|
|
embedding: batchEmbeddings[j],
|
|
text_preview: texts[j].slice(0, 300)
|
|
})
|
|
})
|
|
const batchNum = Math.floor(i / BATCH_SIZE) + 1
|
|
const totalBatches = Math.ceil(structures.length / BATCH_SIZE)
|
|
console.log(` Batch ${batchNum}/${totalBatches} OK (${batch.length} fiches)`)
|
|
// Pause rate limit entre batches
|
|
await new Promise(r => setTimeout(r, 200))
|
|
} catch (err) {
|
|
console.error(` Erreur batch ${i}-${i + BATCH_SIZE}:`, err.message)
|
|
process.exit(1)
|
|
}
|
|
}
|
|
|
|
const output = {
|
|
meta: {
|
|
total: embeddings.length,
|
|
model: 'mistral-embed',
|
|
date: new Date().toISOString(),
|
|
source: 'reseaux-bifurcation.json'
|
|
},
|
|
embeddings
|
|
}
|
|
|
|
fs.writeFileSync(outPath, JSON.stringify(output, null, 2), 'utf-8')
|
|
|
|
const sizeKb = Math.round(fs.statSync(outPath).size / 1024)
|
|
console.log()
|
|
console.log(`Done : ${embeddings.length} embeddings -> ${outPath}`)
|
|
console.log(`Taille : ${sizeKb} KB`)
|
|
console.log()
|
|
console.log('Prochaine etape : deployer le fichier sur le VPS avec les autres assets.')
|
|
}
|
|
|
|
main().catch(err => {
|
|
console.error('Erreur fatale :', err)
|
|
process.exit(1)
|
|
})
|