fix(chatbot): switch endpoint v1 -> v2 + script vectorize CJS + payload Mistral input
- ChatbotPlaceholder.vue : fetch /api/chatbot -> /api/chatbot-v2 (utilise embeddings Mistral) - scripts/vectorize-v2.js renomme en .cjs (package.json type=module incompat avec require) - Fix payload Mistral : 'inputs' (deprecated) -> 'input' (API actuelle) - Vectorisation testee : 120 embeddings -> server/data/embeddings-v2.json (3.4MB, gitignored) - Cle Mistral en rotation : nouvelle dans .env local (pas commit) + a synchroniser sur VPS
This commit is contained in:
127
scripts/vectorize-v2.cjs
Normal file
127
scripts/vectorize-v2.cjs
Normal file
@@ -0,0 +1,127 @@
|
||||
// scripts/vectorize-v2.js
|
||||
// Usage : MISTRAL_API_KEY=xxx node scripts/vectorize-v2.js
|
||||
// Génère : server/data/embeddings-v2.json
|
||||
//
|
||||
// SETUP AVANT DEPLOY :
|
||||
// cd nav-carte && MISTRAL_API_KEY=xxx node scripts/vectorize-v2.js
|
||||
// Coût estimé : ~0.10 EUR pour 120 fiches
|
||||
//
|
||||
// Prérequis : Node >= 18 (fetch natif disponible)
|
||||
|
||||
const fs = require('fs')
|
||||
const path = require('path')
|
||||
|
||||
const MISTRAL_API_KEY = process.env.MISTRAL_API_KEY
|
||||
if (!MISTRAL_API_KEY) {
|
||||
console.error('Erreur : MISTRAL_API_KEY manquante')
|
||||
console.error('Usage : MISTRAL_API_KEY=xxx node scripts/vectorize-v2.js')
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
const dataPath = path.join(process.cwd(), 'public', 'data', 'reseaux-bifurcation.json')
|
||||
const outPath = path.join(process.cwd(), 'server', 'data', 'embeddings-v2.json')
|
||||
|
||||
// Créer server/data/ si absent
|
||||
const outDir = path.dirname(outPath)
|
||||
if (!fs.existsSync(outDir)) fs.mkdirSync(outDir, { recursive: true })
|
||||
|
||||
const rawData = fs.readFileSync(dataPath, 'utf-8')
|
||||
const data = JSON.parse(rawData)
|
||||
const structures = data.structures
|
||||
|
||||
if (!Array.isArray(structures) || structures.length === 0) {
|
||||
console.error('Erreur : aucune structure trouvée dans reseaux-bifurcation.json')
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
async function embedBatch(texts) {
|
||||
const res = await fetch('https://api.mistral.ai/v1/embeddings', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${MISTRAL_API_KEY}`,
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: 'mistral-embed',
|
||||
input: texts
|
||||
})
|
||||
})
|
||||
if (!res.ok) {
|
||||
const err = await res.text()
|
||||
throw new Error(`Mistral API error ${res.status}: ${err}`)
|
||||
}
|
||||
const json = await res.json()
|
||||
return json.data.map(d => d.embedding)
|
||||
}
|
||||
|
||||
function buildText(s) {
|
||||
const parts = [
|
||||
s.nom,
|
||||
s.description_courte ?? '',
|
||||
(s.description_longue ?? '').slice(0, 800),
|
||||
(s.hashtags ?? []).join(' '),
|
||||
(s.sources ?? []).map(src => src.titre).join(' '),
|
||||
(s.pensees ?? []).map(p => p.label).join(' ')
|
||||
]
|
||||
return parts.filter(Boolean).join('\n\n')
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const embeddings = []
|
||||
const BATCH_SIZE = 8 // Mistral embed : rate limit prudent
|
||||
|
||||
console.log(`Vectorisation de ${structures.length} structures (modele : mistral-embed)...`)
|
||||
console.log(`Sortie : ${outPath}`)
|
||||
console.log()
|
||||
|
||||
for (let i = 0; i < structures.length; i += BATCH_SIZE) {
|
||||
const batch = structures.slice(i, i + BATCH_SIZE)
|
||||
const texts = batch.map(buildText)
|
||||
|
||||
try {
|
||||
const batchEmbeddings = await embedBatch(texts)
|
||||
batch.forEach((s, j) => {
|
||||
embeddings.push({
|
||||
fiche_id: s.id,
|
||||
nom: s.nom,
|
||||
famille: s.famille_principale,
|
||||
hashtags: s.hashtags ?? [],
|
||||
embedding: batchEmbeddings[j],
|
||||
text_preview: texts[j].slice(0, 300)
|
||||
})
|
||||
})
|
||||
const batchNum = Math.floor(i / BATCH_SIZE) + 1
|
||||
const totalBatches = Math.ceil(structures.length / BATCH_SIZE)
|
||||
console.log(` Batch ${batchNum}/${totalBatches} OK (${batch.length} fiches)`)
|
||||
// Pause rate limit entre batches
|
||||
await new Promise(r => setTimeout(r, 200))
|
||||
} catch (err) {
|
||||
console.error(` Erreur batch ${i}-${i + BATCH_SIZE}:`, err.message)
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
const output = {
|
||||
meta: {
|
||||
total: embeddings.length,
|
||||
model: 'mistral-embed',
|
||||
date: new Date().toISOString(),
|
||||
source: 'reseaux-bifurcation.json'
|
||||
},
|
||||
embeddings
|
||||
}
|
||||
|
||||
fs.writeFileSync(outPath, JSON.stringify(output, null, 2), 'utf-8')
|
||||
|
||||
const sizeKb = Math.round(fs.statSync(outPath).size / 1024)
|
||||
console.log()
|
||||
console.log(`Done : ${embeddings.length} embeddings -> ${outPath}`)
|
||||
console.log(`Taille : ${sizeKb} KB`)
|
||||
console.log()
|
||||
console.log('Prochaine etape : deployer le fichier sur le VPS avec les autres assets.')
|
||||
}
|
||||
|
||||
main().catch(err => {
|
||||
console.error('Erreur fatale :', err)
|
||||
process.exit(1)
|
||||
})
|
||||
Reference in New Issue
Block a user