nav-carte/server/api/chatbot-v2.post.ts

/**
 * POST /api/chatbot-v2
 *
 * Chatbot V2 - Embedding-based search sur structures bifurcation
 * Coexiste avec /api/chatbot (keyword NocoDB) pendant la transition.
 *
 * SETUP AVANT DEPLOY :
 * cd nav-carte && MISTRAL_API_KEY=xxx node scripts/vectorize-v2.js
 * Coût estimé : ~0.10 EUR pour 120 fiches
 *
 * Flow :
 *   1. Rate limit (réutilise checkRateLimitJson, 10 req/IP/jour)
 *   2. Embed la query via Mistral Embed (mistral-embed)
 *   3. Top-5 cosine similarity sur embeddings-v2.json
 *   4. Si embeddings absents : réponse graceful (v2_ready: false)
 *   5. Construit contexte RAG depuis les fiches candidates
 *   6. Génère réponse Mistral Small (json_object)
 *   7. Retourne { reponse_texte, fiches_recommandees, sources, v2_ready }
 *
 * Variables d'env :
 *   MISTRAL_API_KEY       - Clé Mistral (partagée avec chatbot v1)
 *   RAG_V1_ENABLED        - true/false (défaut: true) - coexistence pendant transition
 *   RAG_V1_DEPRECATION_DATE - Date prévue deprecation v1 (ex: 2026-05-18)
 */

import { checkRateLimitJson } from '~/server/utils/rateLimitJson'
import { loadEmbeddingsV2, topKSearch } from '~/server/utils/vectorSearch'

// ── System prompt V2 ───────────────────────────────────────────────────────────

const SYSTEM_PROMPT_V2 = `Tu es un assistant pour la carte des réseaux de bifurcation en architecture (projet AEP).
Tu réponds aux questions sur les structures, les pratiques, les pensées écologiques.

Règles :
- Cite chaque structure par son nom exact et son fiche_id
- Indique la famille (1-5) entre parenthèses après chaque nom
- Reste sobre et descriptif - pas militant agressif
- Tirets longs interdits : utilise des - ou des ;
- Max 200 mots par réponse
- Si hors-scope (pas archi/habiter/écologie), redirige poliment vers la carte
- Retourne UNIQUEMENT un JSON valide, sans texte avant ou après

Familles :
1 - Réemploi et filières
2 - Frugalité et low-tech
3 - Architecture sociale et précarités
4 - Collectifs, écolieux et AMO
5 - Urbanisme de transition et territoires

FORMAT DE SORTIE :
{
  "reponse_texte": "Ta réponse en prose (max 200 mots)",
  "fiches_recommandees": [
    { "fiche_id": "f1-rotor", "nom": "Rotor", "explication": "1-2 phrases pourquoi cette fiche" }
  ]
}

CONTEXTE - Structures disponibles :
{{CONTEXTE_RAG}}`

// ── Handler ────────────────────────────────────────────────────────────────────

export default defineEventHandler(async (event) => {
  const config = useRuntimeConfig()

  // 1. Rate limit
  const ip =
    getHeader(event, 'x-forwarded-for')?.split(',')[0].trim() ||
    event.node.req.socket?.remoteAddress ||
    '0.0.0.0'

  const allowed = checkRateLimitJson(ip, 'chatbot-v2', 10)
  if (!allowed) {
    throw createError({
      statusCode: 429,
      statusMessage: 'Limite de 10 questions par jour atteinte.'
    })
  }

  // 2. Validation body
  const body = await readBody(event)
  const question: string = (body?.question ?? '').trim()

  if (!question || question.length < 3) {
    throw createError({ statusCode: 400, statusMessage: 'Question trop courte.' })
  }

  const mistralApiKey = config.mistralApiKey as string
  if (!mistralApiKey) {
    throw createError({ statusCode: 500, statusMessage: 'Clé API Mistral manquante.' })
  }

  // 3. Charger embeddings V2 (lazy, cachés en mémoire)
  const embeddingsV2 = loadEmbeddingsV2()

  // Graceful fallback si le script vectorize-v2.js n'a pas encore été lancé
  if (embeddingsV2.length === 0) {
    return {
      reponse_texte: "La base vectorielle V2 est en cours de préparation. Merci d'utiliser le chatbot classique en attendant.",
      fiches_recommandees: [],
      sources: [],
      v2_ready: false
    }
  }

  // 4. Embed la query via Mistral Embed
  let queryEmbedding: number[]
  try {
    const embedRes = await $fetch<{ data: { embedding: number[] }[] }>(
      'https://api.mistral.ai/v1/embeddings',
      {
        method: 'POST',
        headers: {
          Authorization: `Bearer ${mistralApiKey}`,
          'Content-Type': 'application/json'
        },
        body: JSON.stringify({
          model: 'mistral-embed',
          inputs: [question]
        })
      }
    )
    queryEmbedding = embedRes.data[0].embedding
  } catch (e: any) {
    console.error('[chatbot-v2] Erreur embedding Mistral :', e?.message ?? e)
    throw createError({ statusCode: 502, statusMessage: 'Erreur embedding Mistral.' })
  }

  // 5. Top-5 cosine similarity
  const v2Results = topKSearch(embeddingsV2, queryEmbedding, 5)

  // 6. Contexte RAG
  const candidatesContext = v2Results.map(r => ({
    fiche_id: r.fiche_id,
    nom: r.nom,
    famille: r.famille,
    hashtags: r.hashtags,
    score: r.score,
    preview: r.text_preview
  }))

  const contextStr = candidatesContext
    .map(c => `[${c.fiche_id}] ${c.nom} (famille ${c.famille}, score: ${c.score.toFixed(2)})\n${c.preview}`)
    .join('\n\n---\n\n')

  const systemPrompt = SYSTEM_PROMPT_V2.replace('{{CONTEXTE_RAG}}', contextStr)

  // 7. Nebius DeepSeek-V3.2 - génération réponse
  const nebiusApiKey = config.nebiusApiKey as string
  if (!nebiusApiKey) throw createError({ statusCode: 500, statusMessage: 'Clé API Nebius manquante.' })

  let mistralRaw: string
  try {
    const nebiusRes = await $fetch<{
      choices: { message: { content: string } }[]
    }>('https://api.tokenfactory.nebius.com/v1/chat/completions', {
      method: 'POST',
      headers: {
        Authorization: `Bearer ${nebiusApiKey}`,
        'Content-Type': 'application/json'
      },
      body: JSON.stringify({
        model: 'deepseek-ai/DeepSeek-V3.2',
        temperature: 0.3,
        max_tokens: 600,
        response_format: { type: 'json_object' },
        messages: [
          { role: 'system', content: systemPrompt },
          { role: 'user', content: question }
        ]
      })
    })
    mistralRaw = nebiusRes.choices?.[0]?.message?.content ?? '{}'
  } catch (e: any) {
    console.error('[chatbot-v2] Erreur Nebius DeepSeek :', e?.message ?? e)
    throw createError({ statusCode: 502, statusMessage: 'Erreur appel Nebius DeepSeek.' })
  }

  // 8. Parse JSON
  let parsed: { reponse_texte: string; fiches_recommandees: any[] }
  try {
    parsed = JSON.parse(mistralRaw)
    if (!parsed.reponse_texte) throw new Error('reponse_texte absent')
  } catch {
    parsed = {
      reponse_texte: "Impossible d'analyser la réponse.",
      fiches_recommandees: []
    }
  }

  return {
    reponse_texte: parsed.reponse_texte,
    fiches_recommandees: parsed.fiches_recommandees ?? [],
    sources: candidatesContext,
    v2_ready: true
  }
})