Files
astro-site-cerveau/scripts/build-carte-o.js
Jules Neny 32bdc9a2e5 feat: PC3 mindmap Carte O (D3 force-directed) + scrape AEP/Articles + tabs centre HAUT
- scripts/build-carte-o.js : scan recursif AEP/Articles/, parse YAML + legacy header, extract wikilinks, infer 5 famille
- src/components/vue/CarteO.vue : D3 v7 force-directed avec drag, zoom + pan, click handler, tooltips, ResizeObserver
- src/components/vue/CarteOModal.vue : modal recap intention avec Teleport, Esc + backdrop close, transitions
- src/components/vue/CarteOWrapper.vue : fetch /data/carte-o.json, etat selectionne, fallback mobile (msg + miniature SVG)
- src/components/astro/ColCentre.astro : tabs Carte O / Chatbot, panneaux ARIA
- package.json : prebuild + predev hooks, build:carte-o script
- public/data/carte-o.json : 84 nodes / 94 edges sur 21 themes, distribution familles equilibree

Drill-down V1 = zoom + pan seul (V2 recursif backlog).
Pattern adapte de nav-carte/components/codev/CodevGraph.vue (sans coupling Nuxt).
Build Astro 6.3.1 OK, bundle CarteOWrapper 69KB.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-09 00:59:23 +02:00

304 lines
9.4 KiB
JavaScript

#!/usr/bin/env node
// Scrape AEP/Articles/ thematic folders -> public/data/carte-o.json
// Two frontmatter formats supported :
// 1. YAML standard between --- delimiters
// 2. Legacy "MOC : [[X]]\nSource : ...\nTags : ...\nDate : ...\n***" header
//
// Wikilinks [[X]] in body -> edges (resolved by label match against scraped nodes).
// Family inferred from theme directory name (5 AEP families).
// V1 cap : top 150 nodes by degree if scrape > 300 nodes.
import fs from 'node:fs/promises'
import path from 'node:path'
import { fileURLToPath } from 'node:url'
import matter from 'gray-matter'
import { globby } from 'globby'
const __filename = fileURLToPath(import.meta.url)
const __dirname = path.dirname(__filename)
const REPO_ROOT = path.resolve(__dirname, '..')
const ARTICLES_ROOT = 'C:/Users/jules/Dropbox/ATIS - IPCJRA/2 CASQUETTES/Penseur politique/AEP/Articles'
const OUTPUT = path.join(REPO_ROOT, 'public/data/carte-o.json')
const NODE_CAP_V1 = 150
// 5 AEP families : palette refined after first scrape.
const FAMILY_COLORS = {
penseur: '#3b82f6', // blue
concept: '#10b981', // green
methode: '#f59e0b', // amber
collectif: '#ef4444', // red
ressource: '#8b5cf6', // violet
}
function slugify(str) {
return String(str || '')
.normalize('NFD').replace(/[̀-ͯ]/g, '')
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, '')
.slice(0, 80) || 'untitled'
}
function inferFamily(signals) {
// signals = { title, theme, path, tags, content }
const haystack = [
signals.title,
signals.theme,
signals.path,
Array.isArray(signals.tags) ? signals.tags.join(' ') : signals.tags,
(signals.content || '').slice(0, 800),
].filter(Boolean).join(' ').toLowerCase()
// Order matters : check most specific first.
// METHODE : process, outils, comment-faire
if (/m[ée]thode|outil|pratique|community.organizing|alinsky\b|comment\b|process|protocole|recette|guide|how.to|chantier|d[ée]marche/.test(haystack)) {
return 'methode'
}
// PENSEUR : noms propres, auteurs, figures
if (/penseur|auteur|figure|harari|alinsky|piven|chouard|branco|mamdani|shift|graeber|bourdieu|lordon|stiegler|d[ée]bord|illich|gorz|servigne|vidal|haupt|pisani|lalo|rosa/.test(haystack)) {
return 'penseur'
}
// COLLECTIF : organisations, mouvements, réseaux
if (/collectif|r[ée]seau|asso|union|coop|mouvement|piraterie|sociale|syndicat|comit[ée]|crise.de.la.profession|nyc|mamdani|chantier/.test(haystack)) {
return 'collectif'
}
// CONCEPT : notions, théories, critiques
if (/concept|notion|th[ée]orie|critique|fiction|kakistocratie|imp[ée]rialisme|robustesse|sycophan|d[ée]construction|paradoxe|dialectique|ontologie|capitalisme|n[ée]olib[ée]ral|d[ée]mocratie|biais|illusion/.test(haystack)) {
return 'concept'
}
// RESSOURCE : par défaut (articles brouillon, idées, agendas)
return 'ressource'
}
// Fallback parser for legacy "MOC : [[X]]\nSource : ...\nDate : ...\n***\n" headers.
function parseLegacyHeader(raw) {
const lines = raw.split(/\r?\n/)
const fm = {}
let bodyStart = 0
let foundHeader = false
for (let i = 0; i < Math.min(lines.length, 30); i++) {
const line = lines[i]
if (/^\s*\*\*\*\s*$/.test(line)) {
bodyStart = i + 1
foundHeader = true
break
}
const m = line.match(/^([A-Za-zÀ-ÿ ]+)\s*:\s*(.+)$/)
if (m) {
const key = m[1].trim().toLowerCase()
fm[key] = m[2].trim()
}
}
if (!foundHeader) return { data: {}, content: raw }
// Parse tags (space-separated #tag tokens).
if (fm.tags) {
fm.tags = fm.tags.match(/#[\w/-]+/g) || []
}
return {
data: fm,
content: lines.slice(bodyStart).join('\n'),
}
}
function safeParseFrontmatter(raw) {
// Try YAML first.
try {
const parsed = matter(raw)
if (Object.keys(parsed.data).length > 0) return parsed
} catch (_) {
// YAML parse failed, fall through.
}
return parseLegacyHeader(raw)
}
function extractFirstParagraph(content, maxLen = 220) {
// Skip headings, code blocks, callout/quote markers.
const cleaned = content
.replace(/^---[\s\S]*?---\n/, '')
.replace(/```[\s\S]*?```/g, '')
.split(/\n\n+/)
.map(p => p.trim())
.filter(p => p && !p.startsWith('#') && !p.startsWith('---') && !p.startsWith('|'))
// Prefer first paragraph that looks like prose (not a list).
const prose = cleaned.find(p => !/^\s*[->*•\d+\.\s]/.test(p) && p.length > 30)
const first = prose || cleaned[0] || ''
// Strip wikilinks, bold, italics, callout markers.
return first
.replace(/\[\[([^\]|]+)(?:\|[^\]]+)?\]\]/g, '$1')
.replace(/[*_>]+/g, '')
.replace(/\s+/g, ' ')
.trim()
.slice(0, maxLen)
}
function extractWikilinks(content) {
// Match [[Target]], [[Target|alias]], [[Target#section]]
// Skip image embeds ![[...]]
const matches = [...content.matchAll(/(?<!!)\[\[([^\]|#]+)(?:[#|][^\]]*)?\]\]/g)]
return matches.map(m => m[1].trim()).filter(Boolean)
}
async function main() {
console.log('[carte-o] Scraping', ARTICLES_ROOT)
// Glob all .md recursively under Articles/.
const mdFiles = await globby(['**/*.md'], {
cwd: ARTICLES_ROOT,
absolute: true,
gitignore: false,
})
console.log(`[carte-o] Found ${mdFiles.length} markdown files`)
const nodes = []
const edgesRaw = []
const themeStats = {}
for (const mdFile of mdFiles) {
let raw
try {
raw = await fs.readFile(mdFile, 'utf-8')
} catch (e) {
console.warn(`[carte-o] Skip unreadable ${mdFile}`)
continue
}
const relPath = path.relative(ARTICLES_ROOT, mdFile).replace(/\\/g, '/')
const segments = relPath.split('/')
const baseName = path.basename(mdFile, '.md')
// Theme = first or second segment depending on structure.
// E.g. "AEP ARTICLES, BROUILLON/AEP IA/file.md" -> theme = "AEP IA"
// "AEP ARTICLES, BROUILLON/file.md" -> theme = "AEP ARTICLES, BROUILLON"
// "Livre - le nouveau contrat social/file.md" -> theme = "Livre"
let theme = segments[0]
if (segments.length >= 3 && segments[0].startsWith('AEP ARTICLES')) {
theme = segments[1]
}
const { data: fm, content } = safeParseFrontmatter(raw)
const title = fm.titre || fm.title || baseName.replace(/^!\s*/, '').trim()
const slug = slugify(title)
const family = inferFamily({
title,
theme,
path: relPath,
tags: fm.tags,
content,
})
const intention = fm.intention || extractFirstParagraph(content)
nodes.push({
id: slug,
label: title,
family,
intention,
slug,
theme,
path: relPath,
})
themeStats[theme] = (themeStats[theme] || 0) + 1
// Collect wikilinks.
const wikilinks = extractWikilinks(content)
for (const target of wikilinks) {
edgesRaw.push({ source: slug, targetLabel: target })
}
}
// Deduplicate nodes by id (collisions on same slug).
const nodeById = new Map()
for (const n of nodes) {
if (!nodeById.has(n.id)) {
nodeById.set(n.id, n)
}
}
const dedupNodes = [...nodeById.values()]
// Resolve edges : match targetLabel against node label or id.
const labelToId = new Map()
for (const n of dedupNodes) {
labelToId.set(slugify(n.label), n.id)
labelToId.set(n.label.toLowerCase(), n.id)
}
const edgesResolved = []
const edgeSet = new Set()
for (const e of edgesRaw) {
const candidates = [
slugify(e.targetLabel),
e.targetLabel.toLowerCase(),
]
let targetId = null
for (const c of candidates) {
if (labelToId.has(c)) {
targetId = labelToId.get(c)
break
}
}
if (!targetId || targetId === e.source) continue
const key = e.source < targetId ? `${e.source}${targetId}` : `${targetId}${e.source}`
if (edgeSet.has(key)) continue
edgeSet.add(key)
edgesResolved.push({ source: e.source, target: targetId })
}
// Compute degree for each node.
const degree = new Map()
for (const e of edgesResolved) {
degree.set(e.source, (degree.get(e.source) || 0) + 1)
degree.set(e.target, (degree.get(e.target) || 0) + 1)
}
// V1 cap : if > NODE_CAP_V1 nodes, keep top N by degree.
let finalNodes = dedupNodes
if (dedupNodes.length > NODE_CAP_V1) {
finalNodes = [...dedupNodes]
.sort((a, b) => (degree.get(b.id) || 0) - (degree.get(a.id) || 0))
.slice(0, NODE_CAP_V1)
console.log(`[carte-o] Capped from ${dedupNodes.length} to ${NODE_CAP_V1} nodes (top by degree)`)
}
const finalNodeIds = new Set(finalNodes.map(n => n.id))
const finalEdges = edgesResolved.filter(e => finalNodeIds.has(e.source) && finalNodeIds.has(e.target))
// Family distribution stats.
const familyDist = {}
for (const n of finalNodes) {
familyDist[n.family] = (familyDist[n.family] || 0) + 1
}
// Ensure output dir exists.
await fs.mkdir(path.dirname(OUTPUT), { recursive: true })
await fs.writeFile(
OUTPUT,
JSON.stringify({
meta: {
generated: new Date().toISOString(),
source: 'AEP/Articles',
nodeCount: finalNodes.length,
edgeCount: finalEdges.length,
familyDistribution: familyDist,
familyColors: FAMILY_COLORS,
themeStats,
},
nodes: finalNodes,
edges: finalEdges,
}, null, 2),
'utf-8',
)
console.log(`[carte-o] OK : ${finalNodes.length} nodes / ${finalEdges.length} edges`)
console.log(`[carte-o] Families :`, familyDist)
console.log(`[carte-o] Output : ${OUTPUT}`)
}
main().catch(err => {
console.error('[carte-o] FAIL', err)
process.exit(1)
})