- scripts/build-carte-o.js : scan recursif AEP/Articles/, parse YAML + legacy header, extract wikilinks, infer 5 famille - src/components/vue/CarteO.vue : D3 v7 force-directed avec drag, zoom + pan, click handler, tooltips, ResizeObserver - src/components/vue/CarteOModal.vue : modal recap intention avec Teleport, Esc + backdrop close, transitions - src/components/vue/CarteOWrapper.vue : fetch /data/carte-o.json, etat selectionne, fallback mobile (msg + miniature SVG) - src/components/astro/ColCentre.astro : tabs Carte O / Chatbot, panneaux ARIA - package.json : prebuild + predev hooks, build:carte-o script - public/data/carte-o.json : 84 nodes / 94 edges sur 21 themes, distribution familles equilibree Drill-down V1 = zoom + pan seul (V2 recursif backlog). Pattern adapte de nav-carte/components/codev/CodevGraph.vue (sans coupling Nuxt). Build Astro 6.3.1 OK, bundle CarteOWrapper 69KB. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
304 lines
9.4 KiB
JavaScript
304 lines
9.4 KiB
JavaScript
#!/usr/bin/env node
|
|
// Scrape AEP/Articles/ thematic folders -> public/data/carte-o.json
|
|
// Two frontmatter formats supported :
|
|
// 1. YAML standard between --- delimiters
|
|
// 2. Legacy "MOC : [[X]]\nSource : ...\nTags : ...\nDate : ...\n***" header
|
|
//
|
|
// Wikilinks [[X]] in body -> edges (resolved by label match against scraped nodes).
|
|
// Family inferred from theme directory name (5 AEP families).
|
|
// V1 cap : top 150 nodes by degree if scrape > 300 nodes.
|
|
|
|
import fs from 'node:fs/promises'
|
|
import path from 'node:path'
|
|
import { fileURLToPath } from 'node:url'
|
|
import matter from 'gray-matter'
|
|
import { globby } from 'globby'
|
|
|
|
const __filename = fileURLToPath(import.meta.url)
|
|
const __dirname = path.dirname(__filename)
|
|
|
|
const REPO_ROOT = path.resolve(__dirname, '..')
|
|
const ARTICLES_ROOT = 'C:/Users/jules/Dropbox/ATIS - IPCJRA/2 CASQUETTES/Penseur politique/AEP/Articles'
|
|
const OUTPUT = path.join(REPO_ROOT, 'public/data/carte-o.json')
|
|
const NODE_CAP_V1 = 150
|
|
|
|
// 5 AEP families : palette refined after first scrape.
|
|
const FAMILY_COLORS = {
|
|
penseur: '#3b82f6', // blue
|
|
concept: '#10b981', // green
|
|
methode: '#f59e0b', // amber
|
|
collectif: '#ef4444', // red
|
|
ressource: '#8b5cf6', // violet
|
|
}
|
|
|
|
function slugify(str) {
|
|
return String(str || '')
|
|
.normalize('NFD').replace(/[̀-ͯ]/g, '')
|
|
.toLowerCase()
|
|
.replace(/[^a-z0-9]+/g, '-')
|
|
.replace(/^-+|-+$/g, '')
|
|
.slice(0, 80) || 'untitled'
|
|
}
|
|
|
|
function inferFamily(signals) {
|
|
// signals = { title, theme, path, tags, content }
|
|
const haystack = [
|
|
signals.title,
|
|
signals.theme,
|
|
signals.path,
|
|
Array.isArray(signals.tags) ? signals.tags.join(' ') : signals.tags,
|
|
(signals.content || '').slice(0, 800),
|
|
].filter(Boolean).join(' ').toLowerCase()
|
|
|
|
// Order matters : check most specific first.
|
|
// METHODE : process, outils, comment-faire
|
|
if (/m[ée]thode|outil|pratique|community.organizing|alinsky\b|comment\b|process|protocole|recette|guide|how.to|chantier|d[ée]marche/.test(haystack)) {
|
|
return 'methode'
|
|
}
|
|
// PENSEUR : noms propres, auteurs, figures
|
|
if (/penseur|auteur|figure|harari|alinsky|piven|chouard|branco|mamdani|shift|graeber|bourdieu|lordon|stiegler|d[ée]bord|illich|gorz|servigne|vidal|haupt|pisani|lalo|rosa/.test(haystack)) {
|
|
return 'penseur'
|
|
}
|
|
// COLLECTIF : organisations, mouvements, réseaux
|
|
if (/collectif|r[ée]seau|asso|union|coop|mouvement|piraterie|sociale|syndicat|comit[ée]|crise.de.la.profession|nyc|mamdani|chantier/.test(haystack)) {
|
|
return 'collectif'
|
|
}
|
|
// CONCEPT : notions, théories, critiques
|
|
if (/concept|notion|th[ée]orie|critique|fiction|kakistocratie|imp[ée]rialisme|robustesse|sycophan|d[ée]construction|paradoxe|dialectique|ontologie|capitalisme|n[ée]olib[ée]ral|d[ée]mocratie|biais|illusion/.test(haystack)) {
|
|
return 'concept'
|
|
}
|
|
// RESSOURCE : par défaut (articles brouillon, idées, agendas)
|
|
return 'ressource'
|
|
}
|
|
|
|
// Fallback parser for legacy "MOC : [[X]]\nSource : ...\nDate : ...\n***\n" headers.
|
|
function parseLegacyHeader(raw) {
|
|
const lines = raw.split(/\r?\n/)
|
|
const fm = {}
|
|
let bodyStart = 0
|
|
let foundHeader = false
|
|
|
|
for (let i = 0; i < Math.min(lines.length, 30); i++) {
|
|
const line = lines[i]
|
|
if (/^\s*\*\*\*\s*$/.test(line)) {
|
|
bodyStart = i + 1
|
|
foundHeader = true
|
|
break
|
|
}
|
|
const m = line.match(/^([A-Za-zÀ-ÿ ]+)\s*:\s*(.+)$/)
|
|
if (m) {
|
|
const key = m[1].trim().toLowerCase()
|
|
fm[key] = m[2].trim()
|
|
}
|
|
}
|
|
|
|
if (!foundHeader) return { data: {}, content: raw }
|
|
|
|
// Parse tags (space-separated #tag tokens).
|
|
if (fm.tags) {
|
|
fm.tags = fm.tags.match(/#[\w/-]+/g) || []
|
|
}
|
|
return {
|
|
data: fm,
|
|
content: lines.slice(bodyStart).join('\n'),
|
|
}
|
|
}
|
|
|
|
function safeParseFrontmatter(raw) {
|
|
// Try YAML first.
|
|
try {
|
|
const parsed = matter(raw)
|
|
if (Object.keys(parsed.data).length > 0) return parsed
|
|
} catch (_) {
|
|
// YAML parse failed, fall through.
|
|
}
|
|
return parseLegacyHeader(raw)
|
|
}
|
|
|
|
function extractFirstParagraph(content, maxLen = 220) {
|
|
// Skip headings, code blocks, callout/quote markers.
|
|
const cleaned = content
|
|
.replace(/^---[\s\S]*?---\n/, '')
|
|
.replace(/```[\s\S]*?```/g, '')
|
|
.split(/\n\n+/)
|
|
.map(p => p.trim())
|
|
.filter(p => p && !p.startsWith('#') && !p.startsWith('---') && !p.startsWith('|'))
|
|
// Prefer first paragraph that looks like prose (not a list).
|
|
const prose = cleaned.find(p => !/^\s*[->*•\d+\.\s]/.test(p) && p.length > 30)
|
|
const first = prose || cleaned[0] || ''
|
|
// Strip wikilinks, bold, italics, callout markers.
|
|
return first
|
|
.replace(/\[\[([^\]|]+)(?:\|[^\]]+)?\]\]/g, '$1')
|
|
.replace(/[*_>]+/g, '')
|
|
.replace(/\s+/g, ' ')
|
|
.trim()
|
|
.slice(0, maxLen)
|
|
}
|
|
|
|
function extractWikilinks(content) {
|
|
// Match [[Target]], [[Target|alias]], [[Target#section]]
|
|
// Skip image embeds ![[...]]
|
|
const matches = [...content.matchAll(/(?<!!)\[\[([^\]|#]+)(?:[#|][^\]]*)?\]\]/g)]
|
|
return matches.map(m => m[1].trim()).filter(Boolean)
|
|
}
|
|
|
|
async function main() {
|
|
console.log('[carte-o] Scraping', ARTICLES_ROOT)
|
|
|
|
// Glob all .md recursively under Articles/.
|
|
const mdFiles = await globby(['**/*.md'], {
|
|
cwd: ARTICLES_ROOT,
|
|
absolute: true,
|
|
gitignore: false,
|
|
})
|
|
console.log(`[carte-o] Found ${mdFiles.length} markdown files`)
|
|
|
|
const nodes = []
|
|
const edgesRaw = []
|
|
const themeStats = {}
|
|
|
|
for (const mdFile of mdFiles) {
|
|
let raw
|
|
try {
|
|
raw = await fs.readFile(mdFile, 'utf-8')
|
|
} catch (e) {
|
|
console.warn(`[carte-o] Skip unreadable ${mdFile}`)
|
|
continue
|
|
}
|
|
|
|
const relPath = path.relative(ARTICLES_ROOT, mdFile).replace(/\\/g, '/')
|
|
const segments = relPath.split('/')
|
|
const baseName = path.basename(mdFile, '.md')
|
|
|
|
// Theme = first or second segment depending on structure.
|
|
// E.g. "AEP ARTICLES, BROUILLON/AEP IA/file.md" -> theme = "AEP IA"
|
|
// "AEP ARTICLES, BROUILLON/file.md" -> theme = "AEP ARTICLES, BROUILLON"
|
|
// "Livre - le nouveau contrat social/file.md" -> theme = "Livre"
|
|
let theme = segments[0]
|
|
if (segments.length >= 3 && segments[0].startsWith('AEP ARTICLES')) {
|
|
theme = segments[1]
|
|
}
|
|
|
|
const { data: fm, content } = safeParseFrontmatter(raw)
|
|
const title = fm.titre || fm.title || baseName.replace(/^!\s*/, '').trim()
|
|
const slug = slugify(title)
|
|
const family = inferFamily({
|
|
title,
|
|
theme,
|
|
path: relPath,
|
|
tags: fm.tags,
|
|
content,
|
|
})
|
|
const intention = fm.intention || extractFirstParagraph(content)
|
|
|
|
nodes.push({
|
|
id: slug,
|
|
label: title,
|
|
family,
|
|
intention,
|
|
slug,
|
|
theme,
|
|
path: relPath,
|
|
})
|
|
|
|
themeStats[theme] = (themeStats[theme] || 0) + 1
|
|
|
|
// Collect wikilinks.
|
|
const wikilinks = extractWikilinks(content)
|
|
for (const target of wikilinks) {
|
|
edgesRaw.push({ source: slug, targetLabel: target })
|
|
}
|
|
}
|
|
|
|
// Deduplicate nodes by id (collisions on same slug).
|
|
const nodeById = new Map()
|
|
for (const n of nodes) {
|
|
if (!nodeById.has(n.id)) {
|
|
nodeById.set(n.id, n)
|
|
}
|
|
}
|
|
const dedupNodes = [...nodeById.values()]
|
|
|
|
// Resolve edges : match targetLabel against node label or id.
|
|
const labelToId = new Map()
|
|
for (const n of dedupNodes) {
|
|
labelToId.set(slugify(n.label), n.id)
|
|
labelToId.set(n.label.toLowerCase(), n.id)
|
|
}
|
|
|
|
const edgesResolved = []
|
|
const edgeSet = new Set()
|
|
for (const e of edgesRaw) {
|
|
const candidates = [
|
|
slugify(e.targetLabel),
|
|
e.targetLabel.toLowerCase(),
|
|
]
|
|
let targetId = null
|
|
for (const c of candidates) {
|
|
if (labelToId.has(c)) {
|
|
targetId = labelToId.get(c)
|
|
break
|
|
}
|
|
}
|
|
if (!targetId || targetId === e.source) continue
|
|
const key = e.source < targetId ? `${e.source}→${targetId}` : `${targetId}→${e.source}`
|
|
if (edgeSet.has(key)) continue
|
|
edgeSet.add(key)
|
|
edgesResolved.push({ source: e.source, target: targetId })
|
|
}
|
|
|
|
// Compute degree for each node.
|
|
const degree = new Map()
|
|
for (const e of edgesResolved) {
|
|
degree.set(e.source, (degree.get(e.source) || 0) + 1)
|
|
degree.set(e.target, (degree.get(e.target) || 0) + 1)
|
|
}
|
|
|
|
// V1 cap : if > NODE_CAP_V1 nodes, keep top N by degree.
|
|
let finalNodes = dedupNodes
|
|
if (dedupNodes.length > NODE_CAP_V1) {
|
|
finalNodes = [...dedupNodes]
|
|
.sort((a, b) => (degree.get(b.id) || 0) - (degree.get(a.id) || 0))
|
|
.slice(0, NODE_CAP_V1)
|
|
console.log(`[carte-o] Capped from ${dedupNodes.length} to ${NODE_CAP_V1} nodes (top by degree)`)
|
|
}
|
|
|
|
const finalNodeIds = new Set(finalNodes.map(n => n.id))
|
|
const finalEdges = edgesResolved.filter(e => finalNodeIds.has(e.source) && finalNodeIds.has(e.target))
|
|
|
|
// Family distribution stats.
|
|
const familyDist = {}
|
|
for (const n of finalNodes) {
|
|
familyDist[n.family] = (familyDist[n.family] || 0) + 1
|
|
}
|
|
|
|
// Ensure output dir exists.
|
|
await fs.mkdir(path.dirname(OUTPUT), { recursive: true })
|
|
await fs.writeFile(
|
|
OUTPUT,
|
|
JSON.stringify({
|
|
meta: {
|
|
generated: new Date().toISOString(),
|
|
source: 'AEP/Articles',
|
|
nodeCount: finalNodes.length,
|
|
edgeCount: finalEdges.length,
|
|
familyDistribution: familyDist,
|
|
familyColors: FAMILY_COLORS,
|
|
themeStats,
|
|
},
|
|
nodes: finalNodes,
|
|
edges: finalEdges,
|
|
}, null, 2),
|
|
'utf-8',
|
|
)
|
|
|
|
console.log(`[carte-o] OK : ${finalNodes.length} nodes / ${finalEdges.length} edges`)
|
|
console.log(`[carte-o] Families :`, familyDist)
|
|
console.log(`[carte-o] Output : ${OUTPUT}`)
|
|
}
|
|
|
|
main().catch(err => {
|
|
console.error('[carte-o] FAIL', err)
|
|
process.exit(1)
|
|
})
|