astro-site-cerveau/docs/n8n-workflow-journal-aggregate-v2-rsshub.json

{
  "name": "journal-aggregate-v2-rsshub",
  "_notes": "V1.5-E (2026-05-13) — remplace Behold par RSSHub self-host (rss.trans-former.fr) + ajoute Substack natif. Cron decale a 4h UTC (anti-rate-limit Insta). Si /instagram/user/* renvoie 503 (config Insta absente), le node tombe gracefully et le JSON final aura 0 item insta — ColInsta affichera fallbackBio. A importer dans n8n via UI (Workflows -> Import from File) ou API.",
  "nodes": [
    {
      "parameters": {
        "rule": {
          "interval": [
            {
              "field": "cronExpression",
              "expression": "0 4 * * *"
            }
          ]
        }
      },
      "id": "schedule-trigger",
      "name": "Cron-4h-UTC",
      "type": "n8n-nodes-base.scheduleTrigger",
      "typeVersion": 1.1,
      "position": [240, 320]
    },
    {
      "parameters": {
        "url": "https://git.trans-former.fr/jules.atom",
        "options": {
          "response": { "response": { "responseFormat": "text" } },
          "timeout": 15000
        }
      },
      "id": "fetch-gitea",
      "name": "Fetch-gitea",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.1,
      "position": [460, 160]
    },
    {
      "parameters": {
        "url": "https://julesneny.substack.com/feed",
        "options": {
          "response": { "response": { "responseFormat": "text" } },
          "timeout": 15000
        }
      },
      "id": "fetch-substack",
      "name": "Fetch-substack-natif",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.1,
      "position": [460, 280]
    },
    {
      "parameters": {
        "url": "https://rss.trans-former.fr/instagram/user/aep.politique",
        "options": {
          "response": { "response": { "neverError": true, "responseFormat": "text" } },
          "timeout": 20000
        }
      },
      "id": "fetch-rsshub-aep",
      "name": "Fetch-rsshub-insta-aep",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.1,
      "position": [460, 400]
    },
    {
      "parameters": {
        "url": "https://rss.trans-former.fr/instagram/user/julesneny",
        "options": {
          "response": { "response": { "neverError": true, "responseFormat": "text" } },
          "timeout": 20000
        }
      },
      "id": "fetch-rsshub-julesneny",
      "name": "Fetch-rsshub-insta-julesneny",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.1,
      "position": [460, 520]
    },
    {
      "parameters": {
        "jsCode": "// V1.5-E : Normalisation 4 sources -> JSON unifie\n// Sources : Gitea Atom (XML), Substack natif (RSS XML), RSSHub Insta x2 (Atom/RSS XML)\n\nconst items = [];\n\n// ---- Helper parse Atom (Gitea) ----\nfunction parseAtomGitea(xml) {\n  const out = [];\n  if (!xml || typeof xml !== 'string') return out;\n  const entries = xml.split(/<entry>/i).slice(1);\n  for (const raw of entries) {\n    const block = '<entry>' + raw.split(/<\\/entry>/i)[0] + '</entry>';\n    const title = (block.match(/<title>([\\s\\S]*?)<\\/title>/i) || [])[1] || '';\n    const updated = (block.match(/<updated>([^<]+)<\\/updated>/i) || [])[1] || '';\n    const link = (block.match(/<link[^>]*href=\"([^\"]+)\"/i) || [])[1] || '';\n    const summary = (block.match(/<summary[^>]*>([\\s\\S]*?)<\\/summary>/i) || [])[1] || '';\n    const id = (block.match(/<id>([^<]+)<\\/id>/i) || [])[1] || link || updated;\n    if (!updated) continue;\n    const decode = (s) => s\n      .replace(/&lt;/g, '<').replace(/&gt;/g, '>')\n      .replace(/&quot;/g, '\"').replace(/&#34;/g, '\"')\n      .replace(/&#xA;/g, ' ').replace(/&amp;/g, '&')\n      .replace(/<[^>]+>/g, ' ').replace(/\\s+/g, ' ').trim();\n    out.push({\n      id: 'gitea-' + (id.slice(0, 80) || updated),\n      platform: 'gitea',\n      hashtag: '#stack',\n      date: new Date(updated).toISOString(),\n      titre: decode(title).slice(0, 140),\n      extrait: decode(summary).slice(0, 280),\n      url: link,\n      thumbnail: null,\n    });\n  }\n  return out;\n}\n\n// ---- Helper parse RSS 2.0 (Substack, RSSHub) ----\nfunction parseRss(xml, platform, hashtag, fallbackUrl) {\n  const out = [];\n  if (!xml || typeof xml !== 'string') return out;\n  const items = xml.split(/<item[\\s>]/i).slice(1);\n  for (const raw of items) {\n    const block = '<item ' + raw.split(/<\\/item>/i)[0] + '</item>';\n    const cdata = (re) => {\n      const m = block.match(re);\n      if (!m) return '';\n      let s = m[1] || '';\n      s = s.replace(/<!\\[CDATA\\[([\\s\\S]*?)\\]\\]>/g, '$1');\n      return s;\n    };\n    const title = cdata(/<title[^>]*>([\\s\\S]*?)<\\/title>/i);\n    const link = cdata(/<link[^>]*>([\\s\\S]*?)<\\/link>/i).trim();\n    const guid = cdata(/<guid[^>]*>([\\s\\S]*?)<\\/guid>/i).trim();\n    const desc = cdata(/<description[^>]*>([\\s\\S]*?)<\\/description>/i);\n    const pub = cdata(/<pubDate[^>]*>([\\s\\S]*?)<\\/pubDate>/i).trim();\n    if (!pub && !title) continue;\n    const decode = (s) => s\n      .replace(/&lt;/g, '<').replace(/&gt;/g, '>')\n      .replace(/&quot;/g, '\"').replace(/&#34;/g, '\"')\n      .replace(/&amp;/g, '&')\n      .replace(/<[^>]+>/g, ' ').replace(/\\s+/g, ' ').trim();\n    const enclosure = (block.match(/<enclosure[^>]+url=\"([^\"]+)\"/i) || [])[1] || null;\n    const mediaThumb = (block.match(/<media:thumbnail[^>]+url=\"([^\"]+)\"/i) || [])[1] || null;\n    out.push({\n      id: platform + '-' + (guid || link || pub).slice(0, 80),\n      platform,\n      hashtag,\n      date: pub ? new Date(pub).toISOString() : new Date().toISOString(),\n      titre: decode(title).slice(0, 140) || platform,\n      extrait: decode(desc).slice(0, 280),\n      url: link || fallbackUrl,\n      thumbnail: mediaThumb || enclosure,\n    });\n  }\n  return out;\n}\n\n// ---- Recupere payloads ----\nfunction safeText(nodeName) {\n  try {\n    const its = $(nodeName).all();\n    if (!its.length || !its[0].json) return '';\n    const j = its[0].json;\n    return String(j.data || j.body || '');\n  } catch (e) {\n    console.log(nodeName + ' missing:', e.message);\n    return '';\n  }\n}\n\nconst giteaXml = safeText('Fetch-gitea');\nconst subXml = safeText('Fetch-substack-natif');\nconst rsshubAepXml = safeText('Fetch-rsshub-insta-aep');\nconst rsshubJnXml = safeText('Fetch-rsshub-insta-julesneny');\n\nfor (const it of parseAtomGitea(giteaXml)) items.push(it);\nfor (const it of parseRss(subXml, 'substack', '#substack', 'https://julesneny.substack.com')) items.push(it);\nfor (const it of parseRss(rsshubAepXml, 'instagram', '#aep-politique', 'https://instagram.com/aep.politique')) items.push(it);\nfor (const it of parseRss(rsshubJnXml, 'instagram', '#peinture', 'https://instagram.com/julesneny')) items.push(it);\n\n// ---- Dedup par id, tri desc, cap top 100 ----\nconst seen = new Set();\nconst uniq = items.filter((it) => {\n  if (seen.has(it.id)) return false;\n  seen.add(it.id);\n  return true;\n});\nuniq.sort((a, b) => b.date.localeCompare(a.date));\nconst top = uniq.slice(0, 100);\n\nconst counts = {\n  total: top.length,\n  gitea: top.filter((i) => i.platform === 'gitea').length,\n  substack: top.filter((i) => i.platform === 'substack').length,\n  instagram: top.filter((i) => i.platform === 'instagram').length,\n};\n\nreturn [{ json: { generatedAt: new Date().toISOString(), items: top, counts } }];"
      },
      "id": "normalise",
      "name": "Normalise",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [720, 340]
    },
    {
      "parameters": {
        "operation": "toJson",
        "fieldName": "data",
        "options": { "format": true }
      },
      "id": "to-json",
      "name": "To-json-string",
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.4,
      "position": [940, 340]
    },
    {
      "parameters": {
        "operation": "write",
        "fileName": "/home/node/.n8n/journal/journal.json",
        "dataPropertyName": "data",
        "options": {}
      },
      "id": "write-file",
      "name": "Write-journal-json",
      "type": "n8n-nodes-base.readWriteFile",
      "typeVersion": 1,
      "position": [1160, 340]
    }
  ],
  "connections": {
    "Cron-4h-UTC": {
      "main": [[
        { "node": "Fetch-gitea", "type": "main", "index": 0 },
        { "node": "Fetch-substack-natif", "type": "main", "index": 0 },
        { "node": "Fetch-rsshub-insta-aep", "type": "main", "index": 0 },
        { "node": "Fetch-rsshub-insta-julesneny", "type": "main", "index": 0 }
      ]]
    },
    "Fetch-gitea": { "main": [[{ "node": "Normalise", "type": "main", "index": 0 }]] },
    "Fetch-substack-natif": { "main": [[{ "node": "Normalise", "type": "main", "index": 0 }]] },
    "Fetch-rsshub-insta-aep": { "main": [[{ "node": "Normalise", "type": "main", "index": 0 }]] },
    "Fetch-rsshub-insta-julesneny": { "main": [[{ "node": "Normalise", "type": "main", "index": 0 }]] },
    "Normalise": { "main": [[{ "node": "To-json-string", "type": "main", "index": 0 }]] },
    "To-json-string": { "main": [[{ "node": "Write-journal-json", "type": "main", "index": 0 }]] }
  },
  "settings": {
    "executionOrder": "v1",
    "saveExecutionProgress": true,
    "saveManualExecutions": true
  },
  "tags": [
    { "name": "page-cerveau" },
    { "name": "V1.5-E" }
  ]
}