wip: snapshot V2 cascade onglet 2 (sauvegarde avant chirurgie git-hygiene)

This commit is contained in:
Jules Neny
2026-05-06 15:37:13 +02:00
parent 5878c56888
commit e63d02a351
101 changed files with 188900 additions and 3959 deletions

View File

@@ -0,0 +1,111 @@
$base = "C:/Users/jules/Dropbox/ATIS - IPCJRA/1 PROJETS/TECH - infra VPS, website pro, RAG/nav-carte/V2-cascade/seed-cibles"
$f1raw = Get-Content "$base/liste-famille-1.json" -Raw | ConvertFrom-Json
$f1 = $f1raw.fiches
$f2 = Get-Content "$base/liste-famille-2.json" -Raw | ConvertFrom-Json
$f3 = Get-Content "$base/liste-famille-3.json" -Raw | ConvertFrom-Json
$f4 = Get-Content "$base/liste-famille-4.json" -Raw | ConvertFrom-Json
$f5 = Get-Content "$base/liste-famille-5.json" -Raw | ConvertFrom-Json
$all = @($f1) + @($f2) + @($f3) + @($f4) + @($f5)
$total_brut = $all.Count
Write-Host "Brut: $total_brut fiches"
function Norm($url) {
if (!$url) { return $null }
return ($url.ToLower() -replace '^https?://(www\.)?','').TrimEnd('/')
}
# Build dedup map: normUrl -> fiche (richest wins = already_in_v1=false preferred)
$url_map = @{}
$families_map = @{} # normUrl -> list of famille_principale seen
foreach ($f in $all) {
$nu = Norm $f.url
if (-not $nu) { continue }
$fp = if ($f.famille_principale) { $f.famille_principale } else { 0 }
if (-not $families_map.ContainsKey($nu)) { $families_map[$nu] = [System.Collections.Generic.List[int]]::new() }
if ($fp -notin $families_map[$nu]) { [void]$families_map[$nu].Add($fp) }
if ($url_map.ContainsKey($nu)) {
$existing = $url_map[$nu]
# Prefer richer (not already_in_v1) over V1 stub
if ($f.already_in_v1 -eq $false -and $existing.already_in_v1 -eq $true) {
$url_map[$nu] = $f
}
} else {
$url_map[$nu] = $f
}
}
# Collect null-url fiches separately
$null_url_fiches = @($all | Where-Object { -not $_.url })
# Build final list with multi_famille flags
$deduped = [System.Collections.ArrayList]::new()
$n_dup = $total_brut - $url_map.Count - $null_url_fiches.Count
foreach ($nu in $url_map.Keys) {
$f = $url_map[$nu]
$fams = @($families_map[$nu] | Where-Object { $_ -gt 0 } | Sort-Object -Unique)
$main = if ($f.famille_principale) { $f.famille_principale } else { 0 }
$secondaires = @($fams | Where-Object { $_ -ne $main })
$is_multi = $secondaires.Count -gt 0
# Add properties
try { $f | Add-Member -Force NoteProperty multi_famille $is_multi } catch {}
try {
$existing_sug = if ($f.familles_secondaires_suggested) { @($f.familles_secondaires_suggested | Where-Object { $_ -is [int] }) } else { @() }
$merged_sug = @($secondaires + $existing_sug | Sort-Object -Unique)
$f | Add-Member -Force NoteProperty familles_secondaires_suggested $merged_sug
} catch {}
[void]$deduped.Add($f)
}
foreach ($f in $null_url_fiches) {
[void]$deduped.Add($f)
}
# Add missing V1 entries (F4 + White Arkitekter)
$missing_v1 = @(
[PSCustomObject]@{ id="f4-tepop-v1"; nom="Tepop"; url="https://tepop.fr"; famille_principale=4; famille_principale_label="Collectifs ecolieux et AMO bifurcation"; familles_secondaires_suggested=@(3); hashtags=@("#scop-scic","#collectif-horizontal","#participation-usagers","#amo-bifurcation"); type_juridique="SCOP"; already_in_v1=$true; v1_id=7; multi_famille=$true; email_contact="tepop.asso@gmail.com"; email_source="scrape_pv2-1"; email_confidence="high"; badge_centre_ressources_suggested=$false; pays="FR"; ville="Bordeaux" },
[PSCustomObject]@{ id="f4-lacol-v1"; nom="Lacol Arquitectura Cooperativa"; url="https://lacol.coop"; famille_principale=4; famille_principale_label="Collectifs ecolieux et AMO bifurcation"; familles_secondaires_suggested=@(); hashtags=@("#scop-scic","#habitat-cooperatif","#collectif-horizontal"); type_juridique="cooperative"; already_in_v1=$true; v1_id=14; multi_famille=$false; email_contact=$null; badge_centre_ressources_suggested=$false; pays="ES"; ville="Barcelone" },
[PSCustomObject]@{ id="f4-einszueins-v1"; nom="einszueins architektur"; url="https://einszueins.at"; famille_principale=4; famille_principale_label="Collectifs ecolieux et AMO bifurcation"; familles_secondaires_suggested=@(3); hashtags=@("#baugruppe-cohousing","#habitat-participatif","#gouvernance-explicite"); type_juridique="agence"; already_in_v1=$true; v1_id=34; multi_famille=$true; email_contact=$null; badge_centre_ressources_suggested=$false; pays="AT"; ville="Vienne" },
[PSCustomObject]@{ id="f4-diverserighe-v1"; nom="Diverserighestudio"; url="https://diverserighestudio.it"; famille_principale=4; famille_principale_label="Collectifs ecolieux et AMO bifurcation"; familles_secondaires_suggested=@(); hashtags=@("#habitat-participatif","#baugruppe-cohousing"); type_juridique="agence"; already_in_v1=$true; v1_id=39; multi_famille=$false; email_contact=$null; badge_centre_ressources_suggested=$false; pays="IT"; ville="Bologne" },
[PSCustomObject]@{ id="f2-white-arkitekter-v1"; nom="White Arkitekter"; url="https://whitearkitekter.com"; famille_principale=2; famille_principale_label="Frugalite et low-tech"; familles_secondaires_suggested=@(); hashtags=@("#sobriete-energetique","#bois-local"); type_juridique="agence"; already_in_v1=$true; v1_id=29; multi_famille=$false; email_contact=$null; badge_centre_ressources_suggested=$false; pays="SE"; ville="Stockholm" }
)
foreach ($mv in $missing_v1) {
$nu = Norm $mv.url
if (-not $url_map.ContainsKey($nu)) {
[void]$deduped.Add($mv)
}
}
$total_consolide = $deduped.Count
$n_multi = @($deduped | Where-Object { $_.multi_famille -eq $true }).Count
$n_centres = @($deduped | Where-Object { $_.badge_centre_ressources_suggested -eq $true }).Count
Write-Host "Consolide: $total_consolide | Doublons fusionnes: $n_dup | Multi-famille: $n_multi | Centres ressources: $n_centres"
$meta = [PSCustomObject]@{
total_fiches_brut = $total_brut
total_fiches_consolide = $total_consolide
doublons_fusionnes = $n_dup
multi_famille = $n_multi
centres_ressources_flag = $n_centres
date = "2026-05-02"
sources = @("liste-famille-1.json","liste-famille-2.json","liste-famille-3.json","liste-famille-4.json","liste-famille-5.json")
notes = "PV2-2bis. 5 V1 absents ajoutes: Tepop, Lacol, einszueins, Diverserighestudio, White Arkitekter."
}
$output = [PSCustomObject]@{
meta = $meta
fiches = @($deduped)
}
$json = $output | ConvertTo-Json -Depth 10
[System.IO.File]::WriteAllText("$base/seed-v2-consolide.json", $json, [System.Text.Encoding]::UTF8)
Write-Host "seed-v2-consolide.json ecrit."