$base = "C:/Users/jules/Dropbox/ATIS - IPCJRA/1 PROJETS/TECH - infra VPS, website pro, RAG/nav-carte/V2-cascade/seed-cibles" $f1raw = Get-Content "$base/liste-famille-1.json" -Raw | ConvertFrom-Json $f1 = $f1raw.fiches $f2 = Get-Content "$base/liste-famille-2.json" -Raw | ConvertFrom-Json $f3 = Get-Content "$base/liste-famille-3.json" -Raw | ConvertFrom-Json $f4 = Get-Content "$base/liste-famille-4.json" -Raw | ConvertFrom-Json $f5 = Get-Content "$base/liste-famille-5.json" -Raw | ConvertFrom-Json $all = @($f1) + @($f2) + @($f3) + @($f4) + @($f5) $total_brut = $all.Count Write-Host "Brut: $total_brut fiches" function Norm($url) { if (!$url) { return $null } return ($url.ToLower() -replace '^https?://(www\.)?','').TrimEnd('/') } # Build dedup map: normUrl -> fiche (richest wins = already_in_v1=false preferred) $url_map = @{} $families_map = @{} # normUrl -> list of famille_principale seen foreach ($f in $all) { $nu = Norm $f.url if (-not $nu) { continue } $fp = if ($f.famille_principale) { $f.famille_principale } else { 0 } if (-not $families_map.ContainsKey($nu)) { $families_map[$nu] = [System.Collections.Generic.List[int]]::new() } if ($fp -notin $families_map[$nu]) { [void]$families_map[$nu].Add($fp) } if ($url_map.ContainsKey($nu)) { $existing = $url_map[$nu] # Prefer richer (not already_in_v1) over V1 stub if ($f.already_in_v1 -eq $false -and $existing.already_in_v1 -eq $true) { $url_map[$nu] = $f } } else { $url_map[$nu] = $f } } # Collect null-url fiches separately $null_url_fiches = @($all | Where-Object { -not $_.url }) # Build final list with multi_famille flags $deduped = [System.Collections.ArrayList]::new() $n_dup = $total_brut - $url_map.Count - $null_url_fiches.Count foreach ($nu in $url_map.Keys) { $f = $url_map[$nu] $fams = @($families_map[$nu] | Where-Object { $_ -gt 0 } | Sort-Object -Unique) $main = if ($f.famille_principale) { $f.famille_principale } else { 0 } $secondaires = @($fams | Where-Object { $_ -ne $main }) $is_multi = $secondaires.Count -gt 0 # Add properties try { $f | Add-Member -Force NoteProperty multi_famille $is_multi } catch {} try { $existing_sug = if ($f.familles_secondaires_suggested) { @($f.familles_secondaires_suggested | Where-Object { $_ -is [int] }) } else { @() } $merged_sug = @($secondaires + $existing_sug | Sort-Object -Unique) $f | Add-Member -Force NoteProperty familles_secondaires_suggested $merged_sug } catch {} [void]$deduped.Add($f) } foreach ($f in $null_url_fiches) { [void]$deduped.Add($f) } # Add missing V1 entries (F4 + White Arkitekter) $missing_v1 = @( [PSCustomObject]@{ id="f4-tepop-v1"; nom="Tepop"; url="https://tepop.fr"; famille_principale=4; famille_principale_label="Collectifs ecolieux et AMO bifurcation"; familles_secondaires_suggested=@(3); hashtags=@("#scop-scic","#collectif-horizontal","#participation-usagers","#amo-bifurcation"); type_juridique="SCOP"; already_in_v1=$true; v1_id=7; multi_famille=$true; email_contact="tepop.asso@gmail.com"; email_source="scrape_pv2-1"; email_confidence="high"; badge_centre_ressources_suggested=$false; pays="FR"; ville="Bordeaux" }, [PSCustomObject]@{ id="f4-lacol-v1"; nom="Lacol Arquitectura Cooperativa"; url="https://lacol.coop"; famille_principale=4; famille_principale_label="Collectifs ecolieux et AMO bifurcation"; familles_secondaires_suggested=@(); hashtags=@("#scop-scic","#habitat-cooperatif","#collectif-horizontal"); type_juridique="cooperative"; already_in_v1=$true; v1_id=14; multi_famille=$false; email_contact=$null; badge_centre_ressources_suggested=$false; pays="ES"; ville="Barcelone" }, [PSCustomObject]@{ id="f4-einszueins-v1"; nom="einszueins architektur"; url="https://einszueins.at"; famille_principale=4; famille_principale_label="Collectifs ecolieux et AMO bifurcation"; familles_secondaires_suggested=@(3); hashtags=@("#baugruppe-cohousing","#habitat-participatif","#gouvernance-explicite"); type_juridique="agence"; already_in_v1=$true; v1_id=34; multi_famille=$true; email_contact=$null; badge_centre_ressources_suggested=$false; pays="AT"; ville="Vienne" }, [PSCustomObject]@{ id="f4-diverserighe-v1"; nom="Diverserighestudio"; url="https://diverserighestudio.it"; famille_principale=4; famille_principale_label="Collectifs ecolieux et AMO bifurcation"; familles_secondaires_suggested=@(); hashtags=@("#habitat-participatif","#baugruppe-cohousing"); type_juridique="agence"; already_in_v1=$true; v1_id=39; multi_famille=$false; email_contact=$null; badge_centre_ressources_suggested=$false; pays="IT"; ville="Bologne" }, [PSCustomObject]@{ id="f2-white-arkitekter-v1"; nom="White Arkitekter"; url="https://whitearkitekter.com"; famille_principale=2; famille_principale_label="Frugalite et low-tech"; familles_secondaires_suggested=@(); hashtags=@("#sobriete-energetique","#bois-local"); type_juridique="agence"; already_in_v1=$true; v1_id=29; multi_famille=$false; email_contact=$null; badge_centre_ressources_suggested=$false; pays="SE"; ville="Stockholm" } ) foreach ($mv in $missing_v1) { $nu = Norm $mv.url if (-not $url_map.ContainsKey($nu)) { [void]$deduped.Add($mv) } } $total_consolide = $deduped.Count $n_multi = @($deduped | Where-Object { $_.multi_famille -eq $true }).Count $n_centres = @($deduped | Where-Object { $_.badge_centre_ressources_suggested -eq $true }).Count Write-Host "Consolide: $total_consolide | Doublons fusionnes: $n_dup | Multi-famille: $n_multi | Centres ressources: $n_centres" $meta = [PSCustomObject]@{ total_fiches_brut = $total_brut total_fiches_consolide = $total_consolide doublons_fusionnes = $n_dup multi_famille = $n_multi centres_ressources_flag = $n_centres date = "2026-05-02" sources = @("liste-famille-1.json","liste-famille-2.json","liste-famille-3.json","liste-famille-4.json","liste-famille-5.json") notes = "PV2-2bis. 5 V1 absents ajoutes: Tepop, Lacol, einszueins, Diverserighestudio, White Arkitekter." } $output = [PSCustomObject]@{ meta = $meta fiches = @($deduped) } $json = $output | ConvertTo-Json -Depth 10 [System.IO.File]::WriteAllText("$base/seed-v2-consolide.json", $json, [System.Text.Encoding]::UTF8) Write-Host "seed-v2-consolide.json ecrit."