112 lines
6.2 KiB
PowerShell
112 lines
6.2 KiB
PowerShell
$base = "C:/Users/jules/Dropbox/ATIS - IPCJRA/1 PROJETS/TECH - infra VPS, website pro, RAG/nav-carte/V2-cascade/seed-cibles"
|
|
|
|
$f1raw = Get-Content "$base/liste-famille-1.json" -Raw | ConvertFrom-Json
|
|
$f1 = $f1raw.fiches
|
|
$f2 = Get-Content "$base/liste-famille-2.json" -Raw | ConvertFrom-Json
|
|
$f3 = Get-Content "$base/liste-famille-3.json" -Raw | ConvertFrom-Json
|
|
$f4 = Get-Content "$base/liste-famille-4.json" -Raw | ConvertFrom-Json
|
|
$f5 = Get-Content "$base/liste-famille-5.json" -Raw | ConvertFrom-Json
|
|
|
|
$all = @($f1) + @($f2) + @($f3) + @($f4) + @($f5)
|
|
$total_brut = $all.Count
|
|
Write-Host "Brut: $total_brut fiches"
|
|
|
|
function Norm($url) {
|
|
if (!$url) { return $null }
|
|
return ($url.ToLower() -replace '^https?://(www\.)?','').TrimEnd('/')
|
|
}
|
|
|
|
# Build dedup map: normUrl -> fiche (richest wins = already_in_v1=false preferred)
|
|
$url_map = @{}
|
|
$families_map = @{} # normUrl -> list of famille_principale seen
|
|
|
|
foreach ($f in $all) {
|
|
$nu = Norm $f.url
|
|
if (-not $nu) { continue }
|
|
$fp = if ($f.famille_principale) { $f.famille_principale } else { 0 }
|
|
if (-not $families_map.ContainsKey($nu)) { $families_map[$nu] = [System.Collections.Generic.List[int]]::new() }
|
|
if ($fp -notin $families_map[$nu]) { [void]$families_map[$nu].Add($fp) }
|
|
|
|
if ($url_map.ContainsKey($nu)) {
|
|
$existing = $url_map[$nu]
|
|
# Prefer richer (not already_in_v1) over V1 stub
|
|
if ($f.already_in_v1 -eq $false -and $existing.already_in_v1 -eq $true) {
|
|
$url_map[$nu] = $f
|
|
}
|
|
} else {
|
|
$url_map[$nu] = $f
|
|
}
|
|
}
|
|
|
|
# Collect null-url fiches separately
|
|
$null_url_fiches = @($all | Where-Object { -not $_.url })
|
|
|
|
# Build final list with multi_famille flags
|
|
$deduped = [System.Collections.ArrayList]::new()
|
|
$n_dup = $total_brut - $url_map.Count - $null_url_fiches.Count
|
|
|
|
foreach ($nu in $url_map.Keys) {
|
|
$f = $url_map[$nu]
|
|
$fams = @($families_map[$nu] | Where-Object { $_ -gt 0 } | Sort-Object -Unique)
|
|
$main = if ($f.famille_principale) { $f.famille_principale } else { 0 }
|
|
$secondaires = @($fams | Where-Object { $_ -ne $main })
|
|
|
|
$is_multi = $secondaires.Count -gt 0
|
|
|
|
# Add properties
|
|
try { $f | Add-Member -Force NoteProperty multi_famille $is_multi } catch {}
|
|
try {
|
|
$existing_sug = if ($f.familles_secondaires_suggested) { @($f.familles_secondaires_suggested | Where-Object { $_ -is [int] }) } else { @() }
|
|
$merged_sug = @($secondaires + $existing_sug | Sort-Object -Unique)
|
|
$f | Add-Member -Force NoteProperty familles_secondaires_suggested $merged_sug
|
|
} catch {}
|
|
|
|
[void]$deduped.Add($f)
|
|
}
|
|
|
|
foreach ($f in $null_url_fiches) {
|
|
[void]$deduped.Add($f)
|
|
}
|
|
|
|
# Add missing V1 entries (F4 + White Arkitekter)
|
|
$missing_v1 = @(
|
|
[PSCustomObject]@{ id="f4-tepop-v1"; nom="Tepop"; url="https://tepop.fr"; famille_principale=4; famille_principale_label="Collectifs ecolieux et AMO bifurcation"; familles_secondaires_suggested=@(3); hashtags=@("#scop-scic","#collectif-horizontal","#participation-usagers","#amo-bifurcation"); type_juridique="SCOP"; already_in_v1=$true; v1_id=7; multi_famille=$true; email_contact="tepop.asso@gmail.com"; email_source="scrape_pv2-1"; email_confidence="high"; badge_centre_ressources_suggested=$false; pays="FR"; ville="Bordeaux" },
|
|
[PSCustomObject]@{ id="f4-lacol-v1"; nom="Lacol Arquitectura Cooperativa"; url="https://lacol.coop"; famille_principale=4; famille_principale_label="Collectifs ecolieux et AMO bifurcation"; familles_secondaires_suggested=@(); hashtags=@("#scop-scic","#habitat-cooperatif","#collectif-horizontal"); type_juridique="cooperative"; already_in_v1=$true; v1_id=14; multi_famille=$false; email_contact=$null; badge_centre_ressources_suggested=$false; pays="ES"; ville="Barcelone" },
|
|
[PSCustomObject]@{ id="f4-einszueins-v1"; nom="einszueins architektur"; url="https://einszueins.at"; famille_principale=4; famille_principale_label="Collectifs ecolieux et AMO bifurcation"; familles_secondaires_suggested=@(3); hashtags=@("#baugruppe-cohousing","#habitat-participatif","#gouvernance-explicite"); type_juridique="agence"; already_in_v1=$true; v1_id=34; multi_famille=$true; email_contact=$null; badge_centre_ressources_suggested=$false; pays="AT"; ville="Vienne" },
|
|
[PSCustomObject]@{ id="f4-diverserighe-v1"; nom="Diverserighestudio"; url="https://diverserighestudio.it"; famille_principale=4; famille_principale_label="Collectifs ecolieux et AMO bifurcation"; familles_secondaires_suggested=@(); hashtags=@("#habitat-participatif","#baugruppe-cohousing"); type_juridique="agence"; already_in_v1=$true; v1_id=39; multi_famille=$false; email_contact=$null; badge_centre_ressources_suggested=$false; pays="IT"; ville="Bologne" },
|
|
[PSCustomObject]@{ id="f2-white-arkitekter-v1"; nom="White Arkitekter"; url="https://whitearkitekter.com"; famille_principale=2; famille_principale_label="Frugalite et low-tech"; familles_secondaires_suggested=@(); hashtags=@("#sobriete-energetique","#bois-local"); type_juridique="agence"; already_in_v1=$true; v1_id=29; multi_famille=$false; email_contact=$null; badge_centre_ressources_suggested=$false; pays="SE"; ville="Stockholm" }
|
|
)
|
|
|
|
foreach ($mv in $missing_v1) {
|
|
$nu = Norm $mv.url
|
|
if (-not $url_map.ContainsKey($nu)) {
|
|
[void]$deduped.Add($mv)
|
|
}
|
|
}
|
|
|
|
$total_consolide = $deduped.Count
|
|
$n_multi = @($deduped | Where-Object { $_.multi_famille -eq $true }).Count
|
|
$n_centres = @($deduped | Where-Object { $_.badge_centre_ressources_suggested -eq $true }).Count
|
|
|
|
Write-Host "Consolide: $total_consolide | Doublons fusionnes: $n_dup | Multi-famille: $n_multi | Centres ressources: $n_centres"
|
|
|
|
$meta = [PSCustomObject]@{
|
|
total_fiches_brut = $total_brut
|
|
total_fiches_consolide = $total_consolide
|
|
doublons_fusionnes = $n_dup
|
|
multi_famille = $n_multi
|
|
centres_ressources_flag = $n_centres
|
|
date = "2026-05-02"
|
|
sources = @("liste-famille-1.json","liste-famille-2.json","liste-famille-3.json","liste-famille-4.json","liste-famille-5.json")
|
|
notes = "PV2-2bis. 5 V1 absents ajoutes: Tepop, Lacol, einszueins, Diverserighestudio, White Arkitekter."
|
|
}
|
|
|
|
$output = [PSCustomObject]@{
|
|
meta = $meta
|
|
fiches = @($deduped)
|
|
}
|
|
|
|
$json = $output | ConvertTo-Json -Depth 10
|
|
[System.IO.File]::WriteAllText("$base/seed-v2-consolide.json", $json, [System.Text.Encoding]::UTF8)
|
|
Write-Host "seed-v2-consolide.json ecrit."
|