#!/usr/bin/env python3 """ Verify that every kommune has its full set of screenshots in Azure storage. Reads sales/kommune/*-meta.json to know which kommuner exist and which ones have a real eiendomFokusBilder URL (those need the aerial-photo shot). Output: /tmp/kommune-screenshot-audit.txt per-kommune missing-file list /tmp/kommune-screenshot-bad-slugs.txt slug list ready for re-shoot Usage (after sourcing .env so AZURE_STORAGE_CONNECTION_STRING is set): python3 scripts/kommune/verify-screenshots.py """ from __future__ import annotations import json import os import sys from pathlib import Path REPO = Path(__file__).resolve().parent.parent.parent KOMMUNE_DIR = REPO / 'sales' / 'kommune' # Every kommune is expected to have these. eiendom-fokus-bilder.png is # captured only when the picker found a real cadastre, so it's checked # conditionally below. BASE_SHOTS = [ 'kommune.png', 'kommuneplaner.png', 'trafikk.png', 'trafikkulykker.png', 'natur.png', 'matching.png', 'matching-zoom.png', 'eiendom.png', 'eiendom-fokus.png', 'eiendom-3d.png', 'eiendom-leietagere.png', 'eiendom-leietagere-tabell.png', 'vapen.svg', ] CONDITIONAL_SHOTS = ['eiendom-fokus-bilder.png'] # only when meta has real URL def has_real_aerial_url(meta: dict) -> bool: fb = meta.get('eiendomFokusBilder') or '' match = meta.get('matching') or '' return bool(fb) and fb != match def main() -> int: conn = os.environ.get('AZURE_STORAGE_CONNECTION_STRING') if not conn: print('ERROR: AZURE_STORAGE_CONNECTION_STRING not set; source .env first', file=sys.stderr) return 2 try: from azure.storage.blob import BlobServiceClient except ImportError: print('ERROR: azure-storage-blob not installed.', file=sys.stderr) print(' pip install -r scripts/spellcheck/requirements.txt', file=sys.stderr) return 2 svc = BlobServiceClient.from_connection_string(conn) container = svc.get_container_client('$web') print('Listing all kommune/img/* blobs...') have: dict[str, set[str]] = {} for blob in container.list_blobs(name_starts_with='kommune/img/'): # blob.name e.g. "kommune/img/bardu/eiendom-fokus.png" parts = blob.name.split('/') if len(parts) < 4: continue slug = parts[2] fname = parts[3] have.setdefault(slug, set()).add(fname) print(f' found shots for {len(have)} slugs') metas = sorted(KOMMUNE_DIR.glob('*-meta.json')) audit_lines: list[str] = [] bad_slugs: list[str] = [] missing_total = 0 for meta_path in metas: slug = meta_path.name.removesuffix('-meta.json') try: meta = json.loads(meta_path.read_text()) except Exception: continue expected = list(BASE_SHOTS) if has_real_aerial_url(meta): expected.extend(CONDITIONAL_SHOTS) present = have.get(slug, set()) missing = [s for s in expected if s not in present] if missing: audit_lines.append(f'{slug}: missing {", ".join(missing)}') bad_slugs.append(slug) missing_total += len(missing) audit_path = Path('/tmp/kommune-screenshot-audit.txt') bad_path = Path('/tmp/kommune-screenshot-bad-slugs.txt') audit_path.write_text('\n'.join(audit_lines) + ('\n' if audit_lines else '')) bad_path.write_text(','.join(bad_slugs) + ('\n' if bad_slugs else '')) print() print(f'Total kommuner audited: {len(metas)}') print(f'Kommuner with at least one missing shot: {len(bad_slugs)}') print(f'Missing files (cumulative): {missing_total}') print(f' → {audit_path}') print(f' → {bad_path} (slug list for re-shoot)') if not bad_slugs: return 0 print() print('Top 20 missing files:') for line in audit_lines[:20]: print(f' {line}') if len(audit_lines) > 20: print(f' ... +{len(audit_lines) - 20} more (see audit file)') return 0 if __name__ == '__main__': sys.exit(main())