#!/usr/bin/env bash
# Wholesale re-shoot of kommuner whose meta.json still has old s=2400
# matching-zoom URLs OR fallback eiendomFokus URLs (eiendomFokus ==
# matching). Runs sharded; after every BATCH_SIZE kommuner finish, it
# regenerates affected mdx, uploads images, and deploys the site so
# the live pages update incrementally rather than in one big swap at
# the end.
#
# Designed to run unattended on the same beefy machine as
# finish-followup.sh; total wall clock for ~330 kommuner with
# WORKERS=8 is ~2-3 h.
#
# Usage:
#   scripts/kommune/wholesale-polish.sh [num-workers] [batch-size]
#
# Defaults: WORKERS=8, BATCH_SIZE=30 (every ~30 kommuner = redeploy).
#
# Logs: /tmp/wholesale-polish.stage.log + per-shard logs under /tmp/.

set -uo pipefail

REPO="$(cd "$(dirname "$0")/../.." && pwd)"
WORKERS="${1:-8}"
BATCH_SIZE="${2:-30}"

STAGE_LOG=/tmp/wholesale-polish.stage.log
echo "$(date): wholesale-polish started (WORKERS=$WORKERS, BATCH_SIZE=$BATCH_SIZE)" > "$STAGE_LOG"

source "$REPO/.env"
if [[ -z "${AZURE_STORAGE_CONNECTION_STRING:-}" ]]; then
  echo "ERROR: AZURE_STORAGE_CONNECTION_STRING not set in .env" | tee -a "$STAGE_LOG" >&2
  exit 1
fi

# --- Step A: identify slugs needing polish ---
SLUGS_FILE=$(mktemp -t polish-slugs.XXXXXX)
trap 'rm -f "$SLUGS_FILE"' EXIT

python3 - "$SLUGS_FILE" <<'PY' 2>&1 | tee -a "$STAGE_LOG"
import json, glob, os, sys
out = sys.argv[1]
slugs = []
for p in sorted(glob.glob('sales/kommune/*-meta.json')):
    with open(p) as f:
        m = json.load(f)
    fokus = m.get('eiendomFokus', '')
    matching = m.get('matching', '')
    zoom = m.get('matchingZoom', '')
    needs_zoom = 's=2400' in zoom and 's=20000' not in zoom
    needs_eiendom = bool(fokus) and fokus == matching
    if needs_zoom or needs_eiendom:
        slugs.append(os.path.basename(p).removesuffix('-meta.json'))
with open(out, 'w') as f:
    f.write('\n'.join(slugs))
print(f'Step A: {len(slugs)} kommuner staged for re-shoot')
PY

cd "$REPO"
TOTAL=$(wc -l < "$SLUGS_FILE" | tr -d ' ')
if (( TOTAL == 0 )); then
  echo "$(date): nothing to polish - all kommuner already on s=20000 + real cadastre" | tee -a "$STAGE_LOG"
  exit 0
fi

echo "$(date): processing $TOTAL kommuner in batches of $BATCH_SIZE" | tee -a "$STAGE_LOG"

# --- Step B: re-shoot in batches with incremental deploys ---
batch_idx=0
while read -r batch_csv; do
  [[ -z "$batch_csv" ]] && continue
  batch_idx=$((batch_idx + 1))
  batch_count=$(echo "$batch_csv" | tr ',' '\n' | wc -l | tr -d ' ')
  echo "$(date): === Batch $batch_idx ($batch_count kommuner): $batch_csv" | tee -a "$STAGE_LOG"

  # Re-shoot this batch (skip kartlag - already captured for everyone).
  # Each child command gets its stdin redirected from /dev/null so
  # nothing consumes the batch stream the outer while-read loop is
  # iterating - npm run deploy in particular reads stdin during the
  # az afd purge confirmation, which would silently swallow the next
  # batch line and end the loop one batch in.
  STAGGER=5 SKIP_KARTLAG=1 \
    "$REPO/scripts/tour/run-kommune-shards.sh" "$WORKERS" "$batch_csv" \
    < /dev/null 2>&1 | tee -a "$STAGE_LOG"

  # Regen mdx for just the slugs in this batch. gen-kommune-pages.ts
  # is idempotent - it only writes mdx that don't exist - so we delete
  # the affected mdx first.
  for slug in $(echo "$batch_csv" | tr ',' ' '); do
    rm -f "$REPO/sales/kommune/$slug.mdx"
  done
  (cd "$REPO/scripts" && npx tsx tour/gen-kommune-pages.ts) < /dev/null 2>&1 | tee -a "$STAGE_LOG"

  # Touch the just-shot screenshots so azcopy sync detects them as
  # newer than the blob (azcopy compares mtime, and a previous deploy
  # may have set the blob LMT to a time later than the local file).
  for slug in $(echo "$batch_csv" | tr ',' ' '); do
    [[ -d "$REPO/sales/kommune/img/$slug" ]] && \
      find "$REPO/sales/kommune/img/$slug" -type f -name '*.png' -exec touch {} +
  done

  "$REPO/scripts/kommune/upload-images-only.sh" < /dev/null 2>&1 | tee -a "$STAGE_LOG"
  (cd "$REPO/site" && SKIP_SCREENSHOTS=1 npm run deploy) < /dev/null 2>&1 | tee -a "$STAGE_LOG"
  echo "$(date): === Batch $batch_idx deployed; $((TOTAL - batch_idx * BATCH_SIZE)) kommuner remaining" | tee -a "$STAGE_LOG"
done < <(awk -v n="$BATCH_SIZE" 'NR % n == 1 { if (NR > 1) print buf; buf = $0; next } { buf = buf "," $0 } END { print buf }' "$SLUGS_FILE")

# --- Step C: final verify ---
echo "$(date): final verify-screenshots.py" | tee -a "$STAGE_LOG"
PY_BIN="$REPO/scripts/spellcheck/.venv/bin/python"
[[ -x "$PY_BIN" ]] && "$PY_BIN" "$REPO/scripts/kommune/verify-screenshots.py" 2>&1 | tee -a "$STAGE_LOG"

echo "$(date): wholesale-polish complete" | tee -a "$STAGE_LOG"
