#!/bin/bash
# run_benchmark_comprehensive.sh

set -e

DB_NAME="testdb"
DURATION_SECS=20
RESULTS_FILE="benchmark_results.csv"
CONCURRENCY_LEVELS="1 4 8"
SCALE_FACTORS="20000 400000"
BATCH_SIZES="1 50 1000"

# Pre-flight check
if ! psql -lqt | cut -d \| -f 1 | grep -qw "$DB_NAME"; then
    echo "ERROR: Database '$DB_NAME' does not exist."
    exit 1
fi

echo "strategy,scale,batch_size,clients,tps,avg_latency_ms" > "$RESULTS_FILE"

echo "MV BENCHMARK: Baseline vs Strategies"
echo "DB: $DB_NAME | Duration: ${DURATION_SECS}s | Clients: $CONCURRENCY_LEVELS"

vacuum_all_tables() {
    echo -n "  Vacuuming... "
    psql -d $DB_NAME -q << 'EOF'
SET client_min_messages TO warning;
VACUUM ANALYZE mv_benchmark.invoices;
VACUUM ANALYZE mv_benchmark.invoice_lines;
VACUUM ANALYZE mv_benchmark.invoice_summary;
VACUUM ANALYZE mv_benchmark.invoice_summary_table;
SELECT pg_stat_reset();
EOF
    echo "done"
}

verify_data() {
    local check_type=$1
    echo -n "  Verifying consistency ($check_type)... "
    # Check for any rows where status is 'FAIL'
    failed_checks=$(psql -d $DB_NAME -A -t -c "SELECT details FROM mv_benchmark.verify_consistency('$check_type') WHERE status = 'FAIL';")

    if [ -n "$failed_checks" ]; then
        echo "FAILED!"
        echo "$failed_checks"
        echo "WARNING: Data inconsistency detected. Benchmark validity compromised."
    else
        echo "PASS"
    fi
}

run_test() {
    local strategy=$1
    local scale=$2
    local batch=$3
    local full_refresh=$4
    local clients=$5

    echo -n "  [$strategy | S:$scale | B:$batch | C:$clients] "

    # 45% ins, 10% upd, 45% del
    workload_args="-f workload_insert.sql@45 -f workload_update.sql@10 -f workload_delete.sql@45"

    output=$(pgbench -d $DB_NAME -n $workload_args \
        -D scale=$scale -D batch_size=$batch -D full_refresh=$full_refresh \
        -T $DURATION_SECS -c $clients -j $clients 2>&1)

    tps=$(echo "$output" | grep -E "^(excluding|tps)" | grep "tps = " | awk '{print $3}' | head -1)
    latency=$(echo "$output" | grep "latency average" | awk '{print $4}' | head -1)

    # Sanitize
    [ -z "$tps" ] && tps="ERROR" || tps=$(echo "$tps" | sed 's/[^0-9.]//g')
    [ -z "$latency" ] && latency="ERROR" || latency=$(echo "$latency" | sed 's/[^0-9.]//g')

    echo "$strategy,$scale,$batch,$clients,$tps,$latency" >> "$RESULTS_FILE"
    echo "-> TPS: $tps"
}

# --- Main Execution Loop ---

for SCALE_ROWS in $SCALE_FACTORS; do
    echo "--- SCALE: $SCALE_ROWS invoices ---"

    # Init Data
    psql -d $DB_NAME -f setup.sql -q
    psql -d $DB_NAME -c "SELECT mv_benchmark.generate_data($SCALE_ROWS);" -q
    vacuum_all_tables

    # 1. BASELINE (Triggers OFF)
    # No verification needed (no summary to check)
    psql -d $DB_NAME -c "SELECT mv_benchmark.disable_all_triggers();" -q
    for c in $CONCURRENCY_LEVELS; do
        for b in $BATCH_SIZES; do
            run_test "baseline" $SCALE_ROWS $b 0 $c
        done
    done

    # 2. FULL REFRESH (Triggers OFF, Manual Refresh in workload)
    # Verification optional (built-in PG refresh), skipping to save time
    psql -d $DB_NAME -c "SELECT mv_benchmark.generate_data($SCALE_ROWS);" -q
    psql -d $DB_NAME -c "SELECT mv_benchmark.disable_all_triggers();" -q
    vacuum_all_tables
    for c in $CONCURRENCY_LEVELS; do
        for b in $BATCH_SIZES; do
            run_test "full_refresh" $SCALE_ROWS $b 1 $c
        done
    done

    # 3. PARTIAL REFRESH (Triggers ON, Auto Refresh)
    # This tests the patch: concurrent updates should be fast AND correct.
    psql -d $DB_NAME -c "SELECT mv_benchmark.generate_data($SCALE_ROWS);" -q
    psql -d $DB_NAME -c "SELECT mv_benchmark.enable_partial_refresh_triggers();" -q
    vacuum_all_tables
    for c in $CONCURRENCY_LEVELS; do
        for b in $BATCH_SIZES; do
            run_test "partial_refresh" $SCALE_ROWS $b 0 $c
        done
    done
    verify_data "mv"

    # 4. MATERIALIZED TABLE (Triggers ON, Manual Upsert)
    psql -d $DB_NAME -c "SELECT mv_benchmark.generate_data($SCALE_ROWS);" -q
    psql -d $DB_NAME -c "SELECT mv_benchmark.enable_materialized_table_triggers();" -q
    vacuum_all_tables
    for c in $CONCURRENCY_LEVELS; do
        for b in $BATCH_SIZES; do
            run_test "materialized_table" $SCALE_ROWS $b 0 $c
        done
    done
    verify_data "table"
done

echo ""
echo "BENCHMARK COMPLETE. Results in $RESULTS_FILE"

# --- Analysis & Output ---

for c in $CONCURRENCY_LEVELS; do
    echo "Concurrency: $c client(s)"
    echo "----------------------------------------------------------------------------------"
    printf "%-10s %6s | %-12s | %-12s %-12s %-12s\n" "Scale" "Batch" "Baseline TPS" "Full (Rel)" "Partial (Rel)" "Table (Rel)"
    printf "%-10s %6s | %-12s | %-12s %-12s %-12s\n" "----------" "------" "------------" "------------" "------------" "------------"

    for scale in $SCALE_FACTORS; do
        for batch in $BATCH_SIZES; do
            base_tps=$(grep "baseline,$scale,$batch,$c," "$RESULTS_FILE" | cut -d, -f5)
            full_tps=$(grep "full_refresh,$scale,$batch,$c," "$RESULTS_FILE" | cut -d, -f5)
            part_tps=$(grep "partial_refresh,$scale,$batch,$c," "$RESULTS_FILE" | cut -d, -f5)
            tbl_tps=$(grep "materialized_table,$scale,$batch,$c," "$RESULTS_FILE" | cut -d, -f5)

            if [ "$base_tps" != "ERROR" ] && [ -n "$base_tps" ] && (( $(echo "$base_tps > 0" | bc -l) )); then
                # Calculate relative performance (Strategy / Baseline)
                rel_full=$(echo "scale=3; $full_tps / $base_tps" | bc -l)
                rel_part=$(echo "scale=3; $part_tps / $base_tps" | bc -l)
                rel_tbl=$(echo "scale=3; $tbl_tps / $base_tps" | bc -l)

                # Add leading zero
                [[ "$rel_full" =~ ^\. ]] && rel_full="0$rel_full"
                [[ "$rel_part" =~ ^\. ]] && rel_part="0$rel_part"
                [[ "$rel_tbl" =~ ^\. ]] && rel_tbl="0$rel_tbl"

                printf "%-10s %6s | %-12.2f | %-12sx %-12sx %-12sx\n" "$scale" "$batch" "$base_tps" "$rel_full" "$rel_part" "$rel_tbl"
            fi
        done
    done
    echo ""
done

# --- Graph Generation ---

if command -v python3 &> /dev/null; then
    echo "Generating graphs (comparing against Baseline)..."
    mkdir -p graphs

    python3 << 'PYTHON_SCRIPT'
import csv
import matplotlib.pyplot as plt
import numpy as np

data = []
with open('benchmark_results.csv', 'r') as f:
    reader = csv.DictReader(f)
    for row in reader:
        if row['tps'] != 'ERROR':
            data.append({
                'strategy': row['strategy'],
                'scale': int(row['scale']),
                'batch': int(row['batch_size']),
                'clients': int(row['clients']),
                'tps': float(row['tps']),
                'latency': float(row['avg_latency_ms'])
            })

scales = sorted(list(set(d['scale'] for d in data)))
batches = sorted(list(set(d['batch'] for d in data)))
clients_list = sorted(list(set(d['clients'] for d in data)))

colors = {'baseline': '#2ecc71', 'full_refresh': '#e74c3c', 'partial_refresh': '#3498db', 'materialized_table': '#f39c12'}
labels = {'baseline': 'Baseline', 'full_refresh': 'Full Refresh', 'partial_refresh': 'Partial Refresh', 'materialized_table': 'Materialized Table'}

# Graph: Relative Performance vs Baseline
fig, axes = plt.subplots(1, len(scales), figsize=(6*len(scales), 5))
if len(scales) == 1: axes = [axes]

for idx, scale in enumerate(scales):
    ax = axes[idx]
    for clients in clients_list:
        for strategy in ['full_refresh', 'partial_refresh', 'materialized_table']:
            ratios = []
            batch_labels = []
            for batch in batches:
                base = [d for d in data if d['strategy'] == 'baseline' and d['scale'] == scale and d['batch'] == batch and d['clients'] == clients]
                curr = [d for d in data if d['strategy'] == strategy and d['scale'] == scale and d['batch'] == batch and d['clients'] == clients]

                if base and curr and base[0]['tps'] > 0:
                    ratios.append(curr[0]['tps'] / base[0]['tps'])
                    if strategy == 'partial_refresh': batch_labels.append(str(batch))

            if ratios:
                ax.plot(range(len(ratios)), ratios, marker='o', label=f'{labels[strategy]} ({clients}c)', color=colors[strategy])

    ax.set_title(f'Scale: {scale:,}', fontsize=12)
    ax.set_xticks(range(len(batch_labels)))
    ax.set_xticklabels(batch_labels)
    ax.set_xlabel('Batch Size')
    ax.set_ylabel('Performance vs Baseline (1.0 = Baseline)')
    ax.axhline(y=1, color='green', linestyle='--', alpha=0.5, label='Baseline')
    ax.grid(True, alpha=0.3)
    if idx == 0: ax.legend(fontsize=8)

plt.suptitle('Performance Relative to Baseline (Higher is Better)', fontsize=14, y=1.02)
plt.tight_layout()
plt.savefig('graphs/relative_performance.png', bbox_inches='tight')
print("Saved graphs/relative_performance.png")
PYTHON_SCRIPT
fi
