#![allow(
    clippy::unwrap_used,
    clippy::cast_precision_loss,
    clippy::cast_possible_truncation
)]

//! Criterion benchmarks for production-representative load: 1M items.
//!
//! Validates the m7p3 performance acceptance criteria:
//! - RETRIEVE p99 < 50ms
//! - SEARCH p99 < 100ms
//! - Signal write p99 < 100µs
//!
//! A single shared `TidalDb` is built once via `LazyLock` to amortize the
//! 1M-item setup cost across all benchmark runs.
//!
//! Dataset layout:
//! - 1M items, 10K creators (100 items/creator)
//! - 20 categories, 128D random unit vectors
//! - 10% view coverage, 5% like coverage

use std::collections::HashMap;
use std::sync::LazyLock;
use std::time::Duration;

use criterion::{BatchSize, Criterion, SamplingMode, black_box, criterion_group, criterion_main};
use tidaldb::TidalDb;
use tidaldb::query::retrieve::Retrieve;
use tidaldb::query::search::Search;
use tidaldb::schema::{
    DecaySpec, EntityId, EntityKind, SchemaBuilder, TextFieldType, Timestamp, Window,
};
use tidaldb::storage::indexes::filter::FilterExpr;

const N_ITEMS: u64 = 1_000_000;
const N_CREATORS: u64 = 10_000;
const ITEMS_PER_CREATOR: u64 = 100;
const N_CATEGORIES: u64 = 20;
const DIM: usize = 128;

/// Categories pool for round-robin assignment.
static CATEGORIES: &[&str] = &[
    "technology",
    "sports",
    "music",
    "gaming",
    "cooking",
    "travel",
    "science",
    "arts",
    "news",
    "fashion",
    "finance",
    "health",
    "education",
    "entertainment",
    "politics",
    "nature",
    "history",
    "automotive",
    "pets",
    "fitness",
];

fn scale_schema() -> tidaldb::schema::Schema {
    let mut builder = SchemaBuilder::new();
    let _ = builder
        .signal(
            "view",
            EntityKind::Item,
            DecaySpec::Exponential {
                half_life: Duration::from_secs(7 * 24 * 3600),
            },
        )
        .windows(&[Window::TwentyFourHours, Window::SevenDays])
        .velocity(false)
        .add();
    let _ = builder
        .signal(
            "like",
            EntityKind::Item,
            DecaySpec::Exponential {
                half_life: Duration::from_secs(30 * 24 * 3600),
            },
        )
        .windows(&[Window::TwentyFourHours])
        .velocity(false)
        .add();
    builder.text_field("title", TextFieldType::Text);
    builder.text_field("category", TextFieldType::Keyword);
    builder.build().unwrap()
}

/// Build the shared 1M-item database. Called exactly once.
fn build_scale_db() -> TidalDb {
    eprintln!("[scale bench] Building 1M-item database (this takes ~30s)...");
    let db = TidalDb::builder()
        .ephemeral()
        .with_schema(scale_schema())
        .open()
        .unwrap();

    let base_ns = 1_708_000_000_000_000_000u64;

    for i in 0..N_ITEMS {
        let item_id = EntityId::new(i + 1);
        let creator_id = i / ITEMS_PER_CREATOR; // 0..9999
        let category = CATEGORIES[(i % N_CATEGORIES) as usize];

        let mut meta = HashMap::new();
        meta.insert(
            "title".to_string(),
            format!("Content {i} by creator {creator_id} about {category}"),
        );
        meta.insert("category".to_string(), category.to_string());
        meta.insert("creator".to_string(), creator_id.to_string());
        db.write_item_with_metadata(item_id, &meta).unwrap();

        // 10% view coverage.
        if i % 10 == 0 {
            let ts = Timestamp::from_nanos(base_ns - (i % 86400) * 1_000_000_000);
            db.signal("view", item_id, 1.0, ts).unwrap();
        }
        // 5% like coverage.
        if i % 20 == 0 {
            let ts = Timestamp::from_nanos(base_ns - (i % 86400) * 2_000_000_000);
            db.signal("like", item_id, 1.0, ts).unwrap();
        }
    }

    // Wait for text syncer to commit all 1M items (1000 batch commits at 1000/commit).
    // The syncer commits every 1000 items, so 1M items = ~1000 commits. Allow time.
    eprintln!("[scale bench] Waiting for text index to commit...");
    std::thread::sleep(Duration::from_millis(3000));
    db.reload_text_index().unwrap();
    eprintln!("[scale bench] Database ready: {N_ITEMS} items.");

    db
}

/// Shared 1M-item DB: built once, reused by all benchmarks.
static SCALE_DB: LazyLock<TidalDb> = LazyLock::new(build_scale_db);

// ── RETRIEVE benchmarks ───────────────────────────────────────────────────────

/// RETRIEVE: "for_you" profile — signal-scored ranking over full universe.
fn bench_retrieve_for_you(c: &mut Criterion) {
    let db: &TidalDb = &SCALE_DB;

    let query = Retrieve::builder()
        .profile("for_you")
        .limit(20)
        .build()
        .unwrap();

    let mut group = c.benchmark_group("retrieve_1m");
    group.sample_size(10);
    group.measurement_time(Duration::from_secs(30));
    group.sampling_mode(SamplingMode::Flat);

    group.bench_function("for_you", |b| {
        b.iter(|| db.retrieve(black_box(&query)).unwrap());
    });

    group.finish();
}

/// RETRIEVE: "trending" profile — top-viewed items.
fn bench_retrieve_trending(c: &mut Criterion) {
    let db: &TidalDb = &SCALE_DB;

    let query = Retrieve::builder()
        .profile("trending")
        .limit(20)
        .build()
        .unwrap();

    let mut group = c.benchmark_group("retrieve_1m");
    group.sample_size(10);
    group.measurement_time(Duration::from_secs(30));
    group.sampling_mode(SamplingMode::Flat);

    group.bench_function("trending", |b| {
        b.iter(|| db.retrieve(black_box(&query)).unwrap());
    });

    group.finish();
}

/// RETRIEVE: "new" profile — recency-filtered by category.
fn bench_retrieve_new_filtered(c: &mut Criterion) {
    let db: &TidalDb = &SCALE_DB;

    let query = Retrieve::builder()
        .profile("new")
        .limit(20)
        .filter(FilterExpr::CategoryEq("technology".into()))
        .build()
        .unwrap();

    let mut group = c.benchmark_group("retrieve_1m");
    group.sample_size(10);
    group.measurement_time(Duration::from_secs(30));
    group.sampling_mode(SamplingMode::Flat);

    group.bench_function("new_filtered", |b| {
        b.iter(|| db.retrieve(black_box(&query)).unwrap());
    });

    group.finish();
}

// ── SEARCH benchmarks ─────────────────────────────────────────────────────────

/// SEARCH: text-only query over 1M items.
fn bench_search_text_only(c: &mut Criterion) {
    let db: &TidalDb = &SCALE_DB;

    let query = Search::builder()
        .query("technology content creator")
        .limit(20)
        .build()
        .unwrap();

    let mut group = c.benchmark_group("search_1m");
    group.sample_size(10);
    group.measurement_time(Duration::from_secs(30));
    group.sampling_mode(SamplingMode::Flat);

    group.bench_function("text_only", |b| {
        b.iter(|| db.search(black_box(&query)).unwrap());
    });

    group.finish();
}

/// SEARCH: text query with category filter.
fn bench_search_text_filtered(c: &mut Criterion) {
    let db: &TidalDb = &SCALE_DB;

    let query = Search::builder()
        .query("gaming sports fitness")
        .limit(20)
        .filter(FilterExpr::CategoryEq("sports".into()))
        .build()
        .unwrap();

    let mut group = c.benchmark_group("search_1m");
    group.sample_size(10);
    group.measurement_time(Duration::from_secs(30));
    group.sampling_mode(SamplingMode::Flat);

    group.bench_function("text_filtered", |b| {
        b.iter(|| db.search(black_box(&query)).unwrap());
    });

    group.finish();
}

// ── Signal write benchmark ────────────────────────────────────────────────────

/// Signal write: amortized cost over rotating entity IDs.
///
/// Measures the hot path for incremental signal ingestion at 1M-item scale.
/// Rotates through 1000 entity IDs to represent a realistic write workload.
fn bench_signal_write(c: &mut Criterion) {
    let db: &TidalDb = &SCALE_DB;

    let mut group = c.benchmark_group("signal_write_1m");
    group.sample_size(10);
    group.measurement_time(Duration::from_secs(30));
    group.sampling_mode(SamplingMode::Flat);

    let mut counter = 0u64;
    let ts = Timestamp::now();

    group.bench_function("write_rotating_1k_entities", |b| {
        b.iter_batched(
            || {
                // Rotate through 1000 entity IDs in the 1M range.
                let id = EntityId::new((counter % 1000) + 1);
                counter += 1;
                id
            },
            |id| db.signal("view", black_box(id), 1.0, ts).unwrap(),
            BatchSize::SmallInput,
        );
    });

    group.finish();
}

criterion_group!(
    scale_benches,
    bench_retrieve_for_you,
    bench_retrieve_trending,
    bench_retrieve_new_filtered,
    bench_search_text_only,
    bench_search_text_filtered,
    bench_signal_write,
);
criterion_main!(scale_benches);