"""
Export event-driven orderbook state rows for statistical analysis.

The output is a CSV generated from orderbook_events.jsonl. Each row is one
event frame with per-outcome best bid/ask, spread, top depth, trade fields, and
received/exchange timestamps.
"""
from __future__ import annotations

import csv
import os
import sys
from pathlib import Path
from typing import Any

import click
from loguru import logger

ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if ROOT not in sys.path:
    sys.path.insert(0, ROOT)

from analysis.lpl_orderbook.event_replay import iter_event_frames  # noqa: E402


def _sum_size(levels: list[dict[str, float]], depth: int) -> float:
    return round(sum(float(level["size"]) for level in levels[:depth]), 6)


def _sum_notional(levels: list[dict[str, float]], depth: int) -> float:
    return round(
        sum(float(level["price"]) * float(level["size"]) for level in levels[:depth]),
        6,
    )


def _row_for_frame(frame: dict[str, Any], token_ids: list[str]) -> dict[str, Any]:
    row: dict[str, Any] = {
        "received_at_wall": frame.get("received_at_wall", ""),
        "exchange_ts": frame.get("exchange_ts", ""),
        "delay_ms": frame.get("delay_ms", ""),
        "event_type": frame.get("event_type", ""),
        "message_index": frame.get("message_index", ""),
        "connection_id": frame.get("connection_id", ""),
        "event_asset_id": frame.get("asset_id", ""),
        "event_outcome": frame.get("outcome", ""),
        "changed_levels_count": len(frame.get("changed_levels") or []),
    }
    trade = frame.get("trade") or {}
    row.update(
        {
            "trade_asset_id": trade.get("asset_id", ""),
            "trade_outcome": trade.get("outcome", ""),
            "trade_side": trade.get("side", ""),
            "trade_price": trade.get("price", ""),
            "trade_size": trade.get("size", ""),
            "trade_transaction_hash": trade.get("transaction_hash", ""),
        }
    )
    for idx, token_id in enumerate(token_ids, start=1):
        book = (frame.get("books") or {}).get(token_id) or {}
        bids = book.get("bids") or []
        asks = book.get("asks") or []
        best_bid = book.get("best_bid")
        best_ask = book.get("best_ask")
        prefix = f"token{idx}"
        row.update(
            {
                f"{prefix}_asset_id": token_id,
                f"{prefix}_outcome": book.get("outcome", ""),
                f"{prefix}_best_bid": best_bid if best_bid is not None else "",
                f"{prefix}_best_ask": best_ask if best_ask is not None else "",
                f"{prefix}_spread": (
                    round(float(best_ask) - float(best_bid), 6)
                    if best_bid is not None and best_ask is not None
                    else ""
                ),
                f"{prefix}_bid_top1_size": _sum_size(bids, 1),
                f"{prefix}_ask_top1_size": _sum_size(asks, 1),
                f"{prefix}_bid_top5_size": _sum_size(bids, 5),
                f"{prefix}_ask_top5_size": _sum_size(asks, 5),
                f"{prefix}_bid_top5_notional": _sum_notional(bids, 5),
                f"{prefix}_ask_top5_notional": _sum_notional(asks, 5),
            }
        )
    return row


def export_event_stats(
    *,
    slug: str,
    data_root: Path,
    output_path: Path | None,
    depth: int,
) -> dict[str, Any]:
    meta, frames = iter_event_frames(slug=slug, data_root=data_root, depth=depth)
    token_ids = [str(token_id) for token_id in meta.get("token_ids", [])]
    market_dir = data_root / slug
    if output_path is None:
        output_path = market_dir / "event_orderbook_stats.csv"
    output_path.parent.mkdir(parents=True, exist_ok=True)

    rows = [_row_for_frame(frame, token_ids) for frame in frames]
    fieldnames: list[str] = []
    for row in rows:
        for key in row:
            if key not in fieldnames:
                fieldnames.append(key)
    with output_path.open("w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(rows)
    return {
        "slug": slug,
        "rows": len(rows),
        "output_path": str(output_path),
        "started_at": rows[0]["received_at_wall"] if rows else "",
        "ended_at": rows[-1]["received_at_wall"] if rows else "",
    }


@click.command()
@click.option("--slug", required=True, help="Recorded Polymarket market slug")
@click.option(
    "--data-root",
    default="data/lpl",
    show_default=True,
    type=click.Path(file_okay=False, path_type=Path),
)
@click.option(
    "--output",
    "output_path",
    default=None,
    type=click.Path(dir_okay=False, path_type=Path),
    help="Output CSV path. Defaults to data/lpl/<slug>/event_orderbook_stats.csv",
)
@click.option("--depth", default=5, show_default=True, type=int, help="Depth used for top-N stats")
def main(slug: str, data_root: Path, output_path: Path | None, depth: int) -> None:
    result = export_event_stats(
        slug=slug,
        data_root=data_root,
        output_path=output_path,
        depth=depth,
    )
    logger.info(
        "event stats exported: "
        f"rows={result['rows']} "
        f"started_at={result['started_at']} "
        f"ended_at={result['ended_at']} "
        f"output={result['output_path']}"
    )


if __name__ == "__main__":
    main()
