import json
import time
from pathlib import Path
from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
from mitmproxy import http
import pyamf
from pyamf import remoting

OUT_FILE = Path("ninjasage_sessions.jsonl")
REDACT_KEYS = {"session_key", "token", "sid", "authorization", "cookie", "set-cookie"}
TARGET_SUFFIX = "ninjasage.id"
REDACT_QUERY_SUBSTR = ("token", "session", "sid", "authorization", "conversation", "website_token", "jwt")

def _is_target_host(host: str) -> bool:
    h = (host or "").lower()
    return h == TARGET_SUFFIX or h.endswith("." + TARGET_SUFFIX)

def _looks_like_leaderboard(path: str) -> bool:
    p = (path or "").lower()
    return any(x in p for x in ("leaderboard", "rank", "ranking", "ladder", "arena"))

def extract_from_query(flow):
    return {}

def extract_from_body(flow):
    return {}

def extract_from_amf_bytes(payload: bytes):
    meta = {}
    if not payload:
        return meta, {}
    try:
        env = remoting.decode(payload)
    except Exception as e:
        meta["_amf_error"] = str(e)
        return meta, {}

    targets = []
    bodies = getattr(env, "bodies", None)
    if isinstance(bodies, dict):
        for k, body in bodies.items():
            try:
                target = k[0]
            except Exception:
                target = str(k)
            targets.append(str(target))
    else:
        try:
            for item in env:
                _ = item
        except Exception as e:
            meta["_amf_iter_error"] = str(e)

    if targets:
        meta["_amf_targets"] = targets[:10]
    return meta, {}

def extract_from_amf(flow):
    """Decode AMF untuk dapatkan target/method (tanpa mengambil kredensial)."""
    data = {}
    if flow.request.headers.get("Content-Type") == "application/x-amf" or flow.request.path.endswith("/amf"):
        meta, _found = extract_from_amf_bytes(flow.request.content or b"")
        data.update(meta)
    return data

def extract_from_cookies(flow):
    return {}

def _redact_record(rec: dict):
    for k in list(rec.keys()):
        if str(k).lower() in REDACT_KEYS:
            rec[k] = "[REDACTED]"
    if "url" in rec and isinstance(rec["url"], str):
        rec["url"] = _sanitize_url(rec["url"])
    return rec

def _sanitize_url(url: str) -> str:
    try:
        parts = urlsplit(url)
        if not parts.query:
            return url
        pairs = parse_qsl(parts.query, keep_blank_values=True)
        new_pairs = []
        for k, v in pairs:
            kl = (k or "").lower()
            if any(s in kl for s in REDACT_QUERY_SUBSTR):
                new_pairs.append((k, "[REDACTED]"))
            else:
                new_pairs.append((k, v))
        new_query = urlencode(new_pairs, doseq=True)
        return urlunsplit((parts.scheme, parts.netloc, parts.path, new_query, parts.fragment))
    except Exception:
        return url

class NinjaSageRecorder:
    def __init__(self):
        self.out = OUT_FILE
        self.endpoint_counts = {}
        self.amf_target_counts = {}
        self.host_counts = {}
        self.total_flows = 0
        self.total_all_flows = 0
        self.started_at = time.time()
        self.last_snapshot_at = 0.0
        self.snapshot_seq = 0
        self._write_event({"type": "RUN_START"})

    def _write_event(self, payload: dict):
        rec = {"ts": int(time.time()), **payload}
        _redact_record(rec)
        try:
            with self.out.open("a", encoding="utf-8") as f:
                f.write(json.dumps(rec, ensure_ascii=False) + "\n")
        except Exception:
            pass

    def _snapshot(self, final: bool):
        self.snapshot_seq += 1
        summary = {
            "type": "SUMMARY",
            "final": bool(final),
            "seq": self.snapshot_seq,
            "uptime_s": int(time.time() - self.started_at),
            "total_all_flows": self.total_all_flows,
            "total_flows": self.total_flows,
            "top_hosts": sorted(self.host_counts.items(), key=lambda kv: kv[1], reverse=True)[:25],
            "top_endpoints": sorted(self.endpoint_counts.items(), key=lambda kv: kv[1], reverse=True)[:25],
            "top_amf_targets": sorted(self.amf_target_counts.items(), key=lambda kv: kv[1], reverse=True)[:25],
        }
        self._write_event(summary)
        if final:
            print(json.dumps(summary, ensure_ascii=False))

    def _maybe_snapshot(self):
        now = time.time()
        if self.total_flows <= 0:
            return
        if self.last_snapshot_at == 0.0:
            self.last_snapshot_at = now
            return
        if (now - self.last_snapshot_at) >= 15.0:
            self.last_snapshot_at = now
            self._snapshot(final=False)

    def request(self, flow: http.HTTPFlow):
        host = flow.request.host or ""
        self.total_all_flows += 1
        if host:
            self.host_counts[host] = self.host_counts.get(host, 0) + 1

        if not _is_target_host(host):
            self._maybe_snapshot()
            return

        self.total_flows += 1
        endpoint_key = f"{flow.request.method} {host}{flow.request.path}"
        self.endpoint_counts[endpoint_key] = self.endpoint_counts.get(endpoint_key, 0) + 1
        self._maybe_snapshot()

        self.save_record(
            flow,
            {
                "path": flow.request.path,
                "scheme": flow.request.scheme,
                "http_version": flow.request.http_version,
                "req_bytes": len(flow.request.raw_content or b""),
                "content_type": flow.request.headers.get("Content-Type", ""),
                "is_leaderboard": _looks_like_leaderboard(flow.request.path),
            },
            "REQUEST",
        )

        amf_data = extract_from_amf(flow)
        if amf_data:
            for t in amf_data.get("_amf_targets", []) or []:
                self.amf_target_counts[t] = self.amf_target_counts.get(t, 0) + 1
            self.save_record(flow, amf_data, "AMF_REQUEST")

    def response(self, flow: http.HTTPFlow):
        host = flow.request.host or ""
        if not _is_target_host(host):
            return
            
        rec_data = {}
        rec_data.update(extract_from_query(flow))
        
        body_data = extract_from_body(flow)
        if body_data:
            rec_data.update(body_data)
            
        cookie_data = extract_from_cookies(flow)
        if cookie_data:
            rec_data.setdefault("cookies", cookie_data)

        if flow.request.path.endswith("/amf") and flow.response and flow.response.content:
            meta, _found = extract_from_amf_bytes(flow.response.content)
            if meta:
                rec_data.setdefault("amf_response", {})
                rec_data["amf_response"].update(meta)
            
        if rec_data:
            self.save_record(flow, rec_data, "RESPONSE")

        if flow.response:
            self.save_record(
                flow,
                {
                    "path": flow.request.path,
                    "scheme": flow.request.scheme,
                    "http_version": flow.request.http_version,
                    "status": flow.response.status_code,
                    "resp_bytes": len(flow.response.raw_content or b""),
                    "content_type": flow.response.headers.get("Content-Type", ""),
                    "is_leaderboard": _looks_like_leaderboard(flow.request.path),
                },
                "RESPONSE_META",
            )

    def save_record(self, flow, extracted_data, source_type):
        rec = {
            "ts": int(time.time()),
            "type": source_type,
            "host": flow.request.host,
            "url": flow.request.url,
            "method": flow.request.method,
            "status": flow.response.status_code if flow.response else 0,
            **extracted_data
        }

        _redact_record(rec)

        # Simpan ke file
        try:
            with self.out.open("a", encoding="utf-8") as f:
                f.write(json.dumps(rec, ensure_ascii=False) + "\n")
        except Exception:
            pass

        if source_type in ("REQUEST", "RESPONSE_META"):
            tag = "LB" if rec.get("is_leaderboard") else "-"
            if source_type == "REQUEST":
                print(f"[{tag}] {rec.get('method')} {rec.get('host')}{rec.get('path')} ({rec.get('req_bytes')}b)")
            else:
                print(f"[{tag}] {rec.get('status')} {rec.get('method')} {rec.get('host')}{rec.get('path')} ({rec.get('resp_bytes')}b)")

    def done(self):
        self._snapshot(final=True)

addons = [NinjaSageRecorder()]
